You've already forked AstralRinth
forked from didirus/AstralRinth
feat(search): Faceted search based on mod host (curse/modrinth) (#48)
This also adds a commandline argument library (gumdrop) for dealing with indices - reseting, reconfiguring, and skipping them. I don't know which library is best for this case, but gumdrop has shorter compile times and many fewer dependencies than clap, which is why I chose it.
This commit is contained in:
21
Cargo.lock
generated
21
Cargo.lock
generated
@@ -903,6 +903,26 @@ version = "0.21.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c"
|
checksum = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "gumdrop"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "46571f5d540478cf70d2a42dd0d6d8e9f4b9cc7531544b93311e657b86568a0b"
|
||||||
|
dependencies = [
|
||||||
|
"gumdrop_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "gumdrop_derive"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "915ef07c710d84733522461de2a734d4d62a3fd39a4d4f404c2f385ef8618d05"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "h2"
|
name = "h2"
|
||||||
version = "0.2.5"
|
version = "0.2.5"
|
||||||
@@ -1137,6 +1157,7 @@ dependencies = [
|
|||||||
"env_logger",
|
"env_logger",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-timer",
|
"futures-timer",
|
||||||
|
"gumdrop",
|
||||||
"log",
|
"log",
|
||||||
"meilisearch-sdk",
|
"meilisearch-sdk",
|
||||||
"rand",
|
"rand",
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ serde = { version = "1.0", features = ["derive"] }
|
|||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
rand = "0.7"
|
rand = "0.7"
|
||||||
|
|
||||||
|
gumdrop = "0.8"
|
||||||
dotenv = "0.15"
|
dotenv = "0.15"
|
||||||
log = "0.4.8"
|
log = "0.4.8"
|
||||||
env_logger = "0.7.1"
|
env_logger = "0.7.1"
|
||||||
|
|||||||
41
src/main.rs
41
src/main.rs
@@ -1,6 +1,7 @@
|
|||||||
use actix_web::middleware::Logger;
|
use actix_web::middleware::Logger;
|
||||||
use actix_web::{web, App, HttpServer};
|
use actix_web::{web, App, HttpServer};
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
|
use gumdrop::Options;
|
||||||
use log::{info, warn};
|
use log::{info, warn};
|
||||||
use search::indexing::index_mods;
|
use search::indexing::index_mods;
|
||||||
use search::indexing::IndexingSettings;
|
use search::indexing::IndexingSettings;
|
||||||
@@ -13,11 +14,26 @@ mod routes;
|
|||||||
mod scheduler;
|
mod scheduler;
|
||||||
mod search;
|
mod search;
|
||||||
|
|
||||||
|
#[derive(Debug, Options)]
|
||||||
|
struct Config {
|
||||||
|
#[options(help = "Print help message")]
|
||||||
|
help: bool,
|
||||||
|
|
||||||
|
#[options(no_short, help = "Skip indexing on startup")]
|
||||||
|
skip_first_index: bool,
|
||||||
|
#[options(no_short, help = "Reset the settings of the indices")]
|
||||||
|
reconfigure_indices: bool,
|
||||||
|
#[options(no_short, help = "Reset the documents in the indices")]
|
||||||
|
reset_indices: bool,
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::main]
|
#[actix_rt::main]
|
||||||
async fn main() -> std::io::Result<()> {
|
async fn main() -> std::io::Result<()> {
|
||||||
dotenv::dotenv().ok();
|
dotenv::dotenv().ok();
|
||||||
env_logger::from_env(Env::default().default_filter_or("info")).init();
|
env_logger::from_env(Env::default().default_filter_or("info")).init();
|
||||||
|
|
||||||
|
let config = Config::parse_args_default_or_exit();
|
||||||
|
|
||||||
check_env_vars();
|
check_env_vars();
|
||||||
|
|
||||||
// Database Connector
|
// Database Connector
|
||||||
@@ -43,10 +59,17 @@ async fn main() -> std::io::Result<()> {
|
|||||||
Arc::new(file_hosting::MockHost::new())
|
Arc::new(file_hosting::MockHost::new())
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: use a real arg parsing library
|
if config.reset_indices {
|
||||||
let skip_initial = std::env::args().any(|x| x == "skip");
|
info!("Resetting indices");
|
||||||
|
search::indexing::reset_indices().await.unwrap();
|
||||||
|
} else if config.reconfigure_indices {
|
||||||
|
info!("Reconfiguring indices");
|
||||||
|
search::indexing::reconfigure_indices().await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
// Allow manually skipping the initial indexing for quicker iteration
|
// Allow manually skipping the initial indexing for quicker iteration
|
||||||
// and startup times.
|
// and startup times.
|
||||||
|
let skip_initial = config.skip_first_index;
|
||||||
if skip_initial {
|
if skip_initial {
|
||||||
info!("Skipping initial indexing");
|
info!("Skipping initial indexing");
|
||||||
}
|
}
|
||||||
@@ -66,9 +89,12 @@ async fn main() -> std::io::Result<()> {
|
|||||||
let mut skip = skip_initial;
|
let mut skip = skip_initial;
|
||||||
scheduler.run(local_index_interval, move || {
|
scheduler.run(local_index_interval, move || {
|
||||||
let pool_ref = pool_ref.clone();
|
let pool_ref = pool_ref.clone();
|
||||||
|
let local_skip = skip;
|
||||||
|
if skip {
|
||||||
|
skip = false;
|
||||||
|
}
|
||||||
async move {
|
async move {
|
||||||
if skip {
|
if local_skip {
|
||||||
skip = false;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
info!("Indexing local database");
|
info!("Indexing local database");
|
||||||
@@ -90,9 +116,12 @@ async fn main() -> std::io::Result<()> {
|
|||||||
let mut skip = skip_initial;
|
let mut skip = skip_initial;
|
||||||
scheduler.run(std::time::Duration::from_secs(15 * 60), move || {
|
scheduler.run(std::time::Duration::from_secs(15 * 60), move || {
|
||||||
let queue = queue_ref.clone();
|
let queue = queue_ref.clone();
|
||||||
|
let local_skip = skip;
|
||||||
|
if skip {
|
||||||
|
skip = false;
|
||||||
|
}
|
||||||
async move {
|
async move {
|
||||||
if skip {
|
if local_skip {
|
||||||
skip = false;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
info!("Indexing created mod queue");
|
info!("Indexing created mod queue");
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ use actix_web::{post, HttpResponse};
|
|||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use sqlx::postgres::PgPool;
|
use sqlx::postgres::PgPool;
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
@@ -416,7 +417,8 @@ async fn mod_create_inner(
|
|||||||
// TODO: store and return modified time
|
// TODO: store and return modified time
|
||||||
date_modified: formatted,
|
date_modified: formatted,
|
||||||
modified_timestamp: timestamp,
|
modified_timestamp: timestamp,
|
||||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
host: Cow::Borrowed("modrinth"),
|
||||||
|
empty: Cow::Borrowed("{}{}{}"),
|
||||||
};
|
};
|
||||||
|
|
||||||
indexing_queue.add(index_mod);
|
indexing_queue.add(index_mod);
|
||||||
|
|||||||
@@ -13,9 +13,9 @@ impl Scheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run<F, R>(&mut self, interval: std::time::Duration, task: F)
|
pub fn run<F, R>(&mut self, interval: std::time::Duration, mut task: F)
|
||||||
where
|
where
|
||||||
F: Fn() -> R + Send + 'static,
|
F: FnMut() -> R + Send + 'static,
|
||||||
R: std::future::Future<Output = ()> + Send + 'static,
|
R: std::future::Future<Output = ()> + Send + 'static,
|
||||||
{
|
{
|
||||||
let future = time::interval(interval).for_each_concurrent(2, move |_| task());
|
let future = time::interval(interval).for_each_concurrent(2, move |_| task());
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use super::IndexingError;
|
|||||||
use crate::search::UploadSearchMod;
|
use crate::search::UploadSearchMod;
|
||||||
use log::info;
|
use log::info;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
@@ -200,7 +201,8 @@ pub async fn index_curseforge(
|
|||||||
date_modified: modified.to_string(),
|
date_modified: modified.to_string(),
|
||||||
modified_timestamp: modified.timestamp(),
|
modified_timestamp: modified.timestamp(),
|
||||||
latest_version,
|
latest_version,
|
||||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
host: Cow::Borrowed("curseforge"),
|
||||||
|
empty: Cow::Borrowed("{}{}{}"),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use log::info;
|
|||||||
use super::IndexingError;
|
use super::IndexingError;
|
||||||
use crate::search::UploadSearchMod;
|
use crate::search::UploadSearchMod;
|
||||||
use sqlx::postgres::PgPool;
|
use sqlx::postgres::PgPool;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||||
info!("Indexing local mods!");
|
info!("Indexing local mods!");
|
||||||
@@ -71,7 +72,8 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
|
|||||||
date_modified: formatted,
|
date_modified: formatted,
|
||||||
modified_timestamp: timestamp,
|
modified_timestamp: timestamp,
|
||||||
latest_version: "".to_string(), // TODO: Info about latest version
|
latest_version: "".to_string(), // TODO: Info about latest version
|
||||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
host: Cow::Borrowed("modrinth"),
|
||||||
|
empty: Cow::Borrowed("{}{}{}"),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,76 @@ pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(),
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn reset_indices() -> Result<(), IndexingError> {
|
||||||
|
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||||
|
let client = Client::new(address, "");
|
||||||
|
|
||||||
|
client.delete_index("relevance_mods").await?;
|
||||||
|
client.delete_index("downloads_mods").await?;
|
||||||
|
client.delete_index("updated_mods").await?;
|
||||||
|
client.delete_index("newest_mods").await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn reconfigure_indices() -> Result<(), IndexingError> {
|
||||||
|
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||||
|
let client = Client::new(address, "");
|
||||||
|
|
||||||
|
// Relevance Index
|
||||||
|
update_index(&client, "relevance_mods", {
|
||||||
|
let mut relevance_rules = default_rules();
|
||||||
|
relevance_rules.push_back("desc(downloads)".to_string());
|
||||||
|
relevance_rules.into()
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Downloads Index
|
||||||
|
update_index(&client, "downloads_mods", {
|
||||||
|
let mut downloads_rules = default_rules();
|
||||||
|
downloads_rules.push_front("desc(downloads)".to_string());
|
||||||
|
downloads_rules.into()
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Updated Index
|
||||||
|
update_index(&client, "updated_mods", {
|
||||||
|
let mut updated_rules = default_rules();
|
||||||
|
updated_rules.push_front("desc(modified_timestamp)".to_string());
|
||||||
|
updated_rules.into()
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Created Index
|
||||||
|
update_index(&client, "newest_mods", {
|
||||||
|
let mut newest_rules = default_rules();
|
||||||
|
newest_rules.push_front("desc(created_timestamp)".to_string());
|
||||||
|
newest_rules.into()
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_index<'a>(
|
||||||
|
client: &'a Client<'a>,
|
||||||
|
name: &'a str,
|
||||||
|
rules: Vec<String>,
|
||||||
|
) -> Result<Index<'a>, IndexingError> {
|
||||||
|
let index = match client.get_index(name).await {
|
||||||
|
Ok(index) => index,
|
||||||
|
Err(meilisearch_sdk::errors::Error::IndexNotFound) => {
|
||||||
|
client.create_index(name, Some("mod_id")).await?
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
return Err(IndexingError::IndexDBError(e));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
index
|
||||||
|
.set_settings(&default_settings().with_ranking_rules(rules))
|
||||||
|
.await?;
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
|
||||||
async fn create_index<'a>(
|
async fn create_index<'a>(
|
||||||
client: &'a Client<'a>,
|
client: &'a Client<'a>,
|
||||||
name: &'a str,
|
name: &'a str,
|
||||||
@@ -129,7 +199,7 @@ pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
|
|||||||
// Updated Index
|
// Updated Index
|
||||||
let updated_index = create_index(&client, "updated_mods", || {
|
let updated_index = create_index(&client, "updated_mods", || {
|
||||||
let mut updated_rules = default_rules();
|
let mut updated_rules = default_rules();
|
||||||
updated_rules.push_front("desc(updated)".to_string());
|
updated_rules.push_front("desc(modified_timestamp)".to_string());
|
||||||
updated_rules.into()
|
updated_rules.into()
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
@@ -138,7 +208,7 @@ pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
|
|||||||
// Created Index
|
// Created Index
|
||||||
let newest_index = create_index(&client, "newest_mods", || {
|
let newest_index = create_index(&client, "newest_mods", || {
|
||||||
let mut newest_rules = default_rules();
|
let mut newest_rules = default_rules();
|
||||||
newest_rules.push_front("desc(created)".to_string());
|
newest_rules.push_front("desc(created_timestamp)".to_string());
|
||||||
newest_rules.into()
|
newest_rules.into()
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
@@ -173,10 +243,9 @@ fn default_settings() -> Settings {
|
|||||||
"icon_url".to_string(),
|
"icon_url".to_string(),
|
||||||
"author_url".to_string(),
|
"author_url".to_string(),
|
||||||
"date_created".to_string(),
|
"date_created".to_string(),
|
||||||
"created".to_string(),
|
|
||||||
"date_modified".to_string(),
|
"date_modified".to_string(),
|
||||||
"updated".to_string(),
|
|
||||||
"latest_version".to_string(),
|
"latest_version".to_string(),
|
||||||
|
"host".to_string(),
|
||||||
];
|
];
|
||||||
|
|
||||||
let searchable_attributes = vec![
|
let searchable_attributes = vec![
|
||||||
@@ -194,7 +263,7 @@ fn default_settings() -> Settings {
|
|||||||
.with_accept_new_fields(true)
|
.with_accept_new_fields(true)
|
||||||
.with_stop_words(vec![])
|
.with_stop_words(vec![])
|
||||||
.with_synonyms(HashMap::new())
|
.with_synonyms(HashMap::new())
|
||||||
.with_attributes_for_faceting(vec![String::from("categories")])
|
.with_attributes_for_faceting(vec![String::from("categories"), String::from("host")])
|
||||||
}
|
}
|
||||||
|
|
||||||
//endregion
|
//endregion
|
||||||
|
|||||||
@@ -71,6 +71,8 @@ pub struct UploadSearchMod {
|
|||||||
/// Unix timestamp of the last major modification
|
/// Unix timestamp of the last major modification
|
||||||
pub modified_timestamp: i64,
|
pub modified_timestamp: i64,
|
||||||
|
|
||||||
|
pub host: Cow<'static, str>,
|
||||||
|
|
||||||
/// Must be "{}{}{}", a hack until meilisearch supports searches
|
/// Must be "{}{}{}", a hack until meilisearch supports searches
|
||||||
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
|
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
|
||||||
// This is a Cow to prevent unnecessary allocations for a static
|
// This is a Cow to prevent unnecessary allocations for a static
|
||||||
@@ -96,6 +98,9 @@ pub struct ResultSearchMod {
|
|||||||
/// RFC 3339 formatted modification date of the mod
|
/// RFC 3339 formatted modification date of the mod
|
||||||
pub date_modified: String,
|
pub date_modified: String,
|
||||||
pub latest_version: String,
|
pub latest_version: String,
|
||||||
|
|
||||||
|
/// The host of the mod: Either `modrinth` or `curseforge`
|
||||||
|
pub host: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Document for UploadSearchMod {
|
impl Document for UploadSearchMod {
|
||||||
|
|||||||
Reference in New Issue
Block a user