forked from didirus/AstralRinth
feat(search): Faceted search based on mod host (curse/modrinth) (#48)
This also adds a commandline argument library (gumdrop) for dealing with indices - reseting, reconfiguring, and skipping them. I don't know which library is best for this case, but gumdrop has shorter compile times and many fewer dependencies than clap, which is why I chose it.
This commit is contained in:
21
Cargo.lock
generated
21
Cargo.lock
generated
@@ -903,6 +903,26 @@ version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c"
|
||||
|
||||
[[package]]
|
||||
name = "gumdrop"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46571f5d540478cf70d2a42dd0d6d8e9f4b9cc7531544b93311e657b86568a0b"
|
||||
dependencies = [
|
||||
"gumdrop_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gumdrop_derive"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "915ef07c710d84733522461de2a734d4d62a3fd39a4d4f404c2f385ef8618d05"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.2.5"
|
||||
@@ -1137,6 +1157,7 @@ dependencies = [
|
||||
"env_logger",
|
||||
"futures",
|
||||
"futures-timer",
|
||||
"gumdrop",
|
||||
"log",
|
||||
"meilisearch-sdk",
|
||||
"rand",
|
||||
|
||||
@@ -24,6 +24,7 @@ serde = { version = "1.0", features = ["derive"] }
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
rand = "0.7"
|
||||
|
||||
gumdrop = "0.8"
|
||||
dotenv = "0.15"
|
||||
log = "0.4.8"
|
||||
env_logger = "0.7.1"
|
||||
|
||||
41
src/main.rs
41
src/main.rs
@@ -1,6 +1,7 @@
|
||||
use actix_web::middleware::Logger;
|
||||
use actix_web::{web, App, HttpServer};
|
||||
use env_logger::Env;
|
||||
use gumdrop::Options;
|
||||
use log::{info, warn};
|
||||
use search::indexing::index_mods;
|
||||
use search::indexing::IndexingSettings;
|
||||
@@ -13,11 +14,26 @@ mod routes;
|
||||
mod scheduler;
|
||||
mod search;
|
||||
|
||||
#[derive(Debug, Options)]
|
||||
struct Config {
|
||||
#[options(help = "Print help message")]
|
||||
help: bool,
|
||||
|
||||
#[options(no_short, help = "Skip indexing on startup")]
|
||||
skip_first_index: bool,
|
||||
#[options(no_short, help = "Reset the settings of the indices")]
|
||||
reconfigure_indices: bool,
|
||||
#[options(no_short, help = "Reset the documents in the indices")]
|
||||
reset_indices: bool,
|
||||
}
|
||||
|
||||
#[actix_rt::main]
|
||||
async fn main() -> std::io::Result<()> {
|
||||
dotenv::dotenv().ok();
|
||||
env_logger::from_env(Env::default().default_filter_or("info")).init();
|
||||
|
||||
let config = Config::parse_args_default_or_exit();
|
||||
|
||||
check_env_vars();
|
||||
|
||||
// Database Connector
|
||||
@@ -43,10 +59,17 @@ async fn main() -> std::io::Result<()> {
|
||||
Arc::new(file_hosting::MockHost::new())
|
||||
};
|
||||
|
||||
// TODO: use a real arg parsing library
|
||||
let skip_initial = std::env::args().any(|x| x == "skip");
|
||||
if config.reset_indices {
|
||||
info!("Resetting indices");
|
||||
search::indexing::reset_indices().await.unwrap();
|
||||
} else if config.reconfigure_indices {
|
||||
info!("Reconfiguring indices");
|
||||
search::indexing::reconfigure_indices().await.unwrap();
|
||||
}
|
||||
|
||||
// Allow manually skipping the initial indexing for quicker iteration
|
||||
// and startup times.
|
||||
let skip_initial = config.skip_first_index;
|
||||
if skip_initial {
|
||||
info!("Skipping initial indexing");
|
||||
}
|
||||
@@ -66,9 +89,12 @@ async fn main() -> std::io::Result<()> {
|
||||
let mut skip = skip_initial;
|
||||
scheduler.run(local_index_interval, move || {
|
||||
let pool_ref = pool_ref.clone();
|
||||
let local_skip = skip;
|
||||
if skip {
|
||||
skip = false;
|
||||
}
|
||||
async move {
|
||||
if skip {
|
||||
skip = false;
|
||||
if local_skip {
|
||||
return;
|
||||
}
|
||||
info!("Indexing local database");
|
||||
@@ -90,9 +116,12 @@ async fn main() -> std::io::Result<()> {
|
||||
let mut skip = skip_initial;
|
||||
scheduler.run(std::time::Duration::from_secs(15 * 60), move || {
|
||||
let queue = queue_ref.clone();
|
||||
let local_skip = skip;
|
||||
if skip {
|
||||
skip = false;
|
||||
}
|
||||
async move {
|
||||
if skip {
|
||||
skip = false;
|
||||
if local_skip {
|
||||
return;
|
||||
}
|
||||
info!("Indexing created mod queue");
|
||||
|
||||
@@ -11,6 +11,7 @@ use actix_web::{post, HttpResponse};
|
||||
use futures::stream::StreamExt;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::postgres::PgPool;
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
use thiserror::Error;
|
||||
|
||||
@@ -416,7 +417,8 @@ async fn mod_create_inner(
|
||||
// TODO: store and return modified time
|
||||
date_modified: formatted,
|
||||
modified_timestamp: timestamp,
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
host: Cow::Borrowed("modrinth"),
|
||||
empty: Cow::Borrowed("{}{}{}"),
|
||||
};
|
||||
|
||||
indexing_queue.add(index_mod);
|
||||
|
||||
@@ -13,9 +13,9 @@ impl Scheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run<F, R>(&mut self, interval: std::time::Duration, task: F)
|
||||
pub fn run<F, R>(&mut self, interval: std::time::Duration, mut task: F)
|
||||
where
|
||||
F: Fn() -> R + Send + 'static,
|
||||
F: FnMut() -> R + Send + 'static,
|
||||
R: std::future::Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
let future = time::interval(interval).for_each_concurrent(2, move |_| task());
|
||||
|
||||
@@ -2,6 +2,7 @@ use super::IndexingError;
|
||||
use crate::search::UploadSearchMod;
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -200,7 +201,8 @@ pub async fn index_curseforge(
|
||||
date_modified: modified.to_string(),
|
||||
modified_timestamp: modified.timestamp(),
|
||||
latest_version,
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
host: Cow::Borrowed("curseforge"),
|
||||
empty: Cow::Borrowed("{}{}{}"),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ use log::info;
|
||||
use super::IndexingError;
|
||||
use crate::search::UploadSearchMod;
|
||||
use sqlx::postgres::PgPool;
|
||||
use std::borrow::Cow;
|
||||
|
||||
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||
info!("Indexing local mods!");
|
||||
@@ -71,7 +72,8 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
|
||||
date_modified: formatted,
|
||||
modified_timestamp: timestamp,
|
||||
latest_version: "".to_string(), // TODO: Info about latest version
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
host: Cow::Borrowed("modrinth"),
|
||||
empty: Cow::Borrowed("{}{}{}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,76 @@ pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(),
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn reset_indices() -> Result<(), IndexingError> {
|
||||
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||
let client = Client::new(address, "");
|
||||
|
||||
client.delete_index("relevance_mods").await?;
|
||||
client.delete_index("downloads_mods").await?;
|
||||
client.delete_index("updated_mods").await?;
|
||||
client.delete_index("newest_mods").await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn reconfigure_indices() -> Result<(), IndexingError> {
|
||||
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||
let client = Client::new(address, "");
|
||||
|
||||
// Relevance Index
|
||||
update_index(&client, "relevance_mods", {
|
||||
let mut relevance_rules = default_rules();
|
||||
relevance_rules.push_back("desc(downloads)".to_string());
|
||||
relevance_rules.into()
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Downloads Index
|
||||
update_index(&client, "downloads_mods", {
|
||||
let mut downloads_rules = default_rules();
|
||||
downloads_rules.push_front("desc(downloads)".to_string());
|
||||
downloads_rules.into()
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Updated Index
|
||||
update_index(&client, "updated_mods", {
|
||||
let mut updated_rules = default_rules();
|
||||
updated_rules.push_front("desc(modified_timestamp)".to_string());
|
||||
updated_rules.into()
|
||||
})
|
||||
.await?;
|
||||
|
||||
// Created Index
|
||||
update_index(&client, "newest_mods", {
|
||||
let mut newest_rules = default_rules();
|
||||
newest_rules.push_front("desc(created_timestamp)".to_string());
|
||||
newest_rules.into()
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_index<'a>(
|
||||
client: &'a Client<'a>,
|
||||
name: &'a str,
|
||||
rules: Vec<String>,
|
||||
) -> Result<Index<'a>, IndexingError> {
|
||||
let index = match client.get_index(name).await {
|
||||
Ok(index) => index,
|
||||
Err(meilisearch_sdk::errors::Error::IndexNotFound) => {
|
||||
client.create_index(name, Some("mod_id")).await?
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(IndexingError::IndexDBError(e));
|
||||
}
|
||||
};
|
||||
index
|
||||
.set_settings(&default_settings().with_ranking_rules(rules))
|
||||
.await?;
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
async fn create_index<'a>(
|
||||
client: &'a Client<'a>,
|
||||
name: &'a str,
|
||||
@@ -129,7 +199,7 @@ pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
|
||||
// Updated Index
|
||||
let updated_index = create_index(&client, "updated_mods", || {
|
||||
let mut updated_rules = default_rules();
|
||||
updated_rules.push_front("desc(updated)".to_string());
|
||||
updated_rules.push_front("desc(modified_timestamp)".to_string());
|
||||
updated_rules.into()
|
||||
})
|
||||
.await?;
|
||||
@@ -138,7 +208,7 @@ pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
|
||||
// Created Index
|
||||
let newest_index = create_index(&client, "newest_mods", || {
|
||||
let mut newest_rules = default_rules();
|
||||
newest_rules.push_front("desc(created)".to_string());
|
||||
newest_rules.push_front("desc(created_timestamp)".to_string());
|
||||
newest_rules.into()
|
||||
})
|
||||
.await?;
|
||||
@@ -173,10 +243,9 @@ fn default_settings() -> Settings {
|
||||
"icon_url".to_string(),
|
||||
"author_url".to_string(),
|
||||
"date_created".to_string(),
|
||||
"created".to_string(),
|
||||
"date_modified".to_string(),
|
||||
"updated".to_string(),
|
||||
"latest_version".to_string(),
|
||||
"host".to_string(),
|
||||
];
|
||||
|
||||
let searchable_attributes = vec![
|
||||
@@ -194,7 +263,7 @@ fn default_settings() -> Settings {
|
||||
.with_accept_new_fields(true)
|
||||
.with_stop_words(vec![])
|
||||
.with_synonyms(HashMap::new())
|
||||
.with_attributes_for_faceting(vec![String::from("categories")])
|
||||
.with_attributes_for_faceting(vec![String::from("categories"), String::from("host")])
|
||||
}
|
||||
|
||||
//endregion
|
||||
|
||||
@@ -71,6 +71,8 @@ pub struct UploadSearchMod {
|
||||
/// Unix timestamp of the last major modification
|
||||
pub modified_timestamp: i64,
|
||||
|
||||
pub host: Cow<'static, str>,
|
||||
|
||||
/// Must be "{}{}{}", a hack until meilisearch supports searches
|
||||
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
|
||||
// This is a Cow to prevent unnecessary allocations for a static
|
||||
@@ -96,6 +98,9 @@ pub struct ResultSearchMod {
|
||||
/// RFC 3339 formatted modification date of the mod
|
||||
pub date_modified: String,
|
||||
pub latest_version: String,
|
||||
|
||||
/// The host of the mod: Either `modrinth` or `curseforge`
|
||||
pub host: String,
|
||||
}
|
||||
|
||||
impl Document for UploadSearchMod {
|
||||
|
||||
Reference in New Issue
Block a user