feat(search): Faceted search based on mod host (curse/modrinth) (#48)

This also adds a commandline argument library (gumdrop) for dealing
with indices - reseting, reconfiguring, and skipping them. I don't
know which library is best for this case, but gumdrop has shorter
compile times and many fewer dependencies than clap, which is why
I chose it.
This commit is contained in:
Aeledfyr
2020-07-31 20:18:23 -05:00
committed by GitHub
parent c05ae6e94c
commit 8e798dde48
9 changed files with 147 additions and 16 deletions

21
Cargo.lock generated
View File

@@ -903,6 +903,26 @@ version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c"
[[package]]
name = "gumdrop"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46571f5d540478cf70d2a42dd0d6d8e9f4b9cc7531544b93311e657b86568a0b"
dependencies = [
"gumdrop_derive",
]
[[package]]
name = "gumdrop_derive"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "915ef07c710d84733522461de2a734d4d62a3fd39a4d4f404c2f385ef8618d05"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "h2"
version = "0.2.5"
@@ -1137,6 +1157,7 @@ dependencies = [
"env_logger",
"futures",
"futures-timer",
"gumdrop",
"log",
"meilisearch-sdk",
"rand",

View File

@@ -24,6 +24,7 @@ serde = { version = "1.0", features = ["derive"] }
chrono = { version = "0.4", features = ["serde"] }
rand = "0.7"
gumdrop = "0.8"
dotenv = "0.15"
log = "0.4.8"
env_logger = "0.7.1"

View File

@@ -1,6 +1,7 @@
use actix_web::middleware::Logger;
use actix_web::{web, App, HttpServer};
use env_logger::Env;
use gumdrop::Options;
use log::{info, warn};
use search::indexing::index_mods;
use search::indexing::IndexingSettings;
@@ -13,11 +14,26 @@ mod routes;
mod scheduler;
mod search;
#[derive(Debug, Options)]
struct Config {
#[options(help = "Print help message")]
help: bool,
#[options(no_short, help = "Skip indexing on startup")]
skip_first_index: bool,
#[options(no_short, help = "Reset the settings of the indices")]
reconfigure_indices: bool,
#[options(no_short, help = "Reset the documents in the indices")]
reset_indices: bool,
}
#[actix_rt::main]
async fn main() -> std::io::Result<()> {
dotenv::dotenv().ok();
env_logger::from_env(Env::default().default_filter_or("info")).init();
let config = Config::parse_args_default_or_exit();
check_env_vars();
// Database Connector
@@ -43,10 +59,17 @@ async fn main() -> std::io::Result<()> {
Arc::new(file_hosting::MockHost::new())
};
// TODO: use a real arg parsing library
let skip_initial = std::env::args().any(|x| x == "skip");
if config.reset_indices {
info!("Resetting indices");
search::indexing::reset_indices().await.unwrap();
} else if config.reconfigure_indices {
info!("Reconfiguring indices");
search::indexing::reconfigure_indices().await.unwrap();
}
// Allow manually skipping the initial indexing for quicker iteration
// and startup times.
let skip_initial = config.skip_first_index;
if skip_initial {
info!("Skipping initial indexing");
}
@@ -66,9 +89,12 @@ async fn main() -> std::io::Result<()> {
let mut skip = skip_initial;
scheduler.run(local_index_interval, move || {
let pool_ref = pool_ref.clone();
let local_skip = skip;
if skip {
skip = false;
}
async move {
if skip {
skip = false;
if local_skip {
return;
}
info!("Indexing local database");
@@ -90,9 +116,12 @@ async fn main() -> std::io::Result<()> {
let mut skip = skip_initial;
scheduler.run(std::time::Duration::from_secs(15 * 60), move || {
let queue = queue_ref.clone();
let local_skip = skip;
if skip {
skip = false;
}
async move {
if skip {
skip = false;
if local_skip {
return;
}
info!("Indexing created mod queue");

View File

@@ -11,6 +11,7 @@ use actix_web::{post, HttpResponse};
use futures::stream::StreamExt;
use serde::{Deserialize, Serialize};
use sqlx::postgres::PgPool;
use std::borrow::Cow;
use std::sync::Arc;
use thiserror::Error;
@@ -416,7 +417,8 @@ async fn mod_create_inner(
// TODO: store and return modified time
date_modified: formatted,
modified_timestamp: timestamp,
empty: std::borrow::Cow::Borrowed("{}{}{}"),
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
};
indexing_queue.add(index_mod);

View File

@@ -13,9 +13,9 @@ impl Scheduler {
}
}
pub fn run<F, R>(&mut self, interval: std::time::Duration, task: F)
pub fn run<F, R>(&mut self, interval: std::time::Duration, mut task: F)
where
F: Fn() -> R + Send + 'static,
F: FnMut() -> R + Send + 'static,
R: std::future::Future<Output = ()> + Send + 'static,
{
let future = time::interval(interval).for_each_concurrent(2, move |_| task());

View File

@@ -2,6 +2,7 @@ use super::IndexingError;
use crate::search::UploadSearchMod;
use log::info;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
@@ -200,7 +201,8 @@ pub async fn index_curseforge(
date_modified: modified.to_string(),
modified_timestamp: modified.timestamp(),
latest_version,
empty: std::borrow::Cow::Borrowed("{}{}{}"),
host: Cow::Borrowed("curseforge"),
empty: Cow::Borrowed("{}{}{}"),
})
}

View File

@@ -4,6 +4,7 @@ use log::info;
use super::IndexingError;
use crate::search::UploadSearchMod;
use sqlx::postgres::PgPool;
use std::borrow::Cow;
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
info!("Indexing local mods!");
@@ -71,7 +72,8 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
date_modified: formatted,
modified_timestamp: timestamp,
latest_version: "".to_string(), // TODO: Info about latest version
empty: std::borrow::Cow::Borrowed("{}{}{}"),
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
});
}
}

View File

@@ -72,6 +72,76 @@ pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(),
Ok(())
}
pub async fn reset_indices() -> Result<(), IndexingError> {
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
let client = Client::new(address, "");
client.delete_index("relevance_mods").await?;
client.delete_index("downloads_mods").await?;
client.delete_index("updated_mods").await?;
client.delete_index("newest_mods").await?;
Ok(())
}
pub async fn reconfigure_indices() -> Result<(), IndexingError> {
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
let client = Client::new(address, "");
// Relevance Index
update_index(&client, "relevance_mods", {
let mut relevance_rules = default_rules();
relevance_rules.push_back("desc(downloads)".to_string());
relevance_rules.into()
})
.await?;
// Downloads Index
update_index(&client, "downloads_mods", {
let mut downloads_rules = default_rules();
downloads_rules.push_front("desc(downloads)".to_string());
downloads_rules.into()
})
.await?;
// Updated Index
update_index(&client, "updated_mods", {
let mut updated_rules = default_rules();
updated_rules.push_front("desc(modified_timestamp)".to_string());
updated_rules.into()
})
.await?;
// Created Index
update_index(&client, "newest_mods", {
let mut newest_rules = default_rules();
newest_rules.push_front("desc(created_timestamp)".to_string());
newest_rules.into()
})
.await?;
Ok(())
}
async fn update_index<'a>(
client: &'a Client<'a>,
name: &'a str,
rules: Vec<String>,
) -> Result<Index<'a>, IndexingError> {
let index = match client.get_index(name).await {
Ok(index) => index,
Err(meilisearch_sdk::errors::Error::IndexNotFound) => {
client.create_index(name, Some("mod_id")).await?
}
Err(e) => {
return Err(IndexingError::IndexDBError(e));
}
};
index
.set_settings(&default_settings().with_ranking_rules(rules))
.await?;
Ok(index)
}
async fn create_index<'a>(
client: &'a Client<'a>,
name: &'a str,
@@ -129,7 +199,7 @@ pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
// Updated Index
let updated_index = create_index(&client, "updated_mods", || {
let mut updated_rules = default_rules();
updated_rules.push_front("desc(updated)".to_string());
updated_rules.push_front("desc(modified_timestamp)".to_string());
updated_rules.into()
})
.await?;
@@ -138,7 +208,7 @@ pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
// Created Index
let newest_index = create_index(&client, "newest_mods", || {
let mut newest_rules = default_rules();
newest_rules.push_front("desc(created)".to_string());
newest_rules.push_front("desc(created_timestamp)".to_string());
newest_rules.into()
})
.await?;
@@ -173,10 +243,9 @@ fn default_settings() -> Settings {
"icon_url".to_string(),
"author_url".to_string(),
"date_created".to_string(),
"created".to_string(),
"date_modified".to_string(),
"updated".to_string(),
"latest_version".to_string(),
"host".to_string(),
];
let searchable_attributes = vec![
@@ -194,7 +263,7 @@ fn default_settings() -> Settings {
.with_accept_new_fields(true)
.with_stop_words(vec![])
.with_synonyms(HashMap::new())
.with_attributes_for_faceting(vec![String::from("categories")])
.with_attributes_for_faceting(vec![String::from("categories"), String::from("host")])
}
//endregion

View File

@@ -71,6 +71,8 @@ pub struct UploadSearchMod {
/// Unix timestamp of the last major modification
pub modified_timestamp: i64,
pub host: Cow<'static, str>,
/// Must be "{}{}{}", a hack until meilisearch supports searches
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
// This is a Cow to prevent unnecessary allocations for a static
@@ -96,6 +98,9 @@ pub struct ResultSearchMod {
/// RFC 3339 formatted modification date of the mod
pub date_modified: String,
pub latest_version: String,
/// The host of the mod: Either `modrinth` or `curseforge`
pub host: String,
}
impl Document for UploadSearchMod {