Refactor Meilisearch, update to latest SDK, and implement faceted search (#44)

* feat(indexing): Reindex curseforge & local database at an interval

* fix(indexing): Use strings for meilisearch primary key

Fixes #17 by prefixing curseforge ids with "curse-" and local ids
with "local-".

* feat(indexing): Add newly created mods to the index more quickly

* feat(indexing): Implement faceted search, update to meilisearch master

Fixes #9, but only uses faceted search for categories.  It should
be reasonably simple to add support for versions, but it may not
be as useful due to the large number of versions and the large
number of supported versions for each mod.

* feat(indexing): Allow skipping initial indexing

Co-authored-by: Geometrically <18202329+Geometrically@users.noreply.github.com>
This commit is contained in:
Aeledfyr
2020-07-27 18:54:10 -05:00
committed by GitHub
parent 7914e89212
commit ff28ea8fa8
13 changed files with 441 additions and 201 deletions

View File

@@ -1,16 +1,16 @@
use crate::search::indexing::index_mods;
use actix_web::middleware::Logger;
use actix_web::{web, App, HttpServer};
use env_logger::Env;
use log::info;
use std::env;
use std::fs::File;
use log::{info, warn};
use search::indexing::index_mods;
use search::indexing::IndexingSettings;
use std::sync::Arc;
mod database;
mod file_hosting;
mod models;
mod routes;
mod scheduler;
mod search;
#[actix_rt::main]
@@ -24,7 +24,6 @@ async fn main() -> std::io::Result<()> {
let pool = database::connect()
.await
.expect("Database connection failed");
let client_ref = pool.clone();
let backblaze_enabled = dotenv::var("BACKBLAZE_ENABLED")
.ok()
@@ -44,22 +43,97 @@ async fn main() -> std::io::Result<()> {
Arc::new(file_hosting::MockHost::new())
};
// Get executable path
let mut exe_path = env::current_exe()?.parent().unwrap().to_path_buf();
// Create the path to the index lock file
exe_path.push("index.v1.lock");
// TODO: use a real arg parsing library
let skip_initial = std::env::args().any(|x| x == "skip");
// Allow manually skipping the initial indexing for quicker iteration
// and startup times.
if skip_initial {
info!("Skipping initial indexing");
}
// Indexing mods if not already done
if env::args().any(|x| x == "regen") {
// User forced regen of indexing
info!("Forced regeneration of indexes!");
index_mods(pool).await.expect("Mod indexing failed");
} else if !exe_path.exists() {
// The indexes were not created, or the version was upgraded
info!("Indexing of mods for first time...");
index_mods(pool).await.expect("Mod indexing failed");
// Create the lock file
File::create(exe_path)?;
let mut scheduler = scheduler::Scheduler::new();
// The interval in seconds at which the local database is indexed
// for searching. Defaults to 1 hour if unset.
let local_index_interval = std::time::Duration::from_secs(
dotenv::var("LOCAL_INDEX_INTERVAL")
.ok()
.map(|i| i.parse().unwrap())
.unwrap_or(3600),
);
let pool_ref = pool.clone();
let mut skip = skip_initial;
scheduler.run(local_index_interval, move || {
let pool_ref = pool_ref.clone();
async move {
if skip {
skip = false;
return;
}
info!("Indexing local database");
let settings = IndexingSettings {
index_local: true,
index_external: false,
};
let result = index_mods(pool_ref, settings).await;
if let Err(e) = result {
warn!("Local mod indexing failed: {:?}", e);
}
info!("Done indexing local database");
}
});
let indexing_queue = Arc::new(search::indexing::queue::CreationQueue::new());
let queue_ref = indexing_queue.clone();
let mut skip = skip_initial;
scheduler.run(std::time::Duration::from_secs(15 * 60), move || {
let queue = queue_ref.clone();
async move {
if skip {
skip = false;
return;
}
info!("Indexing created mod queue");
let result = search::indexing::queue::index_queue(&*queue).await;
if let Err(e) = result {
warn!("Indexing created mods failed: {:?}", e);
}
info!("Done indexing created mod queue");
}
});
if dotenv::var("INDEX_CURSEFORGE")
.ok()
.and_then(|b| b.parse::<bool>().ok())
.unwrap_or(false)
{
// The interval in seconds at which curseforge is indexed for
// searching. Defaults to 4 hours if unset.
let external_index_interval = std::time::Duration::from_secs(
dotenv::var("EXTERNAL_INDEX_INTERVAL")
.ok()
.map(|i| i.parse().unwrap())
.unwrap_or(3600 * 4),
);
let pool_ref = pool.clone();
scheduler.run(external_index_interval, move || {
info!("Indexing curseforge");
let pool_ref = pool_ref.clone();
async move {
let settings = IndexingSettings {
index_local: false,
index_external: true,
};
let result = index_mods(pool_ref, settings).await;
if let Err(e) = result {
warn!("External mod indexing failed: {:?}", e);
}
info!("Done indexing curseforge");
}
});
}
info!("Starting Actix HTTP server!");
@@ -69,7 +143,7 @@ async fn main() -> std::io::Result<()> {
App::new()
.wrap(Logger::default())
.wrap(Logger::new("%a %{User-Agent}i"))
.data(client_ref.clone())
.data(pool.clone())
.data(file_host.clone())
.service(routes::index_get)
.service(routes::mod_search)
@@ -89,14 +163,14 @@ fn check_env_vars() {
.and_then(|s| s.parse::<T>().ok())
.is_none()
{
log::warn!(
warn!(
"Variable `{}` missing in dotenv or not of type `{}`",
var,
std::any::type_name::<T>()
)
}
}
check_var::<bool>("INDEX_CURSEFORGE");
check_var::<String>("CDN_URL");
check_var::<String>("DATABASE_URL");
check_var::<String>("MEILISEARCH_ADDR");
check_var::<String>("BIND_ADDR");
@@ -109,5 +183,18 @@ fn check_env_vars() {
check_var::<String>("BACKBLAZE_KEY_ID");
check_var::<String>("BACKBLAZE_KEY");
check_var::<String>("BACKBLAZE_BUCKET_ID");
} else {
check_var::<String>("MOCK_FILE_PATH");
}
check_var::<bool>("INDEX_CURSEFORGE");
if dotenv::var("INDEX_CURSEFORGE")
.ok()
.and_then(|s| s.parse::<bool>().ok())
.unwrap_or(false)
{
check_var::<usize>("EXTERNAL_INDEX_INTERVAL");
}
check_var::<usize>("LOCAL_INDEX_INTERVAL");
}