You've already forked AstralRinth
forked from didirus/AstralRinth
Refactor Meilisearch, update to latest SDK, and implement faceted search (#44)
* feat(indexing): Reindex curseforge & local database at an interval * fix(indexing): Use strings for meilisearch primary key Fixes #17 by prefixing curseforge ids with "curse-" and local ids with "local-". * feat(indexing): Add newly created mods to the index more quickly * feat(indexing): Implement faceted search, update to meilisearch master Fixes #9, but only uses faceted search for categories. It should be reasonably simple to add support for versions, but it may not be as useful due to the large number of versions and the large number of supported versions for each mod. * feat(indexing): Allow skipping initial indexing Co-authored-by: Geometrically <18202329+Geometrically@users.noreply.github.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use super::IndexingError;
|
||||
use crate::search::SearchMod;
|
||||
use crate::search::UploadSearchMod;
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -48,10 +48,10 @@ pub struct CurseForgeMod {
|
||||
pub async fn index_curseforge(
|
||||
start_index: i32,
|
||||
end_index: i32,
|
||||
) -> Result<Vec<SearchMod>, IndexingError> {
|
||||
) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||
info!("Indexing curseforge mods!");
|
||||
|
||||
let mut docs_to_add: Vec<SearchMod> = vec![];
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
let res = reqwest::Client::new()
|
||||
.post("https://addons-ecs.forgesvc.net/api/v2/addon")
|
||||
@@ -177,32 +177,32 @@ pub async fn index_curseforge(
|
||||
.thumbnail_url
|
||||
.replace("/256/256/", "/64/64/");
|
||||
|
||||
docs_to_add.push(SearchMod {
|
||||
mod_id: -curseforge_mod.id as i64,
|
||||
let created = curseforge_mod
|
||||
.date_created
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?;
|
||||
let modified = curseforge_mod
|
||||
.date_modified
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?;
|
||||
|
||||
docs_to_add.push(UploadSearchMod {
|
||||
mod_id: format!("curse-{}", curseforge_mod.id),
|
||||
author: (&curseforge_mod.authors[0].name).to_string(),
|
||||
title: curseforge_mod.name,
|
||||
description: curseforge_mod.summary.chars().take(150).collect(),
|
||||
keywords: mod_categories,
|
||||
categories: mod_categories,
|
||||
versions: mod_game_versions.clone(),
|
||||
downloads: curseforge_mod.download_count as i32,
|
||||
page_url: curseforge_mod.website_url,
|
||||
icon_url,
|
||||
author_url: (&curseforge_mod.authors[0].url).to_string(),
|
||||
date_created: curseforge_mod.date_created.chars().take(10).collect(),
|
||||
created: curseforge_mod
|
||||
.date_created
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?
|
||||
.timestamp(),
|
||||
date_modified: curseforge_mod.date_modified.chars().take(10).collect(),
|
||||
updated: curseforge_mod
|
||||
.date_modified
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?
|
||||
.timestamp(),
|
||||
date_created: created.to_string(),
|
||||
created_timestamp: created.timestamp(),
|
||||
date_modified: modified.to_string(),
|
||||
modified_timestamp: modified.timestamp(),
|
||||
latest_version,
|
||||
empty: String::from("{}{}{}"),
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
})
|
||||
}
|
||||
|
||||
//TODO Reindex every hour for new mods.
|
||||
Ok(docs_to_add)
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@ use futures::{StreamExt, TryStreamExt};
|
||||
use log::info;
|
||||
|
||||
use super::IndexingError;
|
||||
use crate::search::SearchMod;
|
||||
use crate::search::UploadSearchMod;
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
pub async fn index_local(pool: PgPool) -> Result<Vec<SearchMod>, IndexingError> {
|
||||
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||
info!("Indexing local mods!");
|
||||
|
||||
let mut docs_to_add: Vec<SearchMod> = vec![];
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
let mut results = sqlx::query!(
|
||||
"
|
||||
@@ -53,23 +53,25 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<SearchMod>, IndexingError>
|
||||
icon_url = url;
|
||||
}
|
||||
|
||||
docs_to_add.push(SearchMod {
|
||||
mod_id: result.id,
|
||||
author: "".to_string(),
|
||||
let formatted = result.published.to_string();
|
||||
let timestamp = result.published.timestamp();
|
||||
docs_to_add.push(UploadSearchMod {
|
||||
mod_id: format!("local-{}", crate::models::ids::ModId(result.id as u64)),
|
||||
title: result.title,
|
||||
description: result.description,
|
||||
keywords: categories,
|
||||
categories,
|
||||
versions,
|
||||
downloads: result.downloads,
|
||||
page_url: result.body_url,
|
||||
icon_url,
|
||||
author: "".to_string(), // TODO: author/team info
|
||||
author_url: "".to_string(),
|
||||
date_created: result.published.to_string(),
|
||||
created: 0,
|
||||
date_modified: "".to_string(),
|
||||
updated: 0,
|
||||
latest_version: "".to_string(),
|
||||
empty: String::from("{}{}{}"),
|
||||
date_created: formatted.clone(),
|
||||
created_timestamp: timestamp,
|
||||
date_modified: formatted,
|
||||
modified_timestamp: timestamp,
|
||||
latest_version: "".to_string(), // TODO: Info about latest version
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
/// This module is used for the indexing from any source.
|
||||
pub mod curseforge_import;
|
||||
pub mod local_import;
|
||||
pub mod queue;
|
||||
|
||||
use crate::search::indexing::curseforge_import::index_curseforge;
|
||||
use crate::search::indexing::local_import::index_local;
|
||||
use crate::search::SearchMod;
|
||||
use crate::search::UploadSearchMod;
|
||||
use curseforge_import::index_curseforge;
|
||||
use local_import::index_local;
|
||||
use meilisearch_sdk::client::Client;
|
||||
use meilisearch_sdk::indexes::Index;
|
||||
use meilisearch_sdk::settings::Settings;
|
||||
use sqlx::postgres::PgPool;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
@@ -14,7 +16,7 @@ use thiserror::Error;
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IndexingError {
|
||||
#[error("Error while connecting to the MeiliSearch database")]
|
||||
IndexDBError(meilisearch_sdk::errors::Error),
|
||||
IndexDBError(#[from] meilisearch_sdk::errors::Error),
|
||||
#[error("Error while importing mods from CurseForge")]
|
||||
CurseforgeImportError(reqwest::Error),
|
||||
#[error("Error while serializing or deserializing JSON: {0}")]
|
||||
@@ -32,95 +34,115 @@ pub enum IndexingError {
|
||||
// assumes a max average size of 1KiB per mod to avoid this cap.
|
||||
const MEILISEARCH_CHUNK_SIZE: usize = 10000;
|
||||
|
||||
pub async fn index_mods(pool: PgPool) -> Result<(), IndexingError> {
|
||||
// Check if the index exists
|
||||
#[derive(Debug)]
|
||||
pub struct IndexingSettings {
|
||||
pub index_external: bool,
|
||||
pub index_local: bool,
|
||||
}
|
||||
|
||||
impl IndexingSettings {
|
||||
pub fn from_env() -> Self {
|
||||
let index_local = true;
|
||||
let index_external = dotenv::var("INDEX_CURSEFORGE")
|
||||
.ok()
|
||||
.and_then(|b| b.parse::<bool>().ok())
|
||||
.unwrap_or(false);
|
||||
|
||||
Self {
|
||||
index_external,
|
||||
index_local,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(), IndexingError> {
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
if settings.index_local {
|
||||
docs_to_add.append(&mut index_local(pool.clone()).await?);
|
||||
}
|
||||
if settings.index_external {
|
||||
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
|
||||
}
|
||||
|
||||
// Write Indices
|
||||
|
||||
add_mods(docs_to_add).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index<'a>(
|
||||
client: &'a Client<'a>,
|
||||
name: &'a str,
|
||||
rules: impl FnOnce() -> Vec<String>,
|
||||
) -> Result<Index<'a>, IndexingError> {
|
||||
match client.get_index(name).await {
|
||||
// TODO: update index settings on startup (or delete old indices on startup)
|
||||
Ok(index) => Ok(index),
|
||||
Err(meilisearch_sdk::errors::Error::IndexNotFound) => {
|
||||
// Only create index and set settings if the index doesn't already exist
|
||||
let index = client.create_index(name, Some("mod_id")).await?;
|
||||
|
||||
index
|
||||
.set_settings(&default_settings().with_ranking_rules(rules()))
|
||||
.await?;
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Unhandled error while creating index: {}", e);
|
||||
Err(IndexingError::IndexDBError(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_to_index(index: Index<'_>, mods: &[UploadSearchMod]) -> Result<(), IndexingError> {
|
||||
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
index.add_documents(chunk, Some("mod_id")).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
|
||||
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||
let client = Client::new(address, "");
|
||||
|
||||
let mut docs_to_add: Vec<SearchMod> = vec![];
|
||||
// Relevance Index
|
||||
let relevance_index = create_index(&client, "relevance_mods", || {
|
||||
let mut relevance_rules = default_rules();
|
||||
relevance_rules.push_back("desc(downloads)".to_string());
|
||||
relevance_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(relevance_index, &mods).await?;
|
||||
|
||||
docs_to_add.append(&mut index_local(pool.clone()).await?);
|
||||
if dotenv::var("INDEX_CURSEFORGE")?
|
||||
.parse()
|
||||
.expect("`INDEX_CURSEFORGE` is not a boolean.")
|
||||
{
|
||||
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
|
||||
}
|
||||
//Write Indexes
|
||||
//Relevance Index
|
||||
// Downloads Index
|
||||
let downloads_index = create_index(&client, "downloads_mods", || {
|
||||
let mut downloads_rules = default_rules();
|
||||
downloads_rules.push_front("desc(downloads)".to_string());
|
||||
downloads_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(downloads_index, &mods).await?;
|
||||
|
||||
let mut relevance_index = client
|
||||
.get_or_create("relevance_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
// Updated Index
|
||||
let updated_index = create_index(&client, "updated_mods", || {
|
||||
let mut updated_rules = default_rules();
|
||||
updated_rules.push_front("desc(updated)".to_string());
|
||||
updated_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(updated_index, &mods).await?;
|
||||
|
||||
let mut relevance_rules = default_rules();
|
||||
relevance_rules.push_back("desc(downloads)".to_string());
|
||||
|
||||
relevance_index
|
||||
.set_settings(&default_settings().with_ranking_rules(relevance_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
// TODO: get meilisearch sdk to not require cloning (ie take a reference to docs_to_add)
|
||||
// This may require making our own fork of it.
|
||||
relevance_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
|
||||
//Downloads Index
|
||||
let mut downloads_index = client
|
||||
.get_or_create("downloads_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
let mut downloads_rules = default_rules();
|
||||
downloads_rules.push_front("desc(downloads)".to_string());
|
||||
|
||||
downloads_index
|
||||
.set_settings(&default_settings().with_ranking_rules(downloads_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
downloads_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
|
||||
//Updated Index
|
||||
let mut updated_index = client
|
||||
.get_or_create("updated_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
let mut updated_rules = default_rules();
|
||||
updated_rules.push_front("desc(updated)".to_string());
|
||||
|
||||
updated_index
|
||||
.set_settings(&default_settings().with_ranking_rules(updated_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
updated_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
|
||||
//Created Index
|
||||
let mut newest_index = client
|
||||
.get_or_create("newest_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
let mut newest_rules = default_rules();
|
||||
newest_rules.push_back("desc(created)".to_string());
|
||||
|
||||
newest_index
|
||||
.set_settings(&default_settings().with_ranking_rules(newest_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
newest_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
// Created Index
|
||||
let newest_index = create_index(&client, "newest_mods", || {
|
||||
let mut newest_rules = default_rules();
|
||||
newest_rules.push_front("desc(created)".to_string());
|
||||
newest_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(newest_index, &mods).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -144,7 +166,7 @@ fn default_settings() -> Settings {
|
||||
"author".to_string(),
|
||||
"title".to_string(),
|
||||
"description".to_string(),
|
||||
"keywords".to_string(),
|
||||
"categories".to_string(),
|
||||
"versions".to_string(),
|
||||
"downloads".to_string(),
|
||||
"page_url".to_string(),
|
||||
@@ -155,13 +177,12 @@ fn default_settings() -> Settings {
|
||||
"date_modified".to_string(),
|
||||
"updated".to_string(),
|
||||
"latest_version".to_string(),
|
||||
"empty".to_string(),
|
||||
];
|
||||
|
||||
let searchable_attributes = vec![
|
||||
"title".to_string(),
|
||||
"description".to_string(),
|
||||
"keywords".to_string(),
|
||||
"categories".to_string(),
|
||||
"versions".to_string(),
|
||||
"author".to_string(),
|
||||
"empty".to_string(),
|
||||
@@ -173,6 +194,7 @@ fn default_settings() -> Settings {
|
||||
.with_accept_new_fields(true)
|
||||
.with_stop_words(vec![])
|
||||
.with_synonyms(HashMap::new())
|
||||
.with_attributes_for_faceting(vec![String::from("categories")])
|
||||
}
|
||||
|
||||
//endregion
|
||||
|
||||
31
src/search/indexing/queue.rs
Normal file
31
src/search/indexing/queue.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
use super::{add_mods, IndexingError, UploadSearchMod};
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub struct CreationQueue {
|
||||
// There's probably a better structure for this, but a mutex works
|
||||
// and I don't think this can deadlock. This queue requires fast
|
||||
// writes and then a single potentially slower read/write that
|
||||
// empties the queue.
|
||||
queue: Mutex<Vec<UploadSearchMod>>,
|
||||
}
|
||||
|
||||
impl CreationQueue {
|
||||
pub fn new() -> Self {
|
||||
CreationQueue {
|
||||
queue: Mutex::new(Vec::with_capacity(10)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&self, search_mod: UploadSearchMod) {
|
||||
// Can only panic if mutex is poisoned
|
||||
self.queue.lock().unwrap().push(search_mod);
|
||||
}
|
||||
pub fn take(&self) -> Vec<UploadSearchMod> {
|
||||
std::mem::replace(&mut *self.queue.lock().unwrap(), Vec::with_capacity(10))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn index_queue(queue: &CreationQueue) -> Result<(), IndexingError> {
|
||||
let queue = queue.take();
|
||||
add_mods(queue).await
|
||||
}
|
||||
Reference in New Issue
Block a user