You've already forked AstralRinth
forked from didirus/AstralRinth
* Inital creation stuff * File Reader * Upload bodies * Major rework: * Finish Multiple Files * Proper Error Handling * Switch to database models * Run formatter * Make dependencies dependent on Versions over mods * Fixes * Fix clippy * Run lint one last time * Update src/models/mods.rs Co-authored-by: AppleTheGolden <scotsbox@protonmail.com> Co-authored-by: AppleTheGolden <scotsbox@protonmail.com>
178 lines
5.7 KiB
Rust
178 lines
5.7 KiB
Rust
/// This module is used for the indexing from any source.
|
|
pub mod curseforge_import;
|
|
pub mod local_import;
|
|
|
|
use crate::search::indexing::curseforge_import::index_curseforge;
|
|
use crate::search::indexing::local_import::index_local;
|
|
use crate::search::SearchMod;
|
|
use meilisearch_sdk::client::Client;
|
|
use meilisearch_sdk::settings::Settings;
|
|
use std::collections::{HashMap, VecDeque};
|
|
use thiserror::Error;
|
|
|
|
#[derive(Error, Debug)]
|
|
pub enum IndexingError {
|
|
#[error("Error while connecting to the MeiliSearch database")]
|
|
IndexDBError(meilisearch_sdk::errors::Error),
|
|
#[error("Error while importing mods from CurseForge")]
|
|
CurseforgeImportError(reqwest::Error),
|
|
#[error("Error while serializing or deserializing JSON: {0}")]
|
|
SerDeError(#[from] serde_json::Error),
|
|
#[error("Error while parsing a timestamp: {0}")]
|
|
ParseDateError(#[from] chrono::format::ParseError),
|
|
#[error("Database Error: {0}")]
|
|
DatabaseError(#[from] crate::database::DatabaseError),
|
|
#[error("Environment Error")]
|
|
EnvError(#[from] dotenv::Error),
|
|
}
|
|
|
|
// The chunk size for adding mods to the indexing database. If the request size
|
|
// is too large (>10MiB) then the request fails with an error. This chunk size
|
|
// assumes a max average size of 1KiB per mod to avoid this cap.
|
|
const MEILISEARCH_CHUNK_SIZE: usize = 10000;
|
|
|
|
pub async fn index_mods(db: mongodb::Client) -> Result<(), IndexingError> {
|
|
// Check if the index exists
|
|
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
|
let client = Client::new(address, "");
|
|
|
|
let mut docs_to_add: Vec<SearchMod> = vec![];
|
|
|
|
docs_to_add.append(&mut index_local(db.clone()).await?);
|
|
if dotenv::var("INDEX_CURSEFORGE")?
|
|
.parse()
|
|
.expect("`INDEX_CURSEFORGE` is not a boolean.")
|
|
{
|
|
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
|
|
}
|
|
//Write Indexes
|
|
//Relevance Index
|
|
|
|
let mut relevance_index = client
|
|
.get_or_create("relevance_mods")
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
let mut relevance_rules = default_rules();
|
|
relevance_rules.push_back("desc(downloads)".to_string());
|
|
|
|
relevance_index
|
|
.set_settings(&default_settings().with_ranking_rules(relevance_rules.into()))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
|
// TODO: get meilisearch sdk to not require cloning (ie take a reference to docs_to_add)
|
|
// This may require making our own fork of it.
|
|
relevance_index
|
|
.add_documents(Vec::from(chunk), Some("mod_id"))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
}
|
|
|
|
//Downloads Index
|
|
let mut downloads_index = client
|
|
.get_or_create("downloads_mods")
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
let mut downloads_rules = default_rules();
|
|
downloads_rules.push_front("desc(downloads)".to_string());
|
|
|
|
downloads_index
|
|
.set_settings(&default_settings().with_ranking_rules(downloads_rules.into()))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
|
downloads_index
|
|
.add_documents(Vec::from(chunk), Some("mod_id"))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
}
|
|
|
|
//Updated Index
|
|
let mut updated_index = client
|
|
.get_or_create("updated_mods")
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
let mut updated_rules = default_rules();
|
|
updated_rules.push_front("desc(updated)".to_string());
|
|
|
|
updated_index
|
|
.set_settings(&default_settings().with_ranking_rules(updated_rules.into()))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
|
updated_index
|
|
.add_documents(Vec::from(chunk), Some("mod_id"))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
}
|
|
|
|
//Created Index
|
|
let mut newest_index = client
|
|
.get_or_create("newest_mods")
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
let mut newest_rules = default_rules();
|
|
newest_rules.push_back("desc(created)".to_string());
|
|
|
|
newest_index
|
|
.set_settings(&default_settings().with_ranking_rules(newest_rules.into()))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
|
|
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
|
newest_index
|
|
.add_documents(Vec::from(chunk), Some("mod_id"))
|
|
.map_err(IndexingError::IndexDBError)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
//region Utils
|
|
fn default_rules() -> VecDeque<String> {
|
|
vec![
|
|
"typo".to_string(),
|
|
"words".to_string(),
|
|
"proximity".to_string(),
|
|
"attribute".to_string(),
|
|
"wordsPosition".to_string(),
|
|
"exactness".to_string(),
|
|
]
|
|
.into()
|
|
}
|
|
|
|
fn default_settings() -> Settings {
|
|
let displayed_attributes = vec![
|
|
"mod_id".to_string(),
|
|
"author".to_string(),
|
|
"title".to_string(),
|
|
"description".to_string(),
|
|
"keywords".to_string(),
|
|
"versions".to_string(),
|
|
"downloads".to_string(),
|
|
"page_url".to_string(),
|
|
"icon_url".to_string(),
|
|
"author_url".to_string(),
|
|
"date_created".to_string(),
|
|
"created".to_string(),
|
|
"date_modified".to_string(),
|
|
"updated".to_string(),
|
|
"latest_version".to_string(),
|
|
"empty".to_string(),
|
|
];
|
|
|
|
let searchable_attributes = vec![
|
|
"title".to_string(),
|
|
"description".to_string(),
|
|
"keywords".to_string(),
|
|
"versions".to_string(),
|
|
"author".to_string(),
|
|
"empty".to_string(),
|
|
];
|
|
|
|
Settings::new()
|
|
.with_displayed_attributes(displayed_attributes)
|
|
.with_searchable_attributes(searchable_attributes)
|
|
.with_accept_new_fields(true)
|
|
.with_stop_words(vec![])
|
|
.with_synonyms(HashMap::new())
|
|
}
|
|
|
|
//endregion
|