You've already forked AstralRinth
forked from didirus/AstralRinth
Refactor Meilisearch, update to latest SDK, and implement faceted search (#44)
* feat(indexing): Reindex curseforge & local database at an interval * fix(indexing): Use strings for meilisearch primary key Fixes #17 by prefixing curseforge ids with "curse-" and local ids with "local-". * feat(indexing): Add newly created mods to the index more quickly * feat(indexing): Implement faceted search, update to meilisearch master Fixes #9, but only uses faceted search for categories. It should be reasonably simple to add support for versions, but it may not be as useful due to the large number of versions and the large number of supported versions for each mod. * feat(indexing): Allow skipping initial indexing Co-authored-by: Geometrically <18202329+Geometrically@users.noreply.github.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use super::IndexingError;
|
||||
use crate::search::SearchMod;
|
||||
use crate::search::UploadSearchMod;
|
||||
use log::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -48,10 +48,10 @@ pub struct CurseForgeMod {
|
||||
pub async fn index_curseforge(
|
||||
start_index: i32,
|
||||
end_index: i32,
|
||||
) -> Result<Vec<SearchMod>, IndexingError> {
|
||||
) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||
info!("Indexing curseforge mods!");
|
||||
|
||||
let mut docs_to_add: Vec<SearchMod> = vec![];
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
let res = reqwest::Client::new()
|
||||
.post("https://addons-ecs.forgesvc.net/api/v2/addon")
|
||||
@@ -177,32 +177,32 @@ pub async fn index_curseforge(
|
||||
.thumbnail_url
|
||||
.replace("/256/256/", "/64/64/");
|
||||
|
||||
docs_to_add.push(SearchMod {
|
||||
mod_id: -curseforge_mod.id as i64,
|
||||
let created = curseforge_mod
|
||||
.date_created
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?;
|
||||
let modified = curseforge_mod
|
||||
.date_modified
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?;
|
||||
|
||||
docs_to_add.push(UploadSearchMod {
|
||||
mod_id: format!("curse-{}", curseforge_mod.id),
|
||||
author: (&curseforge_mod.authors[0].name).to_string(),
|
||||
title: curseforge_mod.name,
|
||||
description: curseforge_mod.summary.chars().take(150).collect(),
|
||||
keywords: mod_categories,
|
||||
categories: mod_categories,
|
||||
versions: mod_game_versions.clone(),
|
||||
downloads: curseforge_mod.download_count as i32,
|
||||
page_url: curseforge_mod.website_url,
|
||||
icon_url,
|
||||
author_url: (&curseforge_mod.authors[0].url).to_string(),
|
||||
date_created: curseforge_mod.date_created.chars().take(10).collect(),
|
||||
created: curseforge_mod
|
||||
.date_created
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?
|
||||
.timestamp(),
|
||||
date_modified: curseforge_mod.date_modified.chars().take(10).collect(),
|
||||
updated: curseforge_mod
|
||||
.date_modified
|
||||
.parse::<chrono::DateTime<chrono::Utc>>()?
|
||||
.timestamp(),
|
||||
date_created: created.to_string(),
|
||||
created_timestamp: created.timestamp(),
|
||||
date_modified: modified.to_string(),
|
||||
modified_timestamp: modified.timestamp(),
|
||||
latest_version,
|
||||
empty: String::from("{}{}{}"),
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
})
|
||||
}
|
||||
|
||||
//TODO Reindex every hour for new mods.
|
||||
Ok(docs_to_add)
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@ use futures::{StreamExt, TryStreamExt};
|
||||
use log::info;
|
||||
|
||||
use super::IndexingError;
|
||||
use crate::search::SearchMod;
|
||||
use crate::search::UploadSearchMod;
|
||||
use sqlx::postgres::PgPool;
|
||||
|
||||
pub async fn index_local(pool: PgPool) -> Result<Vec<SearchMod>, IndexingError> {
|
||||
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||
info!("Indexing local mods!");
|
||||
|
||||
let mut docs_to_add: Vec<SearchMod> = vec![];
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
let mut results = sqlx::query!(
|
||||
"
|
||||
@@ -53,23 +53,25 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<SearchMod>, IndexingError>
|
||||
icon_url = url;
|
||||
}
|
||||
|
||||
docs_to_add.push(SearchMod {
|
||||
mod_id: result.id,
|
||||
author: "".to_string(),
|
||||
let formatted = result.published.to_string();
|
||||
let timestamp = result.published.timestamp();
|
||||
docs_to_add.push(UploadSearchMod {
|
||||
mod_id: format!("local-{}", crate::models::ids::ModId(result.id as u64)),
|
||||
title: result.title,
|
||||
description: result.description,
|
||||
keywords: categories,
|
||||
categories,
|
||||
versions,
|
||||
downloads: result.downloads,
|
||||
page_url: result.body_url,
|
||||
icon_url,
|
||||
author: "".to_string(), // TODO: author/team info
|
||||
author_url: "".to_string(),
|
||||
date_created: result.published.to_string(),
|
||||
created: 0,
|
||||
date_modified: "".to_string(),
|
||||
updated: 0,
|
||||
latest_version: "".to_string(),
|
||||
empty: String::from("{}{}{}"),
|
||||
date_created: formatted.clone(),
|
||||
created_timestamp: timestamp,
|
||||
date_modified: formatted,
|
||||
modified_timestamp: timestamp,
|
||||
latest_version: "".to_string(), // TODO: Info about latest version
|
||||
empty: std::borrow::Cow::Borrowed("{}{}{}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
/// This module is used for the indexing from any source.
|
||||
pub mod curseforge_import;
|
||||
pub mod local_import;
|
||||
pub mod queue;
|
||||
|
||||
use crate::search::indexing::curseforge_import::index_curseforge;
|
||||
use crate::search::indexing::local_import::index_local;
|
||||
use crate::search::SearchMod;
|
||||
use crate::search::UploadSearchMod;
|
||||
use curseforge_import::index_curseforge;
|
||||
use local_import::index_local;
|
||||
use meilisearch_sdk::client::Client;
|
||||
use meilisearch_sdk::indexes::Index;
|
||||
use meilisearch_sdk::settings::Settings;
|
||||
use sqlx::postgres::PgPool;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
@@ -14,7 +16,7 @@ use thiserror::Error;
|
||||
#[derive(Error, Debug)]
|
||||
pub enum IndexingError {
|
||||
#[error("Error while connecting to the MeiliSearch database")]
|
||||
IndexDBError(meilisearch_sdk::errors::Error),
|
||||
IndexDBError(#[from] meilisearch_sdk::errors::Error),
|
||||
#[error("Error while importing mods from CurseForge")]
|
||||
CurseforgeImportError(reqwest::Error),
|
||||
#[error("Error while serializing or deserializing JSON: {0}")]
|
||||
@@ -32,95 +34,115 @@ pub enum IndexingError {
|
||||
// assumes a max average size of 1KiB per mod to avoid this cap.
|
||||
const MEILISEARCH_CHUNK_SIZE: usize = 10000;
|
||||
|
||||
pub async fn index_mods(pool: PgPool) -> Result<(), IndexingError> {
|
||||
// Check if the index exists
|
||||
#[derive(Debug)]
|
||||
pub struct IndexingSettings {
|
||||
pub index_external: bool,
|
||||
pub index_local: bool,
|
||||
}
|
||||
|
||||
impl IndexingSettings {
|
||||
pub fn from_env() -> Self {
|
||||
let index_local = true;
|
||||
let index_external = dotenv::var("INDEX_CURSEFORGE")
|
||||
.ok()
|
||||
.and_then(|b| b.parse::<bool>().ok())
|
||||
.unwrap_or(false);
|
||||
|
||||
Self {
|
||||
index_external,
|
||||
index_local,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn index_mods(pool: PgPool, settings: IndexingSettings) -> Result<(), IndexingError> {
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
if settings.index_local {
|
||||
docs_to_add.append(&mut index_local(pool.clone()).await?);
|
||||
}
|
||||
if settings.index_external {
|
||||
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
|
||||
}
|
||||
|
||||
// Write Indices
|
||||
|
||||
add_mods(docs_to_add).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index<'a>(
|
||||
client: &'a Client<'a>,
|
||||
name: &'a str,
|
||||
rules: impl FnOnce() -> Vec<String>,
|
||||
) -> Result<Index<'a>, IndexingError> {
|
||||
match client.get_index(name).await {
|
||||
// TODO: update index settings on startup (or delete old indices on startup)
|
||||
Ok(index) => Ok(index),
|
||||
Err(meilisearch_sdk::errors::Error::IndexNotFound) => {
|
||||
// Only create index and set settings if the index doesn't already exist
|
||||
let index = client.create_index(name, Some("mod_id")).await?;
|
||||
|
||||
index
|
||||
.set_settings(&default_settings().with_ranking_rules(rules()))
|
||||
.await?;
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Unhandled error while creating index: {}", e);
|
||||
Err(IndexingError::IndexDBError(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_to_index(index: Index<'_>, mods: &[UploadSearchMod]) -> Result<(), IndexingError> {
|
||||
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
index.add_documents(chunk, Some("mod_id")).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn add_mods(mods: Vec<UploadSearchMod>) -> Result<(), IndexingError> {
|
||||
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||
let client = Client::new(address, "");
|
||||
|
||||
let mut docs_to_add: Vec<SearchMod> = vec![];
|
||||
// Relevance Index
|
||||
let relevance_index = create_index(&client, "relevance_mods", || {
|
||||
let mut relevance_rules = default_rules();
|
||||
relevance_rules.push_back("desc(downloads)".to_string());
|
||||
relevance_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(relevance_index, &mods).await?;
|
||||
|
||||
docs_to_add.append(&mut index_local(pool.clone()).await?);
|
||||
if dotenv::var("INDEX_CURSEFORGE")?
|
||||
.parse()
|
||||
.expect("`INDEX_CURSEFORGE` is not a boolean.")
|
||||
{
|
||||
docs_to_add.append(&mut index_curseforge(1, 400_000).await?);
|
||||
}
|
||||
//Write Indexes
|
||||
//Relevance Index
|
||||
// Downloads Index
|
||||
let downloads_index = create_index(&client, "downloads_mods", || {
|
||||
let mut downloads_rules = default_rules();
|
||||
downloads_rules.push_front("desc(downloads)".to_string());
|
||||
downloads_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(downloads_index, &mods).await?;
|
||||
|
||||
let mut relevance_index = client
|
||||
.get_or_create("relevance_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
// Updated Index
|
||||
let updated_index = create_index(&client, "updated_mods", || {
|
||||
let mut updated_rules = default_rules();
|
||||
updated_rules.push_front("desc(updated)".to_string());
|
||||
updated_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(updated_index, &mods).await?;
|
||||
|
||||
let mut relevance_rules = default_rules();
|
||||
relevance_rules.push_back("desc(downloads)".to_string());
|
||||
|
||||
relevance_index
|
||||
.set_settings(&default_settings().with_ranking_rules(relevance_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
// TODO: get meilisearch sdk to not require cloning (ie take a reference to docs_to_add)
|
||||
// This may require making our own fork of it.
|
||||
relevance_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
|
||||
//Downloads Index
|
||||
let mut downloads_index = client
|
||||
.get_or_create("downloads_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
let mut downloads_rules = default_rules();
|
||||
downloads_rules.push_front("desc(downloads)".to_string());
|
||||
|
||||
downloads_index
|
||||
.set_settings(&default_settings().with_ranking_rules(downloads_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
downloads_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
|
||||
//Updated Index
|
||||
let mut updated_index = client
|
||||
.get_or_create("updated_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
let mut updated_rules = default_rules();
|
||||
updated_rules.push_front("desc(updated)".to_string());
|
||||
|
||||
updated_index
|
||||
.set_settings(&default_settings().with_ranking_rules(updated_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
updated_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
|
||||
//Created Index
|
||||
let mut newest_index = client
|
||||
.get_or_create("newest_mods")
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
let mut newest_rules = default_rules();
|
||||
newest_rules.push_back("desc(created)".to_string());
|
||||
|
||||
newest_index
|
||||
.set_settings(&default_settings().with_ranking_rules(newest_rules.into()))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
|
||||
for chunk in docs_to_add.chunks(MEILISEARCH_CHUNK_SIZE) {
|
||||
newest_index
|
||||
.add_documents(Vec::from(chunk), Some("mod_id"))
|
||||
.map_err(IndexingError::IndexDBError)?;
|
||||
}
|
||||
// Created Index
|
||||
let newest_index = create_index(&client, "newest_mods", || {
|
||||
let mut newest_rules = default_rules();
|
||||
newest_rules.push_front("desc(created)".to_string());
|
||||
newest_rules.into()
|
||||
})
|
||||
.await?;
|
||||
add_to_index(newest_index, &mods).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -144,7 +166,7 @@ fn default_settings() -> Settings {
|
||||
"author".to_string(),
|
||||
"title".to_string(),
|
||||
"description".to_string(),
|
||||
"keywords".to_string(),
|
||||
"categories".to_string(),
|
||||
"versions".to_string(),
|
||||
"downloads".to_string(),
|
||||
"page_url".to_string(),
|
||||
@@ -155,13 +177,12 @@ fn default_settings() -> Settings {
|
||||
"date_modified".to_string(),
|
||||
"updated".to_string(),
|
||||
"latest_version".to_string(),
|
||||
"empty".to_string(),
|
||||
];
|
||||
|
||||
let searchable_attributes = vec![
|
||||
"title".to_string(),
|
||||
"description".to_string(),
|
||||
"keywords".to_string(),
|
||||
"categories".to_string(),
|
||||
"versions".to_string(),
|
||||
"author".to_string(),
|
||||
"empty".to_string(),
|
||||
@@ -173,6 +194,7 @@ fn default_settings() -> Settings {
|
||||
.with_accept_new_fields(true)
|
||||
.with_stop_words(vec![])
|
||||
.with_synonyms(HashMap::new())
|
||||
.with_attributes_for_faceting(vec![String::from("categories")])
|
||||
}
|
||||
|
||||
//endregion
|
||||
|
||||
31
src/search/indexing/queue.rs
Normal file
31
src/search/indexing/queue.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
use super::{add_mods, IndexingError, UploadSearchMod};
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub struct CreationQueue {
|
||||
// There's probably a better structure for this, but a mutex works
|
||||
// and I don't think this can deadlock. This queue requires fast
|
||||
// writes and then a single potentially slower read/write that
|
||||
// empties the queue.
|
||||
queue: Mutex<Vec<UploadSearchMod>>,
|
||||
}
|
||||
|
||||
impl CreationQueue {
|
||||
pub fn new() -> Self {
|
||||
CreationQueue {
|
||||
queue: Mutex::new(Vec::with_capacity(10)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&self, search_mod: UploadSearchMod) {
|
||||
// Can only panic if mutex is poisoned
|
||||
self.queue.lock().unwrap().push(search_mod);
|
||||
}
|
||||
pub fn take(&self) -> Vec<UploadSearchMod> {
|
||||
std::mem::replace(&mut *self.queue.lock().unwrap(), Vec::with_capacity(10))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn index_queue(queue: &CreationQueue) -> Result<(), IndexingError> {
|
||||
let queue = queue.take();
|
||||
add_mods(queue).await
|
||||
}
|
||||
@@ -6,6 +6,7 @@ use meilisearch_sdk::client::Client;
|
||||
use meilisearch_sdk::document::Document;
|
||||
use meilisearch_sdk::search::Query;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use thiserror::Error;
|
||||
|
||||
pub mod indexing;
|
||||
@@ -13,7 +14,7 @@ pub mod indexing;
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SearchError {
|
||||
#[error("Error while connecting to the MeiliSearch database")]
|
||||
IndexDBError(meilisearch_sdk::errors::Error),
|
||||
IndexDBError(#[from] meilisearch_sdk::errors::Error),
|
||||
#[error("Error while serializing or deserializing JSON: {0}")]
|
||||
SerDeError(#[from] serde_json::Error),
|
||||
#[error("Error while parsing an integer: {0}")]
|
||||
@@ -45,36 +46,75 @@ impl actix_web::ResponseError for SearchError {
|
||||
}
|
||||
}
|
||||
|
||||
/// A mod document used for uploading mods to meilisearch's indices.
|
||||
/// This contains some extra data that is not returned by search results.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct SearchMod {
|
||||
pub mod_id: i64,
|
||||
pub struct UploadSearchMod {
|
||||
pub mod_id: String,
|
||||
pub author: String,
|
||||
pub title: String,
|
||||
pub description: String,
|
||||
pub keywords: Vec<String>,
|
||||
pub categories: Vec<String>,
|
||||
pub versions: Vec<String>,
|
||||
pub downloads: i32,
|
||||
pub page_url: String,
|
||||
pub icon_url: String,
|
||||
pub author_url: String,
|
||||
pub date_created: String,
|
||||
pub created: i64,
|
||||
pub date_modified: String,
|
||||
pub updated: i64,
|
||||
pub latest_version: String,
|
||||
pub empty: String,
|
||||
|
||||
/// RFC 3339 formatted creation date of the mod
|
||||
pub date_created: String,
|
||||
/// Unix timestamp of the creation date of the mod
|
||||
pub created_timestamp: i64,
|
||||
/// RFC 3339 formatted date/time of last major modification (update)
|
||||
pub date_modified: String,
|
||||
/// Unix timestamp of the last major modification
|
||||
pub modified_timestamp: i64,
|
||||
|
||||
/// Must be "{}{}{}", a hack until meilisearch supports searches
|
||||
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
|
||||
// This is a Cow to prevent unnecessary allocations for a static
|
||||
// string
|
||||
pub empty: Cow<'static, str>,
|
||||
}
|
||||
|
||||
impl Document for SearchMod {
|
||||
type UIDType = i64;
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct ResultSearchMod {
|
||||
pub mod_id: String,
|
||||
pub author: String,
|
||||
pub title: String,
|
||||
pub description: String,
|
||||
pub categories: Vec<String>,
|
||||
// TODO: more efficient format for listing versions, without many repetitions
|
||||
pub versions: Vec<String>,
|
||||
pub downloads: i32,
|
||||
pub page_url: String,
|
||||
pub icon_url: String,
|
||||
pub author_url: String,
|
||||
/// RFC 3339 formatted creation date of the mod
|
||||
pub date_created: String,
|
||||
/// RFC 3339 formatted modification date of the mod
|
||||
pub date_modified: String,
|
||||
pub latest_version: String,
|
||||
}
|
||||
|
||||
impl Document for UploadSearchMod {
|
||||
type UIDType = String;
|
||||
|
||||
fn get_uid(&self) -> &Self::UIDType {
|
||||
&self.mod_id
|
||||
}
|
||||
}
|
||||
|
||||
pub fn search_for_mod(info: &SearchRequest) -> Result<Vec<SearchMod>, SearchError> {
|
||||
use std::borrow::Cow;
|
||||
impl Document for ResultSearchMod {
|
||||
type UIDType = String;
|
||||
|
||||
fn get_uid(&self) -> &Self::UIDType {
|
||||
&self.mod_id
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn search_for_mod(info: &SearchRequest) -> Result<Vec<ResultSearchMod>, SearchError> {
|
||||
let address = &*dotenv::var("MEILISEARCH_ADDR")?;
|
||||
let client = Client::new(address, "");
|
||||
|
||||
@@ -98,11 +138,15 @@ pub fn search_for_mod(info: &SearchRequest) -> Result<Vec<SearchMod>, SearchErro
|
||||
if !filters.is_empty() {
|
||||
query = query.with_filters(&filters);
|
||||
}
|
||||
if let Some(facets) = &info.facets {
|
||||
let facets = serde_json::from_str::<Vec<Vec<&str>>>(facets)?;
|
||||
query = query.with_facet_filters(facets);
|
||||
}
|
||||
|
||||
Ok(client
|
||||
.get_index(format!("{}_mods", index).as_ref())
|
||||
.map_err(SearchError::IndexDBError)?
|
||||
.search::<SearchMod>(&query)
|
||||
.map_err(SearchError::IndexDBError)?
|
||||
.await?
|
||||
.search::<ResultSearchMod>(&query)
|
||||
.await?
|
||||
.hits)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user