You've already forked AstralRinth
forked from didirus/AstralRinth
Fix indexing, upgrade MeiliSearch sdk (#100)
* Hotfixes for indexing * Handles missing INDEX_CACHE_PATH environment variable * Exits on startup if environment variables are missing. The flag --allow-missing-vars disables this, but that is generally a bad idea, since most environment variables are required (and the ones that aren't should be marked as such). * Disables the query loggers * Upgrade meilisearch-sdk to 0.4.0 for MeiliSearch 0.16 support * Fix swap of Forge and Fabric labeling
This commit is contained in:
@@ -119,21 +119,23 @@ lazy_static::lazy_static! {
|
||||
pub async fn index_curseforge(
|
||||
start_index: u32,
|
||||
end_index: u32,
|
||||
cache_path: &std::path::Path,
|
||||
cache_path: Option<&std::path::Path>,
|
||||
) -> Result<Vec<UploadSearchMod>, IndexingError> {
|
||||
info!("Indexing curseforge mods!");
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
let cache = std::fs::File::open(cache_path)
|
||||
let cache = cache_path
|
||||
.map(std::fs::File::open)
|
||||
.and_then(Result::ok)
|
||||
.map(std::io::BufReader::new)
|
||||
.map(serde_json::from_reader::<_, Vec<u32>>);
|
||||
|
||||
let requested_ids;
|
||||
|
||||
// This caching system can't handle segmented indexing
|
||||
if let Ok(Ok(mut cache)) = cache {
|
||||
if let Some(Ok(mut cache)) = cache {
|
||||
let end = cache.last().copied().unwrap_or(start_index);
|
||||
cache.extend(end..end_index);
|
||||
requested_ids = serde_json::to_string(&cache)?;
|
||||
@@ -167,11 +169,13 @@ pub async fn index_curseforge(
|
||||
// Only write to the cache if this doesn't skip mods at the start
|
||||
// The caching system iterates through all ids normally past the last
|
||||
// id in the cache, so the end_index shouldn't matter.
|
||||
if start_index <= 1 {
|
||||
let mut ids = curseforge_mods.iter().map(|m| m.id).collect::<Vec<_>>();
|
||||
ids.sort_unstable();
|
||||
if let Err(e) = std::fs::write(cache_path, serde_json::to_string(&ids)?) {
|
||||
log::warn!("Error writing to index id cache: {}", e);
|
||||
if let Some(path) = cache_path {
|
||||
if start_index <= 1 {
|
||||
let mut ids = curseforge_mods.iter().map(|m| m.id).collect::<Vec<_>>();
|
||||
ids.sort_unstable();
|
||||
if let Err(e) = std::fs::write(path, serde_json::to_string(&ids)?) {
|
||||
log::warn!("Error writing to index id cache: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,8 +196,8 @@ pub async fn index_curseforge(
|
||||
for file in curseforge_mod.latest_files {
|
||||
for version in file.game_version {
|
||||
match &*version {
|
||||
"Fabric" => loaders.forge = true,
|
||||
"Forge" => loaders.fabric = true,
|
||||
"Fabric" => loaders.fabric = true,
|
||||
"Forge" => loaders.forge = true,
|
||||
"Rift" => loaders.rift = true,
|
||||
_ => (),
|
||||
}
|
||||
@@ -309,7 +313,6 @@ pub async fn index_curseforge(
|
||||
modified_timestamp: curseforge_mod.date_modified.timestamp(),
|
||||
latest_version,
|
||||
host: Cow::Borrowed("curseforge"),
|
||||
empty: Cow::Borrowed("{}{}{}"),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -112,7 +112,6 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
|
||||
modified_timestamp: mod_data.updated.timestamp(),
|
||||
latest_version,
|
||||
host: Cow::Borrowed("modrinth"),
|
||||
empty: Cow::Borrowed("{}{}{}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -225,6 +224,5 @@ pub async fn query_one(
|
||||
modified_timestamp: mod_data.updated.timestamp(),
|
||||
latest_version,
|
||||
host: Cow::Borrowed("modrinth"),
|
||||
empty: Cow::Borrowed("{}{}{}"),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -63,7 +63,7 @@ pub async fn index_mods(
|
||||
) -> Result<(), IndexingError> {
|
||||
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
|
||||
|
||||
let cache_path = std::path::PathBuf::from(std::env::var_os("INDEX_CACHE_PATH").unwrap());
|
||||
let cache_path = std::env::var_os("INDEX_CACHE_PATH").map(std::path::PathBuf::from);
|
||||
|
||||
if settings.index_local {
|
||||
docs_to_add.append(&mut index_local(pool.clone()).await?);
|
||||
@@ -74,7 +74,7 @@ pub async fn index_mods(
|
||||
.map(|i| i.parse().unwrap())
|
||||
.unwrap_or(450_000);
|
||||
|
||||
docs_to_add.append(&mut index_curseforge(1, end_index, &cache_path).await?);
|
||||
docs_to_add.append(&mut index_curseforge(1, end_index, cache_path.as_deref()).await?);
|
||||
}
|
||||
|
||||
// Write Indices
|
||||
@@ -270,7 +270,6 @@ fn default_settings() -> Settings {
|
||||
"categories".to_string(),
|
||||
"versions".to_string(),
|
||||
"author".to_string(),
|
||||
"empty".to_string(),
|
||||
];
|
||||
|
||||
Settings::new()
|
||||
|
||||
@@ -5,7 +5,6 @@ use actix_web::web::HttpResponse;
|
||||
use chrono::{DateTime, Utc};
|
||||
use meilisearch_sdk::client::Client;
|
||||
use meilisearch_sdk::document::Document;
|
||||
use meilisearch_sdk::search::Query;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::min;
|
||||
@@ -84,12 +83,6 @@ pub struct UploadSearchMod {
|
||||
pub modified_timestamp: i64,
|
||||
|
||||
pub host: Cow<'static, str>,
|
||||
|
||||
/// Must be "{}{}{}", a hack until meilisearch supports searches
|
||||
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
|
||||
// This is a Cow to prevent unnecessary allocations for a static
|
||||
// string
|
||||
pub empty: Cow<'static, str>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -155,23 +148,6 @@ pub async fn search_for_mod(
|
||||
let offset = info.offset.as_deref().unwrap_or("0").parse()?;
|
||||
let index = info.index.as_deref().unwrap_or("relevance");
|
||||
let limit = info.limit.as_deref().unwrap_or("10").parse()?;
|
||||
let search_query: &str = info
|
||||
.query
|
||||
.as_deref()
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or("{}{}{}");
|
||||
|
||||
let mut query = Query::new(search_query)
|
||||
.with_limit(min(100, limit))
|
||||
.with_offset(offset);
|
||||
|
||||
if !filters.is_empty() {
|
||||
query = query.with_filters(&filters);
|
||||
}
|
||||
if let Some(facets) = &info.facets {
|
||||
let facets = serde_json::from_str::<Vec<Vec<&str>>>(facets)?;
|
||||
query = query.with_facet_filters(facets);
|
||||
}
|
||||
|
||||
let index = match index {
|
||||
"relevance" => "relevance_mods",
|
||||
@@ -181,14 +157,44 @@ pub async fn search_for_mod(
|
||||
i => return Err(SearchError::InvalidIndex(i.to_string())),
|
||||
};
|
||||
|
||||
let results = client
|
||||
.get_index(index)
|
||||
.await?
|
||||
.search::<ResultSearchMod>(&query)
|
||||
.await?;
|
||||
let meilisearch_index = client.get_index(index).await?;
|
||||
let mut query = meilisearch_index.search();
|
||||
|
||||
query.with_limit(min(100, limit)).with_offset(offset);
|
||||
|
||||
if let Some(search) = info.query.as_deref() {
|
||||
if !search.is_empty() {
|
||||
query.with_query(search);
|
||||
}
|
||||
}
|
||||
|
||||
if !filters.is_empty() {
|
||||
query.with_filters(&filters);
|
||||
}
|
||||
|
||||
// So the meilisearch sdk's lifetimes are... broken, to say the least
|
||||
// They are overspecified and almost always wrong, and would generally
|
||||
// just be better if they didn't specify them at all.
|
||||
|
||||
// They also decided to have this take a &[&[&str]], which is impossible
|
||||
// to construct efficiently. Instead it should take impl Iterator<Item=&[&str]>,
|
||||
// &[impl AsRef<[&str]>], or one of many other proper solutions to that issue.
|
||||
|
||||
let why_meilisearch;
|
||||
let why_must_you_do_this;
|
||||
if let Some(facets) = &info.facets {
|
||||
why_meilisearch = serde_json::from_str::<Vec<Vec<&str>>>(facets)?;
|
||||
why_must_you_do_this = why_meilisearch
|
||||
.iter()
|
||||
.map(|v| v as &[_])
|
||||
.collect::<Vec<&[_]>>();
|
||||
query.with_facet_filters(&why_must_you_do_this);
|
||||
}
|
||||
|
||||
let results = query.execute::<ResultSearchMod>().await?;
|
||||
|
||||
Ok(SearchResults {
|
||||
hits: results.hits,
|
||||
hits: results.hits.into_iter().map(|r| r.result).collect(),
|
||||
offset: results.offset,
|
||||
limit: results.limit,
|
||||
total_hits: results.nb_hits,
|
||||
|
||||
Reference in New Issue
Block a user