Fix indexing, upgrade MeiliSearch sdk (#100)

* Hotfixes for indexing

* Handles missing INDEX_CACHE_PATH environment variable
* Exits on startup if environment variables are missing. The flag
  --allow-missing-vars disables this, but that is generally a bad
  idea, since most environment variables are required (and the ones
  that aren't should be marked as such).
* Disables the query loggers

* Upgrade meilisearch-sdk to 0.4.0 for MeiliSearch 0.16 support

* Fix swap of Forge and Fabric labeling
This commit is contained in:
Aeledfyr
2020-11-05 09:38:03 -06:00
committed by GitHub
parent d477874535
commit c8e58a1e5b
7 changed files with 105 additions and 86 deletions

View File

@@ -119,21 +119,23 @@ lazy_static::lazy_static! {
pub async fn index_curseforge(
start_index: u32,
end_index: u32,
cache_path: &std::path::Path,
cache_path: Option<&std::path::Path>,
) -> Result<Vec<UploadSearchMod>, IndexingError> {
info!("Indexing curseforge mods!");
let start = std::time::Instant::now();
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
let cache = std::fs::File::open(cache_path)
let cache = cache_path
.map(std::fs::File::open)
.and_then(Result::ok)
.map(std::io::BufReader::new)
.map(serde_json::from_reader::<_, Vec<u32>>);
let requested_ids;
// This caching system can't handle segmented indexing
if let Ok(Ok(mut cache)) = cache {
if let Some(Ok(mut cache)) = cache {
let end = cache.last().copied().unwrap_or(start_index);
cache.extend(end..end_index);
requested_ids = serde_json::to_string(&cache)?;
@@ -167,11 +169,13 @@ pub async fn index_curseforge(
// Only write to the cache if this doesn't skip mods at the start
// The caching system iterates through all ids normally past the last
// id in the cache, so the end_index shouldn't matter.
if start_index <= 1 {
let mut ids = curseforge_mods.iter().map(|m| m.id).collect::<Vec<_>>();
ids.sort_unstable();
if let Err(e) = std::fs::write(cache_path, serde_json::to_string(&ids)?) {
log::warn!("Error writing to index id cache: {}", e);
if let Some(path) = cache_path {
if start_index <= 1 {
let mut ids = curseforge_mods.iter().map(|m| m.id).collect::<Vec<_>>();
ids.sort_unstable();
if let Err(e) = std::fs::write(path, serde_json::to_string(&ids)?) {
log::warn!("Error writing to index id cache: {}", e);
}
}
}
@@ -192,8 +196,8 @@ pub async fn index_curseforge(
for file in curseforge_mod.latest_files {
for version in file.game_version {
match &*version {
"Fabric" => loaders.forge = true,
"Forge" => loaders.fabric = true,
"Fabric" => loaders.fabric = true,
"Forge" => loaders.forge = true,
"Rift" => loaders.rift = true,
_ => (),
}
@@ -309,7 +313,6 @@ pub async fn index_curseforge(
modified_timestamp: curseforge_mod.date_modified.timestamp(),
latest_version,
host: Cow::Borrowed("curseforge"),
empty: Cow::Borrowed("{}{}{}"),
})
}

View File

@@ -112,7 +112,6 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchMod>, IndexingE
modified_timestamp: mod_data.updated.timestamp(),
latest_version,
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
});
}
}
@@ -225,6 +224,5 @@ pub async fn query_one(
modified_timestamp: mod_data.updated.timestamp(),
latest_version,
host: Cow::Borrowed("modrinth"),
empty: Cow::Borrowed("{}{}{}"),
})
}

View File

@@ -63,7 +63,7 @@ pub async fn index_mods(
) -> Result<(), IndexingError> {
let mut docs_to_add: Vec<UploadSearchMod> = vec![];
let cache_path = std::path::PathBuf::from(std::env::var_os("INDEX_CACHE_PATH").unwrap());
let cache_path = std::env::var_os("INDEX_CACHE_PATH").map(std::path::PathBuf::from);
if settings.index_local {
docs_to_add.append(&mut index_local(pool.clone()).await?);
@@ -74,7 +74,7 @@ pub async fn index_mods(
.map(|i| i.parse().unwrap())
.unwrap_or(450_000);
docs_to_add.append(&mut index_curseforge(1, end_index, &cache_path).await?);
docs_to_add.append(&mut index_curseforge(1, end_index, cache_path.as_deref()).await?);
}
// Write Indices
@@ -270,7 +270,6 @@ fn default_settings() -> Settings {
"categories".to_string(),
"versions".to_string(),
"author".to_string(),
"empty".to_string(),
];
Settings::new()

View File

@@ -5,7 +5,6 @@ use actix_web::web::HttpResponse;
use chrono::{DateTime, Utc};
use meilisearch_sdk::client::Client;
use meilisearch_sdk::document::Document;
use meilisearch_sdk::search::Query;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::cmp::min;
@@ -84,12 +83,6 @@ pub struct UploadSearchMod {
pub modified_timestamp: i64,
pub host: Cow<'static, str>,
/// Must be "{}{}{}", a hack until meilisearch supports searches
/// with empty queries (https://github.com/meilisearch/MeiliSearch/issues/729)
// This is a Cow to prevent unnecessary allocations for a static
// string
pub empty: Cow<'static, str>,
}
#[derive(Serialize, Deserialize, Debug)]
@@ -155,23 +148,6 @@ pub async fn search_for_mod(
let offset = info.offset.as_deref().unwrap_or("0").parse()?;
let index = info.index.as_deref().unwrap_or("relevance");
let limit = info.limit.as_deref().unwrap_or("10").parse()?;
let search_query: &str = info
.query
.as_deref()
.filter(|s| !s.is_empty())
.unwrap_or("{}{}{}");
let mut query = Query::new(search_query)
.with_limit(min(100, limit))
.with_offset(offset);
if !filters.is_empty() {
query = query.with_filters(&filters);
}
if let Some(facets) = &info.facets {
let facets = serde_json::from_str::<Vec<Vec<&str>>>(facets)?;
query = query.with_facet_filters(facets);
}
let index = match index {
"relevance" => "relevance_mods",
@@ -181,14 +157,44 @@ pub async fn search_for_mod(
i => return Err(SearchError::InvalidIndex(i.to_string())),
};
let results = client
.get_index(index)
.await?
.search::<ResultSearchMod>(&query)
.await?;
let meilisearch_index = client.get_index(index).await?;
let mut query = meilisearch_index.search();
query.with_limit(min(100, limit)).with_offset(offset);
if let Some(search) = info.query.as_deref() {
if !search.is_empty() {
query.with_query(search);
}
}
if !filters.is_empty() {
query.with_filters(&filters);
}
// So the meilisearch sdk's lifetimes are... broken, to say the least
// They are overspecified and almost always wrong, and would generally
// just be better if they didn't specify them at all.
// They also decided to have this take a &[&[&str]], which is impossible
// to construct efficiently. Instead it should take impl Iterator<Item=&[&str]>,
// &[impl AsRef<[&str]>], or one of many other proper solutions to that issue.
let why_meilisearch;
let why_must_you_do_this;
if let Some(facets) = &info.facets {
why_meilisearch = serde_json::from_str::<Vec<Vec<&str>>>(facets)?;
why_must_you_do_this = why_meilisearch
.iter()
.map(|v| v as &[_])
.collect::<Vec<&[_]>>();
query.with_facet_filters(&why_must_you_do_this);
}
let results = query.execute::<ResultSearchMod>().await?;
Ok(SearchResults {
hits: results.hits,
hits: results.hits.into_iter().map(|r| r.result).collect(),
offset: results.offset,
limit: results.limit,
total_hits: results.nb_hits,