move to monorepo dir

This commit is contained in:
Jai A
2024-10-16 14:11:42 -07:00
parent ff7975773e
commit e3a3379615
756 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,528 @@
use chrono::{DateTime, Utc};
use dashmap::DashMap;
use futures::TryStreamExt;
use itertools::Itertools;
use log::info;
use std::collections::HashMap;
use super::IndexingError;
use crate::database::models::loader_fields::{
QueryLoaderField, QueryLoaderFieldEnumValue, QueryVersionField, VersionField,
};
use crate::database::models::{
LoaderFieldEnumId, LoaderFieldEnumValueId, LoaderFieldId, ProjectId, VersionId,
};
use crate::models::projects::from_duplicate_version_fields;
use crate::models::v2::projects::LegacyProject;
use crate::routes::v2_reroute;
use crate::search::UploadSearchProject;
use sqlx::postgres::PgPool;
pub async fn index_local(pool: &PgPool) -> Result<Vec<UploadSearchProject>, IndexingError> {
info!("Indexing local projects!");
// todo: loaders, project type, game versions
struct PartialProject {
id: ProjectId,
name: String,
summary: String,
downloads: i32,
follows: i32,
icon_url: Option<String>,
updated: DateTime<Utc>,
approved: DateTime<Utc>,
slug: Option<String>,
color: Option<i32>,
license: String,
}
let db_projects = sqlx::query!(
"
SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color
FROM mods m
WHERE m.status = ANY($1)
GROUP BY m.id;
",
&*crate::models::projects::ProjectStatus::iterator()
.filter(|x| x.is_searchable())
.map(|x| x.to_string())
.collect::<Vec<String>>(),
)
.fetch(pool)
.map_ok(|m| {
PartialProject {
id: ProjectId(m.id),
name: m.name,
summary: m.summary,
downloads: m.downloads,
follows: m.follows,
icon_url: m.icon_url,
updated: m.updated,
approved: m.approved.unwrap_or(m.published),
slug: m.slug,
color: m.color,
license: m.license,
}
})
.try_collect::<Vec<PartialProject>>()
.await?;
let project_ids = db_projects.iter().map(|x| x.id.0).collect::<Vec<i64>>();
struct PartialGallery {
url: String,
featured: bool,
ordering: i64,
}
info!("Indexing local gallery!");
let mods_gallery: DashMap<ProjectId, Vec<PartialGallery>> = sqlx::query!(
"
SELECT mod_id, image_url, featured, ordering
FROM mods_gallery
WHERE mod_id = ANY($1)
",
&*project_ids,
)
.fetch(pool)
.try_fold(
DashMap::new(),
|acc: DashMap<ProjectId, Vec<PartialGallery>>, m| {
acc.entry(ProjectId(m.mod_id))
.or_default()
.push(PartialGallery {
url: m.image_url,
featured: m.featured.unwrap_or(false),
ordering: m.ordering,
});
async move { Ok(acc) }
},
)
.await?;
info!("Indexing local categories!");
let categories: DashMap<ProjectId, Vec<(String, bool)>> = sqlx::query!(
"
SELECT mc.joining_mod_id mod_id, c.category name, mc.is_additional is_additional
FROM mods_categories mc
INNER JOIN categories c ON mc.joining_category_id = c.id
WHERE joining_mod_id = ANY($1)
",
&*project_ids,
)
.fetch(pool)
.try_fold(
DashMap::new(),
|acc: DashMap<ProjectId, Vec<(String, bool)>>, m| {
acc.entry(ProjectId(m.mod_id))
.or_default()
.push((m.name, m.is_additional));
async move { Ok(acc) }
},
)
.await?;
info!("Indexing local versions!");
let mut versions = index_versions(pool, project_ids.clone()).await?;
info!("Indexing local org owners!");
let mods_org_owners: DashMap<ProjectId, String> = sqlx::query!(
"
SELECT m.id mod_id, u.username
FROM mods m
INNER JOIN organizations o ON o.id = m.organization_id
INNER JOIN team_members tm ON tm.is_owner = TRUE and tm.team_id = o.team_id
INNER JOIN users u ON u.id = tm.user_id
WHERE m.id = ANY($1)
",
&*project_ids,
)
.fetch(pool)
.try_fold(DashMap::new(), |acc: DashMap<ProjectId, String>, m| {
acc.insert(ProjectId(m.mod_id), m.username);
async move { Ok(acc) }
})
.await?;
info!("Indexing local team owners!");
let mods_team_owners: DashMap<ProjectId, String> = sqlx::query!(
"
SELECT m.id mod_id, u.username
FROM mods m
INNER JOIN team_members tm ON tm.is_owner = TRUE and tm.team_id = m.team_id
INNER JOIN users u ON u.id = tm.user_id
WHERE m.id = ANY($1)
",
&project_ids,
)
.fetch(pool)
.try_fold(DashMap::new(), |acc: DashMap<ProjectId, String>, m| {
acc.insert(ProjectId(m.mod_id), m.username);
async move { Ok(acc) }
})
.await?;
info!("Getting all loader fields!");
let loader_fields: Vec<QueryLoaderField> = sqlx::query!(
"
SELECT DISTINCT id, field, field_type, enum_type, min_val, max_val, optional
FROM loader_fields lf
",
)
.fetch(pool)
.map_ok(|m| QueryLoaderField {
id: LoaderFieldId(m.id),
field: m.field,
field_type: m.field_type,
enum_type: m.enum_type.map(LoaderFieldEnumId),
min_val: m.min_val,
max_val: m.max_val,
optional: m.optional,
})
.try_collect()
.await?;
let loader_fields: Vec<&QueryLoaderField> = loader_fields.iter().collect();
info!("Getting all loader field enum values!");
let loader_field_enum_values: Vec<QueryLoaderFieldEnumValue> = sqlx::query!(
"
SELECT DISTINCT id, enum_id, value, ordering, created, metadata
FROM loader_field_enum_values lfev
ORDER BY enum_id, ordering, created DESC
"
)
.fetch(pool)
.map_ok(|m| QueryLoaderFieldEnumValue {
id: LoaderFieldEnumValueId(m.id),
enum_id: LoaderFieldEnumId(m.enum_id),
value: m.value,
ordering: m.ordering,
created: m.created,
metadata: m.metadata,
})
.try_collect()
.await?;
info!("Indexing loaders, project types!");
let mut uploads = Vec::new();
let total_len = db_projects.len();
let mut count = 0;
for project in db_projects {
count += 1;
info!("projects index prog: {count}/{total_len}");
let owner = if let Some((_, org_owner)) = mods_org_owners.remove(&project.id) {
org_owner
} else if let Some((_, team_owner)) = mods_team_owners.remove(&project.id) {
team_owner
} else {
println!(
"org owner not found for project {} id: {}!",
project.name, project.id.0
);
continue;
};
let license = match project.license.split(' ').next() {
Some(license) => license.to_string(),
None => project.license.clone(),
};
let open_source = match spdx::license_id(&license) {
Some(id) => id.is_osi_approved(),
_ => false,
};
let (featured_gallery, gallery) =
if let Some((_, gallery)) = mods_gallery.remove(&project.id) {
let mut vals = Vec::new();
let mut featured = None;
for x in gallery
.into_iter()
.sorted_by(|a, b| a.ordering.cmp(&b.ordering))
{
if x.featured && featured.is_none() {
featured = Some(x.url);
} else {
vals.push(x.url);
}
}
(featured, vals)
} else {
(None, vec![])
};
let (categories, display_categories) =
if let Some((_, categories)) = categories.remove(&project.id) {
let mut vals = Vec::new();
let mut featured_vals = Vec::new();
for (val, is_additional) in categories {
if !is_additional {
featured_vals.push(val.clone());
}
vals.push(val);
}
(vals, featured_vals)
} else {
(vec![], vec![])
};
if let Some(versions) = versions.remove(&project.id) {
// Aggregated project loader fields
let project_version_fields = versions
.iter()
.flat_map(|x| x.version_fields.clone())
.collect::<Vec<_>>();
let aggregated_version_fields = VersionField::from_query_json(
project_version_fields,
&loader_fields,
&loader_field_enum_values,
true,
);
let project_loader_fields = from_duplicate_version_fields(aggregated_version_fields);
// aggregated project loaders
let project_loaders = versions
.iter()
.flat_map(|x| x.loaders.clone())
.collect::<Vec<_>>();
for version in versions {
let version_fields = VersionField::from_query_json(
version.version_fields,
&loader_fields,
&loader_field_enum_values,
false,
);
let unvectorized_loader_fields = version_fields
.iter()
.map(|vf| (vf.field_name.clone(), vf.value.serialize_internal()))
.collect();
let mut loader_fields = from_duplicate_version_fields(version_fields);
let project_types = version.project_types;
let mut version_loaders = version.loaders;
// Uses version loaders, not project loaders.
let mut categories = categories.clone();
categories.append(&mut version_loaders.clone());
let display_categories = display_categories.clone();
categories.append(&mut version_loaders);
// SPECIAL BEHAVIOUR
// Todo: revisit.
// For consistency with v2 searching, we consider the loader field 'mrpack_loaders' to be a category.
// These were previously considered the loader, and in v2, the loader is a category for searching.
// So to avoid breakage or awkward conversions, we just consider those loader_fields to be categories.
// The loaders are kept in loader_fields as well, so that no information is lost on retrieval.
let mrpack_loaders = loader_fields
.get("mrpack_loaders")
.cloned()
.map(|x| {
x.into_iter()
.filter_map(|x| x.as_str().map(String::from))
.collect::<Vec<_>>()
})
.unwrap_or_default();
categories.extend(mrpack_loaders);
if loader_fields.contains_key("mrpack_loaders") {
categories.retain(|x| *x != "mrpack");
}
// SPECIAL BEHAVIOUR:
// For consitency with v2 searching, we manually input the
// client_side and server_side fields from the loader fields into
// separate loader fields.
// 'client_side' and 'server_side' remain supported by meilisearch even though they are no longer v3 fields.
let (_, v2_og_project_type) = LegacyProject::get_project_type(&project_types);
let (client_side, server_side) = v2_reroute::convert_side_types_v2(
&unvectorized_loader_fields,
Some(&v2_og_project_type),
);
if let Ok(client_side) = serde_json::to_value(client_side) {
loader_fields.insert("client_side".to_string(), vec![client_side]);
}
if let Ok(server_side) = serde_json::to_value(server_side) {
loader_fields.insert("server_side".to_string(), vec![server_side]);
}
let usp = UploadSearchProject {
version_id: crate::models::ids::VersionId::from(version.id).to_string(),
project_id: crate::models::ids::ProjectId::from(project.id).to_string(),
name: project.name.clone(),
summary: project.summary.clone(),
categories: categories.clone(),
display_categories: display_categories.clone(),
follows: project.follows,
downloads: project.downloads,
icon_url: project.icon_url.clone(),
author: owner.clone(),
date_created: project.approved,
created_timestamp: project.approved.timestamp(),
date_modified: project.updated,
modified_timestamp: project.updated.timestamp(),
license: license.clone(),
slug: project.slug.clone(),
// TODO
project_types,
gallery: gallery.clone(),
featured_gallery: featured_gallery.clone(),
open_source,
color: project.color.map(|x| x as u32),
loader_fields,
project_loader_fields: project_loader_fields.clone(),
// 'loaders' is aggregate of all versions' loaders
loaders: project_loaders.clone(),
};
uploads.push(usp);
}
}
}
Ok(uploads)
}
struct PartialVersion {
id: VersionId,
loaders: Vec<String>,
project_types: Vec<String>,
version_fields: Vec<QueryVersionField>,
}
async fn index_versions(
pool: &PgPool,
project_ids: Vec<i64>,
) -> Result<HashMap<ProjectId, Vec<PartialVersion>>, IndexingError> {
let versions: HashMap<ProjectId, Vec<VersionId>> = sqlx::query!(
"
SELECT v.id, v.mod_id
FROM versions v
WHERE mod_id = ANY($1)
",
&project_ids,
)
.fetch(pool)
.try_fold(
HashMap::new(),
|mut acc: HashMap<ProjectId, Vec<VersionId>>, m| {
acc.entry(ProjectId(m.mod_id))
.or_default()
.push(VersionId(m.id));
async move { Ok(acc) }
},
)
.await?;
// Get project types, loaders
#[derive(Default)]
struct VersionLoaderData {
loaders: Vec<String>,
project_types: Vec<String>,
}
let all_version_ids = versions
.iter()
.flat_map(|(_, version_ids)| version_ids.iter())
.map(|x| x.0)
.collect::<Vec<i64>>();
let loaders_ptypes: DashMap<VersionId, VersionLoaderData> = sqlx::query!(
"
SELECT DISTINCT version_id,
ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders,
ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types
FROM versions v
INNER JOIN loaders_versions lv ON v.id = lv.version_id
INNER JOIN loaders l ON lv.loader_id = l.id
INNER JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id
INNER JOIN project_types pt ON pt.id = lpt.joining_project_type_id
WHERE v.id = ANY($1)
GROUP BY version_id
",
&all_version_ids
)
.fetch(pool)
.map_ok(|m| {
let version_id = VersionId(m.version_id);
let version_loader_data = VersionLoaderData {
loaders: m.loaders.unwrap_or_default(),
project_types: m.project_types.unwrap_or_default(),
};
(version_id, version_loader_data)
})
.try_collect()
.await?;
// Get version fields
let version_fields: DashMap<VersionId, Vec<QueryVersionField>> = sqlx::query!(
"
SELECT version_id, field_id, int_value, enum_value, string_value
FROM version_fields
WHERE version_id = ANY($1)
",
&all_version_ids,
)
.fetch(pool)
.try_fold(
DashMap::new(),
|acc: DashMap<VersionId, Vec<QueryVersionField>>, m| {
let qvf = QueryVersionField {
version_id: VersionId(m.version_id),
field_id: LoaderFieldId(m.field_id),
int_value: m.int_value,
enum_value: m.enum_value.map(LoaderFieldEnumValueId),
string_value: m.string_value,
};
acc.entry(VersionId(m.version_id)).or_default().push(qvf);
async move { Ok(acc) }
},
)
.await?;
// Convert to partial versions
let mut res_versions: HashMap<ProjectId, Vec<PartialVersion>> = HashMap::new();
for (project_id, version_ids) in versions.iter() {
for version_id in version_ids {
// Extract version-specific data fetched
// We use 'remove' as every version is only in the map once
let version_loader_data = loaders_ptypes
.remove(version_id)
.map(|(_, version_loader_data)| version_loader_data)
.unwrap_or_default();
let version_fields = version_fields
.remove(version_id)
.map(|(_, version_fields)| version_fields)
.unwrap_or_default();
res_versions
.entry(*project_id)
.or_default()
.push(PartialVersion {
id: *version_id,
loaders: version_loader_data.loaders,
project_types: version_loader_data.project_types,
version_fields,
});
}
}
Ok(res_versions)
}

View File

@@ -0,0 +1,376 @@
/// This module is used for the indexing from any source.
pub mod local_import;
use crate::database::redis::RedisPool;
use crate::models::ids::base62_impl::to_base62;
use crate::search::{SearchConfig, UploadSearchProject};
use local_import::index_local;
use log::info;
use meilisearch_sdk::client::Client;
use meilisearch_sdk::indexes::Index;
use meilisearch_sdk::settings::{PaginationSetting, Settings};
use meilisearch_sdk::SwapIndexes;
use sqlx::postgres::PgPool;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum IndexingError {
#[error("Error while connecting to the MeiliSearch database")]
Indexing(#[from] meilisearch_sdk::errors::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
Serde(#[from] serde_json::Error),
#[error("Database Error: {0}")]
Sqlx(#[from] sqlx::error::Error),
#[error("Database Error: {0}")]
Database(#[from] crate::database::models::DatabaseError),
#[error("Environment Error")]
Env(#[from] dotenvy::Error),
#[error("Error while awaiting index creation task")]
Task,
}
// The chunk size for adding projects to the indexing database. If the request size
// is too large (>10MiB) then the request fails with an error. This chunk size
// assumes a max average size of 4KiB per project to avoid this cap.
const MEILISEARCH_CHUNK_SIZE: usize = 10000000;
const TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
pub async fn remove_documents(
ids: &[crate::models::ids::VersionId],
config: &SearchConfig,
) -> Result<(), meilisearch_sdk::errors::Error> {
let mut indexes = get_indexes_for_indexing(config, false).await?;
let mut indexes_next = get_indexes_for_indexing(config, true).await?;
indexes.append(&mut indexes_next);
for index in indexes {
index
.delete_documents(&ids.iter().map(|x| to_base62(x.0)).collect::<Vec<_>>())
.await?;
}
Ok(())
}
pub async fn index_projects(
pool: PgPool,
redis: RedisPool,
config: &SearchConfig,
) -> Result<(), IndexingError> {
info!("Indexing projects.");
// First, ensure current index exists (so no error happens- current index should be worst-case empty, not missing)
get_indexes_for_indexing(config, false).await?;
// Then, delete the next index if it still exists
let indices = get_indexes_for_indexing(config, true).await?;
for index in indices {
index.delete().await?;
}
// Recreate the next index for indexing
let indices = get_indexes_for_indexing(config, true).await?;
let all_loader_fields =
crate::database::models::loader_fields::LoaderField::get_fields_all(&pool, &redis)
.await?
.into_iter()
.map(|x| x.field)
.collect::<Vec<_>>();
let uploads = index_local(&pool).await?;
add_projects(&indices, uploads, all_loader_fields.clone(), config).await?;
// Swap the index
swap_index(config, "projects").await?;
swap_index(config, "projects_filtered").await?;
// Delete the now-old index
for index in indices {
index.delete().await?;
}
info!("Done adding projects.");
Ok(())
}
pub async fn swap_index(config: &SearchConfig, index_name: &str) -> Result<(), IndexingError> {
let client = config.make_client();
let index_name_next = config.get_index_name(index_name, true);
let index_name = config.get_index_name(index_name, false);
let swap_indices = SwapIndexes {
indexes: (index_name_next, index_name),
};
client
.swap_indexes([&swap_indices])
.await?
.wait_for_completion(&client, None, Some(TIMEOUT))
.await?;
Ok(())
}
pub async fn get_indexes_for_indexing(
config: &SearchConfig,
next: bool, // Get the 'next' one
) -> Result<Vec<Index>, meilisearch_sdk::errors::Error> {
let client = config.make_client();
let project_name = config.get_index_name("projects", next);
let project_filtered_name = config.get_index_name("projects_filtered", next);
let projects_index = create_or_update_index(
&client,
&project_name,
Some(&[
"words",
"typo",
"proximity",
"attribute",
"exactness",
"sort",
]),
)
.await?;
let projects_filtered_index = create_or_update_index(
&client,
&project_filtered_name,
Some(&[
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
]),
)
.await?;
Ok(vec![projects_index, projects_filtered_index])
}
async fn create_or_update_index(
client: &Client,
name: &str,
custom_rules: Option<&'static [&'static str]>,
) -> Result<Index, meilisearch_sdk::errors::Error> {
info!("Updating/creating index {}", name);
match client.get_index(name).await {
Ok(index) => {
info!("Updating index settings.");
let mut settings = default_settings();
if let Some(custom_rules) = custom_rules {
settings = settings.with_ranking_rules(custom_rules);
}
info!("Performing index settings set.");
index
.set_settings(&settings)
.await?
.wait_for_completion(client, None, Some(TIMEOUT))
.await?;
info!("Done performing index settings set.");
Ok(index)
}
_ => {
info!("Creating index.");
// Only create index and set settings if the index doesn't already exist
let task = client.create_index(name, Some("version_id")).await?;
let task = task
.wait_for_completion(client, None, Some(TIMEOUT))
.await?;
let index = task
.try_make_index(client)
.map_err(|x| x.unwrap_failure())?;
let mut settings = default_settings();
if let Some(custom_rules) = custom_rules {
settings = settings.with_ranking_rules(custom_rules);
}
index
.set_settings(&settings)
.await?
.wait_for_completion(client, None, Some(TIMEOUT))
.await?;
Ok(index)
}
}
}
async fn add_to_index(
client: &Client,
index: &Index,
mods: &[UploadSearchProject],
) -> Result<(), IndexingError> {
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
info!(
"Adding chunk starting with version id {}",
chunk[0].version_id
);
index
.add_or_replace(chunk, Some("version_id"))
.await?
.wait_for_completion(client, None, Some(std::time::Duration::from_secs(3600)))
.await?;
info!("Added chunk of {} projects to index", chunk.len());
}
Ok(())
}
async fn update_and_add_to_index(
client: &Client,
index: &Index,
projects: &[UploadSearchProject],
_additional_fields: &[String],
) -> Result<(), IndexingError> {
// TODO: Uncomment this- hardcoding loader_fields is a band-aid fix, and will be fixed soon
// let mut new_filterable_attributes: Vec<String> = index.get_filterable_attributes().await?;
// let mut new_displayed_attributes = index.get_displayed_attributes().await?;
// // Check if any 'additional_fields' are not already in the index
// // Only add if they are not already in the index
// let new_fields = additional_fields
// .iter()
// .filter(|x| !new_filterable_attributes.contains(x))
// .collect::<Vec<_>>();
// if !new_fields.is_empty() {
// info!("Adding new fields to index: {:?}", new_fields);
// new_filterable_attributes.extend(new_fields.iter().map(|s: &&String| s.to_string()));
// new_displayed_attributes.extend(new_fields.iter().map(|s| s.to_string()));
// // Adds new fields to the index
// let filterable_task = index
// .set_filterable_attributes(new_filterable_attributes)
// .await?;
// let displayable_task = index
// .set_displayed_attributes(new_displayed_attributes)
// .await?;
// // Allow a long timeout for adding new attributes- it only needs to happen the once
// filterable_task
// .wait_for_completion(client, None, Some(TIMEOUT * 100))
// .await?;
// displayable_task
// .wait_for_completion(client, None, Some(TIMEOUT * 100))
// .await?;
// }
info!("Adding to index.");
add_to_index(client, index, projects).await?;
Ok(())
}
pub async fn add_projects(
indices: &[Index],
projects: Vec<UploadSearchProject>,
additional_fields: Vec<String>,
config: &SearchConfig,
) -> Result<(), IndexingError> {
let client = config.make_client();
for index in indices {
update_and_add_to_index(&client, index, &projects, &additional_fields).await?;
}
Ok(())
}
fn default_settings() -> Settings {
Settings::new()
.with_distinct_attribute("project_id")
.with_displayed_attributes(DEFAULT_DISPLAYED_ATTRIBUTES)
.with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES)
.with_sortable_attributes(DEFAULT_SORTABLE_ATTRIBUTES)
.with_filterable_attributes(DEFAULT_ATTRIBUTES_FOR_FACETING)
.with_pagination(PaginationSetting {
max_total_hits: 2147483647,
})
}
const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
"project_id",
"version_id",
"project_types",
"slug",
"author",
"name",
"summary",
"categories",
"display_categories",
"downloads",
"follows",
"icon_url",
"date_created",
"date_modified",
"latest_version",
"license",
"gallery",
"featured_gallery",
"color",
// Note: loader fields are not here, but are added on as they are needed (so they can be dynamically added depending on which exist).
// TODO: remove these- as they should be automatically populated. This is a band-aid fix.
"server_only",
"client_only",
"game_versions",
"singleplayer",
"client_and_server",
"mrpack_loaders",
// V2 legacy fields for logical consistency
"client_side",
"server_side",
// Non-searchable fields for filling out the Project model.
"license_url",
"monetization_status",
"team_id",
"thread_id",
"versions",
"date_published",
"date_queued",
"status",
"requested_status",
"games",
"organization_id",
"links",
"gallery_items",
"loaders", // search uses loaders as categories- this is purely for the Project model.
"project_loader_fields",
];
const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] = &["name", "summary", "author", "slug"];
const DEFAULT_ATTRIBUTES_FOR_FACETING: &[&str] = &[
"categories",
"license",
"project_types",
"downloads",
"follows",
"author",
"name",
"date_created",
"created_timestamp",
"date_modified",
"modified_timestamp",
"project_id",
"open_source",
"color",
// Note: loader fields are not here, but are added on as they are needed (so they can be dynamically added depending on which exist).
// TODO: remove these- as they should be automatically populated. This is a band-aid fix.
"server_only",
"client_only",
"game_versions",
"singleplayer",
"client_and_server",
"mrpack_loaders",
// V2 legacy fields for logical consistency
"client_side",
"server_side",
];
const DEFAULT_SORTABLE_ATTRIBUTES: &[&str] =
&["downloads", "follows", "date_created", "date_modified"];

View File

@@ -0,0 +1,304 @@
use crate::models::error::ApiError;
use crate::models::projects::SearchRequest;
use actix_web::http::StatusCode;
use actix_web::HttpResponse;
use chrono::{DateTime, Utc};
use itertools::Itertools;
use meilisearch_sdk::client::Client;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use thiserror::Error;
pub mod indexing;
#[derive(Error, Debug)]
pub enum SearchError {
#[error("MeiliSearch Error: {0}")]
MeiliSearch(#[from] meilisearch_sdk::errors::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
Serde(#[from] serde_json::Error),
#[error("Error while parsing an integer: {0}")]
IntParsing(#[from] std::num::ParseIntError),
#[error("Error while formatting strings: {0}")]
FormatError(#[from] std::fmt::Error),
#[error("Environment Error")]
Env(#[from] dotenvy::Error),
#[error("Invalid index to sort by: {0}")]
InvalidIndex(String),
}
impl actix_web::ResponseError for SearchError {
fn status_code(&self) -> StatusCode {
match self {
SearchError::Env(..) => StatusCode::INTERNAL_SERVER_ERROR,
SearchError::MeiliSearch(..) => StatusCode::BAD_REQUEST,
SearchError::Serde(..) => StatusCode::BAD_REQUEST,
SearchError::IntParsing(..) => StatusCode::BAD_REQUEST,
SearchError::InvalidIndex(..) => StatusCode::BAD_REQUEST,
SearchError::FormatError(..) => StatusCode::BAD_REQUEST,
}
}
fn error_response(&self) -> HttpResponse {
HttpResponse::build(self.status_code()).json(ApiError {
error: match self {
SearchError::Env(..) => "environment_error",
SearchError::MeiliSearch(..) => "meilisearch_error",
SearchError::Serde(..) => "invalid_input",
SearchError::IntParsing(..) => "invalid_input",
SearchError::InvalidIndex(..) => "invalid_input",
SearchError::FormatError(..) => "invalid_input",
},
description: self.to_string(),
})
}
}
#[derive(Debug, Clone)]
pub struct SearchConfig {
pub address: String,
pub key: String,
pub meta_namespace: String,
}
impl SearchConfig {
// Panics if the environment variables are not set,
// but these are already checked for on startup.
pub fn new(meta_namespace: Option<String>) -> Self {
let address = dotenvy::var("MEILISEARCH_ADDR").expect("MEILISEARCH_ADDR not set");
let key = dotenvy::var("MEILISEARCH_KEY").expect("MEILISEARCH_KEY not set");
Self {
address,
key,
meta_namespace: meta_namespace.unwrap_or_default(),
}
}
pub fn make_client(&self) -> Client {
Client::new(self.address.as_str(), Some(self.key.as_str()))
}
// Next: true if we want the next index (we are preparing the next swap), false if we want the current index (searching)
pub fn get_index_name(&self, index: &str, next: bool) -> String {
let alt = if next { "_alt" } else { "" };
format!("{}_{}_{}", self.meta_namespace, index, alt)
}
}
/// A project document used for uploading projects to MeiliSearch's indices.
/// This contains some extra data that is not returned by search results.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct UploadSearchProject {
pub version_id: String,
pub project_id: String,
//
pub project_types: Vec<String>,
pub slug: Option<String>,
pub author: String,
pub name: String,
pub summary: String,
pub categories: Vec<String>,
pub display_categories: Vec<String>,
pub follows: i32,
pub downloads: i32,
pub icon_url: Option<String>,
pub license: String,
pub gallery: Vec<String>,
pub featured_gallery: Option<String>,
/// RFC 3339 formatted creation date of the project
pub date_created: DateTime<Utc>,
/// Unix timestamp of the creation date of the project
pub created_timestamp: i64,
/// RFC 3339 formatted date/time of last major modification (update)
pub date_modified: DateTime<Utc>,
/// Unix timestamp of the last major modification
pub modified_timestamp: i64,
pub open_source: bool,
pub color: Option<u32>,
// Hidden fields to get the Project model out of the search results.
pub loaders: Vec<String>, // Search uses loaders as categories- this is purely for the Project model.
pub project_loader_fields: HashMap<String, Vec<serde_json::Value>>, // Aggregation of loader_fields from all versions of the project, allowing for reconstruction of the Project model.
#[serde(flatten)]
pub loader_fields: HashMap<String, Vec<serde_json::Value>>,
}
#[derive(Serialize, Deserialize, Debug)]
pub struct SearchResults {
pub hits: Vec<ResultSearchProject>,
pub page: usize,
pub hits_per_page: usize,
pub total_hits: usize,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ResultSearchProject {
pub version_id: String,
pub project_id: String,
pub project_types: Vec<String>,
pub slug: Option<String>,
pub author: String,
pub name: String,
pub summary: String,
pub categories: Vec<String>,
pub display_categories: Vec<String>,
pub downloads: i32,
pub follows: i32,
pub icon_url: Option<String>,
/// RFC 3339 formatted creation date of the project
pub date_created: String,
/// RFC 3339 formatted modification date of the project
pub date_modified: String,
pub license: String,
pub gallery: Vec<String>,
pub featured_gallery: Option<String>,
pub color: Option<u32>,
// Hidden fields to get the Project model out of the search results.
pub loaders: Vec<String>, // Search uses loaders as categories- this is purely for the Project model.
pub project_loader_fields: HashMap<String, Vec<serde_json::Value>>, // Aggregation of loader_fields from all versions of the project, allowing for reconstruction of the Project model.
#[serde(flatten)]
pub loader_fields: HashMap<String, Vec<serde_json::Value>>,
}
pub fn get_sort_index(
config: &SearchConfig,
index: &str,
) -> Result<(String, [&'static str; 1]), SearchError> {
let projects_name = config.get_index_name("projects", false);
let projects_filtered_name = config.get_index_name("projects_filtered", false);
Ok(match index {
"relevance" => (projects_name, ["downloads:desc"]),
"downloads" => (projects_filtered_name, ["downloads:desc"]),
"follows" => (projects_name, ["follows:desc"]),
"updated" => (projects_name, ["date_modified:desc"]),
"newest" => (projects_name, ["date_created:desc"]),
i => return Err(SearchError::InvalidIndex(i.to_string())),
})
}
pub async fn search_for_project(
info: &SearchRequest,
config: &SearchConfig,
) -> Result<SearchResults, SearchError> {
let client = Client::new(&*config.address, Some(&*config.key));
let offset: usize = info.offset.as_deref().unwrap_or("0").parse()?;
let index = info.index.as_deref().unwrap_or("relevance");
let limit = info
.limit
.as_deref()
.unwrap_or("10")
.parse::<usize>()?
.min(100);
let sort = get_sort_index(config, index)?;
let meilisearch_index = client.get_index(sort.0).await?;
let mut filter_string = String::new();
// Convert offset and limit to page and hits_per_page
let hits_per_page = limit;
let page = offset / limit + 1;
let results = {
let mut query = meilisearch_index.search();
query
.with_page(page)
.with_hits_per_page(hits_per_page)
.with_query(info.query.as_deref().unwrap_or_default())
.with_sort(&sort.1);
if let Some(new_filters) = info.new_filters.as_deref() {
query.with_filter(new_filters);
} else {
let facets = if let Some(facets) = &info.facets {
Some(serde_json::from_str::<Vec<Vec<Value>>>(facets)?)
} else {
None
};
let filters: Cow<_> = match (info.filters.as_deref(), info.version.as_deref()) {
(Some(f), Some(v)) => format!("({f}) AND ({v})").into(),
(Some(f), None) => f.into(),
(None, Some(v)) => v.into(),
(None, None) => "".into(),
};
if let Some(facets) = facets {
// Search can now *optionally* have a third inner array: So Vec(AND)<Vec(OR)<Vec(AND)< _ >>>
// For every inner facet, we will check if it can be deserialized into a Vec<&str>, and do so.
// If not, we will assume it is a single facet and wrap it in a Vec.
let facets: Vec<Vec<Vec<String>>> = facets
.into_iter()
.map(|facets| {
facets
.into_iter()
.map(|facet| {
if facet.is_array() {
serde_json::from_value::<Vec<String>>(facet).unwrap_or_default()
} else {
vec![serde_json::from_value::<String>(facet)
.unwrap_or_default()]
}
})
.collect_vec()
})
.collect_vec();
filter_string.push('(');
for (index, facet_outer_list) in facets.iter().enumerate() {
filter_string.push('(');
for (facet_outer_index, facet_inner_list) in facet_outer_list.iter().enumerate()
{
filter_string.push('(');
for (facet_inner_index, facet) in facet_inner_list.iter().enumerate() {
filter_string.push_str(&facet.replace(':', " = "));
if facet_inner_index != (facet_inner_list.len() - 1) {
filter_string.push_str(" AND ")
}
}
filter_string.push(')');
if facet_outer_index != (facet_outer_list.len() - 1) {
filter_string.push_str(" OR ")
}
}
filter_string.push(')');
if index != (facets.len() - 1) {
filter_string.push_str(" AND ")
}
}
filter_string.push(')');
if !filters.is_empty() {
write!(filter_string, " AND ({filters})")?;
}
} else {
filter_string.push_str(&filters);
}
if !filter_string.is_empty() {
query.with_filter(&filter_string);
}
}
query.execute::<ResultSearchProject>().await?
};
Ok(SearchResults {
hits: results.hits.into_iter().map(|r| r.result).collect(),
page: results.page.unwrap_or_default(),
hits_per_page: results.hits_per_page.unwrap_or_default(),
total_hits: results.total_hits.unwrap_or_default(),
})
}