You've already forked AstralRinth
forked from didirus/AstralRinth
* Update Java dependencies * Baselint lint fixes * Update Rust version * Update actix-files 0.6.6 -> 0.6.8 * Update actix-http 3.11.0 -> 3.11.2 * Update actix-rt 2.10.0 -> 2.11.0 * Update async_zip 0.0.17 -> 0.0.18 * Update async-compression 0.4.27 -> 0.4.32 * Update async-trait 0.1.88 -> 0.1.89 * Update async-tungstenite 0.30.0 -> 0.31.0 * Update const_format 0.2.34 -> 0.2.35 * Update bitflags 2.9.1 -> 2.9.4 * Update bytemuck 1.23.1 -> 1.24.0 * Update typed-path 0.11.0 -> 0.12.0 * Update chrono 0.4.41 -> 0.4.42 * Update cidre 0.11.2 -> 0.11.3 * Update clap 4.5.43 -> 4.5.48 * Update data-url 0.3.1 -> 0.3.2 * Update discord-rich-presence 0.2.5 -> 1.0.0 * Update enumset 1.1.7 -> 1.1.10 * Update flate2 1.1.2 -> 1.1.4 * Update hyper 1.6.0 -> 1.7.0 * Update hyper-util 0.1.16 -> 0.1.17 * Update iana-time-zone 0.1.63 -> 0.1.64 * Update image 0.25.6 -> 0.25.8 * Update indexmap 2.10.0 -> 2.11.4 * Update json-patch 4.0.0 -> 4.1.0 * Update meilisearch-sdk 0.29.1 -> 0.30.0 * Update clickhouse 0.13.3 -> 0.14.0 * Fix some prettier things * Update lettre 0.11.18 -> 0.11.19 * Update phf 0.12.1 -> 0.13.1 * Update png 0.17.16 -> 0.18.0 * Update quick-xml 0.38.1 -> 0.38.3 * Update redis 0.32.4 -> 0.32.7 * Update regex 1.11.1 -> 1.11.3 * Update reqwest 0.12.22 -> 0.12.23 * Update rust_decimal 1.37.2 -> 1.38.0 * Update rust-s3 0.35.1 -> 0.37.0 * Update serde 1.0.219 -> 1.0.228 * Update serde_bytes 0.11.17 -> 0.11.19 * Update serde_json 1.0.142 -> 1.0.145 * Update serde_with 3.14.0 -> 3.15.0 * Update sentry 0.42.0 -> 0.45.0 and sentry-actix 0.42.0 -> 0.45.0 * Update spdx 0.10.9 -> 0.12.0 * Update sysinfo 0.36.1 -> 0.37.2 * Update tauri 2.7.0 -> 2.8.5 * Update tauri-build 2.3.1 -> 2.4.1 * Update tauri-plugin-deep-link 2.4.1 -> 2.4.3 * Update tauri-plugin-dialog 2.3.2 -> 2.4.0 * Update tauri-plugin-http 2.5.1 -> 2.5.2 * Update tauri-plugin-opener 2.4.0 -> 2.5.0 * Update tauri-plugin-os 2.3.0 -> 2.3.1 * Update tauri-plugin-single-instance 2.3.2 -> 2.3.4 * Update tempfile 3.20.0 -> 3.23.0 * Update thiserror 2.0.12 -> 2.0.17 * Update tracing-subscriber 0.3.19 -> 0.3.20 * Update url 2.5.4 -> 2.5.7 * Update uuid 1.17.0 -> 1.18.1 * Update webp 0.3.0 -> 0.3.1 * Update whoami 1.6.0 -> 1.6.1 * Note that windows and windows-core can't be updated yet * Update zbus 5.9.0 -> 5.11.0 * Update zip 4.3.0 -> 6.0.0 * Fix build * Enforce rustls crypto provider * Refresh Cargo.lock * Update transitive dependencies * Bump Gradle usage to Java 17 * Use ubuntu-latest consistently across workflows * Fix lint * Fix lint in Rust * Update native-dialog 0.9.0 -> 0.9.2 * Update regex 1.11.3 -> 1.12.2 * Update reqwest 0.12.23 -> 0.12.24 * Update rust_decimal 1.38.0 -> 1.39.0 * Remaining lock-only updates * chore: move TLS impl of some other dependencies to aws-lc-rs The AWS bloatware "virus" expands by sheer force of widespread adoption by the ecosystem... 🫣 * chore(fmt): run Tombi --------- Co-authored-by: Alejandro González <me@alegon.dev>
408 lines
12 KiB
Rust
408 lines
12 KiB
Rust
/// This module is used for the indexing from any source.
|
|
pub mod local_import;
|
|
|
|
use std::time::Duration;
|
|
|
|
use crate::database::redis::RedisPool;
|
|
use crate::search::{SearchConfig, UploadSearchProject};
|
|
use ariadne::ids::base62_impl::to_base62;
|
|
use futures::StreamExt;
|
|
use futures::stream::FuturesUnordered;
|
|
use local_import::index_local;
|
|
use meilisearch_sdk::client::{Client, SwapIndexes};
|
|
use meilisearch_sdk::indexes::Index;
|
|
use meilisearch_sdk::settings::{PaginationSetting, Settings};
|
|
use sqlx::postgres::PgPool;
|
|
use thiserror::Error;
|
|
use tracing::info;
|
|
|
|
#[derive(Error, Debug)]
|
|
pub enum IndexingError {
|
|
#[error("Error while connecting to the MeiliSearch database")]
|
|
Indexing(#[from] meilisearch_sdk::errors::Error),
|
|
#[error("Error while serializing or deserializing JSON: {0}")]
|
|
Serde(#[from] serde_json::Error),
|
|
#[error("Database Error: {0}")]
|
|
Sqlx(#[from] sqlx::error::Error),
|
|
#[error("Database Error: {0}")]
|
|
Database(#[from] crate::database::models::DatabaseError),
|
|
#[error("Environment Error")]
|
|
Env(#[from] dotenvy::Error),
|
|
#[error("Error while awaiting index creation task")]
|
|
Task,
|
|
}
|
|
|
|
// The chunk size for adding projects to the indexing database. If the request size
|
|
// is too large (>10MiB) then the request fails with an error. This chunk size
|
|
// assumes a max average size of 4KiB per project to avoid this cap.
|
|
const MEILISEARCH_CHUNK_SIZE: usize = 10000000;
|
|
const TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
|
|
|
|
pub async fn remove_documents(
|
|
ids: &[crate::models::ids::VersionId],
|
|
config: &SearchConfig,
|
|
) -> Result<(), meilisearch_sdk::errors::Error> {
|
|
let mut indexes = get_indexes_for_indexing(config, false).await?;
|
|
let mut indexes_next = get_indexes_for_indexing(config, true).await?;
|
|
indexes.append(&mut indexes_next);
|
|
|
|
let client = config.make_client()?;
|
|
let client = &client;
|
|
let mut deletion_tasks = FuturesUnordered::new();
|
|
|
|
for index in &indexes {
|
|
deletion_tasks.push(async move {
|
|
// After being successfully submitted, Meilisearch tasks are executed
|
|
// asynchronously, so wait some time for them to complete
|
|
index
|
|
.delete_documents(
|
|
&ids.iter().map(|x| to_base62(x.0)).collect::<Vec<_>>(),
|
|
)
|
|
.await?
|
|
.wait_for_completion(
|
|
client,
|
|
None,
|
|
Some(Duration::from_secs(15)),
|
|
)
|
|
.await
|
|
});
|
|
}
|
|
|
|
while let Some(result) = deletion_tasks.next().await {
|
|
result?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn index_projects(
|
|
pool: PgPool,
|
|
redis: RedisPool,
|
|
config: &SearchConfig,
|
|
) -> Result<(), IndexingError> {
|
|
info!("Indexing projects.");
|
|
|
|
// First, ensure current index exists (so no error happens- current index should be worst-case empty, not missing)
|
|
get_indexes_for_indexing(config, false).await?;
|
|
|
|
// Then, delete the next index if it still exists
|
|
let indices = get_indexes_for_indexing(config, true).await?;
|
|
for index in indices {
|
|
index.delete().await?;
|
|
}
|
|
// Recreate the next index for indexing
|
|
let indices = get_indexes_for_indexing(config, true).await?;
|
|
|
|
let all_loader_fields =
|
|
crate::database::models::loader_fields::LoaderField::get_fields_all(
|
|
&pool, &redis,
|
|
)
|
|
.await?
|
|
.into_iter()
|
|
.map(|x| x.field)
|
|
.collect::<Vec<_>>();
|
|
|
|
let uploads = index_local(&pool).await?;
|
|
add_projects(&indices, uploads, all_loader_fields.clone(), config).await?;
|
|
|
|
// Swap the index
|
|
swap_index(config, "projects").await?;
|
|
swap_index(config, "projects_filtered").await?;
|
|
|
|
// Delete the now-old index
|
|
for index in indices {
|
|
index.delete().await?;
|
|
}
|
|
|
|
info!("Done adding projects.");
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn swap_index(
|
|
config: &SearchConfig,
|
|
index_name: &str,
|
|
) -> Result<(), IndexingError> {
|
|
let client = config.make_client()?;
|
|
let index_name_next = config.get_index_name(index_name, true);
|
|
let index_name = config.get_index_name(index_name, false);
|
|
let swap_indices = SwapIndexes {
|
|
indexes: (index_name_next, index_name),
|
|
rename: None,
|
|
};
|
|
client
|
|
.swap_indexes([&swap_indices])
|
|
.await?
|
|
.wait_for_completion(&client, None, Some(TIMEOUT))
|
|
.await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn get_indexes_for_indexing(
|
|
config: &SearchConfig,
|
|
next: bool, // Get the 'next' one
|
|
) -> Result<Vec<Index>, meilisearch_sdk::errors::Error> {
|
|
let client = config.make_client()?;
|
|
let project_name = config.get_index_name("projects", next);
|
|
let project_filtered_name =
|
|
config.get_index_name("projects_filtered", next);
|
|
let projects_index = create_or_update_index(
|
|
&client,
|
|
&project_name,
|
|
Some(&[
|
|
"words",
|
|
"typo",
|
|
"proximity",
|
|
"attribute",
|
|
"exactness",
|
|
"sort",
|
|
]),
|
|
)
|
|
.await?;
|
|
let projects_filtered_index = create_or_update_index(
|
|
&client,
|
|
&project_filtered_name,
|
|
Some(&[
|
|
"sort",
|
|
"words",
|
|
"typo",
|
|
"proximity",
|
|
"attribute",
|
|
"exactness",
|
|
]),
|
|
)
|
|
.await?;
|
|
|
|
Ok(vec![projects_index, projects_filtered_index])
|
|
}
|
|
|
|
async fn create_or_update_index(
|
|
client: &Client,
|
|
name: &str,
|
|
custom_rules: Option<&'static [&'static str]>,
|
|
) -> Result<Index, meilisearch_sdk::errors::Error> {
|
|
info!("Updating/creating index {}", name);
|
|
|
|
match client.get_index(name).await {
|
|
Ok(index) => {
|
|
info!("Updating index settings.");
|
|
|
|
let mut settings = default_settings();
|
|
|
|
if let Some(custom_rules) = custom_rules {
|
|
settings = settings.with_ranking_rules(custom_rules);
|
|
}
|
|
|
|
info!("Performing index settings set.");
|
|
index
|
|
.set_settings(&settings)
|
|
.await?
|
|
.wait_for_completion(client, None, Some(TIMEOUT))
|
|
.await?;
|
|
info!("Done performing index settings set.");
|
|
|
|
Ok(index)
|
|
}
|
|
_ => {
|
|
info!("Creating index.");
|
|
|
|
// Only create index and set settings if the index doesn't already exist
|
|
let task = client.create_index(name, Some("version_id")).await?;
|
|
let task = task
|
|
.wait_for_completion(client, None, Some(TIMEOUT))
|
|
.await?;
|
|
let index = task
|
|
.try_make_index(client)
|
|
.map_err(|x| x.unwrap_failure())?;
|
|
|
|
let mut settings = default_settings();
|
|
|
|
if let Some(custom_rules) = custom_rules {
|
|
settings = settings.with_ranking_rules(custom_rules);
|
|
}
|
|
|
|
index
|
|
.set_settings(&settings)
|
|
.await?
|
|
.wait_for_completion(client, None, Some(TIMEOUT))
|
|
.await?;
|
|
|
|
Ok(index)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn add_to_index(
|
|
client: &Client,
|
|
index: &Index,
|
|
mods: &[UploadSearchProject],
|
|
) -> Result<(), IndexingError> {
|
|
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
|
|
info!(
|
|
"Adding chunk starting with version id {}",
|
|
chunk[0].version_id
|
|
);
|
|
index
|
|
.add_or_replace(chunk, Some("version_id"))
|
|
.await?
|
|
.wait_for_completion(
|
|
client,
|
|
None,
|
|
Some(std::time::Duration::from_secs(3600)),
|
|
)
|
|
.await?;
|
|
info!("Added chunk of {} projects to index", chunk.len());
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn update_and_add_to_index(
|
|
client: &Client,
|
|
index: &Index,
|
|
projects: &[UploadSearchProject],
|
|
_additional_fields: &[String],
|
|
) -> Result<(), IndexingError> {
|
|
// TODO: Uncomment this- hardcoding loader_fields is a band-aid fix, and will be fixed soon
|
|
// let mut new_filterable_attributes: Vec<String> = index.get_filterable_attributes().await?;
|
|
// let mut new_displayed_attributes = index.get_displayed_attributes().await?;
|
|
|
|
// // Check if any 'additional_fields' are not already in the index
|
|
// // Only add if they are not already in the index
|
|
// let new_fields = additional_fields
|
|
// .iter()
|
|
// .filter(|x| !new_filterable_attributes.contains(x))
|
|
// .collect::<Vec<_>>();
|
|
// if !new_fields.is_empty() {
|
|
// info!("Adding new fields to index: {:?}", new_fields);
|
|
// new_filterable_attributes.extend(new_fields.iter().map(|s: &&String| s.to_string()));
|
|
// new_displayed_attributes.extend(new_fields.iter().map(|s| s.to_string()));
|
|
|
|
// // Adds new fields to the index
|
|
// let filterable_task = index
|
|
// .set_filterable_attributes(new_filterable_attributes)
|
|
// .await?;
|
|
// let displayable_task = index
|
|
// .set_displayed_attributes(new_displayed_attributes)
|
|
// .await?;
|
|
|
|
// // Allow a long timeout for adding new attributes- it only needs to happen the once
|
|
// filterable_task
|
|
// .wait_for_completion(client, None, Some(TIMEOUT * 100))
|
|
// .await?;
|
|
// displayable_task
|
|
// .wait_for_completion(client, None, Some(TIMEOUT * 100))
|
|
// .await?;
|
|
// }
|
|
|
|
info!("Adding to index.");
|
|
|
|
add_to_index(client, index, projects).await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn add_projects(
|
|
indices: &[Index],
|
|
projects: Vec<UploadSearchProject>,
|
|
additional_fields: Vec<String>,
|
|
config: &SearchConfig,
|
|
) -> Result<(), IndexingError> {
|
|
let client = config.make_client()?;
|
|
for index in indices {
|
|
update_and_add_to_index(&client, index, &projects, &additional_fields)
|
|
.await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn default_settings() -> Settings {
|
|
Settings::new()
|
|
.with_distinct_attribute(Some("project_id"))
|
|
.with_displayed_attributes(DEFAULT_DISPLAYED_ATTRIBUTES)
|
|
.with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES)
|
|
.with_sortable_attributes(DEFAULT_SORTABLE_ATTRIBUTES)
|
|
.with_filterable_attributes(DEFAULT_ATTRIBUTES_FOR_FACETING)
|
|
.with_pagination(PaginationSetting {
|
|
max_total_hits: 2147483647,
|
|
})
|
|
}
|
|
|
|
const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
|
|
"project_id",
|
|
"version_id",
|
|
"project_types",
|
|
"slug",
|
|
"author",
|
|
"name",
|
|
"summary",
|
|
"categories",
|
|
"display_categories",
|
|
"downloads",
|
|
"follows",
|
|
"icon_url",
|
|
"date_created",
|
|
"date_modified",
|
|
"latest_version",
|
|
"license",
|
|
"gallery",
|
|
"featured_gallery",
|
|
"color",
|
|
// Note: loader fields are not here, but are added on as they are needed (so they can be dynamically added depending on which exist).
|
|
// TODO: remove these- as they should be automatically populated. This is a band-aid fix.
|
|
"environment",
|
|
"game_versions",
|
|
"mrpack_loaders",
|
|
// V2 legacy fields for logical consistency
|
|
"client_side",
|
|
"server_side",
|
|
// Non-searchable fields for filling out the Project model.
|
|
"license_url",
|
|
"monetization_status",
|
|
"team_id",
|
|
"thread_id",
|
|
"versions",
|
|
"date_published",
|
|
"date_queued",
|
|
"status",
|
|
"requested_status",
|
|
"games",
|
|
"organization_id",
|
|
"links",
|
|
"gallery_items",
|
|
"loaders", // search uses loaders as categories- this is purely for the Project model.
|
|
"project_loader_fields",
|
|
];
|
|
|
|
const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] =
|
|
&["name", "summary", "author", "slug"];
|
|
|
|
const DEFAULT_ATTRIBUTES_FOR_FACETING: &[&str] = &[
|
|
"categories",
|
|
"license",
|
|
"project_types",
|
|
"downloads",
|
|
"follows",
|
|
"author",
|
|
"name",
|
|
"date_created",
|
|
"created_timestamp",
|
|
"date_modified",
|
|
"modified_timestamp",
|
|
"project_id",
|
|
"open_source",
|
|
"color",
|
|
// Note: loader fields are not here, but are added on as they are needed (so they can be dynamically added depending on which exist).
|
|
// TODO: remove these- as they should be automatically populated. This is a band-aid fix.
|
|
"environment",
|
|
"game_versions",
|
|
"mrpack_loaders",
|
|
// V2 legacy fields for logical consistency
|
|
"client_side",
|
|
"server_side",
|
|
];
|
|
|
|
const DEFAULT_SORTABLE_ATTRIBUTES: &[&str] =
|
|
&["downloads", "follows", "date_created", "date_modified"];
|