You've already forked AstralRinth
forked from didirus/AstralRinth
f0224dfff7
* initial elasticsearch impl * working elastic cluster * replace SearchError with ApiError for preparation of search backend * start factoring meili out to trait * move meili to backend * update routes to use search backend trait * wip * Update projects.rs * search backend is only init'd once in config * wip * wip: backend agnostic * change search internal routes to delegate to backend * initial elasticsearch impl * fix filtering * elastic impl * refactor indexing into its own module * clean up elastic code * fix ci * fix tests * fix elastic health check * fix up env rebase * fix compile * dummy commit to update github pr * Fix rebase * Elastic basic https auth * Fix duplicate projects showing up * Fix up tests * Replace search `ApiErrors` with `eyre::Reports`, propagate background task errors * clean up agents files * make index chunk size configurable * make `match_phrase` in elastic case-insensitive * use current/next indices and swap between them * test case for error body * Fix failing case * da merge * factor out common stuff from search backends * allow fetching hit metadata from search results * allow customising elasticsearch search config * bit of docs * add mappings to indices for elastic * Implement Typesense * wip * fix up some sort fields stuff * use different approach to filterable field sets * remove a bunch of search fields which weren't used for filtering * bucket text matches * Bucketing by text_match for typesense * fix tombi lint * fix some sentry errors and dont prioritise 2+ term matches * tweak ts query settings * expose some more search settings * query sort changes * small fixes * should fix pagination stuff * fix healthcheck maybe * ragebait ci * tests * tests * revert environment
350 lines
11 KiB
Rust
350 lines
11 KiB
Rust
use crate::database::redis::RedisPool;
|
|
use crate::models::exp;
|
|
use crate::models::exp::minecraft::JavaServerPing;
|
|
use crate::models::ids::{ProjectId, VersionId};
|
|
use crate::queue::server_ping;
|
|
use crate::routes::ApiError;
|
|
use crate::{database::PgPool, env::ENV};
|
|
use ariadne::ids::base62_impl::parse_base62;
|
|
use async_trait::async_trait;
|
|
use chrono::{DateTime, Utc};
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::Value;
|
|
use std::{collections::HashMap, str::FromStr};
|
|
use thiserror::Error;
|
|
use utoipa::ToSchema;
|
|
|
|
pub mod backend;
|
|
pub mod indexing;
|
|
|
|
/// Search parameters which can fit in a URL query string.
|
|
///
|
|
/// Used with `GET /*/search` endpoints.
|
|
///
|
|
/// Can be converted into a [`SearchRequest`] using [`From`].
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct SearchQuery {
|
|
pub query: Option<String>,
|
|
pub offset: Option<String>,
|
|
pub index: Option<String>,
|
|
pub limit: Option<String>,
|
|
|
|
pub new_filters: Option<String>,
|
|
|
|
// TODO: Deprecated values below. WILL BE REMOVED V3!
|
|
pub facets: Option<String>,
|
|
pub filters: Option<String>,
|
|
pub version: Option<String>,
|
|
}
|
|
|
|
/// Search parameters which are more complicated and more suitable for a POST
|
|
/// request body.
|
|
///
|
|
/// Used with `POST /*/search` endpoints.
|
|
///
|
|
/// Can be converted from a [`SearchQuery`] using [`From`].
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct SearchRequest {
|
|
pub query: Option<String>,
|
|
pub offset: Option<String>,
|
|
pub index: Option<String>,
|
|
pub limit: Option<String>,
|
|
#[serde(default)]
|
|
pub show_metadata: bool,
|
|
#[serde(default)]
|
|
pub elasticsearch_config: backend::elasticsearch::RequestConfig,
|
|
#[serde(default)]
|
|
pub typesense_config: backend::typesense::RequestConfig,
|
|
|
|
pub new_filters: Option<String>,
|
|
|
|
pub facets: Option<String>,
|
|
pub filters: Option<String>,
|
|
pub version: Option<String>,
|
|
}
|
|
|
|
impl From<SearchQuery> for SearchRequest {
|
|
fn from(query: SearchQuery) -> Self {
|
|
Self {
|
|
query: query.query,
|
|
offset: query.offset,
|
|
index: query.index,
|
|
limit: query.limit,
|
|
show_metadata: false,
|
|
elasticsearch_config:
|
|
backend::elasticsearch::RequestConfig::default(),
|
|
typesense_config: backend::typesense::RequestConfig::default(),
|
|
new_filters: query.new_filters,
|
|
facets: query.facets,
|
|
filters: query.filters,
|
|
version: query.version,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
pub trait SearchBackend: Send + Sync {
|
|
async fn search_for_project(
|
|
&self,
|
|
info: &SearchRequest,
|
|
redis: &RedisPool,
|
|
) -> Result<SearchResults, ApiError> {
|
|
let mut results = self.search_for_project_raw(info).await?;
|
|
hydrate_search_results(&mut results.hits, redis)
|
|
.await
|
|
.map_err(ApiError::Internal)?;
|
|
Ok(results)
|
|
}
|
|
|
|
async fn search_for_project_raw(
|
|
&self,
|
|
info: &SearchRequest,
|
|
) -> Result<SearchResults, ApiError>;
|
|
|
|
async fn index_projects(
|
|
&self,
|
|
ro_pool: PgPool,
|
|
redis: RedisPool,
|
|
) -> eyre::Result<()>;
|
|
|
|
async fn remove_documents(&self, ids: &[VersionId]) -> eyre::Result<()>;
|
|
|
|
async fn tasks(&self) -> eyre::Result<Value>;
|
|
|
|
async fn tasks_cancel(
|
|
&self,
|
|
filter: &TasksCancelFilter,
|
|
) -> eyre::Result<()>;
|
|
}
|
|
|
|
async fn hydrate_search_results(
|
|
hits: &mut [ResultSearchProject],
|
|
redis_pool: &RedisPool,
|
|
) -> eyre::Result<()> {
|
|
// Minecraft Java servers should fetch the latest player count that we have
|
|
// from Redis, rather than the (pretty stale) data from search backend
|
|
// TODO: this block should be made generic over the component type,
|
|
// for now we can hardcode MC java servers tho
|
|
|
|
let project_ids = hits
|
|
.iter()
|
|
.filter(|hit| hit.components.minecraft_java_server.is_some())
|
|
.filter_map(|hit| parse_base62(&hit.project_id).ok().map(ProjectId))
|
|
.collect::<Vec<_>>();
|
|
|
|
let pings_by_project_id = if project_ids.is_empty() {
|
|
HashMap::new()
|
|
} else {
|
|
let mut redis = redis_pool.connect().await?;
|
|
let ping_results = redis
|
|
.get_many_deserialized_from_json::<JavaServerPing>(
|
|
server_ping::REDIS_NAMESPACE,
|
|
&project_ids
|
|
.iter()
|
|
.map(ToString::to_string)
|
|
.collect::<Vec<_>>(),
|
|
)
|
|
.await?;
|
|
|
|
ping_results
|
|
.into_iter()
|
|
.enumerate()
|
|
.filter_map(|(idx, ping)| ping.map(|ping| (project_ids[idx], ping)))
|
|
.collect::<HashMap<_, _>>()
|
|
};
|
|
|
|
for hit in hits {
|
|
let Some(java_server) = hit.components.minecraft_java_server.as_mut()
|
|
else {
|
|
continue;
|
|
};
|
|
if let Ok(project_id) = parse_base62(&hit.project_id).map(ProjectId) {
|
|
java_server.ping = pings_by_project_id.get(&project_id).cloned();
|
|
} else {
|
|
java_server.ping = None;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[derive(Deserialize, Serialize, ToSchema)]
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|
pub enum TasksCancelFilter {
|
|
All,
|
|
AllEnqueued,
|
|
Indexes { indexes: Vec<String> },
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
pub enum SearchBackendKind {
|
|
Meilisearch,
|
|
Elasticsearch,
|
|
Typesense,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, strum::EnumIter)]
|
|
pub enum SearchField {
|
|
Categories,
|
|
ProjectTypes,
|
|
ProjectId,
|
|
OpenSource,
|
|
Environment,
|
|
GameVersions,
|
|
ClientSide,
|
|
ServerSide,
|
|
MinecraftServerRegion,
|
|
MinecraftServerLanguages,
|
|
MinecraftJavaServerContentKind,
|
|
MinecraftJavaServerContentSupportedGameVersions,
|
|
MinecraftJavaServerPingData,
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
#[error("invalid search backend kind")]
|
|
pub struct InvalidSearchBackendKind;
|
|
|
|
impl FromStr for SearchBackendKind {
|
|
type Err = InvalidSearchBackendKind;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
Ok(match s {
|
|
"meilisearch" => SearchBackendKind::Meilisearch,
|
|
"elasticsearch" => SearchBackendKind::Elasticsearch,
|
|
"typesense" => SearchBackendKind::Typesense,
|
|
_ => return Err(InvalidSearchBackendKind),
|
|
})
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
pub struct UploadSearchProject {
|
|
pub version_id: String,
|
|
pub project_id: String,
|
|
//
|
|
pub project_types: Vec<String>,
|
|
pub slug: Option<String>,
|
|
pub author: String,
|
|
pub name: String,
|
|
pub summary: String,
|
|
pub categories: Vec<String>,
|
|
pub display_categories: Vec<String>,
|
|
pub follows: i32,
|
|
pub downloads: i32,
|
|
pub log_downloads: f64,
|
|
pub icon_url: Option<String>,
|
|
pub license: String,
|
|
pub gallery: Vec<String>,
|
|
pub featured_gallery: Option<String>,
|
|
/// RFC 3339 formatted creation date of the project
|
|
pub date_created: DateTime<Utc>,
|
|
/// Unix timestamp of the creation date of the project
|
|
pub created_timestamp: i64,
|
|
/// RFC 3339 formatted date/time of last major modification (update)
|
|
pub date_modified: DateTime<Utc>,
|
|
/// Unix timestamp of the last major modification
|
|
pub modified_timestamp: i64,
|
|
/// Unix timestamp of the publication date of the version
|
|
pub version_published_timestamp: i64,
|
|
pub open_source: bool,
|
|
pub color: Option<u32>,
|
|
|
|
// Hidden fields to get the Project model out of the search results.
|
|
pub loaders: Vec<String>, // Search uses loaders as categories- this is purely for the Project model.
|
|
pub project_loader_fields: HashMap<String, Vec<serde_json::Value>>, // Aggregation of loader_fields from all versions of the project, allowing for reconstruction of the Project model.
|
|
|
|
#[serde(flatten)]
|
|
pub components: exp::ProjectQuery,
|
|
#[serde(flatten)]
|
|
pub loader_fields: HashMap<String, Vec<serde_json::Value>>,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct SearchResults {
|
|
pub hits: Vec<ResultSearchProject>,
|
|
pub page: usize,
|
|
pub hits_per_page: usize,
|
|
pub total_hits: usize,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
pub struct ResultSearchProject {
|
|
pub version_id: String,
|
|
pub project_id: String,
|
|
pub project_types: Vec<String>,
|
|
pub slug: Option<String>,
|
|
pub author: String,
|
|
pub name: String,
|
|
pub summary: String,
|
|
pub categories: Vec<String>,
|
|
pub display_categories: Vec<String>,
|
|
pub downloads: i32,
|
|
pub follows: i32,
|
|
pub icon_url: Option<String>,
|
|
/// RFC 3339 formatted creation date of the project
|
|
pub date_created: String,
|
|
/// RFC 3339 formatted modification date of the project
|
|
pub date_modified: String,
|
|
pub license: String,
|
|
pub gallery: Vec<String>,
|
|
pub featured_gallery: Option<String>,
|
|
pub color: Option<u32>,
|
|
|
|
// Hidden fields to get the Project model out of the search results.
|
|
pub loaders: Vec<String>, // Search uses loaders as categories- this is purely for the Project model.
|
|
pub project_loader_fields: HashMap<String, Vec<serde_json::Value>>, // Aggregation of loader_fields from all versions of the project, allowing for reconstruction of the Project model.
|
|
|
|
#[serde(flatten)]
|
|
pub components: exp::ProjectQuery,
|
|
#[serde(flatten)]
|
|
pub loader_fields: HashMap<String, Vec<serde_json::Value>>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub search_metadata: Option<Value>,
|
|
}
|
|
|
|
impl From<UploadSearchProject> for ResultSearchProject {
|
|
fn from(source: UploadSearchProject) -> Self {
|
|
Self {
|
|
version_id: source.version_id,
|
|
project_id: source.project_id,
|
|
project_types: source.project_types,
|
|
slug: source.slug,
|
|
author: source.author,
|
|
name: source.name,
|
|
summary: source.summary,
|
|
categories: source.categories,
|
|
display_categories: source.display_categories,
|
|
downloads: source.downloads,
|
|
follows: source.follows,
|
|
icon_url: source.icon_url,
|
|
date_created: source.date_created.to_rfc3339(),
|
|
date_modified: source.date_modified.to_rfc3339(),
|
|
license: source.license,
|
|
gallery: source.gallery,
|
|
featured_gallery: source.featured_gallery,
|
|
color: source.color,
|
|
loaders: source.loaders,
|
|
project_loader_fields: source.project_loader_fields,
|
|
components: source.components,
|
|
loader_fields: source.loader_fields,
|
|
search_metadata: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn backend(meta_namespace: Option<String>) -> Box<dyn SearchBackend> {
|
|
match ENV.SEARCH_BACKEND {
|
|
SearchBackendKind::Meilisearch => {
|
|
let config = backend::MeilisearchConfig::new(meta_namespace);
|
|
Box::new(backend::Meilisearch::new(config))
|
|
}
|
|
SearchBackendKind::Elasticsearch => {
|
|
Box::new(backend::Elasticsearch::new(meta_namespace).unwrap())
|
|
}
|
|
SearchBackendKind::Typesense => {
|
|
let config = backend::TypesenseConfig::new(meta_namespace);
|
|
Box::new(backend::Typesense::new(config))
|
|
}
|
|
}
|
|
}
|