Search test + v3 (#731)

* search patch for accurate loader/gv filtering

* backup

* basic search test

* finished test

* incomplete commit; backing up

* Working multipat reroute backup

* working rough draft v3

* most tests passing

* works

* search v2 conversion

* added some tags.rs v2 conversions

* Worked through warnings, unwraps, prints

* refactors

* new search test

* version files changes fixes

* redesign to revs

* removed old caches

* removed games

* fmt clippy

* merge conflicts

* fmt, prepare

* moved v2 routes over to v3

* fixes; tests passing

* project type changes

* moved files over

* fmt, clippy, prepare, etc

* loaders to loader_fields, added tests

* fmt, clippy, prepare

* fixed sorting bug

* reversed back- wrong order for consistency

* fmt; clippy; prepare

---------

Co-authored-by: Jai A <jaiagr+gpg@pm.me>
This commit is contained in:
Wyatt Verchere
2023-11-11 16:40:10 -08:00
committed by GitHub
parent 97ccb7df94
commit ae1c5342f2
133 changed files with 18153 additions and 11320 deletions

View File

@@ -1,50 +1,94 @@
use std::collections::HashMap;
use std::sync::Arc;
use dashmap::DashSet;
use futures::TryStreamExt;
use log::info;
use super::IndexingError;
use crate::database::models::loader_fields::VersionField;
use crate::database::models::ProjectId;
use crate::search::UploadSearchProject;
use sqlx::postgres::PgPool;
pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, IndexingError> {
pub async fn index_local(
pool: PgPool,
) -> Result<(Vec<UploadSearchProject>, Vec<String>), IndexingError> {
info!("Indexing local projects!");
Ok(
let loader_field_keys: Arc<DashSet<String>> = Arc::new(DashSet::new());
let uploads =
sqlx::query!(
"
SELECT m.id id, m.project_type project_type, m.title title, m.description description, m.downloads downloads, m.follows follows,
SELECT m.id id, v.id version_id, m.title title, m.description description, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.published published, m.approved approved, m.updated updated,
m.team_id team_id, m.license license, m.slug slug, m.status status_name, m.color color,
cs.name client_side_type, ss.name server_side_type, pt.name project_type_name, u.username username,
pt.name project_type_name, u.username username,
ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is false) categories,
ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is true) additional_categories,
ARRAY_AGG(DISTINCT lo.loader) filter (where lo.loader is not null) loaders,
ARRAY_AGG(DISTINCT gv.version) filter (where gv.version is not null) versions,
ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types,
ARRAY_AGG(DISTINCT g.name) filter (where g.name is not null) games,
ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is false) gallery,
ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is true) featured_gallery
FROM mods m
ARRAY_AGG(DISTINCT mg.image_url) filter (where mg.image_url is not null and mg.featured is true) featured_gallery,
JSONB_AGG(
DISTINCT jsonb_build_object(
'field_id', vf.field_id,
'int_value', vf.int_value,
'enum_value', vf.enum_value,
'string_value', vf.string_value
)
) filter (where vf.field_id is not null) version_fields,
JSONB_AGG(
DISTINCT jsonb_build_object(
'lf_id', lf.id,
'loader_name', lo.loader,
'field', lf.field,
'field_type', lf.field_type,
'enum_type', lf.enum_type,
'min_val', lf.min_val,
'max_val', lf.max_val,
'optional', lf.optional
)
) filter (where lf.id is not null) loader_fields,
JSONB_AGG(
DISTINCT jsonb_build_object(
'id', lfev.id,
'enum_id', lfev.enum_id,
'value', lfev.value,
'ordering', lfev.ordering,
'created', lfev.created,
'metadata', lfev.metadata
)
) filter (where lfev.id is not null) loader_field_enum_values
FROM versions v
INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2)
LEFT OUTER JOIN mods_categories mc ON joining_mod_id = m.id
LEFT OUTER JOIN categories c ON mc.joining_category_id = c.id
LEFT OUTER JOIN versions v ON v.mod_id = m.id AND v.status != ALL($1)
LEFT OUTER JOIN game_versions_versions gvv ON gvv.joining_version_id = v.id
LEFT OUTER JOIN game_versions gv ON gvv.game_version_id = gv.id
LEFT OUTER JOIN loaders_versions lv ON lv.version_id = v.id
LEFT OUTER JOIN loaders lo ON lo.id = lv.loader_id
LEFT JOIN loaders_project_types lpt ON lpt.joining_loader_id = lo.id
LEFT JOIN project_types pt ON pt.id = lpt.joining_project_type_id
LEFT JOIN loaders_project_types_games lptg ON lptg.loader_id = lo.id AND lptg.project_type_id = pt.id
LEFT JOIN games g ON lptg.game_id = g.id
LEFT OUTER JOIN mods_gallery mg ON mg.mod_id = m.id
INNER JOIN project_types pt ON pt.id = m.project_type
INNER JOIN side_types cs ON m.client_side = cs.id
INNER JOIN side_types ss ON m.server_side = ss.id
INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.role = $3 AND tm.accepted = TRUE
INNER JOIN users u ON tm.user_id = u.id
WHERE m.status = ANY($2)
GROUP BY m.id, cs.id, ss.id, pt.id, u.id;
LEFT OUTER JOIN version_fields vf on v.id = vf.version_id
LEFT OUTER JOIN loader_fields lf on vf.field_id = lf.id
LEFT OUTER JOIN loader_field_enums lfe on lf.enum_type = lfe.id
LEFT OUTER JOIN loader_field_enum_values lfev on lfev.enum_id = lfe.id
WHERE v.status != ANY($1)
GROUP BY v.id, m.id, pt.id, u.id;
",
&*crate::models::projects::VersionStatus::iterator().filter(|x| x.is_hidden()).map(|x| x.to_string()).collect::<Vec<String>>(),
&*crate::models::projects::ProjectStatus::iterator().filter(|x| x.is_searchable()).map(|x| x.to_string()).collect::<Vec<String>>(),
crate::models::teams::OWNER_ROLE,
)
.fetch_many(&pool)
.try_filter_map(|e| async {
.try_filter_map(|e| {
let loader_field_keys = loader_field_keys.clone();
async move {
Ok(e.right().map(|m| {
let mut additional_categories = m.additional_categories.unwrap_or_default();
let mut categories = m.categories.unwrap_or_default();
@@ -54,9 +98,18 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
let display_categories = categories.clone();
categories.append(&mut additional_categories);
let versions = m.versions.unwrap_or_default();
let version_fields = VersionField::from_query_json(m.id, m.loader_fields, m.version_fields, m.loader_field_enum_values);
let loader_fields : HashMap<String, Vec<String>> = version_fields.into_iter().map(|vf| {
(vf.field_name, vf.value.as_strings())
}).collect();
for v in loader_fields.keys().cloned() {
loader_field_keys.insert(v);
}
let project_id: crate::models::projects::ProjectId = ProjectId(m.id).into();
let version_id: crate::models::projects::ProjectId = ProjectId(m.version_id).into();
let license = match m.license.split(' ').next() {
Some(license) => license.to_string(),
@@ -68,7 +121,17 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
_ => false,
};
// SPECIAL BEHAVIOUR
// Todo: revisit.
// For consistency with v2 searching, we consider the loader field 'mrpack_loaders' to be a category.
// These were previously considered the loader, and in v2, the loader is a category for searching.
// So to avoid breakage or awkward conversions, we just consider those loader_fields to be categories.
// The loaders are kept in loader_fields as well, so that no information is lost on retrieval.
let mrpack_loaders = loader_fields.get("mrpack_loaders").cloned().unwrap_or_default();
categories.extend(mrpack_loaders);
UploadSearchProject {
version_id: version_id.to_string(),
project_id: project_id.to_string(),
title: m.title,
description: m.description,
@@ -81,11 +144,7 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
created_timestamp: m.approved.unwrap_or(m.published).timestamp(),
date_modified: m.updated,
modified_timestamp: m.updated.timestamp(),
latest_version: versions.last().cloned().unwrap_or_else(|| "None".to_string()),
versions,
license,
client_side: m.client_side_type,
server_side: m.server_side_type,
slug: m.slug,
project_type: m.project_type_name,
gallery: m.gallery.unwrap_or_default(),
@@ -93,10 +152,17 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
open_source,
color: m.color.map(|x| x as u32),
featured_gallery: m.featured_gallery.unwrap_or_default().first().cloned(),
loader_fields
}
}))
})
}})
.try_collect::<Vec<_>>()
.await?
)
.await?;
Ok((
uploads,
Arc::try_unwrap(loader_field_keys)
.unwrap_or_default()
.into_iter()
.collect(),
))
}

View File

@@ -32,11 +32,14 @@ const MEILISEARCH_CHUNK_SIZE: usize = 10000;
pub async fn index_projects(pool: PgPool, config: &SearchConfig) -> Result<(), IndexingError> {
let mut docs_to_add: Vec<UploadSearchProject> = vec![];
let mut additional_fields: Vec<String> = vec![];
docs_to_add.append(&mut index_local(pool.clone()).await?);
let (mut uploads, mut loader_fields) = index_local(pool.clone()).await?;
docs_to_add.append(&mut uploads);
additional_fields.append(&mut loader_fields);
// Write Indices
add_projects(docs_to_add, config).await?;
add_projects(docs_to_add, additional_fields, config).await?;
Ok(())
}
@@ -69,7 +72,7 @@ async fn create_index(
},
)) => {
// Only create index and set settings if the index doesn't already exist
let task = client.create_index(name, Some("project_id")).await?;
let task = client.create_index(name, Some("version_id")).await?;
let task = task.wait_for_completion(client, None, None).await?;
let index = task
.try_make_index(client)
@@ -103,7 +106,7 @@ async fn add_to_index(
) -> Result<(), IndexingError> {
for chunk in mods.chunks(MEILISEARCH_CHUNK_SIZE) {
index
.add_documents(chunk, Some("project_id"))
.add_documents(chunk, Some("version_id"))
.await?
.wait_for_completion(client, None, None)
.await?;
@@ -114,25 +117,35 @@ async fn add_to_index(
async fn create_and_add_to_index(
client: &Client,
projects: &[UploadSearchProject],
additional_fields: &[String],
name: &'static str,
custom_rules: Option<&'static [&'static str]>,
) -> Result<(), IndexingError> {
let index = create_index(client, name, custom_rules).await?;
let mut new_filterable_attributes = index.get_filterable_attributes().await?;
new_filterable_attributes.extend(additional_fields.iter().map(|s| s.to_string()));
index
.set_filterable_attributes(new_filterable_attributes)
.await?;
add_to_index(client, index, projects).await?;
Ok(())
}
pub async fn add_projects(
projects: Vec<UploadSearchProject>,
additional_fields: Vec<String>,
config: &SearchConfig,
) -> Result<(), IndexingError> {
let client = config.make_client();
create_and_add_to_index(&client, &projects, "projects", None).await?;
create_and_add_to_index(&client, &projects, &additional_fields, "projects", None).await?;
create_and_add_to_index(
&client,
&projects,
&additional_fields,
"projects_filtered",
Some(&[
"sort",
@@ -150,6 +163,7 @@ pub async fn add_projects(
fn default_settings() -> Settings {
Settings::new()
.with_distinct_attribute("project_id")
.with_displayed_attributes(DEFAULT_DISPLAYED_ATTRIBUTES)
.with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES)
.with_sortable_attributes(DEFAULT_SORTABLE_ATTRIBUTES)
@@ -161,6 +175,7 @@ fn default_settings() -> Settings {
const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
"project_id",
"version_id",
"project_type",
"slug",
"author",
@@ -168,7 +183,6 @@ const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
"description",
"categories",
"display_categories",
"versions",
"downloads",
"follows",
"icon_url",
@@ -176,8 +190,6 @@ const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
"date_modified",
"latest_version",
"license",
"client_side",
"server_side",
"gallery",
"featured_gallery",
"color",
@@ -187,10 +199,7 @@ const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] = &["title", "description", "author
const DEFAULT_ATTRIBUTES_FOR_FACETING: &[&str] = &[
"categories",
"versions",
"license",
"client_side",
"server_side",
"project_type",
"downloads",
"follows",

View File

@@ -7,6 +7,7 @@ use meilisearch_sdk::client::Client;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::cmp::min;
use std::collections::HashMap;
use std::fmt::Write;
use thiserror::Error;
@@ -71,6 +72,7 @@ impl SearchConfig {
/// This contains some extra data that is not returned by search results.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct UploadSearchProject {
pub version_id: String,
pub project_id: String,
pub project_type: String,
pub slug: Option<String>,
@@ -79,14 +81,10 @@ pub struct UploadSearchProject {
pub description: String,
pub categories: Vec<String>,
pub display_categories: Vec<String>,
pub versions: Vec<String>,
pub follows: i32,
pub downloads: i32,
pub icon_url: String,
pub latest_version: String,
pub license: String,
pub client_side: String,
pub server_side: String,
pub gallery: Vec<String>,
pub featured_gallery: Option<String>,
/// RFC 3339 formatted creation date of the project
@@ -99,6 +97,9 @@ pub struct UploadSearchProject {
pub modified_timestamp: i64,
pub open_source: bool,
pub color: Option<u32>,
#[serde(flatten)]
pub loader_fields: HashMap<String, Vec<String>>,
}
#[derive(Serialize, Deserialize, Debug)]
@@ -111,6 +112,7 @@ pub struct SearchResults {
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ResultSearchProject {
pub version_id: String,
pub project_id: String,
pub project_type: String,
pub slug: Option<String>,
@@ -119,7 +121,6 @@ pub struct ResultSearchProject {
pub description: String,
pub categories: Vec<String>,
pub display_categories: Vec<String>,
pub versions: Vec<String>,
pub downloads: i32,
pub follows: i32,
pub icon_url: String,
@@ -127,13 +128,24 @@ pub struct ResultSearchProject {
pub date_created: String,
/// RFC 3339 formatted modification date of the project
pub date_modified: String,
pub latest_version: String,
pub license: String,
pub client_side: String,
pub server_side: String,
pub gallery: Vec<String>,
pub featured_gallery: Option<String>,
pub color: Option<u32>,
#[serde(flatten)]
pub loader_fields: HashMap<String, Vec<String>>,
}
pub fn get_sort_index(index: &str) -> Result<(&str, [&str; 1]), SearchError> {
Ok(match index {
"relevance" => ("projects", ["downloads:desc"]),
"downloads" => ("projects_filtered", ["downloads:desc"]),
"follows" => ("projects", ["follows:desc"]),
"updated" => ("projects", ["date_modified:desc"]),
"newest" => ("projects", ["date_created:desc"]),
i => return Err(SearchError::InvalidIndex(i.to_string())),
})
}
pub async fn search_for_project(
@@ -146,14 +158,7 @@ pub async fn search_for_project(
let index = info.index.as_deref().unwrap_or("relevance");
let limit = info.limit.as_deref().unwrap_or("10").parse()?;
let sort = match index {
"relevance" => ("projects", ["downloads:desc"]),
"downloads" => ("projects_filtered", ["downloads:desc"]),
"follows" => ("projects", ["follows:desc"]),
"updated" => ("projects", ["date_modified:desc"]),
"newest" => ("projects", ["date_created:desc"]),
i => return Err(SearchError::InvalidIndex(i.to_string())),
};
let sort = get_sort_index(index)?;
let meilisearch_index = client.get_index(sort.0).await?;