From bfeff781645b31938660d207eebc870c50994521 Mon Sep 17 00:00:00 2001 From: Geometrically <18202329+Geometrically@users.noreply.github.com> Date: Sat, 13 Jan 2024 18:20:28 -0500 Subject: [PATCH] Update search queries (#854) * Update search queries * Bump accepted payload limit * fixes * push test changes * fmt clippy prepare --------- Co-authored-by: Wyatt Verchere --- ...ebb487bd5fffe0858cd9e0356cea10fea83a3.json | 40 + ...7d340e5970b27edc76f21b903f362329a6542.json | 46 ++ ...7f5433df9b9d86a2a1a695933feb94b093d5d.json | 35 - ...42fa9a2351eab68ddacbd91aa3cdc9c5cff7a.json | 34 + ...bac035931a0bab8c0d0cf63888c8e5616f847.json | 28 + ...b30dc966b10872581c5932ae36dd28e930c6b.json | 34 + ...3087a587633783894a5041889b856d47a4ed5.json | 88 +++ ...e0d771f929c7a6ed96245ba5b37dc9d49844c.json | 56 ++ ...b5812d947582e3573da56e632f2b0b29fac7b.json | 28 + ...120b4ae4068bd086dc08f572b33cfc2476354.json | 28 + ...0d7dfb4e0fa2ee640128d29d6e4beafe60f4c.json | 50 ++ docker-compose.yml | 1 + src/models/v3/projects.rs | 305 ++++---- src/routes/v3/projects.rs | 36 +- src/search/indexing/local_import.rs | 701 ++++++++++++------ src/search/indexing/mod.rs | 46 +- src/search/mod.rs | 30 +- tests/common/api_v3/project.rs | 4 +- tests/project.rs | 64 +- tests/search.rs | 3 +- tests/v2/search.rs | 1 + 21 files changed, 1135 insertions(+), 523 deletions(-) create mode 100644 .sqlx/query-09e411b2d15dd49a62f7b09fd1cebb487bd5fffe0858cd9e0356cea10fea83a3.json create mode 100644 .sqlx/query-53c50911a9e98ac6d0c83fec4117d340e5970b27edc76f21b903f362329a6542.json delete mode 100644 .sqlx/query-594ead968747529638ce41ebd3f7f5433df9b9d86a2a1a695933feb94b093d5d.json create mode 100644 .sqlx/query-5e6c981d0f6b42ee926f59dbe3e42fa9a2351eab68ddacbd91aa3cdc9c5cff7a.json create mode 100644 .sqlx/query-80734c33c16aeacca980cf40070bac035931a0bab8c0d0cf63888c8e5616f847.json create mode 100644 .sqlx/query-8ef92ce880a7fdac4fc3a5dee50b30dc966b10872581c5932ae36dd28e930c6b.json create mode 100644 .sqlx/query-b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5.json create mode 100644 .sqlx/query-d69ee7051e3bf4b66eab2010134e0d771f929c7a6ed96245ba5b37dc9d49844c.json create mode 100644 .sqlx/query-dbdcaf9f2126e15892c28f782d4b5812d947582e3573da56e632f2b0b29fac7b.json create mode 100644 .sqlx/query-e50e308826d1e7fa54cade7daf8120b4ae4068bd086dc08f572b33cfc2476354.json create mode 100644 .sqlx/query-fe34673ce6d7bcb616a5ab2e8900d7dfb4e0fa2ee640128d29d6e4beafe60f4c.json diff --git a/.sqlx/query-09e411b2d15dd49a62f7b09fd1cebb487bd5fffe0858cd9e0356cea10fea83a3.json b/.sqlx/query-09e411b2d15dd49a62f7b09fd1cebb487bd5fffe0858cd9e0356cea10fea83a3.json new file mode 100644 index 00000000..59424ca7 --- /dev/null +++ b/.sqlx/query-09e411b2d15dd49a62f7b09fd1cebb487bd5fffe0858cd9e0356cea10fea83a3.json @@ -0,0 +1,40 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT mod_id, image_url, featured, ordering\n FROM mods_gallery\n WHERE mod_id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "mod_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "image_url", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "featured", + "type_info": "Bool" + }, + { + "ordinal": 3, + "name": "ordering", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + false, + true, + false + ] + }, + "hash": "09e411b2d15dd49a62f7b09fd1cebb487bd5fffe0858cd9e0356cea10fea83a3" +} diff --git a/.sqlx/query-53c50911a9e98ac6d0c83fec4117d340e5970b27edc76f21b903f362329a6542.json b/.sqlx/query-53c50911a9e98ac6d0c83fec4117d340e5970b27edc76f21b903f362329a6542.json new file mode 100644 index 00000000..f932bd38 --- /dev/null +++ b/.sqlx/query-53c50911a9e98ac6d0c83fec4117d340e5970b27edc76f21b903f362329a6542.json @@ -0,0 +1,46 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT version_id, field_id, int_value, enum_value, string_value\n FROM version_fields\n WHERE version_id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "version_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "field_id", + "type_info": "Int4" + }, + { + "ordinal": 2, + "name": "int_value", + "type_info": "Int4" + }, + { + "ordinal": 3, + "name": "enum_value", + "type_info": "Int4" + }, + { + "ordinal": 4, + "name": "string_value", + "type_info": "Text" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + false, + true, + true, + true + ] + }, + "hash": "53c50911a9e98ac6d0c83fec4117d340e5970b27edc76f21b903f362329a6542" +} diff --git a/.sqlx/query-594ead968747529638ce41ebd3f7f5433df9b9d86a2a1a695933feb94b093d5d.json b/.sqlx/query-594ead968747529638ce41ebd3f7f5433df9b9d86a2a1a695933feb94b093d5d.json deleted file mode 100644 index 53f55bd3..00000000 --- a/.sqlx/query-594ead968747529638ce41ebd3f7f5433df9b9d86a2a1a695933feb94b093d5d.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT v.id id, m.id mod_id, COALESCE(u.username, ou.username) owner_username\n FROM versions v\n INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2)\n LEFT JOIN team_members tm ON tm.team_id = m.team_id AND tm.is_owner = TRUE AND tm.accepted = TRUE\n LEFT JOIN users u ON tm.user_id = u.id\n LEFT JOIN organizations o ON o.id = m.organization_id\n LEFT JOIN team_members otm ON otm.team_id = o.team_id AND otm.is_owner = TRUE AND otm.accepted = TRUE\n LEFT JOIN users ou ON otm.user_id = ou.id\n WHERE v.status != ANY($1)\n GROUP BY v.id, m.id, u.username, ou.username\n ORDER BY m.id DESC;\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "id", - "type_info": "Int8" - }, - { - "ordinal": 1, - "name": "mod_id", - "type_info": "Int8" - }, - { - "ordinal": 2, - "name": "owner_username", - "type_info": "Varchar" - } - ], - "parameters": { - "Left": [ - "TextArray", - "TextArray" - ] - }, - "nullable": [ - false, - false, - null - ] - }, - "hash": "594ead968747529638ce41ebd3f7f5433df9b9d86a2a1a695933feb94b093d5d" -} diff --git a/.sqlx/query-5e6c981d0f6b42ee926f59dbe3e42fa9a2351eab68ddacbd91aa3cdc9c5cff7a.json b/.sqlx/query-5e6c981d0f6b42ee926f59dbe3e42fa9a2351eab68ddacbd91aa3cdc9c5cff7a.json new file mode 100644 index 00000000..d3d2f9b6 --- /dev/null +++ b/.sqlx/query-5e6c981d0f6b42ee926f59dbe3e42fa9a2351eab68ddacbd91aa3cdc9c5cff7a.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT DISTINCT version_id,\n ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders,\n ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types\n FROM versions v\n INNER JOIN loaders_versions lv ON v.id = lv.version_id\n INNER JOIN loaders l ON lv.loader_id = l.id\n INNER JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id\n INNER JOIN project_types pt ON pt.id = lpt.joining_project_type_id\n WHERE v.id = ANY($1)\n GROUP BY version_id\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "version_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "loaders", + "type_info": "VarcharArray" + }, + { + "ordinal": 2, + "name": "project_types", + "type_info": "VarcharArray" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + null, + null + ] + }, + "hash": "5e6c981d0f6b42ee926f59dbe3e42fa9a2351eab68ddacbd91aa3cdc9c5cff7a" +} diff --git a/.sqlx/query-80734c33c16aeacca980cf40070bac035931a0bab8c0d0cf63888c8e5616f847.json b/.sqlx/query-80734c33c16aeacca980cf40070bac035931a0bab8c0d0cf63888c8e5616f847.json new file mode 100644 index 00000000..b874da66 --- /dev/null +++ b/.sqlx/query-80734c33c16aeacca980cf40070bac035931a0bab8c0d0cf63888c8e5616f847.json @@ -0,0 +1,28 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT m.id mod_id, u.username\n FROM mods m\n INNER JOIN team_members tm ON tm.is_owner = TRUE and tm.team_id = m.team_id\n INNER JOIN users u ON u.id = tm.user_id\n WHERE m.id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "mod_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "username", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "80734c33c16aeacca980cf40070bac035931a0bab8c0d0cf63888c8e5616f847" +} diff --git a/.sqlx/query-8ef92ce880a7fdac4fc3a5dee50b30dc966b10872581c5932ae36dd28e930c6b.json b/.sqlx/query-8ef92ce880a7fdac4fc3a5dee50b30dc966b10872581c5932ae36dd28e930c6b.json new file mode 100644 index 00000000..3007b6d2 --- /dev/null +++ b/.sqlx/query-8ef92ce880a7fdac4fc3a5dee50b30dc966b10872581c5932ae36dd28e930c6b.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT mc.joining_mod_id mod_id, c.category name, mc.is_additional is_additional\n FROM mods_categories mc\n INNER JOIN categories c ON mc.joining_category_id = c.id\n WHERE joining_mod_id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "mod_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "is_additional", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + false, + false + ] + }, + "hash": "8ef92ce880a7fdac4fc3a5dee50b30dc966b10872581c5932ae36dd28e930c6b" +} diff --git a/.sqlx/query-b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5.json b/.sqlx/query-b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5.json new file mode 100644 index 00000000..6142e7dc --- /dev/null +++ b/.sqlx/query-b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5.json @@ -0,0 +1,88 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,\n m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color\n FROM mods m\n WHERE m.status = ANY($1)\n GROUP BY m.id;\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "summary", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "downloads", + "type_info": "Int4" + }, + { + "ordinal": 4, + "name": "follows", + "type_info": "Int4" + }, + { + "ordinal": 5, + "name": "icon_url", + "type_info": "Varchar" + }, + { + "ordinal": 6, + "name": "updated", + "type_info": "Timestamptz" + }, + { + "ordinal": 7, + "name": "approved", + "type_info": "Timestamptz" + }, + { + "ordinal": 8, + "name": "published", + "type_info": "Timestamptz" + }, + { + "ordinal": 9, + "name": "license", + "type_info": "Varchar" + }, + { + "ordinal": 10, + "name": "slug", + "type_info": "Varchar" + }, + { + "ordinal": 11, + "name": "color", + "type_info": "Int4" + } + ], + "parameters": { + "Left": [ + "TextArray" + ] + }, + "nullable": [ + false, + false, + false, + false, + false, + true, + false, + true, + false, + false, + true, + true + ] + }, + "hash": "b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5" +} diff --git a/.sqlx/query-d69ee7051e3bf4b66eab2010134e0d771f929c7a6ed96245ba5b37dc9d49844c.json b/.sqlx/query-d69ee7051e3bf4b66eab2010134e0d771f929c7a6ed96245ba5b37dc9d49844c.json new file mode 100644 index 00000000..859e603f --- /dev/null +++ b/.sqlx/query-d69ee7051e3bf4b66eab2010134e0d771f929c7a6ed96245ba5b37dc9d49844c.json @@ -0,0 +1,56 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT DISTINCT id, field, field_type, enum_type, min_val, max_val, optional\n FROM loader_fields lf\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4" + }, + { + "ordinal": 1, + "name": "field", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "field_type", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "enum_type", + "type_info": "Int4" + }, + { + "ordinal": 4, + "name": "min_val", + "type_info": "Int4" + }, + { + "ordinal": 5, + "name": "max_val", + "type_info": "Int4" + }, + { + "ordinal": 6, + "name": "optional", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false, + false, + false, + true, + true, + true, + false + ] + }, + "hash": "d69ee7051e3bf4b66eab2010134e0d771f929c7a6ed96245ba5b37dc9d49844c" +} diff --git a/.sqlx/query-dbdcaf9f2126e15892c28f782d4b5812d947582e3573da56e632f2b0b29fac7b.json b/.sqlx/query-dbdcaf9f2126e15892c28f782d4b5812d947582e3573da56e632f2b0b29fac7b.json new file mode 100644 index 00000000..eab35e7c --- /dev/null +++ b/.sqlx/query-dbdcaf9f2126e15892c28f782d4b5812d947582e3573da56e632f2b0b29fac7b.json @@ -0,0 +1,28 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT v.id, v.mod_id\n FROM versions v\n WHERE mod_id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "mod_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "dbdcaf9f2126e15892c28f782d4b5812d947582e3573da56e632f2b0b29fac7b" +} diff --git a/.sqlx/query-e50e308826d1e7fa54cade7daf8120b4ae4068bd086dc08f572b33cfc2476354.json b/.sqlx/query-e50e308826d1e7fa54cade7daf8120b4ae4068bd086dc08f572b33cfc2476354.json new file mode 100644 index 00000000..dadf6296 --- /dev/null +++ b/.sqlx/query-e50e308826d1e7fa54cade7daf8120b4ae4068bd086dc08f572b33cfc2476354.json @@ -0,0 +1,28 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT m.id mod_id, u.username\n FROM mods m\n INNER JOIN organizations o ON o.id = m.organization_id\n INNER JOIN team_members tm ON tm.is_owner = TRUE and tm.team_id = o.team_id\n INNER JOIN users u ON u.id = tm.user_id\n WHERE m.id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "mod_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "username", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "e50e308826d1e7fa54cade7daf8120b4ae4068bd086dc08f572b33cfc2476354" +} diff --git a/.sqlx/query-fe34673ce6d7bcb616a5ab2e8900d7dfb4e0fa2ee640128d29d6e4beafe60f4c.json b/.sqlx/query-fe34673ce6d7bcb616a5ab2e8900d7dfb4e0fa2ee640128d29d6e4beafe60f4c.json new file mode 100644 index 00000000..0af23b85 --- /dev/null +++ b/.sqlx/query-fe34673ce6d7bcb616a5ab2e8900d7dfb4e0fa2ee640128d29d6e4beafe60f4c.json @@ -0,0 +1,50 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT DISTINCT id, enum_id, value, ordering, created, metadata\n FROM loader_field_enum_values lfev\n ORDER BY enum_id, ordering, created DESC\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int4" + }, + { + "ordinal": 1, + "name": "enum_id", + "type_info": "Int4" + }, + { + "ordinal": 2, + "name": "value", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "ordering", + "type_info": "Int4" + }, + { + "ordinal": 4, + "name": "created", + "type_info": "Timestamptz" + }, + { + "ordinal": 5, + "name": "metadata", + "type_info": "Jsonb" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false, + false, + false, + true, + false, + true + ] + }, + "hash": "fe34673ce6d7bcb616a5ab2e8900d7dfb4e0fa2ee640128d29d6e4beafe60f4c" +} diff --git a/docker-compose.yml b/docker-compose.yml index 11a4f806..1c0daf3b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,6 +20,7 @@ services: - meilisearch-data:/data.ms environment: MEILI_MASTER_KEY: modrinth + MEILI_HTTP_PAYLOAD_SIZE_LIMIT: 107374182400 redis: image: redis:alpine restart: on-failure diff --git a/src/models/v3/projects.rs b/src/models/v3/projects.rs index f0862bee..5bb0710b 100644 --- a/src/models/v3/projects.rs +++ b/src/models/v3/projects.rs @@ -1,6 +1,5 @@ use std::collections::{HashMap, HashSet}; -use super::ids::base62_impl::parse_base62; use super::ids::{Base62Id, OrganizationId}; use super::teams::TeamId; use super::users::UserId; @@ -8,9 +7,7 @@ use crate::database::models::loader_fields::VersionField; use crate::database::models::project_item::{LinkUrl, QueryProject}; use crate::database::models::version_item::QueryVersion; use crate::models::threads::ThreadId; -use crate::search::ResultSearchProject; use chrono::{DateTime, Utc}; -use itertools::Itertools; use serde::{Deserialize, Serialize}; use validator::Validate; @@ -235,157 +232,157 @@ impl From for Project { impl Project { // Matches the from QueryProject, but with a ResultSearchProject - pub fn from_search(m: ResultSearchProject) -> Option { - let project_id = ProjectId(parse_base62(&m.project_id).ok()?); - let team_id = TeamId(parse_base62(&m.team_id).ok()?); - let organization_id = m - .organization_id - .and_then(|id| Some(OrganizationId(parse_base62(&id).ok()?))); - let thread_id = ThreadId(parse_base62(&m.thread_id).ok()?); - let versions = m - .versions - .iter() - .filter_map(|id| Some(VersionId(parse_base62(id).ok()?))) - .collect(); - - let approved = DateTime::parse_from_rfc3339(&m.date_created).ok()?; - let published = DateTime::parse_from_rfc3339(&m.date_published).ok()?.into(); - let approved = if approved == published { - None - } else { - Some(approved.into()) - }; - - let updated = DateTime::parse_from_rfc3339(&m.date_modified).ok()?.into(); - let queued = m - .date_queued - .and_then(|dq| DateTime::parse_from_rfc3339(&dq).ok()) - .map(|d| d.into()); - - let status = ProjectStatus::from_string(&m.status); - let requested_status = m - .requested_status - .map(|mrs| ProjectStatus::from_string(&mrs)); - - let license_url = m.license_url; - let icon_url = m.icon_url; - - // Loaders - let mut loaders = m.loaders; - let mrpack_loaders_strings = - m.project_loader_fields - .get("mrpack_loaders") - .cloned() - .map(|v| { - v.into_iter() - .filter_map(|v| v.as_str().map(String::from)) - .collect_vec() - }); - - // If the project has a mrpack loader, keep only 'loaders' that are not in the mrpack_loaders - if let Some(ref mrpack_loaders) = mrpack_loaders_strings { - loaders.retain(|l| !mrpack_loaders.contains(l)); - } - - // Categories - let mut categories = m.display_categories.clone(); - categories.retain(|c| !loaders.contains(c)); - if let Some(ref mrpack_loaders) = mrpack_loaders_strings { - categories.retain(|l| !mrpack_loaders.contains(l)); - } - - // Additional categories - let mut additional_categories = m.categories.clone(); - additional_categories.retain(|c| !categories.contains(c)); - additional_categories.retain(|c| !loaders.contains(c)); - if let Some(ref mrpack_loaders) = mrpack_loaders_strings { - additional_categories.retain(|l| !mrpack_loaders.contains(l)); - } - - let games = m.games; - - let monetization_status = m - .monetization_status - .as_deref() - .map(MonetizationStatus::from_string) - .unwrap_or(MonetizationStatus::Monetized); - - let link_urls = m - .links - .into_iter() - .map(|d| (d.platform_name.clone(), Link::from(d))) - .collect(); - - let gallery = m - .gallery_items - .into_iter() - .map(|x| GalleryItem { - url: x.image_url, - featured: x.featured, - name: x.name, - description: x.description, - created: x.created, - ordering: x.ordering, - }) - .collect(); - - Some(Self { - id: project_id, - slug: m.slug, - project_types: m.project_types, - games, - team_id, - organization: organization_id, - name: m.name, - summary: m.summary, - description: "".to_string(), // Body is potentially huge, do not store in search - published, - updated, - approved, - queued, - status, - requested_status, - moderator_message: None, // Deprecated - license: License { - id: m.license.clone(), - name: match spdx::Expression::parse(&m.license) { - Ok(spdx_expr) => { - let mut vec: Vec<&str> = Vec::new(); - for node in spdx_expr.iter() { - if let spdx::expression::ExprNode::Req(req) = node { - if let Some(id) = req.req.license.id() { - vec.push(id.full_name); - } - } - } - // spdx crate returns AND/OR operations in postfix order - // and it would be a lot more effort to make it actually in order - // so let's just ignore that and make them comma-separated - vec.join(", ") - } - Err(_) => "".to_string(), - }, - url: license_url, - }, - downloads: m.downloads as u32, - followers: m.follows as u32, - categories, - additional_categories, - loaders, - versions, - icon_url, - link_urls, - gallery, - color: m.color, - thread_id, - monetization_status, - fields: m - .project_loader_fields - .into_iter() - .map(|(k, v)| (k, v.into_iter().collect())) - .collect(), - }) - } + // pub fn from_search(m: ResultSearchProject) -> Option { + // let project_id = ProjectId(parse_base62(&m.project_id).ok()?); + // let team_id = TeamId(parse_base62(&m.team_id).ok()?); + // let organization_id = m + // .organization_id + // .and_then(|id| Some(OrganizationId(parse_base62(&id).ok()?))); + // let thread_id = ThreadId(parse_base62(&m.thread_id).ok()?); + // let versions = m + // .versions + // .iter() + // .filter_map(|id| Some(VersionId(parse_base62(id).ok()?))) + // .collect(); + // + // let approved = DateTime::parse_from_rfc3339(&m.date_created).ok()?; + // let published = DateTime::parse_from_rfc3339(&m.date_published).ok()?.into(); + // let approved = if approved == published { + // None + // } else { + // Some(approved.into()) + // }; + // + // let updated = DateTime::parse_from_rfc3339(&m.date_modified).ok()?.into(); + // let queued = m + // .date_queued + // .and_then(|dq| DateTime::parse_from_rfc3339(&dq).ok()) + // .map(|d| d.into()); + // + // let status = ProjectStatus::from_string(&m.status); + // let requested_status = m + // .requested_status + // .map(|mrs| ProjectStatus::from_string(&mrs)); + // + // let license_url = m.license_url; + // let icon_url = m.icon_url; + // + // // Loaders + // let mut loaders = m.loaders; + // let mrpack_loaders_strings = + // m.project_loader_fields + // .get("mrpack_loaders") + // .cloned() + // .map(|v| { + // v.into_iter() + // .filter_map(|v| v.as_str().map(String::from)) + // .collect_vec() + // }); + // + // // If the project has a mrpack loader, keep only 'loaders' that are not in the mrpack_loaders + // if let Some(ref mrpack_loaders) = mrpack_loaders_strings { + // loaders.retain(|l| !mrpack_loaders.contains(l)); + // } + // + // // Categories + // let mut categories = m.display_categories.clone(); + // categories.retain(|c| !loaders.contains(c)); + // if let Some(ref mrpack_loaders) = mrpack_loaders_strings { + // categories.retain(|l| !mrpack_loaders.contains(l)); + // } + // + // // Additional categories + // let mut additional_categories = m.categories.clone(); + // additional_categories.retain(|c| !categories.contains(c)); + // additional_categories.retain(|c| !loaders.contains(c)); + // if let Some(ref mrpack_loaders) = mrpack_loaders_strings { + // additional_categories.retain(|l| !mrpack_loaders.contains(l)); + // } + // + // let games = m.games; + // + // let monetization_status = m + // .monetization_status + // .as_deref() + // .map(MonetizationStatus::from_string) + // .unwrap_or(MonetizationStatus::Monetized); + // + // let link_urls = m + // .links + // .into_iter() + // .map(|d| (d.platform_name.clone(), Link::from(d))) + // .collect(); + // + // let gallery = m + // .gallery_items + // .into_iter() + // .map(|x| GalleryItem { + // url: x.image_url, + // featured: x.featured, + // name: x.name, + // description: x.description, + // created: x.created, + // ordering: x.ordering, + // }) + // .collect(); + // + // Some(Self { + // id: project_id, + // slug: m.slug, + // project_types: m.project_types, + // games, + // team_id, + // organization: organization_id, + // name: m.name, + // summary: m.summary, + // description: "".to_string(), // Body is potentially huge, do not store in search + // published, + // updated, + // approved, + // queued, + // status, + // requested_status, + // moderator_message: None, // Deprecated + // license: License { + // id: m.license.clone(), + // name: match spdx::Expression::parse(&m.license) { + // Ok(spdx_expr) => { + // let mut vec: Vec<&str> = Vec::new(); + // for node in spdx_expr.iter() { + // if let spdx::expression::ExprNode::Req(req) = node { + // if let Some(id) = req.req.license.id() { + // vec.push(id.full_name); + // } + // } + // } + // // spdx crate returns AND/OR operations in postfix order + // // and it would be a lot more effort to make it actually in order + // // so let's just ignore that and make them comma-separated + // vec.join(", ") + // } + // Err(_) => "".to_string(), + // }, + // url: license_url, + // }, + // downloads: m.downloads as u32, + // followers: m.follows as u32, + // categories, + // additional_categories, + // loaders, + // versions, + // icon_url, + // link_urls, + // gallery, + // color: m.color, + // thread_id, + // monetization_status, + // fields: m + // .project_loader_fields + // .into_iter() + // .map(|(k, v)| (k, v.into_iter().collect())) + // .collect(), + // }) + // } } #[derive(Serialize, Deserialize, Clone, Debug)] pub struct GalleryItem { diff --git a/src/routes/v3/projects.rs b/src/routes/v3/projects.rs index c4f82ca7..8a426e3f 100644 --- a/src/routes/v3/projects.rs +++ b/src/routes/v3/projects.rs @@ -893,13 +893,14 @@ pub async fn edit_project_categories( Ok(()) } -#[derive(Serialize, Deserialize)] -pub struct ReturnSearchResults { - pub hits: Vec, - pub page: usize, - pub hits_per_page: usize, - pub total_hits: usize, -} +// TODO: Re-add this if we want to match v3 Projects structure to v3 Search Result structure, otherwise, delete +// #[derive(Serialize, Deserialize)] +// pub struct ReturnSearchResults { +// pub hits: Vec, +// pub page: usize, +// pub hits_per_page: usize, +// pub total_hits: usize, +// } pub async fn project_search( web::Query(info): web::Query, @@ -907,16 +908,17 @@ pub async fn project_search( ) -> Result { let results = search_for_project(&info, &config).await?; - let results = ReturnSearchResults { - hits: results - .hits - .into_iter() - .filter_map(Project::from_search) - .collect::>(), - page: results.page, - hits_per_page: results.hits_per_page, - total_hits: results.total_hits, - }; + // TODO: add this back + // let results = ReturnSearchResults { + // hits: results + // .hits + // .into_iter() + // .filter_map(Project::from_search) + // .collect::>(), + // page: results.page, + // hits_per_page: results.hits_per_page, + // total_hits: results.total_hits, + // }; Ok(HttpResponse::Ok().json(results)) } diff --git a/src/search/indexing/local_import.rs b/src/search/indexing/local_import.rs index 6636ed5d..67ef159a 100644 --- a/src/search/indexing/local_import.rs +++ b/src/search/indexing/local_import.rs @@ -1,138 +1,240 @@ +use chrono::{DateTime, Utc}; +use dashmap::DashMap; use futures::TryStreamExt; +use itertools::Itertools; use log::info; use std::collections::HashMap; use super::IndexingError; -use crate::database::models::{project_item, version_item, ProjectId, VersionId}; -use crate::database::redis::RedisPool; -use crate::models; +use crate::database::models::loader_fields::{ + QueryLoaderField, QueryLoaderFieldEnumValue, QueryVersionField, VersionField, +}; +use crate::database::models::{ + LoaderFieldEnumId, LoaderFieldEnumValueId, LoaderFieldId, ProjectId, VersionId, +}; +use crate::models::projects::from_duplicate_version_fields; use crate::models::v2::projects::LegacyProject; use crate::routes::v2_reroute; use crate::search::UploadSearchProject; use sqlx::postgres::PgPool; -pub async fn get_all_ids( - pool: PgPool, -) -> Result, IndexingError> { - // TODO: Currently org owner is set to be considered owner. It may be worth considering - // adding a new facetable 'organization' field to the search index, and using that instead, - // and making owner to be optional. - let all_visible_ids: Vec<(VersionId, ProjectId, String)> = sqlx::query!( +pub async fn index_local(pool: &PgPool) -> Result, IndexingError> { + info!("Indexing local projects!"); + + // todo: loaders, project type, game versions + struct PartialProject { + id: ProjectId, + name: String, + summary: String, + downloads: i32, + follows: i32, + icon_url: Option, + updated: DateTime, + approved: DateTime, + slug: Option, + color: Option, + license: String, + } + + let db_projects = sqlx::query!( " - SELECT v.id id, m.id mod_id, COALESCE(u.username, ou.username) owner_username - FROM versions v - INNER JOIN mods m ON v.mod_id = m.id AND m.status = ANY($2) - LEFT JOIN team_members tm ON tm.team_id = m.team_id AND tm.is_owner = TRUE AND tm.accepted = TRUE - LEFT JOIN users u ON tm.user_id = u.id - LEFT JOIN organizations o ON o.id = m.organization_id - LEFT JOIN team_members otm ON otm.team_id = o.team_id AND otm.is_owner = TRUE AND otm.accepted = TRUE - LEFT JOIN users ou ON otm.user_id = ou.id - WHERE v.status != ANY($1) - GROUP BY v.id, m.id, u.username, ou.username - ORDER BY m.id DESC; + SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows, + m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color + FROM mods m + WHERE m.status = ANY($1) + GROUP BY m.id; ", - &*crate::models::projects::VersionStatus::iterator() - .filter(|x| x.is_hidden()) - .map(|x| x.to_string()) - .collect::>(), &*crate::models::projects::ProjectStatus::iterator() - .filter(|x| x.is_searchable()) - .map(|x| x.to_string()) - .collect::>(), + .filter(|x| x.is_searchable()) + .map(|x| x.to_string()) + .collect::>(), + ) + .fetch_many(pool) + .try_filter_map(|e| async { + Ok(e.right().map(|m| { + + PartialProject { + id: ProjectId(m.id), + name: m.name, + summary: m.summary, + downloads: m.downloads, + follows: m.follows, + icon_url: m.icon_url, + updated: m.updated, + approved: m.approved.unwrap_or(m.published), + slug: m.slug, + color: m.color, + license: m.license, + }})) + }) + .try_collect::>() + .await?; + + let project_ids = db_projects.iter().map(|x| x.id.0).collect::>(); + + struct PartialGallery { + url: String, + featured: bool, + ordering: i64, + } + + info!("Indexing local gallery!"); + + let mods_gallery: DashMap> = sqlx::query!( + " + SELECT mod_id, image_url, featured, ordering + FROM mods_gallery + WHERE mod_id = ANY($1) + ", + &*project_ids, + ) + .fetch(pool) + .try_fold( + DashMap::new(), + |acc: DashMap>, m| { + acc.entry(ProjectId(m.mod_id)) + .or_default() + .push(PartialGallery { + url: m.image_url, + featured: m.featured.unwrap_or(false), + ordering: m.ordering, + }); + async move { Ok(acc) } + }, ) - .fetch_many(&pool) - .try_filter_map(|e| async move { - Ok(e.right().map(|m| { - let project_id: ProjectId = ProjectId(m.mod_id); - let version_id: VersionId = VersionId(m.id); - let owner_username = m.owner_username.unwrap_or_default(); - (version_id, project_id, owner_username) - })) - }) - .try_collect::>() .await?; - Ok(all_visible_ids) -} + info!("Indexing local categories!"); -pub async fn index_local( - pool: &PgPool, - redis: &RedisPool, - visible_ids: HashMap, -) -> Result, IndexingError> { - info!("Indexing local projects!"); - let project_ids = visible_ids - .values() - .map(|(project_id, _)| project_id) - .cloned() - .collect::>(); - let projects: HashMap<_, _> = project_item::Project::get_many_ids(&project_ids, pool, redis) - .await? - .into_iter() - .map(|p| (p.inner.id, p)) - .collect(); + let categories: DashMap> = sqlx::query!( + " + SELECT mc.joining_mod_id mod_id, c.category name, mc.is_additional is_additional + FROM mods_categories mc + INNER JOIN categories c ON mc.joining_category_id = c.id + WHERE joining_mod_id = ANY($1) + ", + &*project_ids, + ) + .fetch(pool) + .try_fold( + DashMap::new(), + |acc: DashMap>, m| { + acc.entry(ProjectId(m.mod_id)) + .or_default() + .push((m.name, m.is_additional)); + async move { Ok(acc) } + }, + ) + .await?; - info!("Fetched local projects!"); + info!("Indexing local versions!"); + let mut versions = index_versions(pool, project_ids.clone()).await?; - let version_ids = visible_ids.keys().cloned().collect::>(); - let versions: HashMap<_, _> = version_item::Version::get_many(&version_ids, pool, redis) - .await? - .into_iter() - .map(|v| (v.inner.id, v)) - .collect(); + info!("Indexing local org owners!"); - info!("Fetched local versions!"); + let mods_org_owners: DashMap = sqlx::query!( + " + SELECT m.id mod_id, u.username + FROM mods m + INNER JOIN organizations o ON o.id = m.organization_id + INNER JOIN team_members tm ON tm.is_owner = TRUE and tm.team_id = o.team_id + INNER JOIN users u ON u.id = tm.user_id + WHERE m.id = ANY($1) + ", + &*project_ids, + ) + .fetch(pool) + .try_fold(DashMap::new(), |acc: DashMap, m| { + acc.insert(ProjectId(m.mod_id), m.username); + async move { Ok(acc) } + }) + .await?; + info!("Indexing local team owners!"); + + let mods_team_owners: DashMap = sqlx::query!( + " + SELECT m.id mod_id, u.username + FROM mods m + INNER JOIN team_members tm ON tm.is_owner = TRUE and tm.team_id = m.team_id + INNER JOIN users u ON u.id = tm.user_id + WHERE m.id = ANY($1) + ", + &project_ids, + ) + .fetch(pool) + .try_fold(DashMap::new(), |acc: DashMap, m| { + acc.insert(ProjectId(m.mod_id), m.username); + async move { Ok(acc) } + }) + .await?; + + info!("Getting all loader fields!"); + let loader_fields: Vec = sqlx::query!( + " + SELECT DISTINCT id, field, field_type, enum_type, min_val, max_val, optional + FROM loader_fields lf + ", + ) + .fetch(pool) + .map_ok(|m| QueryLoaderField { + id: LoaderFieldId(m.id), + field: m.field, + field_type: m.field_type, + enum_type: m.enum_type.map(LoaderFieldEnumId), + min_val: m.min_val, + max_val: m.max_val, + optional: m.optional, + }) + .try_collect() + .await?; + let loader_fields: Vec<&QueryLoaderField> = loader_fields.iter().collect(); + + info!("Getting all loader field enum values!"); + + let loader_field_enum_values: Vec = sqlx::query!( + " + SELECT DISTINCT id, enum_id, value, ordering, created, metadata + FROM loader_field_enum_values lfev + ORDER BY enum_id, ordering, created DESC + " + ) + .fetch(pool) + .map_ok(|m| QueryLoaderFieldEnumValue { + id: LoaderFieldEnumValueId(m.id), + enum_id: LoaderFieldEnumId(m.enum_id), + value: m.value, + ordering: m.ordering, + created: m.created, + metadata: m.metadata, + }) + .try_collect() + .await?; + + info!("Indexing loaders, project types!"); let mut uploads = Vec::new(); - // TODO: could possibly clone less here? - for (version_id, (project_id, owner_username)) in visible_ids { - let m = projects.get(&project_id); - let v = versions.get(&version_id); - let m = match m { - Some(m) => m, - None => continue, + let total_len = db_projects.len(); + let mut count = 0; + for project in db_projects { + count += 1; + info!("projects index prog: {count}/{total_len}"); + + let owner = if let Some((_, org_owner)) = mods_org_owners.remove(&project.id) { + org_owner + } else if let Some((_, team_owner)) = mods_team_owners.remove(&project.id) { + team_owner + } else { + println!( + "org owner not found for project {} id: {}!", + project.name, project.id.0 + ); + continue; }; - let v = match v { - Some(v) => v, - None => continue, - }; - - let version_id: crate::models::projects::VersionId = v.inner.id.into(); - let project_id: crate::models::projects::ProjectId = m.inner.id.into(); - let team_id: crate::models::teams::TeamId = m.inner.team_id.into(); - let organization_id: Option = - m.inner.organization_id.map(|x| x.into()); - let thread_id: crate::models::threads::ThreadId = m.thread_id.into(); - - let all_version_ids = m - .versions - .iter() - .map(|v| (*v).into()) - .collect::>(); - - let mut additional_categories = m.additional_categories.clone(); - let mut categories = m.categories.clone(); - - // Uses version loaders, not project loaders. - categories.append(&mut v.loaders.clone()); - - let display_categories = categories.clone(); - categories.append(&mut additional_categories); - - let version_fields = v.version_fields.clone(); - let unvectorized_loader_fields = v - .version_fields - .iter() - .map(|vf| (vf.field_name.clone(), vf.value.serialize_internal())) - .collect(); - let mut loader_fields = models::projects::from_duplicate_version_fields(version_fields); - let project_loader_fields = - models::projects::from_duplicate_version_fields(m.aggregate_version_fields.clone()); - let license = match m.inner.license.split(' ').next() { + let license = match project.license.split(' ').next() { Some(license) => license.to_string(), - None => m.inner.license.clone(), + None => project.license.clone(), }; let open_source = match spdx::license_id(&license) { @@ -140,113 +242,288 @@ pub async fn index_local( _ => false, }; - // For loaders, get ALL loaders across ALL versions - let mut loaders = all_version_ids - .iter() - .fold(vec![], |mut loaders, version_id| { - let version = versions.get(&(*version_id).into()); - if let Some(version) = version { - loaders.extend(version.loaders.clone()); + let (featured_gallery, gallery) = + if let Some((_, gallery)) = mods_gallery.remove(&project.id) { + let mut vals = Vec::new(); + let mut featured = None; + + for x in gallery + .into_iter() + .sorted_by(|a, b| a.ordering.cmp(&b.ordering)) + { + if x.featured && featured.is_none() { + featured = Some(x.url); + } else { + vals.push(x.url); + } } - loaders - }); - loaders.sort(); - loaders.dedup(); - // SPECIAL BEHAVIOUR - // Todo: revisit. - // For consistency with v2 searching, we consider the loader field 'mrpack_loaders' to be a category. - // These were previously considered the loader, and in v2, the loader is a category for searching. - // So to avoid breakage or awkward conversions, we just consider those loader_fields to be categories. - // The loaders are kept in loader_fields as well, so that no information is lost on retrieval. - let mrpack_loaders = loader_fields - .get("mrpack_loaders") - .cloned() - .map(|x| { - x.into_iter() - .filter_map(|x| x.as_str().map(String::from)) - .collect::>() - }) - .unwrap_or_default(); - categories.extend(mrpack_loaders); - if loader_fields.contains_key("mrpack_loaders") { - categories.retain(|x| *x != "mrpack"); + (featured, vals) + } else { + (None, vec![]) + }; + + let (categories, display_categories) = + if let Some((_, categories)) = categories.remove(&project.id) { + let mut vals = Vec::new(); + let mut featured_vals = Vec::new(); + + for (val, featured) in categories { + if featured { + featured_vals.push(val.clone()); + } + vals.push(val); + } + + (vals, featured_vals) + } else { + (vec![], vec![]) + }; + + if let Some(versions) = versions.remove(&project.id) { + // Aggregated project loader fields + let project_version_fields = versions + .iter() + .flat_map(|x| x.version_fields.clone()) + .collect::>(); + let aggregated_version_fields = VersionField::from_query_json( + project_version_fields, + &loader_fields, + &loader_field_enum_values, + true, + ); + let project_loader_fields = from_duplicate_version_fields(aggregated_version_fields); + + // aggregated project loaders + let project_loaders = versions + .iter() + .flat_map(|x| x.loaders.clone()) + .collect::>(); + + for version in versions { + let version_fields = VersionField::from_query_json( + version.version_fields, + &loader_fields, + &loader_field_enum_values, + false, + ); + let unvectorized_loader_fields = version_fields + .iter() + .map(|vf| (vf.field_name.clone(), vf.value.serialize_internal())) + .collect(); + let mut loader_fields = from_duplicate_version_fields(version_fields); + let project_types = version.project_types; + + let mut version_loaders = version.loaders; + + // Uses version loaders, not project loaders. + let mut categories = categories.clone(); + categories.append(&mut version_loaders.clone()); + + let display_categories = display_categories.clone(); + categories.append(&mut version_loaders); + + // SPECIAL BEHAVIOUR + // Todo: revisit. + // For consistency with v2 searching, we consider the loader field 'mrpack_loaders' to be a category. + // These were previously considered the loader, and in v2, the loader is a category for searching. + // So to avoid breakage or awkward conversions, we just consider those loader_fields to be categories. + // The loaders are kept in loader_fields as well, so that no information is lost on retrieval. + let mrpack_loaders = loader_fields + .get("mrpack_loaders") + .cloned() + .map(|x| { + x.into_iter() + .filter_map(|x| x.as_str().map(String::from)) + .collect::>() + }) + .unwrap_or_default(); + categories.extend(mrpack_loaders); + if loader_fields.contains_key("mrpack_loaders") { + categories.retain(|x| *x != "mrpack"); + } + + // SPECIAL BEHAVIOUR: + // For consitency with v2 searching, we manually input the + // client_side and server_side fields from the loader fields into + // separate loader fields. + // 'client_side' and 'server_side' remain supported by meilisearch even though they are no longer v3 fields. + let (_, v2_og_project_type) = LegacyProject::get_project_type(&project_types); + let (client_side, server_side) = v2_reroute::convert_side_types_v2( + &unvectorized_loader_fields, + Some(&v2_og_project_type), + ); + + if let Ok(client_side) = serde_json::to_value(client_side) { + loader_fields.insert("client_side".to_string(), vec![client_side]); + } + if let Ok(server_side) = serde_json::to_value(server_side) { + loader_fields.insert("server_side".to_string(), vec![server_side]); + } + + let usp = UploadSearchProject { + version_id: crate::models::ids::VersionId::from(version.id).to_string(), + project_id: crate::models::ids::ProjectId::from(project.id).to_string(), + name: project.name.clone(), + summary: project.summary.clone(), + categories: categories.clone(), + display_categories: display_categories.clone(), + follows: project.follows, + downloads: project.downloads, + icon_url: project.icon_url.clone(), + author: owner.clone(), + date_created: project.approved, + created_timestamp: project.approved.timestamp(), + date_modified: project.updated, + modified_timestamp: project.updated.timestamp(), + license: license.clone(), + slug: project.slug.clone(), + // TODO + project_types, + gallery: gallery.clone(), + featured_gallery: featured_gallery.clone(), + open_source, + color: project.color.map(|x| x as u32), + loader_fields, + project_loader_fields: project_loader_fields.clone(), + // 'loaders' is aggregate of all versions' loaders + loaders: project_loaders.clone(), + }; + + uploads.push(usp); + } } - - // SPECIAL BEHAVIOUR: - // For consitency with v2 searching, we manually input the - // client_side and server_side fields from the loader fields into - // separate loader fields. - // 'client_side' and 'server_side' remain supported by meilisearch even though they are no longer v3 fields. - let (_, v2_og_project_type) = LegacyProject::get_project_type(&v.project_types); - let (client_side, server_side) = v2_reroute::convert_side_types_v2( - &unvectorized_loader_fields, - Some(&v2_og_project_type), - ); - - if let Ok(client_side) = serde_json::to_value(client_side) { - loader_fields.insert("client_side".to_string(), vec![client_side]); - } - if let Ok(server_side) = serde_json::to_value(server_side) { - loader_fields.insert("server_side".to_string(), vec![server_side]); - } - - let gallery = m - .gallery_items - .iter() - .filter(|gi| !gi.featured) - .map(|gi| gi.image_url.clone()) - .collect::>(); - let featured_gallery = m - .gallery_items - .iter() - .filter(|gi| gi.featured) - .map(|gi| gi.image_url.clone()) - .collect::>(); - let featured_gallery = featured_gallery.first().cloned(); - - let usp = UploadSearchProject { - version_id: version_id.to_string(), - project_id: project_id.to_string(), - name: m.inner.name.clone(), - summary: m.inner.summary.clone(), - categories, - follows: m.inner.follows, - downloads: m.inner.downloads, - icon_url: m.inner.icon_url.clone(), - author: owner_username, - date_created: m.inner.approved.unwrap_or(m.inner.published), - created_timestamp: m.inner.approved.unwrap_or(m.inner.published).timestamp(), - date_modified: m.inner.updated, - modified_timestamp: m.inner.updated.timestamp(), - license, - slug: m.inner.slug.clone(), - project_types: m.project_types.clone(), - gallery, - featured_gallery, - display_categories, - open_source, - color: m.inner.color, - loader_fields, - license_url: m.inner.license_url.clone(), - monetization_status: Some(m.inner.monetization_status), - team_id: team_id.to_string(), - organization_id: organization_id.map(|x| x.to_string()), - thread_id: thread_id.to_string(), - versions: all_version_ids.iter().map(|x| x.to_string()).collect(), - date_published: m.inner.published, - date_queued: m.inner.queued, - status: m.inner.status, - requested_status: m.inner.requested_status, - games: m.games.clone(), - links: m.urls.clone(), - gallery_items: m.gallery_items.clone(), - loaders, - project_loader_fields, - }; - - uploads.push(usp); } Ok(uploads) } + +struct PartialVersion { + id: VersionId, + loaders: Vec, + project_types: Vec, + version_fields: Vec, +} + +async fn index_versions( + pool: &PgPool, + project_ids: Vec, +) -> Result>, IndexingError> { + let versions: HashMap> = sqlx::query!( + " + SELECT v.id, v.mod_id + FROM versions v + WHERE mod_id = ANY($1) + ", + &project_ids, + ) + .fetch(pool) + .try_fold( + HashMap::new(), + |mut acc: HashMap>, m| { + acc.entry(ProjectId(m.mod_id)) + .or_default() + .push(VersionId(m.id)); + async move { Ok(acc) } + }, + ) + .await?; + + // Get project types, loaders + #[derive(Default)] + struct VersionLoaderData { + loaders: Vec, + project_types: Vec, + } + + let all_version_ids = versions + .iter() + .flat_map(|(_, version_ids)| version_ids.iter()) + .map(|x| x.0) + .collect::>(); + + let loaders_ptypes: DashMap = sqlx::query!( + " + SELECT DISTINCT version_id, + ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders, + ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types + FROM versions v + INNER JOIN loaders_versions lv ON v.id = lv.version_id + INNER JOIN loaders l ON lv.loader_id = l.id + INNER JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id + INNER JOIN project_types pt ON pt.id = lpt.joining_project_type_id + WHERE v.id = ANY($1) + GROUP BY version_id + ", + &all_version_ids + ) + .fetch(pool) + .map_ok(|m| { + let version_id = VersionId(m.version_id); + + let version_loader_data = VersionLoaderData { + loaders: m.loaders.unwrap_or_default(), + project_types: m.project_types.unwrap_or_default(), + }; + (version_id, version_loader_data) + }) + .try_collect() + .await?; + + // Get version fields + let version_fields: DashMap> = sqlx::query!( + " + SELECT version_id, field_id, int_value, enum_value, string_value + FROM version_fields + WHERE version_id = ANY($1) + ", + &all_version_ids, + ) + .fetch(pool) + .try_fold( + DashMap::new(), + |acc: DashMap>, m| { + let qvf = QueryVersionField { + version_id: VersionId(m.version_id), + field_id: LoaderFieldId(m.field_id), + int_value: m.int_value, + enum_value: m.enum_value.map(LoaderFieldEnumValueId), + string_value: m.string_value, + }; + + acc.entry(VersionId(m.version_id)).or_default().push(qvf); + async move { Ok(acc) } + }, + ) + .await?; + + // Convert to partial versions + let mut res_versions: HashMap> = HashMap::new(); + for (project_id, version_ids) in versions.iter() { + for version_id in version_ids { + // Extract version-specific data fetched + // We use 'remove' as every version is only in the map once + let version_loader_data = loaders_ptypes + .remove(version_id) + .map(|(_, version_loader_data)| version_loader_data) + .unwrap_or_default(); + + let version_fields = version_fields + .remove(version_id) + .map(|(_, version_fields)| version_fields) + .unwrap_or_default(); + + res_versions + .entry(*project_id) + .or_default() + .push(PartialVersion { + id: *version_id, + loaders: version_loader_data.loaders, + project_types: version_loader_data.project_types, + version_fields, + }); + } + } + + Ok(res_versions) +} diff --git a/src/search/indexing/mod.rs b/src/search/indexing/mod.rs index 05d670ea..f7399a38 100644 --- a/src/search/indexing/mod.rs +++ b/src/search/indexing/mod.rs @@ -1,10 +1,6 @@ /// This module is used for the indexing from any source. pub mod local_import; -use itertools::Itertools; -use meilisearch_sdk::SwapIndexes; -use std::collections::HashMap; - use crate::database::redis::RedisPool; use crate::models::ids::base62_impl::to_base62; use crate::search::{SearchConfig, UploadSearchProject}; @@ -13,11 +9,9 @@ use log::info; use meilisearch_sdk::client::Client; use meilisearch_sdk::indexes::Index; use meilisearch_sdk::settings::{PaginationSetting, Settings}; +use meilisearch_sdk::SwapIndexes; use sqlx::postgres::PgPool; use thiserror::Error; - -use self::local_import::get_all_ids; - #[derive(Error, Debug)] pub enum IndexingError { #[error("Error while connecting to the MeiliSearch database")] @@ -37,9 +31,7 @@ pub enum IndexingError { // The chunk size for adding projects to the indexing database. If the request size // is too large (>10MiB) then the request fails with an error. This chunk size // assumes a max average size of 4KiB per project to avoid this cap. -const MEILISEARCH_CHUNK_SIZE: usize = 2500; // Should be less than FETCH_PROJECT_SIZE -const FETCH_PROJECT_SIZE: usize = 5000; - +const MEILISEARCH_CHUNK_SIZE: usize = 10000000; const TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); pub async fn remove_documents( @@ -84,38 +76,8 @@ pub async fn index_projects( .map(|x| x.field) .collect::>(); - let all_ids = get_all_ids(pool.clone()).await?; - let all_ids_len = all_ids.len(); - info!("Got all ids, indexing {} projects", all_ids_len); - let mut so_far = 0; - let as_chunks: Vec<_> = all_ids - .into_iter() - .chunks(FETCH_PROJECT_SIZE) - .into_iter() - .map(|x| x.collect::>()) - .collect(); - - for id_chunk in as_chunks { - info!( - "Fetching chunk {}-{}/{}, size: {}", - so_far, - so_far + FETCH_PROJECT_SIZE, - all_ids_len, - id_chunk.len() - ); - so_far += FETCH_PROJECT_SIZE; - - let id_chunk = id_chunk - .into_iter() - .map(|(version_id, project_id, owner_username)| { - (version_id, (project_id, owner_username)) - }) - .collect::>(); - let uploads = index_local(&pool, &redis, id_chunk).await?; - - info!("Got chunk, adding to docs_to_add"); - add_projects(&indices, uploads, all_loader_fields.clone(), config).await?; - } + let uploads = index_local(&pool).await?; + add_projects(&indices, uploads, all_loader_fields.clone(), config).await?; // Swap the index swap_index(config, "projects").await?; diff --git a/src/search/mod.rs b/src/search/mod.rs index 6f942a85..725958e5 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -1,6 +1,5 @@ -use crate::database::models::project_item::{GalleryItem, LinkUrl}; use crate::models::error::ApiError; -use crate::models::projects::{MonetizationStatus, ProjectStatus, SearchRequest}; +use crate::models::projects::SearchRequest; use actix_web::http::StatusCode; use actix_web::HttpResponse; use chrono::{DateTime, Utc}; @@ -96,6 +95,7 @@ impl SearchConfig { pub struct UploadSearchProject { pub version_id: String, pub project_id: String, + // pub project_types: Vec, pub slug: Option, pub author: String, @@ -121,20 +121,7 @@ pub struct UploadSearchProject { pub color: Option, // Hidden fields to get the Project model out of the search results. - pub license_url: Option, - pub monetization_status: Option, - pub team_id: String, - pub thread_id: String, - pub versions: Vec, - pub date_published: DateTime, - pub date_queued: Option>, - pub status: ProjectStatus, - pub requested_status: Option, pub loaders: Vec, // Search uses loaders as categories- this is purely for the Project model. - pub links: Vec, - pub gallery_items: Vec, // Gallery *only* urls are stored in gallery, but the gallery items are stored here- required for the Project model. - pub games: Vec, - pub organization_id: Option, pub project_loader_fields: HashMap>, // Aggregation of loader_fields from all versions of the project, allowing for reconstruction of the Project model. #[serde(flatten)] @@ -173,20 +160,7 @@ pub struct ResultSearchProject { pub color: Option, // Hidden fields to get the Project model out of the search results. - pub license_url: Option, - pub monetization_status: Option, - pub team_id: String, - pub thread_id: String, - pub versions: Vec, - pub date_published: String, - pub date_queued: Option, - pub status: String, - pub requested_status: Option, pub loaders: Vec, // Search uses loaders as categories- this is purely for the Project model. - pub links: Vec, - pub gallery_items: Vec, // Gallery *only* urls are stored in gallery, but the gallery items are stored here- required for the Project model. - pub games: Vec, - pub organization_id: Option, pub project_loader_fields: HashMap>, // Aggregation of loader_fields from all versions of the project, allowing for reconstruction of the Project model. #[serde(flatten)] diff --git a/tests/common/api_v3/project.rs b/tests/common/api_v3/project.rs index 8382ae1e..61db1218 100644 --- a/tests/common/api_v3/project.rs +++ b/tests/common/api_v3/project.rs @@ -10,7 +10,7 @@ use bytes::Bytes; use chrono::{DateTime, Utc}; use labrinth::{ models::{organizations::Organization, projects::Project}, - routes::v3::projects::ReturnSearchResults, + search::SearchResults, util::actix::AppendsMultipart, }; use rust_decimal::Decimal; @@ -511,7 +511,7 @@ impl ApiV3 { query: Option<&str>, facets: Option, pat: Option<&str>, - ) -> ReturnSearchResults { + ) -> SearchResults { let query_field = if let Some(query) = query { format!("&query={}", urlencoding::encode(query)) } else { diff --git a/tests/project.rs b/tests/project.rs index 7ef74e3f..74170565 100644 --- a/tests/project.rs +++ b/tests/project.rs @@ -6,11 +6,10 @@ use common::dummy_data::DUMMY_CATEGORIES; use common::environment::{with_test_environment, with_test_environment_all, TestEnvironment}; use common::permissions::{PermissionsTest, PermissionsTestContext}; -use common::search::setup_search_projects; use futures::StreamExt; use labrinth::database::models::project_item::{PROJECTS_NAMESPACE, PROJECTS_SLUGS_NAMESPACE}; use labrinth::models::ids::base62_impl::parse_base62; -use labrinth::models::projects::{Project, ProjectId}; +use labrinth::models::projects::ProjectId; use labrinth::models::teams::ProjectPermissions; use labrinth::util::actix::{MultipartSegment, MultipartSegmentData}; use serde_json::json; @@ -1199,41 +1198,42 @@ async fn project_permissions_consistency_test() { .await; } -#[actix_rt::test] -async fn align_search_projects() { - // Test setup and dummy data - with_test_environment(Some(10), |test_env: TestEnvironment| async move { - setup_search_projects(&test_env).await; +// TODO: Re-add this if we want to match v3 Projects structure to v3 Search Result structure, otherwise, delete +// #[actix_rt::test] +// async fn align_search_projects() { +// // Test setup and dummy data +// with_test_environment(Some(10), |test_env: TestEnvironment| async move { +// setup_search_projects(&test_env).await; - let api = &test_env.api; - let test_name = test_env.db.database_name.clone(); +// let api = &test_env.api; +// let test_name = test_env.db.database_name.clone(); - let projects = api - .search_deserialized( - Some(&format!("\"&{test_name}\"")), - Some(json!([["categories:fabric"]])), - USER_USER_PAT, - ) - .await; +// let projects = api +// .search_deserialized( +// Some(&format!("\"&{test_name}\"")), +// Some(json!([["categories:fabric"]])), +// USER_USER_PAT, +// ) +// .await; - for project in projects.hits { - let project_model = api - .get_project(&project.id.to_string(), USER_USER_PAT) - .await; - assert_status!(&project_model, StatusCode::OK); - let mut project_model: Project = test::read_body_json(project_model).await; +// for project in projects.hits { +// let project_model = api +// .get_project(&project.id.to_string(), USER_USER_PAT) +// .await; +// assert_status!(&project_model, StatusCode::OK); +// let mut project_model: Project = test::read_body_json(project_model).await; - // Body/description is huge- don't store it in search, so it's StatusCode::OK if they differ here - // (Search should return "") - project_model.description = "".into(); +// // Body/description is huge- don't store it in search, so it's StatusCode::OK if they differ here +// // (Search should return "") +// project_model.description = "".into(); - let project_model = serde_json::to_value(project_model).unwrap(); - let searched_project_serialized = serde_json::to_value(project).unwrap(); - assert_eq!(project_model, searched_project_serialized); - } - }) - .await -} +// let project_model = serde_json::to_value(project_model).unwrap(); +// let searched_project_serialized = serde_json::to_value(project).unwrap(); +// assert_eq!(project_model, searched_project_serialized); +// } +// }) +// .await +// } #[actix_rt::test] async fn projects_various_visibility() { diff --git a/tests/search.rs b/tests/search.rs index 04b94a7d..67db3ada 100644 --- a/tests/search.rs +++ b/tests/search.rs @@ -8,6 +8,7 @@ use common::environment::with_test_environment; use common::environment::TestEnvironment; use common::search::setup_search_projects; use futures::stream::StreamExt; +use labrinth::models::ids::base62_impl::parse_base62; use serde_json::json; use crate::common::api_common::Api; @@ -103,7 +104,7 @@ async fn search_projects() { let mut found_project_ids: Vec = projects .hits .into_iter() - .map(|p| id_conversion[&p.id.0]) + .map(|p| id_conversion[&parse_base62(&p.project_id).unwrap()]) .collect(); let num_hits = projects.total_hits; expected_project_ids.sort(); diff --git a/tests/v2/search.rs b/tests/v2/search.rs index 4d3db368..622bbcab 100644 --- a/tests/v2/search.rs +++ b/tests/v2/search.rs @@ -328,6 +328,7 @@ async fn search_projects() { .collect(); expected_project_ids.sort(); found_project_ids.sort(); + println!("Facets: {:?}", facets); assert_eq!(found_project_ids, expected_project_ids); } })