Chunking searches (#787)

* new attempt

* revised searching CTEs

* prepare fix

* fix tests

* fixes

* restructured project_item to use queries

* search changes! fmt clippy prepare

* small changes
This commit is contained in:
Wyatt Verchere
2023-12-08 22:14:17 -08:00
committed by GitHub
parent 945e5a2dc3
commit 235f4f10ef
28 changed files with 1784 additions and 1026 deletions

View File

@@ -1,4 +1,6 @@
use super::loader_fields::VersionField;
use super::loader_fields::{
QueryLoaderField, QueryLoaderFieldEnumValue, QueryVersionField, VersionField,
};
use super::{ids::*, User};
use crate::database::models;
use crate::database::models::DatabaseError;
@@ -6,6 +8,7 @@ use crate::database::redis::RedisPool;
use crate::models::ids::base62_impl::{parse_base62, to_base62};
use crate::models::projects::{MonetizationStatus, ProjectStatus};
use chrono::{DateTime, Utc};
use dashmap::{DashMap, DashSet};
use futures::TryStreamExt;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
@@ -446,7 +449,7 @@ impl Project {
redis: &RedisPool,
) -> Result<Option<QueryProject>, DatabaseError>
where
E: sqlx::Executor<'a, Database = sqlx::Postgres>,
E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{
Project::get_many(&[string], executor, redis)
.await
@@ -459,7 +462,7 @@ impl Project {
redis: &RedisPool,
) -> Result<Option<QueryProject>, DatabaseError>
where
E: sqlx::Executor<'a, Database = sqlx::Postgres>,
E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{
Project::get_many(&[crate::models::ids::ProjectId::from(id)], executor, redis)
.await
@@ -472,7 +475,7 @@ impl Project {
redis: &RedisPool,
) -> Result<Vec<QueryProject>, DatabaseError>
where
E: sqlx::Executor<'a, Database = sqlx::Postgres>,
E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{
let ids = project_ids
.iter()
@@ -487,13 +490,14 @@ impl Project {
redis: &RedisPool,
) -> Result<Vec<QueryProject>, DatabaseError>
where
E: sqlx::Executor<'a, Database = sqlx::Postgres>,
E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{
if project_strings.is_empty() {
return Ok(Vec::new());
}
let mut redis = redis.connect().await?;
let mut exec = exec.acquire().await?;
let mut found_projects = Vec::new();
let mut remaining_strings = project_strings
@@ -544,104 +548,200 @@ impl Project {
.flat_map(|x| parse_base62(&x.to_string()).ok())
.map(|x| x as i64)
.collect();
let slugs = remaining_strings
.into_iter()
.map(|x| x.to_string().to_lowercase())
.collect::<Vec<_>>();
let all_version_ids = DashSet::new();
let versions: DashMap<ProjectId, Vec<(VersionId, DateTime<Utc>)>> = sqlx::query!(
"
SELECT DISTINCT mod_id, v.id as id, date_published
FROM mods m
INNER JOIN versions v ON m.id = v.mod_id AND v.status = ANY($3)
WHERE m.id = ANY($1) OR m.slug = ANY($2)
",
&project_ids_parsed,
&slugs,
&*crate::models::projects::VersionStatus::iterator()
.filter(|x| x.is_listed())
.map(|x| x.to_string())
.collect::<Vec<String>>()
)
.fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<ProjectId, Vec<(VersionId, DateTime<Utc>)>>, m| {
let version_id = VersionId(m.id);
let date_published = m.date_published;
all_version_ids.insert(version_id);
acc.entry(ProjectId(m.mod_id))
.or_default()
.push((version_id, date_published));
async move { Ok(acc) }
})
.await?;
let loader_field_ids = DashSet::new();
let loader_field_enum_value_ids = DashSet::new();
let version_fields: DashMap<ProjectId, Vec<QueryVersionField>> = sqlx::query!(
"
SELECT DISTINCT mod_id, version_id, field_id, int_value, enum_value, string_value
FROM versions v
INNER JOIN version_fields vf ON v.id = vf.version_id
WHERE v.id = ANY($1)
",
&all_version_ids.iter().map(|x| x.0).collect::<Vec<_>>()
)
.fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<ProjectId, Vec<QueryVersionField>>, m| {
let qvf = QueryVersionField {
version_id: VersionId(m.version_id),
field_id: LoaderFieldId(m.field_id),
int_value: m.int_value,
enum_value: m.enum_value.map(LoaderFieldEnumValueId),
string_value: m.string_value,
};
loader_field_ids.insert(LoaderFieldId(m.field_id));
if let Some(enum_value) = m.enum_value {
loader_field_enum_value_ids.insert(LoaderFieldEnumValueId(enum_value));
}
acc.entry(ProjectId(m.mod_id))
.or_default()
.push(qvf);
async move { Ok(acc) }
})
.await?;
let loader_fields: Vec<QueryLoaderField> = sqlx::query!(
"
SELECT DISTINCT id, field, field_type, enum_type, min_val, max_val, optional
FROM loader_fields lf
WHERE id = ANY($1)
",
&loader_field_ids.iter().map(|x| x.0).collect::<Vec<_>>()
)
.fetch(&mut *exec)
.map_ok(|m| QueryLoaderField {
id: LoaderFieldId(m.id),
field: m.field,
field_type: m.field_type,
enum_type: m.enum_type.map(LoaderFieldEnumId),
min_val: m.min_val,
max_val: m.max_val,
optional: m.optional,
})
.try_collect()
.await?;
let loader_field_enum_values: Vec<QueryLoaderFieldEnumValue> = sqlx::query!(
"
SELECT DISTINCT id, enum_id, value, ordering, created, metadata
FROM loader_field_enum_values lfev
WHERE id = ANY($1)
ORDER BY enum_id, ordering, created DESC
",
&loader_field_enum_value_ids
.iter()
.map(|x| x.0)
.collect::<Vec<_>>()
)
.fetch(&mut *exec)
.map_ok(|m| QueryLoaderFieldEnumValue {
id: LoaderFieldEnumValueId(m.id),
enum_id: LoaderFieldEnumId(m.enum_id),
value: m.value,
ordering: m.ordering,
created: m.created,
metadata: m.metadata,
})
.try_collect()
.await?;
let mods_gallery: DashMap<ProjectId, Vec<GalleryItem>> = sqlx::query!(
"
SELECT DISTINCT mod_id, mg.image_url, mg.featured, mg.name, mg.description, mg.created, mg.ordering
FROM mods_gallery mg
INNER JOIN mods m ON mg.mod_id = m.id
WHERE m.id = ANY($1) OR m.slug = ANY($2)
",
&project_ids_parsed,
&slugs
).fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<ProjectId, Vec<GalleryItem>>, m| {
acc.entry(ProjectId(m.mod_id))
.or_default()
.push(GalleryItem {
image_url: m.image_url,
featured: m.featured.unwrap_or(false),
name: m.name,
description: m.description,
created: m.created,
ordering: m.ordering,
});
async move { Ok(acc) }
}
).await?;
let links: DashMap<ProjectId, Vec<LinkUrl>> = sqlx::query!(
"
SELECT DISTINCT joining_mod_id as mod_id, joining_platform_id as platform_id, lp.name as platform_name, url, lp.donation as donation
FROM mods_links ml
INNER JOIN mods m ON ml.joining_mod_id = m.id
INNER JOIN link_platforms lp ON ml.joining_platform_id = lp.id
WHERE m.id = ANY($1) OR m.slug = ANY($2)
",
&project_ids_parsed,
&slugs
).fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<ProjectId, Vec<LinkUrl>>, m| {
acc.entry(ProjectId(m.mod_id))
.or_default()
.push(LinkUrl {
platform_id: LinkPlatformId(m.platform_id),
platform_name: m.platform_name,
url: m.url,
donation: m.donation,
});
async move { Ok(acc) }
}
).await?;
type StringTriple = (Vec<String>, Vec<String>, Vec<String>);
let loaders_ptypes_games: DashMap<ProjectId, StringTriple> = sqlx::query!(
"
SELECT DISTINCT mod_id,
ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders,
ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types,
ARRAY_AGG(DISTINCT g.slug) filter (where g.slug is not null) games
FROM versions v
INNER JOIN loaders_versions lv ON v.id = lv.version_id
INNER JOIN loaders l ON lv.loader_id = l.id
INNER JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id
INNER JOIN project_types pt ON pt.id = lpt.joining_project_type_id
INNER JOIN loaders_project_types_games lptg ON lptg.loader_id = l.id AND lptg.project_type_id = pt.id
INNER JOIN games g ON lptg.game_id = g.id
WHERE v.id = ANY($1)
GROUP BY mod_id
",
&all_version_ids.iter().map(|x| x.0).collect::<Vec<_>>()
).fetch(&mut *exec)
.map_ok(|m| {
let project_id = ProjectId(m.mod_id);
let loaders = m.loaders.unwrap_or_default();
let project_types = m.project_types.unwrap_or_default();
let games = m.games.unwrap_or_default();
(project_id, (loaders, project_types, games))
}
).try_collect().await?;
// TODO: Possible improvements to look into:
// - use multiple queries instead of CTES (for cleanliness?)
// - repeated joins to mods in separate CTEs- perhaps 1 CTE for mods and use later (in mods_gallery_json, mods_donations_json, etc.)
let db_projects: Vec<QueryProject> = sqlx::query!(
"
WITH version_fields_cte AS (
SELECT mod_id, version_id, field_id, int_value, enum_value, string_value
FROM mods m
INNER JOIN versions v ON m.id = v.mod_id
INNER JOIN version_fields vf ON v.id = vf.version_id
WHERE m.id = ANY($1) OR m.slug = ANY($2)
),
version_fields_json AS (
SELECT DISTINCT mod_id,
JSONB_AGG(
DISTINCT jsonb_build_object('version_id', version_id, 'field_id', field_id, 'int_value', int_value, 'enum_value', enum_value, 'string_value', string_value)
) version_fields_json
FROM version_fields_cte
GROUP BY mod_id
),
loader_fields_cte AS (
SELECT DISTINCT vf.mod_id, vf.version_id, lf.*, l.loader
FROM loader_fields lf
INNER JOIN version_fields_cte vf ON lf.id = vf.field_id
LEFT JOIN loaders_versions lv ON vf.version_id = lv.version_id
LEFT JOIN loaders l ON lv.loader_id = l.id
GROUP BY vf.mod_id, vf.version_id, lf.enum_type, lf.id, l.loader
),
loader_fields_json AS (
SELECT DISTINCT mod_id,
JSONB_AGG(
DISTINCT jsonb_build_object(
'version_id', lf.version_id,
'lf_id', id, 'loader_name', loader, 'field', field, 'field_type', field_type, 'enum_type', enum_type, 'min_val', min_val, 'max_val', max_val, 'optional', optional
)
) filter (where lf.id is not null) loader_fields_json
FROM loader_fields_cte lf
GROUP BY mod_id
),
loader_field_enum_values_json AS (
SELECT DISTINCT mod_id,
JSONB_AGG(
DISTINCT jsonb_build_object(
'id', lfev.id, 'enum_id', lfev.enum_id, 'value', lfev.value, 'ordering', lfev.ordering, 'created', lfev.created, 'metadata', lfev.metadata
)
) filter (where lfev.id is not null) loader_field_enum_values_json
FROM loader_field_enum_values lfev
INNER JOIN loader_fields_cte lf on lf.enum_type = lfev.enum_id
GROUP BY mod_id
),
versions_cte AS (
SELECT DISTINCT mod_id, v.id as id, date_published
FROM mods m
INNER JOIN versions v ON m.id = v.mod_id AND v.status = ANY($3)
WHERE m.id = ANY($1) OR m.slug = ANY($2)
),
versions_json AS (
SELECT DISTINCT mod_id,
JSONB_AGG(
DISTINCT jsonb_build_object(
'id', id, 'date_published', date_published
)
) filter (where id is not null) versions_json
FROM versions_cte
GROUP BY mod_id
),
loaders_cte AS (
SELECT DISTINCT mod_id, l.id as id, l.loader
FROM versions_cte
INNER JOIN loaders_versions lv ON versions_cte.id = lv.version_id
INNER JOIN loaders l ON lv.loader_id = l.id
),
mods_gallery_json AS (
SELECT DISTINCT mod_id,
JSONB_AGG(
DISTINCT jsonb_build_object(
'image_url', mg.image_url, 'featured', mg.featured, 'name', mg.name, 'description', mg.description, 'created', mg.created, 'ordering', mg.ordering
)
) filter (where image_url is not null) mods_gallery_json
FROM mods_gallery mg
INNER JOIN mods m ON mg.mod_id = m.id
WHERE m.id = ANY($1) OR m.slug = ANY($2)
GROUP BY mod_id
),
links_json AS (
SELECT DISTINCT joining_mod_id as mod_id,
JSONB_AGG(
DISTINCT jsonb_build_object(
'platform_id', ml.joining_platform_id, 'platform_name', lp.name,'url', ml.url, 'donation', lp.donation
)
) filter (where ml.joining_platform_id is not null) links_json
FROM mods_links ml
INNER JOIN mods m ON ml.joining_mod_id = m.id AND m.id = ANY($1) OR m.slug = ANY($2)
INNER JOIN link_platforms lp ON ml.joining_platform_id = lp.id
GROUP BY mod_id
)
SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.description description, m.published published,
m.updated updated, m.approved approved, m.queued, m.status status, m.requested_status requested_status,
@@ -649,43 +749,28 @@ impl Project {
m.team_id team_id, m.organization_id organization_id, m.license license, m.slug slug, m.moderation_message moderation_message, m.moderation_message_body moderation_message_body,
m.webhook_sent, m.color,
t.id thread_id, m.monetization_status monetization_status,
ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders,
ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types,
ARRAY_AGG(DISTINCT g.slug) filter (where g.slug is not null) games,
ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is false) categories,
ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is true) additional_categories,
v.versions_json versions,
mg.mods_gallery_json gallery,
ml.links_json links,
vf.version_fields_json version_fields,
lf.loader_fields_json loader_fields,
lfev.loader_field_enum_values_json loader_field_enum_values
ARRAY_AGG(DISTINCT c.category) filter (where c.category is not null and mc.is_additional is true) additional_categories
FROM mods m
INNER JOIN threads t ON t.mod_id = m.id
LEFT JOIN mods_gallery_json mg ON mg.mod_id = m.id
LEFT JOIN links_json ml ON ml.mod_id = m.id
LEFT JOIN mods_categories mc ON mc.joining_mod_id = m.id
LEFT JOIN categories c ON mc.joining_category_id = c.id
LEFT JOIN versions_json v ON v.mod_id = m.id
LEFT JOIN loaders_cte l on l.mod_id = m.id
LEFT JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id
LEFT JOIN project_types pt ON pt.id = lpt.joining_project_type_id
LEFT JOIN loaders_project_types_games lptg ON lptg.loader_id = l.id AND lptg.project_type_id = pt.id
LEFT JOIN games g ON lptg.game_id = g.id
LEFT OUTER JOIN version_fields_json vf ON m.id = vf.mod_id
LEFT OUTER JOIN loader_fields_json lf ON m.id = lf.mod_id
LEFT OUTER JOIN loader_field_enum_values_json lfev ON m.id = lfev.mod_id
WHERE m.id = ANY($1) OR m.slug = ANY($2)
GROUP BY t.id, m.id, version_fields_json, loader_fields_json, loader_field_enum_values_json, versions_json, mods_gallery_json, links_json;
GROUP BY t.id, m.id;
",
&project_ids_parsed,
&remaining_strings.into_iter().map(|x| x.to_string().to_lowercase()).collect::<Vec<_>>(),
&*crate::models::projects::VersionStatus::iterator().filter(|x| x.is_listed()).map(|x| x.to_string()).collect::<Vec<String>>()
&slugs,
)
.fetch_many(exec)
.fetch_many(&mut *exec)
.try_filter_map(|e| async {
Ok(e.right().map(|m| {
let id = m.id;
let project_id = ProjectId(id);
let (loaders, project_types, games) = loaders_ptypes_games.remove(&project_id).map(|x| x.1).unwrap_or_default();
let mut versions = versions.remove(&project_id).map(|x| x.1).unwrap_or_default();
let mut gallery = mods_gallery.remove(&project_id).map(|x| x.1).unwrap_or_default();
let urls = links.remove(&project_id).map(|x| x.1).unwrap_or_default();
let version_fields = version_fields.remove(&project_id).map(|x| x.1).unwrap_or_default();
QueryProject {
inner: Project {
id: ProjectId(id),
@@ -717,41 +802,23 @@ impl Project {
monetization_status: MonetizationStatus::from_string(
&m.monetization_status,
),
loaders: m.loaders.unwrap_or_default(),
loaders,
},
categories: m.categories.unwrap_or_default(),
additional_categories: m.additional_categories.unwrap_or_default(),
project_types: m.project_types.unwrap_or_default(),
games: m.games.unwrap_or_default(),
project_types,
games,
versions: {
#[derive(Deserialize)]
struct Version {
pub id: VersionId,
pub date_published: DateTime<Utc>,
}
let mut versions: Vec<Version> = serde_json::from_value(
m.versions.unwrap_or_default(),
)
.ok()
.unwrap_or_default();
versions.sort_by(|a, b| a.date_published.cmp(&b.date_published));
versions.into_iter().map(|x| x.id).collect()
// Each version is a tuple of (VersionId, DateTime<Utc>)
versions.sort_by(|a, b| a.1.cmp(&b.1));
versions.into_iter().map(|x| x.0).collect()
},
gallery_items: {
let mut gallery: Vec<GalleryItem> = serde_json::from_value(
m.gallery.unwrap_or_default(),
).ok().unwrap_or_default();
gallery.sort_by(|a, b| a.ordering.cmp(&b.ordering));
gallery
},
urls: serde_json::from_value(
m.links.unwrap_or_default(),
).unwrap_or_default(),
aggregate_version_fields: VersionField::from_query_json(m.loader_fields, m.version_fields, m.loader_field_enum_values, true),
urls,
aggregate_version_fields: VersionField::from_query_json(version_fields, &loader_fields, &loader_field_enum_values, true),
thread_id: ThreadId(m.thread_id),
}}))
})