Fix cache stampede issues + generalize cache (#884)

* caching changes

* fix cache stampede issues

* Use pub/sub for better DB fetches

* remove pubsub

* remove debugs

* Fix caches not working

* fix search indexing removal
This commit is contained in:
Geometrically
2024-03-26 21:15:50 -07:00
committed by GitHub
parent decfcb6c27
commit a0aa350a08
48 changed files with 1540 additions and 1655 deletions

View File

@@ -8,6 +8,7 @@ use crate::database::redis::RedisPool;
use crate::models::projects::{FileType, VersionStatus};
use chrono::{DateTime, Utc};
use dashmap::{DashMap, DashSet};
use futures::TryStreamExt;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
@@ -469,301 +470,263 @@ impl Version {
where
E: sqlx::Acquire<'a, Database = sqlx::Postgres>,
{
let version_ids = version_ids
.iter()
.unique()
.copied()
.collect::<Vec<VersionId>>();
let mut val = redis.get_cached_keys(
VERSIONS_NAMESPACE,
&version_ids.iter().map(|x| x.0).collect::<Vec<_>>(),
|version_ids| async move {
let mut exec = exec.acquire().await?;
use futures::stream::TryStreamExt;
let loader_field_enum_value_ids = DashSet::new();
let version_fields: DashMap<VersionId, Vec<QueryVersionField>> = sqlx::query!(
"
SELECT version_id, field_id, int_value, enum_value, string_value
FROM version_fields
WHERE version_id = ANY($1)
",
&version_ids
)
.fetch(&mut *exec)
.try_fold(
DashMap::new(),
|acc: DashMap<VersionId, Vec<QueryVersionField>>, m| {
let qvf = QueryVersionField {
version_id: VersionId(m.version_id),
field_id: LoaderFieldId(m.field_id),
int_value: m.int_value,
enum_value: m.enum_value.map(LoaderFieldEnumValueId),
string_value: m.string_value,
};
if version_ids.is_empty() {
return Ok(Vec::new());
}
if let Some(enum_value) = m.enum_value {
loader_field_enum_value_ids.insert(LoaderFieldEnumValueId(enum_value));
}
let mut exec = exec.acquire().await?;
let mut redis = redis.connect().await?;
acc.entry(VersionId(m.version_id)).or_default().push(qvf);
async move { Ok(acc) }
},
)
.await?;
let mut version_ids_parsed: Vec<i64> = version_ids.iter().map(|x| x.0).collect();
#[derive(Default)]
struct VersionLoaderData {
loaders: Vec<String>,
project_types: Vec<String>,
games: Vec<String>,
loader_loader_field_ids: Vec<LoaderFieldId>,
}
let mut found_versions = Vec::new();
let loader_field_ids = DashSet::new();
let loaders_ptypes_games: DashMap<VersionId, VersionLoaderData> = sqlx::query!(
"
SELECT DISTINCT version_id,
ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders,
ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types,
ARRAY_AGG(DISTINCT g.slug) filter (where g.slug is not null) games,
ARRAY_AGG(DISTINCT lfl.loader_field_id) filter (where lfl.loader_field_id is not null) loader_fields
FROM versions v
INNER JOIN loaders_versions lv ON v.id = lv.version_id
INNER JOIN loaders l ON lv.loader_id = l.id
INNER JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id
INNER JOIN project_types pt ON pt.id = lpt.joining_project_type_id
INNER JOIN loaders_project_types_games lptg ON lptg.loader_id = l.id AND lptg.project_type_id = pt.id
INNER JOIN games g ON lptg.game_id = g.id
LEFT JOIN loader_fields_loaders lfl ON lfl.loader_id = l.id
WHERE v.id = ANY($1)
GROUP BY version_id
",
&version_ids
).fetch(&mut *exec)
.map_ok(|m| {
let version_id = VersionId(m.version_id);
let versions = redis
.multi_get::<String>(
VERSIONS_NAMESPACE,
version_ids_parsed
.clone()
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>(),
)
.await?;
// Add loader fields to the set we need to fetch
let loader_loader_field_ids = m.loader_fields.unwrap_or_default().into_iter().map(LoaderFieldId).collect::<Vec<_>>();
for loader_field_id in loader_loader_field_ids.iter() {
loader_field_ids.insert(*loader_field_id);
}
for version in versions {
if let Some(version) =
version.and_then(|x| serde_json::from_str::<QueryVersion>(&x).ok())
{
version_ids_parsed.retain(|x| &version.inner.id.0 != x);
found_versions.push(version);
continue;
}
}
// Add loader + loader associated data to the map
let version_loader_data = VersionLoaderData {
loaders: m.loaders.unwrap_or_default(),
project_types: m.project_types.unwrap_or_default(),
games: m.games.unwrap_or_default(),
loader_loader_field_ids,
};
(version_id,version_loader_data)
if !version_ids_parsed.is_empty() {
let loader_field_enum_value_ids = DashSet::new();
let version_fields: DashMap<VersionId, Vec<QueryVersionField>> = sqlx::query!(
"
SELECT version_id, field_id, int_value, enum_value, string_value
FROM version_fields
WHERE version_id = ANY($1)
",
&version_ids_parsed
)
.fetch(&mut *exec)
.try_fold(
DashMap::new(),
|acc: DashMap<VersionId, Vec<QueryVersionField>>, m| {
let qvf = QueryVersionField {
version_id: VersionId(m.version_id),
field_id: LoaderFieldId(m.field_id),
int_value: m.int_value,
enum_value: m.enum_value.map(LoaderFieldEnumValueId),
string_value: m.string_value,
};
if let Some(enum_value) = m.enum_value {
loader_field_enum_value_ids.insert(LoaderFieldEnumValueId(enum_value));
}
).try_collect().await?;
acc.entry(VersionId(m.version_id)).or_default().push(qvf);
async move { Ok(acc) }
},
)
.await?;
// Fetch all loader fields from any version
let loader_fields: Vec<QueryLoaderField> = sqlx::query!(
"
SELECT DISTINCT id, field, field_type, enum_type, min_val, max_val, optional
FROM loader_fields lf
WHERE id = ANY($1)
",
&loader_field_ids.iter().map(|x| x.0).collect::<Vec<_>>()
)
.fetch(&mut *exec)
.map_ok(|m| QueryLoaderField {
id: LoaderFieldId(m.id),
field: m.field,
field_type: m.field_type,
enum_type: m.enum_type.map(LoaderFieldEnumId),
min_val: m.min_val,
max_val: m.max_val,
optional: m.optional,
})
.try_collect()
.await?;
#[derive(Default)]
struct VersionLoaderData {
loaders: Vec<String>,
project_types: Vec<String>,
games: Vec<String>,
loader_loader_field_ids: Vec<LoaderFieldId>,
}
let loader_field_enum_values: Vec<QueryLoaderFieldEnumValue> = sqlx::query!(
"
SELECT DISTINCT id, enum_id, value, ordering, created, metadata
FROM loader_field_enum_values lfev
WHERE id = ANY($1)
ORDER BY enum_id, ordering, created ASC
",
&loader_field_enum_value_ids
.iter()
.map(|x| x.0)
.collect::<Vec<_>>()
)
.fetch(&mut *exec)
.map_ok(|m| QueryLoaderFieldEnumValue {
id: LoaderFieldEnumValueId(m.id),
enum_id: LoaderFieldEnumId(m.enum_id),
value: m.value,
ordering: m.ordering,
created: m.created,
metadata: m.metadata,
})
.try_collect()
.await?;
let loader_field_ids = DashSet::new();
let loaders_ptypes_games: DashMap<VersionId, VersionLoaderData> = sqlx::query!(
"
SELECT DISTINCT version_id,
ARRAY_AGG(DISTINCT l.loader) filter (where l.loader is not null) loaders,
ARRAY_AGG(DISTINCT pt.name) filter (where pt.name is not null) project_types,
ARRAY_AGG(DISTINCT g.slug) filter (where g.slug is not null) games,
ARRAY_AGG(DISTINCT lfl.loader_field_id) filter (where lfl.loader_field_id is not null) loader_fields
FROM versions v
INNER JOIN loaders_versions lv ON v.id = lv.version_id
INNER JOIN loaders l ON lv.loader_id = l.id
INNER JOIN loaders_project_types lpt ON lpt.joining_loader_id = l.id
INNER JOIN project_types pt ON pt.id = lpt.joining_project_type_id
INNER JOIN loaders_project_types_games lptg ON lptg.loader_id = l.id AND lptg.project_type_id = pt.id
INNER JOIN games g ON lptg.game_id = g.id
LEFT JOIN loader_fields_loaders lfl ON lfl.loader_id = l.id
WHERE v.id = ANY($1)
GROUP BY version_id
",
&version_ids_parsed
).fetch(&mut *exec)
.map_ok(|m| {
let version_id = VersionId(m.version_id);
// Add loader fields to the set we need to fetch
let loader_loader_field_ids = m.loader_fields.unwrap_or_default().into_iter().map(LoaderFieldId).collect::<Vec<_>>();
for loader_field_id in loader_loader_field_ids.iter() {
loader_field_ids.insert(*loader_field_id);
#[derive(Deserialize)]
struct Hash {
pub file_id: FileId,
pub algorithm: String,
pub hash: String,
}
// Add loader + loader associated data to the map
let version_loader_data = VersionLoaderData {
loaders: m.loaders.unwrap_or_default(),
project_types: m.project_types.unwrap_or_default(),
games: m.games.unwrap_or_default(),
loader_loader_field_ids,
};
(version_id,version_loader_data)
#[derive(Deserialize)]
struct File {
pub id: FileId,
pub url: String,
pub filename: String,
pub primary: bool,
pub size: u32,
pub file_type: Option<FileType>,
}
).try_collect().await?;
// Fetch all loader fields from any version
let loader_fields: Vec<QueryLoaderField> = sqlx::query!(
"
SELECT DISTINCT id, field, field_type, enum_type, min_val, max_val, optional
FROM loader_fields lf
WHERE id = ANY($1)
",
&loader_field_ids.iter().map(|x| x.0).collect::<Vec<_>>()
)
.fetch(&mut *exec)
.map_ok(|m| QueryLoaderField {
id: LoaderFieldId(m.id),
field: m.field,
field_type: m.field_type,
enum_type: m.enum_type.map(LoaderFieldEnumId),
min_val: m.min_val,
max_val: m.max_val,
optional: m.optional,
})
.try_collect()
.await?;
let file_ids = DashSet::new();
let reverse_file_map = DashMap::new();
let files : DashMap<VersionId, Vec<File>> = sqlx::query!(
"
SELECT DISTINCT version_id, f.id, f.url, f.filename, f.is_primary, f.size, f.file_type
FROM files f
WHERE f.version_id = ANY($1)
",
&version_ids
).fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<VersionId, Vec<File>>, m| {
let file = File {
id: FileId(m.id),
url: m.url,
filename: m.filename,
primary: m.is_primary,
size: m.size as u32,
file_type: m.file_type.map(|x| FileType::from_string(&x)),
};
let loader_field_enum_values: Vec<QueryLoaderFieldEnumValue> = sqlx::query!(
"
SELECT DISTINCT id, enum_id, value, ordering, created, metadata
FROM loader_field_enum_values lfev
WHERE id = ANY($1)
ORDER BY enum_id, ordering, created ASC
",
&loader_field_enum_value_ids
.iter()
.map(|x| x.0)
.collect::<Vec<_>>()
)
.fetch(&mut *exec)
.map_ok(|m| QueryLoaderFieldEnumValue {
id: LoaderFieldEnumValueId(m.id),
enum_id: LoaderFieldEnumId(m.enum_id),
value: m.value,
ordering: m.ordering,
created: m.created,
metadata: m.metadata,
})
.try_collect()
.await?;
file_ids.insert(FileId(m.id));
reverse_file_map.insert(FileId(m.id), VersionId(m.version_id));
#[derive(Deserialize)]
struct Hash {
pub file_id: FileId,
pub algorithm: String,
pub hash: String,
}
#[derive(Deserialize)]
struct File {
pub id: FileId,
pub url: String,
pub filename: String,
pub primary: bool,
pub size: u32,
pub file_type: Option<FileType>,
}
let file_ids = DashSet::new();
let reverse_file_map = DashMap::new();
let files : DashMap<VersionId, Vec<File>> = sqlx::query!(
"
SELECT DISTINCT version_id, f.id, f.url, f.filename, f.is_primary, f.size, f.file_type
FROM files f
WHERE f.version_id = ANY($1)
",
&version_ids_parsed
).fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<VersionId, Vec<File>>, m| {
let file = File {
id: FileId(m.id),
url: m.url,
filename: m.filename,
primary: m.is_primary,
size: m.size as u32,
file_type: m.file_type.map(|x| FileType::from_string(&x)),
};
file_ids.insert(FileId(m.id));
reverse_file_map.insert(FileId(m.id), VersionId(m.version_id));
acc.entry(VersionId(m.version_id))
.or_default()
.push(file);
async move { Ok(acc) }
}
).await?;
let hashes: DashMap<VersionId, Vec<Hash>> = sqlx::query!(
"
SELECT DISTINCT file_id, algorithm, encode(hash, 'escape') hash
FROM hashes
WHERE file_id = ANY($1)
",
&file_ids.iter().map(|x| x.0).collect::<Vec<_>>()
)
.fetch(&mut *exec)
.try_fold(DashMap::new(), |acc: DashMap<VersionId, Vec<Hash>>, m| {
if let Some(found_hash) = m.hash {
let hash = Hash {
file_id: FileId(m.file_id),
algorithm: m.algorithm,
hash: found_hash,
};
if let Some(version_id) = reverse_file_map.get(&FileId(m.file_id)) {
acc.entry(*version_id).or_default().push(hash);
acc.entry(VersionId(m.version_id))
.or_default()
.push(file);
async move { Ok(acc) }
}
}
async move { Ok(acc) }
})
.await?;
).await?;
let dependencies : DashMap<VersionId, Vec<QueryDependency>> = sqlx::query!(
"
SELECT DISTINCT dependent_id as version_id, d.mod_dependency_id as dependency_project_id, d.dependency_id as dependency_version_id, d.dependency_file_name as file_name, d.dependency_type as dependency_type
FROM dependencies d
WHERE dependent_id = ANY($1)
",
&version_ids_parsed
).fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<_,Vec<QueryDependency>>, m| {
let dependency = QueryDependency {
project_id: m.dependency_project_id.map(ProjectId),
version_id: m.dependency_version_id.map(VersionId),
file_name: m.file_name,
dependency_type: m.dependency_type,
};
let hashes: DashMap<VersionId, Vec<Hash>> = sqlx::query!(
"
SELECT DISTINCT file_id, algorithm, encode(hash, 'escape') hash
FROM hashes
WHERE file_id = ANY($1)
",
&file_ids.iter().map(|x| x.0).collect::<Vec<_>>()
)
.fetch(&mut *exec)
.try_fold(DashMap::new(), |acc: DashMap<VersionId, Vec<Hash>>, m| {
if let Some(found_hash) = m.hash {
let hash = Hash {
file_id: FileId(m.file_id),
algorithm: m.algorithm,
hash: found_hash,
};
acc.entry(VersionId(m.version_id))
.or_default()
.push(dependency);
async move { Ok(acc) }
}
).await?;
if let Some(version_id) = reverse_file_map.get(&FileId(m.file_id)) {
acc.entry(*version_id).or_default().push(hash);
}
}
async move { Ok(acc) }
})
.await?;
let db_versions: Vec<QueryVersion> = sqlx::query!(
"
SELECT v.id id, v.mod_id mod_id, v.author_id author_id, v.name version_name, v.version_number version_number,
v.changelog changelog, v.date_published date_published, v.downloads downloads,
v.version_type version_type, v.featured featured, v.status status, v.requested_status requested_status, v.ordering ordering
FROM versions v
WHERE v.id = ANY($1)
ORDER BY v.ordering ASC NULLS LAST, v.date_published ASC;
",
&version_ids_parsed
)
.fetch_many(&mut *exec)
.try_filter_map(|e| async {
Ok(e.right().map(|v|
{
let dependencies : DashMap<VersionId, Vec<QueryDependency>> = sqlx::query!(
"
SELECT DISTINCT dependent_id as version_id, d.mod_dependency_id as dependency_project_id, d.dependency_id as dependency_version_id, d.dependency_file_name as file_name, d.dependency_type as dependency_type
FROM dependencies d
WHERE dependent_id = ANY($1)
",
&version_ids
).fetch(&mut *exec)
.try_fold(DashMap::new(), |acc : DashMap<_,Vec<QueryDependency>>, m| {
let dependency = QueryDependency {
project_id: m.dependency_project_id.map(ProjectId),
version_id: m.dependency_version_id.map(VersionId),
file_name: m.file_name,
dependency_type: m.dependency_type,
};
acc.entry(VersionId(m.version_id))
.or_default()
.push(dependency);
async move { Ok(acc) }
}
).await?;
let res = sqlx::query!(
"
SELECT v.id id, v.mod_id mod_id, v.author_id author_id, v.name version_name, v.version_number version_number,
v.changelog changelog, v.date_published date_published, v.downloads downloads,
v.version_type version_type, v.featured featured, v.status status, v.requested_status requested_status, v.ordering ordering
FROM versions v
WHERE v.id = ANY($1);
",
&version_ids
)
.fetch(&mut *exec)
.try_fold(DashMap::new(), |acc, v| {
let version_id = VersionId(v.id);
let VersionLoaderData {
loaders,
project_types,
games,
loader_loader_field_ids,
} = loaders_ptypes_games.remove(&version_id).map(|x|x.1).unwrap_or_default();
} = loaders_ptypes_games.remove(&version_id).map(|x|x.1).unwrap_or_default();
let files = files.remove(&version_id).map(|x|x.1).unwrap_or_default();
let hashes = hashes.remove(&version_id).map(|x|x.1).unwrap_or_default();
let version_fields = version_fields.remove(&version_id).map(|x|x.1).unwrap_or_default();
let dependencies = dependencies.remove(&version_id).map(|x|x.1).unwrap_or_default();
let loader_fields = loader_fields.iter()
.filter(|x| loader_loader_field_ids.contains(&x.id))
.collect::<Vec<_>>();
.filter(|x| loader_loader_field_ids.contains(&x.id))
.collect::<Vec<_>>();
QueryVersion {
let query_version = QueryVersion {
inner: Version {
id: VersionId(v.id),
project_id: ProjectId(v.mod_id),
@@ -821,22 +784,20 @@ impl Version {
project_types,
games,
dependencies,
}
}))
})
.try_collect::<Vec<QueryVersion>>()
.await?;
};
for version in db_versions {
redis
.set_serialized_to_json(VERSIONS_NAMESPACE, version.inner.id.0, &version, None)
acc.insert(v.id, query_version);
async move { Ok(acc) }
})
.await?;
found_versions.push(version);
}
}
Ok(res)
},
).await?;
Ok(found_versions)
val.sort();
Ok(val)
}
pub async fn get_file_from_hash<'a, 'b, E>(
@@ -866,110 +827,66 @@ impl Version {
where
E: sqlx::Executor<'a, Database = sqlx::Postgres> + Copy,
{
use futures::stream::TryStreamExt;
let mut redis = redis.connect().await?;
if hashes.is_empty() {
return Ok(Vec::new());
}
let mut file_ids_parsed = hashes.to_vec();
let mut found_files = Vec::new();
let files = redis
.multi_get::<String>(
VERSION_FILES_NAMESPACE,
file_ids_parsed
.iter()
.map(|hash| format!("{}_{}", algorithm, hash))
.collect::<Vec<_>>(),
)
.await?;
for file in files {
if let Some(mut file) =
file.and_then(|x| serde_json::from_str::<Vec<SingleFile>>(&x).ok())
{
file_ids_parsed.retain(|x| {
!file
.iter()
.any(|y| y.hashes.iter().any(|z| z.0 == &algorithm && z.1 == x))
});
found_files.append(&mut file);
continue;
}
}
if !file_ids_parsed.is_empty() {
let db_files: Vec<SingleFile> = sqlx::query!(
"
SELECT f.id, f.version_id, v.mod_id, f.url, f.filename, f.is_primary, f.size, f.file_type,
JSONB_AGG(DISTINCT jsonb_build_object('algorithm', h.algorithm, 'hash', encode(h.hash, 'escape'))) filter (where h.hash is not null) hashes
FROM files f
INNER JOIN versions v on v.id = f.version_id
INNER JOIN hashes h on h.file_id = f.id
WHERE h.algorithm = $1 AND h.hash = ANY($2)
GROUP BY f.id, v.mod_id, v.date_published
ORDER BY v.date_published
",
algorithm,
&file_ids_parsed.into_iter().map(|x| x.as_bytes().to_vec()).collect::<Vec<_>>(),
)
.fetch_many(executor)
.try_filter_map(|e| async {
Ok(e.right().map(|f| {
let val = redis.get_cached_keys(
VERSION_FILES_NAMESPACE,
&hashes.iter().map(|x| format!("{algorithm}_{x}")).collect::<Vec<_>>(),
|file_ids| async move {
let files = sqlx::query!(
"
SELECT f.id, f.version_id, v.mod_id, f.url, f.filename, f.is_primary, f.size, f.file_type,
JSONB_AGG(DISTINCT jsonb_build_object('algorithm', h.algorithm, 'hash', encode(h.hash, 'escape'))) filter (where h.hash is not null) hashes
FROM files f
INNER JOIN versions v on v.id = f.version_id
INNER JOIN hashes h on h.file_id = f.id
WHERE h.algorithm = $1 AND h.hash = ANY($2)
GROUP BY f.id, v.mod_id, v.date_published
ORDER BY v.date_published
",
algorithm,
&file_ids.into_iter().flat_map(|x| x.split('_').last().map(|x| x.as_bytes().to_vec())).collect::<Vec<_>>(),
)
.fetch(executor)
.try_fold(DashMap::new(), |acc, f| {
#[derive(Deserialize)]
struct Hash {
pub algorithm: String,
pub hash: String,
}
SingleFile {
id: FileId(f.id),
version_id: VersionId(f.version_id),
project_id: ProjectId(f.mod_id),
url: f.url,
filename: f.filename,
hashes: serde_json::from_value::<Vec<Hash>>(
f.hashes.unwrap_or_default(),
)
.ok()
.unwrap_or_default().into_iter().map(|x| (x.algorithm, x.hash)).collect(),
primary: f.is_primary,
size: f.size as u32,
file_type: f.file_type.map(|x| FileType::from_string(&x)),
let hashes = serde_json::from_value::<Vec<Hash>>(
f.hashes.unwrap_or_default(),
)
.ok()
.unwrap_or_default().into_iter().map(|x| (x.algorithm, x.hash))
.collect::<HashMap<_, _>>();
if let Some(hash) = hashes.get(&algorithm) {
let key = format!("{algorithm}_{hash}");
let file = SingleFile {
id: FileId(f.id),
version_id: VersionId(f.version_id),
project_id: ProjectId(f.mod_id),
url: f.url,
filename: f.filename,
hashes,
primary: f.is_primary,
size: f.size as u32,
file_type: f.file_type.map(|x| FileType::from_string(&x)),
};
acc.insert(key, file);
}
}
))
})
.try_collect::<Vec<SingleFile>>()
.await?;
let mut save_files: HashMap<String, Vec<SingleFile>> = HashMap::new();
for file in db_files {
for (algo, hash) in &file.hashes {
let key = format!("{}_{}", algo, hash);
if let Some(files) = save_files.get_mut(&key) {
files.push(file.clone());
} else {
save_files.insert(key, vec![file.clone()]);
}
}
}
for (key, mut files) in save_files {
redis
.set_serialized_to_json(VERSION_FILES_NAMESPACE, key, &files, None)
async move { Ok(acc) }
})
.await?;
found_files.append(&mut files);
Ok(files)
}
}
).await?;
Ok(found_files)
Ok(val)
}
pub async fn clear_cache(