You've already forked AstralRinth
forked from didirus/AstralRinth
Batched search indexing (#5191)
* Use RO pool for search indexing * Batched search indexing that actually works * Query cache
This commit is contained in:
committed by
GitHub
parent
09dead50d2
commit
5c29a8c7dd
@@ -1,26 +0,0 @@
|
|||||||
{
|
|
||||||
"db_name": "PostgreSQL",
|
|
||||||
"query": "\n SELECT\n id,\n status AS \"status: PayoutStatus\"\n FROM payouts\n ORDER BY id\n ",
|
|
||||||
"describe": {
|
|
||||||
"columns": [
|
|
||||||
{
|
|
||||||
"ordinal": 0,
|
|
||||||
"name": "id",
|
|
||||||
"type_info": "Int8"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"ordinal": 1,
|
|
||||||
"name": "status: PayoutStatus",
|
|
||||||
"type_info": "Varchar"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": {
|
|
||||||
"Left": []
|
|
||||||
},
|
|
||||||
"nullable": [
|
|
||||||
false,
|
|
||||||
false
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"hash": "1adbd24d815107e13bc1440c7a8f4eeff66ab4165a9f4980032e114db4dc1286"
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"db_name": "PostgreSQL",
|
"db_name": "PostgreSQL",
|
||||||
"query": "\n SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,\n m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color\n FROM mods m\n WHERE m.status = ANY($1)\n GROUP BY m.id;\n ",
|
"query": "\n SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,\n m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color\n FROM mods m\n WHERE m.status = ANY($1) AND m.id > $3\n GROUP BY m.id\n ORDER BY m.id ASC\n LIMIT $2;\n ",
|
||||||
"describe": {
|
"describe": {
|
||||||
"columns": [
|
"columns": [
|
||||||
{
|
{
|
||||||
@@ -66,7 +66,9 @@
|
|||||||
],
|
],
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"Left": [
|
"Left": [
|
||||||
"TextArray"
|
"TextArray",
|
||||||
|
"Int8",
|
||||||
|
"Int8"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"nullable": [
|
"nullable": [
|
||||||
@@ -84,5 +86,5 @@
|
|||||||
true
|
true
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"hash": "b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5"
|
"hash": "702a2826d5857dc51b1a7a79c9043ae8987441bb5e89c9ea62d347e47899e3c2"
|
||||||
}
|
}
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
{
|
|
||||||
"db_name": "PostgreSQL",
|
|
||||||
"query": "SELECT status AS \"status: PayoutStatus\" FROM payouts WHERE id = 1",
|
|
||||||
"describe": {
|
|
||||||
"columns": [
|
|
||||||
{
|
|
||||||
"ordinal": 0,
|
|
||||||
"name": "status: PayoutStatus",
|
|
||||||
"type_info": "Varchar"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": {
|
|
||||||
"Left": []
|
|
||||||
},
|
|
||||||
"nullable": [
|
|
||||||
false
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"hash": "b92b5bb7d179c4fcdbc45600ccfd2402f52fea71e27b08e7926fcc2a9e62c0f3"
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
{
|
|
||||||
"db_name": "PostgreSQL",
|
|
||||||
"query": "\n INSERT INTO payouts (id, method, platform_id, status, user_id, amount, created)\n VALUES ($1, $2, $3, $4, $5, 10.0, NOW())\n ",
|
|
||||||
"describe": {
|
|
||||||
"columns": [],
|
|
||||||
"parameters": {
|
|
||||||
"Left": [
|
|
||||||
"Int8",
|
|
||||||
"Text",
|
|
||||||
"Text",
|
|
||||||
"Varchar",
|
|
||||||
"Int8"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"nullable": []
|
|
||||||
},
|
|
||||||
"hash": "cd5ccd618fb3cc41646a6de86f9afedb074492b4ec7f2457c14113f5fd13aa02"
|
|
||||||
}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
{
|
|
||||||
"db_name": "PostgreSQL",
|
|
||||||
"query": "\n INSERT INTO payouts (id, method, platform_id, status, user_id, amount, created)\n VALUES ($1, $2, NULL, $3, $4, 10.00, NOW())\n ",
|
|
||||||
"describe": {
|
|
||||||
"columns": [],
|
|
||||||
"parameters": {
|
|
||||||
"Left": [
|
|
||||||
"Int8",
|
|
||||||
"Text",
|
|
||||||
"Varchar",
|
|
||||||
"Int8"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"nullable": []
|
|
||||||
},
|
|
||||||
"hash": "cec4240c7c848988b3dfd13e3f8e5c93783c7641b019fdb698a1ec0be1393606"
|
|
||||||
}
|
|
||||||
@@ -31,7 +31,8 @@ impl BackgroundTask {
|
|||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn run(
|
pub async fn run(
|
||||||
self,
|
self,
|
||||||
pool: sqlx::Pool<Postgres>,
|
pool: sqlx::PgPool,
|
||||||
|
ro_pool: sqlx::PgPool,
|
||||||
redis_pool: RedisPool,
|
redis_pool: RedisPool,
|
||||||
search_config: search::SearchConfig,
|
search_config: search::SearchConfig,
|
||||||
clickhouse: clickhouse::Client,
|
clickhouse: clickhouse::Client,
|
||||||
@@ -43,7 +44,9 @@ impl BackgroundTask {
|
|||||||
use BackgroundTask::*;
|
use BackgroundTask::*;
|
||||||
match self {
|
match self {
|
||||||
Migrations => run_migrations().await,
|
Migrations => run_migrations().await,
|
||||||
IndexSearch => index_search(pool, redis_pool, search_config).await,
|
IndexSearch => {
|
||||||
|
index_search(ro_pool, redis_pool, search_config).await
|
||||||
|
}
|
||||||
ReleaseScheduled => release_scheduled(pool).await,
|
ReleaseScheduled => release_scheduled(pool).await,
|
||||||
UpdateVersions => update_versions(pool, redis_pool).await,
|
UpdateVersions => update_versions(pool, redis_pool).await,
|
||||||
Payouts => payouts(pool, clickhouse, redis_pool).await,
|
Payouts => payouts(pool, clickhouse, redis_pool).await,
|
||||||
@@ -117,12 +120,12 @@ pub async fn run_migrations() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn index_search(
|
pub async fn index_search(
|
||||||
pool: sqlx::Pool<Postgres>,
|
ro_pool: sqlx::PgPool,
|
||||||
redis_pool: RedisPool,
|
redis_pool: RedisPool,
|
||||||
search_config: search::SearchConfig,
|
search_config: search::SearchConfig,
|
||||||
) {
|
) {
|
||||||
info!("Indexing local database");
|
info!("Indexing local database");
|
||||||
let result = index_projects(pool, redis_pool, &search_config).await;
|
let result = index_projects(ro_pool, redis_pool, &search_config).await;
|
||||||
if let Err(e) = result {
|
if let Err(e) = result {
|
||||||
warn!("Local project indexing failed: {:?}", e);
|
warn!("Local project indexing failed: {:?}", e);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,12 @@ impl From<PgPool> for ReadOnlyPgPool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ReadOnlyPgPool {
|
||||||
|
pub fn into_inner(self) -> PgPool {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Deref for ReadOnlyPgPool {
|
impl Deref for ReadOnlyPgPool {
|
||||||
type Target = PgPool;
|
type Target = PgPool;
|
||||||
|
|
||||||
|
|||||||
@@ -176,6 +176,7 @@ async fn app() -> std::io::Result<()> {
|
|||||||
info!("Running task {task:?} and exiting");
|
info!("Running task {task:?} and exiting");
|
||||||
task.run(
|
task.run(
|
||||||
pool,
|
pool,
|
||||||
|
ro_pool.into_inner(),
|
||||||
redis_pool,
|
redis_pool,
|
||||||
search_config,
|
search_config,
|
||||||
clickhouse,
|
clickhouse,
|
||||||
|
|||||||
@@ -22,7 +22,9 @@ use sqlx::postgres::PgPool;
|
|||||||
|
|
||||||
pub async fn index_local(
|
pub async fn index_local(
|
||||||
pool: &PgPool,
|
pool: &PgPool,
|
||||||
) -> Result<Vec<UploadSearchProject>, IndexingError> {
|
cursor: i64,
|
||||||
|
limit: i64,
|
||||||
|
) -> Result<(Vec<UploadSearchProject>, i64), IndexingError> {
|
||||||
info!("Indexing local projects!");
|
info!("Indexing local projects!");
|
||||||
|
|
||||||
// todo: loaders, project type, game versions
|
// todo: loaders, project type, game versions
|
||||||
@@ -45,13 +47,17 @@ pub async fn index_local(
|
|||||||
SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,
|
SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,
|
||||||
m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color
|
m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color
|
||||||
FROM mods m
|
FROM mods m
|
||||||
WHERE m.status = ANY($1)
|
WHERE m.status = ANY($1) AND m.id > $3
|
||||||
GROUP BY m.id;
|
GROUP BY m.id
|
||||||
|
ORDER BY m.id ASC
|
||||||
|
LIMIT $2;
|
||||||
",
|
",
|
||||||
&*crate::models::projects::ProjectStatus::iterator()
|
&*crate::models::projects::ProjectStatus::iterator()
|
||||||
.filter(|x| x.is_searchable())
|
.filter(|x| x.is_searchable())
|
||||||
.map(|x| x.to_string())
|
.map(|x| x.to_string())
|
||||||
.collect::<Vec<String>>(),
|
.collect::<Vec<String>>(),
|
||||||
|
limit,
|
||||||
|
cursor,
|
||||||
)
|
)
|
||||||
.fetch(pool)
|
.fetch(pool)
|
||||||
.map_ok(|m| {
|
.map_ok(|m| {
|
||||||
@@ -74,6 +80,10 @@ pub async fn index_local(
|
|||||||
|
|
||||||
let project_ids = db_projects.iter().map(|x| x.id.0).collect::<Vec<i64>>();
|
let project_ids = db_projects.iter().map(|x| x.id.0).collect::<Vec<i64>>();
|
||||||
|
|
||||||
|
let Some(largest) = project_ids.iter().max() else {
|
||||||
|
return Ok((vec![], i64::MAX));
|
||||||
|
};
|
||||||
|
|
||||||
struct PartialGallery {
|
struct PartialGallery {
|
||||||
url: String,
|
url: String,
|
||||||
featured: bool,
|
featured: bool,
|
||||||
@@ -415,7 +425,7 @@ pub async fn index_local(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(uploads)
|
Ok((uploads, *largest))
|
||||||
}
|
}
|
||||||
|
|
||||||
struct PartialVersion {
|
struct PartialVersion {
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ pub async fn remove_documents(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn index_projects(
|
pub async fn index_projects(
|
||||||
pool: PgPool,
|
ro_pool: PgPool,
|
||||||
redis: RedisPool,
|
redis: RedisPool,
|
||||||
config: &SearchConfig,
|
config: &SearchConfig,
|
||||||
) -> Result<(), IndexingError> {
|
) -> Result<(), IndexingError> {
|
||||||
@@ -111,7 +111,7 @@ pub async fn index_projects(
|
|||||||
|
|
||||||
let all_loader_fields =
|
let all_loader_fields =
|
||||||
crate::database::models::loader_fields::LoaderField::get_fields_all(
|
crate::database::models::loader_fields::LoaderField::get_fields_all(
|
||||||
&pool, &redis,
|
&ro_pool, &redis,
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@@ -120,17 +120,35 @@ pub async fn index_projects(
|
|||||||
|
|
||||||
info!("Gathering local projects");
|
info!("Gathering local projects");
|
||||||
|
|
||||||
let uploads = index_local(&pool).await?;
|
let mut cursor = 0;
|
||||||
|
let mut idx = 0;
|
||||||
|
let mut total = 0;
|
||||||
|
|
||||||
info!("Adding projects to index");
|
loop {
|
||||||
|
info!("Gathering index data chunk {idx}");
|
||||||
|
idx += 1;
|
||||||
|
|
||||||
add_projects_batch_client(
|
let (uploads, next_cursor) =
|
||||||
&indices,
|
index_local(&ro_pool, cursor, 10000).await?;
|
||||||
uploads,
|
total += uploads.len();
|
||||||
all_loader_fields.clone(),
|
|
||||||
config,
|
if uploads.is_empty() {
|
||||||
)
|
info!(
|
||||||
.await?;
|
"No more projects to index, indexed {total} projects after {idx} chunks"
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor = next_cursor;
|
||||||
|
|
||||||
|
add_projects_batch_client(
|
||||||
|
&indices,
|
||||||
|
uploads,
|
||||||
|
all_loader_fields.clone(),
|
||||||
|
config,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
info!("Swapping indexes");
|
info!("Swapping indexes");
|
||||||
|
|
||||||
@@ -326,7 +344,7 @@ async fn add_to_index(
|
|||||||
monitor_task(
|
monitor_task(
|
||||||
client,
|
client,
|
||||||
task,
|
task,
|
||||||
Duration::from_secs(60 * 10), // Timeout after 10 minutes
|
Duration::from_secs(60 * 5), // Timeout after 10 minutes
|
||||||
Some(Duration::from_secs(1)), // Poll once every second
|
Some(Duration::from_secs(1)), // Poll once every second
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|||||||
Reference in New Issue
Block a user