forked from didirus/AstralRinth
Batched search indexing (#5191)
* Use RO pool for search indexing * Batched search indexing that actually works * Query cache
This commit is contained in:
committed by
GitHub
parent
09dead50d2
commit
5c29a8c7dd
@@ -1,26 +0,0 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "\n SELECT\n id,\n status AS \"status: PayoutStatus\"\n FROM payouts\n ORDER BY id\n ",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "id",
|
||||
"type_info": "Int8"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "status: PayoutStatus",
|
||||
"type_info": "Varchar"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": []
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "1adbd24d815107e13bc1440c7a8f4eeff66ab4165a9f4980032e114db4dc1286"
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "\n SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,\n m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color\n FROM mods m\n WHERE m.status = ANY($1)\n GROUP BY m.id;\n ",
|
||||
"query": "\n SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,\n m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color\n FROM mods m\n WHERE m.status = ANY($1) AND m.id > $3\n GROUP BY m.id\n ORDER BY m.id ASC\n LIMIT $2;\n ",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
@@ -66,7 +66,9 @@
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"TextArray"
|
||||
"TextArray",
|
||||
"Int8",
|
||||
"Int8"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
@@ -84,5 +86,5 @@
|
||||
true
|
||||
]
|
||||
},
|
||||
"hash": "b30d0365bd116fceee5de03fb9e3087a587633783894a5041889b856d47a4ed5"
|
||||
"hash": "702a2826d5857dc51b1a7a79c9043ae8987441bb5e89c9ea62d347e47899e3c2"
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT status AS \"status: PayoutStatus\" FROM payouts WHERE id = 1",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "status: PayoutStatus",
|
||||
"type_info": "Varchar"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": []
|
||||
},
|
||||
"nullable": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "b92b5bb7d179c4fcdbc45600ccfd2402f52fea71e27b08e7926fcc2a9e62c0f3"
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "\n INSERT INTO payouts (id, method, platform_id, status, user_id, amount, created)\n VALUES ($1, $2, $3, $4, $5, 10.0, NOW())\n ",
|
||||
"describe": {
|
||||
"columns": [],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Int8",
|
||||
"Text",
|
||||
"Text",
|
||||
"Varchar",
|
||||
"Int8"
|
||||
]
|
||||
},
|
||||
"nullable": []
|
||||
},
|
||||
"hash": "cd5ccd618fb3cc41646a6de86f9afedb074492b4ec7f2457c14113f5fd13aa02"
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "\n INSERT INTO payouts (id, method, platform_id, status, user_id, amount, created)\n VALUES ($1, $2, NULL, $3, $4, 10.00, NOW())\n ",
|
||||
"describe": {
|
||||
"columns": [],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Int8",
|
||||
"Text",
|
||||
"Varchar",
|
||||
"Int8"
|
||||
]
|
||||
},
|
||||
"nullable": []
|
||||
},
|
||||
"hash": "cec4240c7c848988b3dfd13e3f8e5c93783c7641b019fdb698a1ec0be1393606"
|
||||
}
|
||||
@@ -31,7 +31,8 @@ impl BackgroundTask {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn run(
|
||||
self,
|
||||
pool: sqlx::Pool<Postgres>,
|
||||
pool: sqlx::PgPool,
|
||||
ro_pool: sqlx::PgPool,
|
||||
redis_pool: RedisPool,
|
||||
search_config: search::SearchConfig,
|
||||
clickhouse: clickhouse::Client,
|
||||
@@ -43,7 +44,9 @@ impl BackgroundTask {
|
||||
use BackgroundTask::*;
|
||||
match self {
|
||||
Migrations => run_migrations().await,
|
||||
IndexSearch => index_search(pool, redis_pool, search_config).await,
|
||||
IndexSearch => {
|
||||
index_search(ro_pool, redis_pool, search_config).await
|
||||
}
|
||||
ReleaseScheduled => release_scheduled(pool).await,
|
||||
UpdateVersions => update_versions(pool, redis_pool).await,
|
||||
Payouts => payouts(pool, clickhouse, redis_pool).await,
|
||||
@@ -117,12 +120,12 @@ pub async fn run_migrations() {
|
||||
}
|
||||
|
||||
pub async fn index_search(
|
||||
pool: sqlx::Pool<Postgres>,
|
||||
ro_pool: sqlx::PgPool,
|
||||
redis_pool: RedisPool,
|
||||
search_config: search::SearchConfig,
|
||||
) {
|
||||
info!("Indexing local database");
|
||||
let result = index_projects(pool, redis_pool, &search_config).await;
|
||||
let result = index_projects(ro_pool, redis_pool, &search_config).await;
|
||||
if let Err(e) = result {
|
||||
warn!("Local project indexing failed: {:?}", e);
|
||||
}
|
||||
|
||||
@@ -17,6 +17,12 @@ impl From<PgPool> for ReadOnlyPgPool {
|
||||
}
|
||||
}
|
||||
|
||||
impl ReadOnlyPgPool {
|
||||
pub fn into_inner(self) -> PgPool {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for ReadOnlyPgPool {
|
||||
type Target = PgPool;
|
||||
|
||||
|
||||
@@ -176,6 +176,7 @@ async fn app() -> std::io::Result<()> {
|
||||
info!("Running task {task:?} and exiting");
|
||||
task.run(
|
||||
pool,
|
||||
ro_pool.into_inner(),
|
||||
redis_pool,
|
||||
search_config,
|
||||
clickhouse,
|
||||
|
||||
@@ -22,7 +22,9 @@ use sqlx::postgres::PgPool;
|
||||
|
||||
pub async fn index_local(
|
||||
pool: &PgPool,
|
||||
) -> Result<Vec<UploadSearchProject>, IndexingError> {
|
||||
cursor: i64,
|
||||
limit: i64,
|
||||
) -> Result<(Vec<UploadSearchProject>, i64), IndexingError> {
|
||||
info!("Indexing local projects!");
|
||||
|
||||
// todo: loaders, project type, game versions
|
||||
@@ -45,13 +47,17 @@ pub async fn index_local(
|
||||
SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,
|
||||
m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color
|
||||
FROM mods m
|
||||
WHERE m.status = ANY($1)
|
||||
GROUP BY m.id;
|
||||
WHERE m.status = ANY($1) AND m.id > $3
|
||||
GROUP BY m.id
|
||||
ORDER BY m.id ASC
|
||||
LIMIT $2;
|
||||
",
|
||||
&*crate::models::projects::ProjectStatus::iterator()
|
||||
.filter(|x| x.is_searchable())
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
limit,
|
||||
cursor,
|
||||
)
|
||||
.fetch(pool)
|
||||
.map_ok(|m| {
|
||||
@@ -74,6 +80,10 @@ pub async fn index_local(
|
||||
|
||||
let project_ids = db_projects.iter().map(|x| x.id.0).collect::<Vec<i64>>();
|
||||
|
||||
let Some(largest) = project_ids.iter().max() else {
|
||||
return Ok((vec![], i64::MAX));
|
||||
};
|
||||
|
||||
struct PartialGallery {
|
||||
url: String,
|
||||
featured: bool,
|
||||
@@ -415,7 +425,7 @@ pub async fn index_local(
|
||||
}
|
||||
}
|
||||
|
||||
Ok(uploads)
|
||||
Ok((uploads, *largest))
|
||||
}
|
||||
|
||||
struct PartialVersion {
|
||||
|
||||
@@ -86,7 +86,7 @@ pub async fn remove_documents(
|
||||
}
|
||||
|
||||
pub async fn index_projects(
|
||||
pool: PgPool,
|
||||
ro_pool: PgPool,
|
||||
redis: RedisPool,
|
||||
config: &SearchConfig,
|
||||
) -> Result<(), IndexingError> {
|
||||
@@ -111,7 +111,7 @@ pub async fn index_projects(
|
||||
|
||||
let all_loader_fields =
|
||||
crate::database::models::loader_fields::LoaderField::get_fields_all(
|
||||
&pool, &redis,
|
||||
&ro_pool, &redis,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
@@ -120,17 +120,35 @@ pub async fn index_projects(
|
||||
|
||||
info!("Gathering local projects");
|
||||
|
||||
let uploads = index_local(&pool).await?;
|
||||
let mut cursor = 0;
|
||||
let mut idx = 0;
|
||||
let mut total = 0;
|
||||
|
||||
info!("Adding projects to index");
|
||||
loop {
|
||||
info!("Gathering index data chunk {idx}");
|
||||
idx += 1;
|
||||
|
||||
add_projects_batch_client(
|
||||
&indices,
|
||||
uploads,
|
||||
all_loader_fields.clone(),
|
||||
config,
|
||||
)
|
||||
.await?;
|
||||
let (uploads, next_cursor) =
|
||||
index_local(&ro_pool, cursor, 10000).await?;
|
||||
total += uploads.len();
|
||||
|
||||
if uploads.is_empty() {
|
||||
info!(
|
||||
"No more projects to index, indexed {total} projects after {idx} chunks"
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
cursor = next_cursor;
|
||||
|
||||
add_projects_batch_client(
|
||||
&indices,
|
||||
uploads,
|
||||
all_loader_fields.clone(),
|
||||
config,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
info!("Swapping indexes");
|
||||
|
||||
@@ -326,7 +344,7 @@ async fn add_to_index(
|
||||
monitor_task(
|
||||
client,
|
||||
task,
|
||||
Duration::from_secs(60 * 10), // Timeout after 10 minutes
|
||||
Duration::from_secs(60 * 5), // Timeout after 10 minutes
|
||||
Some(Duration::from_secs(1)), // Poll once every second
|
||||
)
|
||||
.await?;
|
||||
|
||||
Reference in New Issue
Block a user