Batched search indexing (#5191)

* Use RO pool for search indexing

* Batched search indexing that actually works

* Query cache
This commit is contained in:
François-Xavier Talbot
2026-01-23 07:32:02 -05:00
committed by GitHub
parent 09dead50d2
commit 5c29a8c7dd
10 changed files with 63 additions and 104 deletions

View File

@@ -31,7 +31,8 @@ impl BackgroundTask {
#[allow(clippy::too_many_arguments)]
pub async fn run(
self,
pool: sqlx::Pool<Postgres>,
pool: sqlx::PgPool,
ro_pool: sqlx::PgPool,
redis_pool: RedisPool,
search_config: search::SearchConfig,
clickhouse: clickhouse::Client,
@@ -43,7 +44,9 @@ impl BackgroundTask {
use BackgroundTask::*;
match self {
Migrations => run_migrations().await,
IndexSearch => index_search(pool, redis_pool, search_config).await,
IndexSearch => {
index_search(ro_pool, redis_pool, search_config).await
}
ReleaseScheduled => release_scheduled(pool).await,
UpdateVersions => update_versions(pool, redis_pool).await,
Payouts => payouts(pool, clickhouse, redis_pool).await,
@@ -117,12 +120,12 @@ pub async fn run_migrations() {
}
pub async fn index_search(
pool: sqlx::Pool<Postgres>,
ro_pool: sqlx::PgPool,
redis_pool: RedisPool,
search_config: search::SearchConfig,
) {
info!("Indexing local database");
let result = index_projects(pool, redis_pool, &search_config).await;
let result = index_projects(ro_pool, redis_pool, &search_config).await;
if let Err(e) = result {
warn!("Local project indexing failed: {:?}", e);
}

View File

@@ -17,6 +17,12 @@ impl From<PgPool> for ReadOnlyPgPool {
}
}
impl ReadOnlyPgPool {
pub fn into_inner(self) -> PgPool {
self.0
}
}
impl Deref for ReadOnlyPgPool {
type Target = PgPool;

View File

@@ -176,6 +176,7 @@ async fn app() -> std::io::Result<()> {
info!("Running task {task:?} and exiting");
task.run(
pool,
ro_pool.into_inner(),
redis_pool,
search_config,
clickhouse,

View File

@@ -22,7 +22,9 @@ use sqlx::postgres::PgPool;
pub async fn index_local(
pool: &PgPool,
) -> Result<Vec<UploadSearchProject>, IndexingError> {
cursor: i64,
limit: i64,
) -> Result<(Vec<UploadSearchProject>, i64), IndexingError> {
info!("Indexing local projects!");
// todo: loaders, project type, game versions
@@ -45,13 +47,17 @@ pub async fn index_local(
SELECT m.id id, m.name name, m.summary summary, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.updated updated, m.approved approved, m.published, m.license license, m.slug slug, m.color
FROM mods m
WHERE m.status = ANY($1)
GROUP BY m.id;
WHERE m.status = ANY($1) AND m.id > $3
GROUP BY m.id
ORDER BY m.id ASC
LIMIT $2;
",
&*crate::models::projects::ProjectStatus::iterator()
.filter(|x| x.is_searchable())
.map(|x| x.to_string())
.collect::<Vec<String>>(),
limit,
cursor,
)
.fetch(pool)
.map_ok(|m| {
@@ -74,6 +80,10 @@ pub async fn index_local(
let project_ids = db_projects.iter().map(|x| x.id.0).collect::<Vec<i64>>();
let Some(largest) = project_ids.iter().max() else {
return Ok((vec![], i64::MAX));
};
struct PartialGallery {
url: String,
featured: bool,
@@ -415,7 +425,7 @@ pub async fn index_local(
}
}
Ok(uploads)
Ok((uploads, *largest))
}
struct PartialVersion {

View File

@@ -86,7 +86,7 @@ pub async fn remove_documents(
}
pub async fn index_projects(
pool: PgPool,
ro_pool: PgPool,
redis: RedisPool,
config: &SearchConfig,
) -> Result<(), IndexingError> {
@@ -111,7 +111,7 @@ pub async fn index_projects(
let all_loader_fields =
crate::database::models::loader_fields::LoaderField::get_fields_all(
&pool, &redis,
&ro_pool, &redis,
)
.await?
.into_iter()
@@ -120,17 +120,35 @@ pub async fn index_projects(
info!("Gathering local projects");
let uploads = index_local(&pool).await?;
let mut cursor = 0;
let mut idx = 0;
let mut total = 0;
info!("Adding projects to index");
loop {
info!("Gathering index data chunk {idx}");
idx += 1;
add_projects_batch_client(
&indices,
uploads,
all_loader_fields.clone(),
config,
)
.await?;
let (uploads, next_cursor) =
index_local(&ro_pool, cursor, 10000).await?;
total += uploads.len();
if uploads.is_empty() {
info!(
"No more projects to index, indexed {total} projects after {idx} chunks"
);
break;
}
cursor = next_cursor;
add_projects_batch_client(
&indices,
uploads,
all_loader_fields.clone(),
config,
)
.await?;
}
info!("Swapping indexes");
@@ -326,7 +344,7 @@ async fn add_to_index(
monitor_task(
client,
task,
Duration::from_secs(60 * 10), // Timeout after 10 minutes
Duration::from_secs(60 * 5), // Timeout after 10 minutes
Some(Duration::from_secs(1)), // Poll once every second
)
.await?;