diff --git a/.env b/.env index 77ba0bf8..c774ce17 100644 --- a/.env +++ b/.env @@ -24,14 +24,8 @@ S3_URL=none S3_REGION=none S3_BUCKET_NAME=none -INDEX_CURSEFORGE=false -MAX_CURSEFORGE_ID=450000 # 1 hour LOCAL_INDEX_INTERVAL=3600 -# 12 hours -EXTERNAL_INDEX_INTERVAL=43200 - -INDEX_CACHE_PATH=/tmp/modrinth-id-cache.json GITHUB_CLIENT_ID=3acffb2e808d16d4b226 GITHUB_CLIENT_SECRET=none \ No newline at end of file diff --git a/sqlx-data.json b/sqlx-data.json index 5704b390..820ae4fc 100644 --- a/sqlx-data.json +++ b/sqlx-data.json @@ -218,27 +218,6 @@ ] } }, - "15978ec367b2768eea87dcdf1ee2497aa03b8a926139fecffbca22031e3ae7f9": { - "query": "SELECT EXISTS(SELECT 1 FROM team_members WHERE id = $1 AND user_id = $2)", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "exists", - "type_info": "Bool" - } - ], - "parameters": { - "Left": [ - "Int8", - "Int8" - ] - }, - "nullable": [ - null - ] - } - }, "16871e66d8762452be3ca0c80f4733f2db49980205fbf7cb6f9829cdd99cdb65": { "query": "\n INSERT INTO dependencies (dependent_id, dependency_id)\n VALUES ($1, $2)\n ", "describe": { @@ -1390,6 +1369,27 @@ ] } }, + "618472f46632ddf15b01bb0df27c9d5e6f5b56a9413a6f7393d6d7c29b852459": { + "query": "SELECT EXISTS(SELECT 1 FROM team_members tm INNER JOIN mods m ON m.team_id = tm.team_id AND m.id = $1 WHERE tm.user_id = $2)", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "exists", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8" + ] + }, + "nullable": [ + null + ] + } + }, "637fd5f9564a79b625e00a705b3c9fe70ba3cba9050c0993557ca46f50d89623": { "query": "\n SELECT * FROM mods\n WHERE status = (\n SELECT id FROM statuses WHERE status = $1\n )\n ORDER BY updated ASC\n LIMIT $2;\n ", "describe": { @@ -1938,27 +1938,6 @@ "nullable": [] } }, - "75a1099a12e73484cf0e7dd4b346ea154ea1ff915fe9ee15f936e1e8faed4118": { - "query": "SELECT EXISTS(SELECT 1 FROM team_members tm INNER JOIN mods m ON m.team_id = tm.id AND m.id = $1 WHERE tm.user_id = $2)", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "exists", - "type_info": "Bool" - } - ], - "parameters": { - "Left": [ - "Int8", - "Int8" - ] - }, - "nullable": [ - null - ] - } - }, "763eaff18057e579472960e9e8256c22ae275f24a45da96bc3e47385376faae3": { "query": "\n UPDATE mods\n SET downloads = downloads + 1\n WHERE id = $1\n ", "describe": { @@ -2021,6 +2000,27 @@ ] } }, + "796f057ea8eb5b01d3eedeee9840fb37464ea567f32871953fb07e14ed86af1c": { + "query": "SELECT EXISTS(SELECT 1 FROM team_members WHERE team_id = $1 AND user_id = $2)", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "exists", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8" + ] + }, + "nullable": [ + null + ] + } + }, "79b896b1a8ddab285294638302976b75d0d915f36036383cc21bd2fc48d4502c": { "query": "\n DELETE FROM loaders_versions WHERE version_id = $1\n ", "describe": { diff --git a/src/main.rs b/src/main.rs index 8e5f1704..3a7157a9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -228,40 +228,6 @@ async fn main() -> std::io::Result<()> { } }); - if dotenv::var("INDEX_CURSEFORGE") - .ok() - .and_then(|b| b.parse::().ok()) - .unwrap_or(false) - { - // The interval in seconds at which curseforge is indexed for - // searching. Defaults to 4 hours if unset. - let external_index_interval = std::time::Duration::from_secs( - dotenv::var("EXTERNAL_INDEX_INTERVAL") - .ok() - .map(|i| i.parse().unwrap()) - .unwrap_or(3600 * 12), - ); - - let pool_ref = pool.clone(); - let thread_search_config = search_config.clone(); - scheduler.run(external_index_interval, move || { - info!("Indexing curseforge"); - let pool_ref = pool_ref.clone(); - let thread_search_config = thread_search_config.clone(); - async move { - let settings = IndexingSettings { - index_local: false, - index_external: true, - }; - let result = index_mods(pool_ref, settings, &thread_search_config).await; - if let Err(e) = result { - warn!("External mod indexing failed: {:?}", e); - } - info!("Done indexing curseforge"); - } - }); - } - scheduler::schedule_versions(&mut scheduler, pool.clone(), skip_initial); let ip_salt = Pepper { @@ -375,23 +341,8 @@ fn check_env_vars() -> bool { failed |= true; } - failed |= check_var::("INDEX_CURSEFORGE"); - if dotenv::var("INDEX_CURSEFORGE") - .ok() - .and_then(|s| s.parse::().ok()) - .unwrap_or(false) - { - failed |= check_var::("EXTERNAL_INDEX_INTERVAL"); - failed |= check_var::("MAX_CURSEFORGE_ID"); - } - failed |= check_var::("LOCAL_INDEX_INTERVAL"); - // In theory this should be an OsString since it's a path, but - // dotenv doesn't support that. The usage of this does treat - // it as an OsString, though. - failed |= check_var::("INDEX_CACHE_PATH"); - failed |= check_var::("GITHUB_CLIENT_ID"); failed |= check_var::("GITHUB_CLIENT_SECRET"); diff --git a/src/routes/mod.rs b/src/routes/mod.rs index bfa74c72..c6022a29 100644 --- a/src/routes/mod.rs +++ b/src/routes/mod.rs @@ -106,6 +106,8 @@ pub enum ApiError { InvalidInputError(String), #[error("Search Error: {0}")] SearchError(#[from] meilisearch_sdk::errors::Error), + #[error("Indexing Error: {0}")] + IndexingError(#[from] crate::search::indexing::IndexingError), } impl actix_web::ResponseError for ApiError { @@ -117,6 +119,7 @@ impl actix_web::ResponseError for ApiError { ApiError::CustomAuthenticationError(..) => actix_web::http::StatusCode::UNAUTHORIZED, ApiError::JsonError(..) => actix_web::http::StatusCode::BAD_REQUEST, ApiError::SearchError(..) => actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, + ApiError::IndexingError(..) => actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, ApiError::FileHostingError(..) => actix_web::http::StatusCode::INTERNAL_SERVER_ERROR, ApiError::InvalidInputError(..) => actix_web::http::StatusCode::BAD_REQUEST, } @@ -132,6 +135,7 @@ impl actix_web::ResponseError for ApiError { ApiError::CustomAuthenticationError(..) => "unauthorized", ApiError::JsonError(..) => "json_error", ApiError::SearchError(..) => "search_error", + ApiError::IndexingError(..) => "indexing_error", ApiError::FileHostingError(..) => "file_hosting_error", ApiError::InvalidInputError(..) => "invalid_input", }, diff --git a/src/routes/mod_creation.rs b/src/routes/mod_creation.rs index b67bb4c6..5927d01a 100644 --- a/src/routes/mod_creation.rs +++ b/src/routes/mod_creation.rs @@ -165,7 +165,7 @@ pub async fn mod_create( &mut transaction, &***file_host, &mut uploaded_files, - &***indexing_queue, + &***indexing_queue ) .await; @@ -557,7 +557,7 @@ async fn mod_create_inner( body_url: mod_builder.body_url.clone(), published: now, updated: now, - status, + status: status.clone(), license: License { id: mod_create_data.license_id.clone(), name: "".to_string(), @@ -582,10 +582,12 @@ async fn mod_create_inner( let _mod_id = mod_builder.insert(&mut *transaction).await?; - let index_mod = - crate::search::indexing::local_import::query_one(mod_id.into(), &mut *transaction) - .await?; - indexing_queue.add(index_mod); + if status.is_searchable() { + let index_mod = + crate::search::indexing::local_import::query_one(mod_id.into(), &mut *transaction) + .await?; + indexing_queue.add(index_mod); + } Ok(HttpResponse::Ok().json(response)) } diff --git a/src/routes/mods.rs b/src/routes/mods.rs index 270016f2..190cbe1a 100644 --- a/src/routes/mods.rs +++ b/src/routes/mods.rs @@ -11,6 +11,8 @@ use futures::StreamExt; use serde::{Deserialize, Serialize}; use sqlx::PgPool; use std::sync::Arc; +use crate::search::indexing::queue::CreationQueue; +use actix_web::web::Data; #[get("mod")] pub async fn mod_search( @@ -58,7 +60,7 @@ pub async fn mods_get( let user_id: database::models::ids::UserId = user.id.into(); let mod_exists = sqlx::query!( - "SELECT EXISTS(SELECT 1 FROM team_members WHERE id = $1 AND user_id = $2)", + "SELECT EXISTS(SELECT 1 FROM team_members WHERE team_id = $1 AND user_id = $2)", mod_data.inner.team_id as database::models::ids::TeamId, user_id as database::models::ids::UserId, ) @@ -104,7 +106,7 @@ pub async fn mod_slug_get( let user_id: database::models::ids::UserId = user.id.into(); let mod_exists = sqlx::query!( - "SELECT EXISTS(SELECT 1 FROM team_members WHERE id = $1 AND user_id = $2)", + "SELECT EXISTS(SELECT 1 FROM team_members WHERE team_id = $1 AND user_id = $2)", data.inner.team_id as database::models::ids::TeamId, user_id as database::models::ids::UserId, ) @@ -151,7 +153,7 @@ pub async fn mod_get( let user_id: database::models::ids::UserId = user.id.into(); let mod_exists = sqlx::query!( - "SELECT EXISTS(SELECT 1 FROM team_members WHERE id = $1 AND user_id = $2)", + "SELECT EXISTS(SELECT 1 FROM team_members WHERE team_id = $1 AND user_id = $2)", data.inner.team_id as database::models::ids::TeamId, user_id as database::models::ids::UserId, ) @@ -265,6 +267,7 @@ pub async fn mod_edit( config: web::Data, file_host: web::Data>, new_mod: web::Json, + indexing_queue: Data>, ) -> Result { let user = get_user_from_headers(req.headers(), &**pool).await?; @@ -378,8 +381,14 @@ pub async fn mod_edit( .await .map_err(|e| ApiError::DatabaseError(e.into()))?; - if mod_item.status.is_searchable() && status.is_searchable() { + if mod_item.status.is_searchable() && !status.is_searchable() { delete_from_index(id.into(), config).await?; + } else if !mod_item.status.is_searchable() && status.is_searchable() { + let index_mod = + crate::search::indexing::local_import::query_one(mod_id.into(), &mut *transaction) + .await?; + + indexing_queue.add(index_mod); } } diff --git a/src/routes/versions.rs b/src/routes/versions.rs index 31b2b0f3..239d580e 100644 --- a/src/routes/versions.rs +++ b/src/routes/versions.rs @@ -81,7 +81,7 @@ pub async fn versions_get( let user_id: database::models::ids::UserId = user.id.into(); let member_exists = sqlx::query!( - "SELECT EXISTS(SELECT 1 FROM team_members tm INNER JOIN mods m ON m.team_id = tm.id AND m.id = $1 WHERE tm.user_id = $2)", + "SELECT EXISTS(SELECT 1 FROM team_members tm INNER JOIN mods m ON m.team_id = tm.team_id AND m.id = $1 WHERE tm.user_id = $2)", version.mod_id as database::models::ModId, user_id as database::models::ids::UserId, ) @@ -123,7 +123,7 @@ pub async fn version_get( let user_id: database::models::ids::UserId = user.id.into(); let member_exists = sqlx::query!( - "SELECT EXISTS(SELECT 1 FROM team_members tm INNER JOIN mods m ON m.team_id = tm.id AND m.id = $1 WHERE tm.user_id = $2)", + "SELECT EXISTS(SELECT 1 FROM team_members tm INNER JOIN mods m ON m.team_id = tm.team_id AND m.id = $1 WHERE tm.user_id = $2)", data.mod_id as database::models::ModId, user_id as database::models::ids::UserId, ) diff --git a/src/search/indexing/curseforge_import.rs b/src/search/indexing/curseforge_import.rs deleted file mode 100644 index 1cd4bc1d..00000000 --- a/src/search/indexing/curseforge_import.rs +++ /dev/null @@ -1,326 +0,0 @@ -use super::IndexingError; -use crate::search::UploadSearchMod; -use log::info; -use serde::{Deserialize, Serialize}; -use std::borrow::Cow; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Attachment<'a> { - pub url: Cow<'a, str>, - pub thumbnail_url: Cow<'a, str>, - pub is_default: bool, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct Category<'a> { - pub name: Cow<'a, str>, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct Author<'a> { - pub name: Cow<'a, str>, - pub url: Cow<'a, str>, -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct CurseVersion<'a> { - pub game_version: Cow<'a, str>, -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct LatestFile<'a> { - pub game_version: Vec>, - pub modules: Vec>, -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct VersionModule<'a> { - pub foldername: Cow<'a, str>, -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct CurseForgeMod<'a> { - pub id: u32, - pub name: Cow<'a, str>, - pub authors: Vec>>, - pub attachments: Vec>, - pub website_url: Cow<'a, str>, - pub summary: Cow<'a, str>, - pub download_count: f32, - pub categories: Vec>, - pub latest_files: Vec>, - pub game_version_latest_files: Vec>, - pub date_created: chrono::DateTime, - pub date_modified: chrono::DateTime, - pub category_section: CategorySection, -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct CategorySection { - pub id: u32, -} - -#[derive(Default)] -struct Loaders { - forge: bool, - fabric: bool, - liteloader: bool, - rift: bool, -} - -lazy_static::lazy_static! { - static ref CURSEFORGE_CATEGORIES: std::collections::HashMap<&'static str, &'static str> = { - let mut map = std::collections::HashMap::new(); - map.insert("World Gen", "worldgen"); - map.insert("Biomes", "worldgen"); - map.insert("Ores and Resources", "worldgen"); - map.insert("Structures", "worldgen"); - map.insert("Dimensions", "worldgen"); - map.insert("Mobs", "worldgen"); - map.insert("Technology", "technology"); - map.insert("Processing", "technology"); - map.insert("Player Transport", "technology"); - map.insert("Energy, Fluid, and Item Transport", "technology"); - map.insert("Food", "food"); - map.insert("Farming", "food"); - map.insert("Energy", "technology"); - map.insert("Redstone", "technology"); - map.insert("Genetics", "technology"); - map.insert("Magic", "magic"); - map.insert("Storage", "storage"); - map.insert("API and Library", "library"); - map.insert("Adventure and RPG", "adventure"); - map.insert("Map and Information", "utility"); - map.insert("Cosmetic", "decoration"); - map.insert("Addons", "misc"); - map.insert("Thermal Expansion", "misc"); - map.insert("Tinker's Construct", "misc"); - map.insert("Industrial Craft", "misc"); - map.insert("Thaumcraft", "misc"); - map.insert("Buildcraft", "misc"); - map.insert("Forestry", "misc"); - map.insert("Blood Magic", "misc"); - map.insert("Lucky Blocks", "misc"); - map.insert("Applied Energistics 2", "misc"); - map.insert("CraftTweaker", "misc"); - map.insert("Miscellaneous", "misc"); - map.insert("Armor, Tools, and Weapons", "equipment"); - map.insert("Server Utility", "utility"); - map - }; -} - -pub async fn index_curseforge( - start_index: u32, - end_index: u32, - cache_path: Option<&std::path::Path>, -) -> Result, IndexingError> { - info!("Indexing curseforge mods!"); - let start = std::time::Instant::now(); - - let mut docs_to_add: Vec = vec![]; - - let cache = cache_path - .map(std::fs::File::open) - .and_then(Result::ok) - .map(std::io::BufReader::new) - .map(serde_json::from_reader::<_, Vec>); - - let requested_ids; - - // This caching system can't handle segmented indexing - if let Some(Ok(mut cache)) = cache { - let end = cache.last().copied().unwrap_or(start_index); - cache.extend(end..end_index); - requested_ids = serde_json::to_string(&cache)?; - } else { - // This ends up being around 3 MiB - // Serde json is better than using debug formatting since it doesn't - // include spaces after commas, removing a lot of the extra size - requested_ids = serde_json::to_string(&(start_index..end_index).collect::>())?; - } - - let res = reqwest::Client::new() - .post("https://addons-ecs.forgesvc.net/api/v2/addon") - .header(reqwest::header::CONTENT_TYPE, "application/json") - .body(requested_ids) - .send() - .await?; - - // The response ends up being about 300MiB, so we have to deal with - // it efficiently. Reading it as bytes and then deserializing with - // borrowed data should avoid copying it, but it may take a bit more - // memory. To do this efficiently, we would have to get serde_json - // to skip deserializing mods with category_section.id != 8 - // It's only 100MiB when using the cached ids, since that eliminates - // all "addons" that aren't minecraft mods - let buffer = res.bytes().await?; - - let mut curseforge_mods: Vec = serde_json::from_slice(&buffer)?; - // This should remove many of the mods from the list before processing - curseforge_mods.retain(|m| m.category_section.id == 8); - - // Only write to the cache if this doesn't skip mods at the start - // The caching system iterates through all ids normally past the last - // id in the cache, so the end_index shouldn't matter. - if let Some(path) = cache_path { - if start_index <= 1 { - let mut ids = curseforge_mods.iter().map(|m| m.id).collect::>(); - ids.sort_unstable(); - if let Err(e) = std::fs::write(path, serde_json::to_string(&ids)?) { - log::warn!("Error writing to index id cache: {}", e); - } - } - } - - for mut curseforge_mod in curseforge_mods { - // The gameId of minecraft is 432 - // The categorySection.id for mods is always 8 - // The categorySection.id 8 appears to be unique to minecraft mods - // if curseforge_mod.game_slug != "minecraft" - // || !curseforge_mod.website_url.contains("/mc-mods/") - // if curseforge_mod.category_section.id != 8 { - // continue; - // } - - let mut mod_game_versions = vec![]; - - let mut loaders = Loaders::default(); - - for file in curseforge_mod.latest_files { - for version in file.game_version { - match &*version { - "Fabric" => loaders.fabric = true, - "Forge" => loaders.forge = true, - "Rift" => loaders.rift = true, - _ => (), - } - } - for module in file.modules { - match &*module.foldername { - "fabric.mod.json" => loaders.fabric = true, - "mcmod.info" => loaders.forge = true, // 1.13+ forge uses META-INF/mods.toml - "riftmod.json" => loaders.rift = true, - "litemod.json" => loaders.liteloader = true, - _ => (), - } - } - // TODO: files ending with .litemod should also enable liteloader - // if we decide to add true support for it; That requires extra - // deserializing work, so I'm not adding it for now - } - - let mut latest = None; - - for version in curseforge_mod.game_version_latest_files { - let mut split = version.game_version.split('.'); - let version_numbers = ( - split.next().and_then(|s| s.parse::().ok()).unwrap_or(0), - split.next().and_then(|s| s.parse::().ok()).unwrap_or(0), - split.next().and_then(|s| s.parse::().ok()).unwrap_or(0), - ); - - if let Some((number, _)) = latest { - if version_numbers > number { - latest = Some((version_numbers, version.game_version.clone())); - } - } else { - latest = Some((version_numbers, version.game_version.clone())) - } - - if ((1, 0, 0)..(1, 14, 0)).contains(&version_numbers) { - // Is this a reasonable assumption to make? - loaders.forge = true; - } - mod_game_versions.push(version.game_version); - } - - let mut mod_categories = std::collections::HashSet::new(); - - for category in curseforge_mod.categories { - if category.name == "Fabric" { - loaders.fabric = true; - } else if let Some(category) = CURSEFORGE_CATEGORIES.get(&*category.name) { - mod_categories.insert(*category); - } - } - - if !(loaders.fabric || loaders.rift || loaders.liteloader || loaders.forge) { - // Assume that mods without loaders will be - loaders.forge = true; - } - - let mut mod_categories = mod_categories - .into_iter() - .take(3) - .map(Cow::Borrowed) - .collect::>(); - - if loaders.forge { - mod_categories.push(Cow::Borrowed("forge")); - } - if loaders.fabric { - mod_categories.push(Cow::Borrowed("fabric")); - } - - let latest_version = latest - .map(|(_, name)| name) - .unwrap_or_else(|| Cow::Borrowed("None")); - - let icon_url = curseforge_mod - .attachments - .iter() - .find(|a| a.is_default) - .map(|a| a.thumbnail_url.replace("/256/256/", "/64/64/")) - .unwrap_or_default(); - - let author; - let author_url; - - if let Some(user) = curseforge_mod - .authors - .get_mut(0) - .map(Option::take) - .flatten() - { - author = user.name.into_owned(); - author_url = user.url.into_owned(); - } else { - author = "unknown".to_owned(); - author_url = String::from(&*curseforge_mod.website_url); - } - - docs_to_add.push(UploadSearchMod { - mod_id: format!("curse-{}", curseforge_mod.id), - author, - title: curseforge_mod.name.into_owned(), - description: curseforge_mod.summary.chars().take(150).collect(), - categories: mod_categories, - versions: mod_game_versions.into_iter().map(String::from).collect(), - downloads: curseforge_mod.download_count as i32, - page_url: curseforge_mod.website_url.into_owned(), - icon_url, - author_url, - date_created: curseforge_mod.date_created, - created_timestamp: curseforge_mod.date_created.timestamp(), - date_modified: curseforge_mod.date_modified, - modified_timestamp: curseforge_mod.date_modified.timestamp(), - latest_version, - host: Cow::Borrowed("curseforge"), - }) - } - - let duration = start.elapsed(); - info!( - "Finished indexing curseforge; Took {:5.2}s", - duration.as_secs_f32() - ); - - Ok(docs_to_add) -} diff --git a/src/search/indexing/mod.rs b/src/search/indexing/mod.rs index ed268913..b215bc3f 100644 --- a/src/search/indexing/mod.rs +++ b/src/search/indexing/mod.rs @@ -1,10 +1,8 @@ /// This module is used for the indexing from any source. -pub mod curseforge_import; pub mod local_import; pub mod queue; use crate::search::{SearchConfig, UploadSearchMod}; -use curseforge_import::index_curseforge; use local_import::index_local; use meilisearch_sdk::client::Client; use meilisearch_sdk::indexes::Index; @@ -63,20 +61,9 @@ pub async fn index_mods( ) -> Result<(), IndexingError> { let mut docs_to_add: Vec = vec![]; - let cache_path = std::env::var_os("INDEX_CACHE_PATH").map(std::path::PathBuf::from); - if settings.index_local { docs_to_add.append(&mut index_local(pool.clone()).await?); } - if settings.index_external { - let end_index = dotenv::var("MAX_CURSEFORGE_ID") - .ok() - .map(|i| i.parse().unwrap()) - .unwrap_or(450_000); - - docs_to_add.append(&mut index_curseforge(1, end_index, cache_path.as_deref()).await?); - } - // Write Indices add_mods(docs_to_add, config).await?;