Rustic cleanups, dedups and making the code less hard to read in general (#251)

* typos :help_me:

* (part 1/?) massive cleanup to make the code more Rust-ic and cut down heap allocations.

* (part 2/?) massive cleanup to make the code more Rust-ic and cut down heap allocations.

* (part 3/?) cut down some pretty major heap allocations here - more Bytes and BytesMuts, less Vec<u8>s

also I don't really understand why you need to `to_vec` when you don't really use it again afterwards

* (part 4/?) deduplicate error handling in backblaze logic

* (part 5/?) fixes, cleanups, refactors, and reformatting

* (part 6/?) cleanups and refactors

* remove loads of `as_str` in types that already are `Display`

* Revert "remove loads of `as_str` in types that already are `Display`"

This reverts commit 4f974310cfb167ceba03001d81388db4f0fbb509.

* reformat and move routes util to the util module

* use streams

* Run prepare + formatting issues

Co-authored-by: Jai A <jaiagr+gpg@pm.me>
Co-authored-by: Geometrically <18202329+Geometrically@users.noreply.github.com>
This commit is contained in:
Leo Chen
2021-10-12 11:26:59 +08:00
committed by GitHub
parent 0010119440
commit 13187de97d
53 changed files with 997 additions and 1129 deletions

View File

@@ -3,7 +3,6 @@ use log::info;
use super::IndexingError;
use crate::database::models::ProjectId;
use crate::models::projects::ProjectStatus;
use crate::search::UploadSearchProject;
use sqlx::postgres::PgPool;
@@ -12,6 +11,8 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
info!("Indexing local projects!");
Ok(
sqlx::query!(
//FIXME: there must be a way to reduce the duplicate lines between this query and the one in `query_one` here...
//region query
"
SELECT m.id id, m.project_type project_type, m.title title, m.description description, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.published published,
@@ -39,19 +40,22 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
WHERE s.status = $1
GROUP BY m.id, s.id, cs.id, ss.id, l.id, pt.id, u.id;
",
ProjectStatus::Approved.as_str(),
//endregion query
crate::models::projects::ProjectStatus::Approved.as_str(),
crate::models::teams::OWNER_ROLE,
)
.fetch_many(&pool)
.try_filter_map(|e| async {
Ok(e.right().map(|m| {
let mut categories = m.categories.map(|x| x.split(',').map(|x| x.to_string()).collect::<Vec<String>>()).unwrap_or_default();
categories.append(&mut m.loaders.map(|x| x.split(',').map(|x| x.to_string()).collect::<Vec<String>>()).unwrap_or_default());
let mut categories = split_to_strings(m.categories);
categories.append(&mut split_to_strings(m.loaders));
let versions = split_to_strings(m.versions);
let versions : Vec<String> = m.versions.map(|x| x.split(',').map(|x| x.to_string()).collect()).unwrap_or_default();
let project_id : crate::models::projects::ProjectId = ProjectId(m.id).into();
let project_id: crate::models::projects::ProjectId = ProjectId(m.id).into();
// TODO: Cleanup - This method has a lot of code in common with the method below.
// But, since the macro returns an (de facto) unnamed struct,
// We cannot reuse the code easily. Ugh.
UploadSearchProject {
project_id: format!("{}", project_id),
title: m.title,
@@ -76,64 +80,53 @@ pub async fn index_local(pool: PgPool) -> Result<Vec<UploadSearchProject>, Index
}
}))
})
.try_collect::<Vec<UploadSearchProject>>()
.try_collect::<Vec<_>>()
.await?
)
}
pub async fn query_one(
id: ProjectId,
exec: &mut sqlx::PgConnection,
) -> Result<UploadSearchProject, IndexingError> {
let m = sqlx::query!(
"
SELECT m.id id, m.project_type project_type, m.title title, m.description description, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.published published,
m.updated updated,
m.team_id team_id, m.license license, m.slug slug,
s.status status_name, cs.name client_side_type, ss.name server_side_type, l.short short, pt.name project_type_name, u.username username,
STRING_AGG(DISTINCT c.category, ',') categories, STRING_AGG(DISTINCT lo.loader, ',') loaders, STRING_AGG(DISTINCT gv.version, ',') versions,
STRING_AGG(DISTINCT mg.image_url, ',') gallery
FROM mods m
LEFT OUTER JOIN mods_categories mc ON joining_mod_id = m.id
LEFT OUTER JOIN categories c ON mc.joining_category_id = c.id
LEFT OUTER JOIN versions v ON v.mod_id = m.id
LEFT OUTER JOIN game_versions_versions gvv ON gvv.joining_version_id = v.id
LEFT OUTER JOIN game_versions gv ON gvv.game_version_id = gv.id
LEFT OUTER JOIN loaders_versions lv ON lv.version_id = v.id
LEFT OUTER JOIN loaders lo ON lo.id = lv.loader_id
LEFT OUTER JOIN mods_gallery mg ON mg.mod_id = m.id
INNER JOIN statuses s ON s.id = m.status
INNER JOIN project_types pt ON pt.id = m.project_type
INNER JOIN side_types cs ON m.client_side = cs.id
INNER JOIN side_types ss ON m.server_side = ss.id
INNER JOIN licenses l ON m.license = l.id
INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.role = $2
INNER JOIN users u ON tm.user_id = u.id
WHERE m.id = $1
GROUP BY m.id, s.id, cs.id, ss.id, l.id, pt.id, u.id;
",
id as ProjectId,
crate::models::teams::OWNER_ROLE,
)
//region query
"
SELECT m.id id, m.project_type project_type, m.title title, m.description description, m.downloads downloads, m.follows follows,
m.icon_url icon_url, m.published published,
m.updated updated,
m.team_id team_id, m.license license, m.slug slug,
s.status status_name, cs.name client_side_type, ss.name server_side_type, l.short short, pt.name project_type_name, u.username username,
STRING_AGG(DISTINCT c.category, ',') categories, STRING_AGG(DISTINCT lo.loader, ',') loaders, STRING_AGG(DISTINCT gv.version, ',') versions,
STRING_AGG(DISTINCT mg.image_url, ',') gallery
FROM mods m
LEFT OUTER JOIN mods_categories mc ON joining_mod_id = m.id
LEFT OUTER JOIN categories c ON mc.joining_category_id = c.id
LEFT OUTER JOIN versions v ON v.mod_id = m.id
LEFT OUTER JOIN game_versions_versions gvv ON gvv.joining_version_id = v.id
LEFT OUTER JOIN game_versions gv ON gvv.game_version_id = gv.id
LEFT OUTER JOIN loaders_versions lv ON lv.version_id = v.id
LEFT OUTER JOIN loaders lo ON lo.id = lv.loader_id
LEFT OUTER JOIN mods_gallery mg ON mg.mod_id = m.id
INNER JOIN statuses s ON s.id = m.status
INNER JOIN project_types pt ON pt.id = m.project_type
INNER JOIN side_types cs ON m.client_side = cs.id
INNER JOIN side_types ss ON m.server_side = ss.id
INNER JOIN licenses l ON m.license = l.id
INNER JOIN team_members tm ON tm.team_id = m.team_id AND tm.role = $2
INNER JOIN users u ON tm.user_id = u.id
WHERE m.id = $1
GROUP BY m.id, s.id, cs.id, ss.id, l.id, pt.id, u.id;
",
//endregion query
id as ProjectId,
crate::models::teams::OWNER_ROLE
)
.fetch_one(exec)
.await?;
let mut categories = m
.categories
.map(|x| x.split(',').map(|x| x.to_string()).collect::<Vec<String>>())
.unwrap_or_default();
categories.append(
&mut m
.loaders
.map(|x| x.split(',').map(|x| x.to_string()).collect::<Vec<String>>())
.unwrap_or_default(),
);
let versions: Vec<String> = m
.versions
.map(|x| x.split(',').map(|x| x.to_string()).collect())
.unwrap_or_default();
let mut categories = split_to_strings(m.categories);
categories.append(&mut split_to_strings(m.loaders));
let versions = split_to_strings(m.versions);
let project_id: crate::models::projects::ProjectId = ProjectId(m.id).into();
@@ -160,9 +153,11 @@ pub async fn query_one(
server_side: m.server_side_type,
slug: m.slug,
project_type: m.project_type_name,
gallery: m
.gallery
.map(|x| x.split(',').map(|x| x.to_string()).collect())
.unwrap_or_default(),
gallery: split_to_strings(m.gallery),
})
}
fn split_to_strings(s: Option<String>) -> Vec<String> {
s.map(|x| x.split(',').map(ToString::to_string).collect())
.unwrap_or_default()
}

View File

@@ -16,7 +16,7 @@ pub enum IndexingError {
#[error("Error while connecting to the MeiliSearch database")]
IndexDBError(#[from] meilisearch_sdk::errors::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
SerDeError(#[from] serde_json::Error),
SerdeError(#[from] serde_json::Error),
#[error("Error while parsing a timestamp: {0}")]
ParseDateError(#[from] chrono::format::ParseError),
#[error("Database Error: {0}")]
@@ -40,6 +40,7 @@ pub struct IndexingSettings {
impl IndexingSettings {
#[allow(dead_code)]
pub fn from_env() -> Self {
//FIXME: what?
let index_local = true;
Self { index_local }
@@ -64,7 +65,7 @@ pub async fn index_projects(
}
pub async fn reset_indices(config: &SearchConfig) -> Result<(), IndexingError> {
let client = Client::new(&*config.address, &*config.key);
let client = config.make_client();
client.delete_index("relevance_projects").await?;
client.delete_index("downloads_projects").await?;
@@ -74,48 +75,28 @@ pub async fn reset_indices(config: &SearchConfig) -> Result<(), IndexingError> {
Ok(())
}
async fn update_index_helper<'a>(
client: &'a Client<'a>,
name: &'static str,
rule: &'static str,
) -> Result<Index<'a>, IndexingError> {
update_index(&client, name, {
let mut rules = default_rules();
rules.push_back(rule);
rules.into()
})
.await
}
pub async fn reconfigure_indices(config: &SearchConfig) -> Result<(), IndexingError> {
let client = Client::new(&*config.address, &*config.key);
let client = config.make_client();
// Relevance Index
update_index(&client, "relevance_projects", {
let mut relevance_rules = default_rules();
relevance_rules.push_back("desc(downloads)".to_string());
relevance_rules.into()
})
.await?;
// Downloads Index
update_index(&client, "downloads_projects", {
let mut downloads_rules = default_rules();
downloads_rules.push_front("desc(downloads)".to_string());
downloads_rules.into()
})
.await?;
// Follows Index
update_index(&client, "follows_projects", {
let mut follows_rules = default_rules();
follows_rules.push_front("desc(follows)".to_string());
follows_rules.into()
})
.await?;
// Updated Index
update_index(&client, "updated_projects", {
let mut updated_rules = default_rules();
updated_rules.push_front("desc(modified_timestamp)".to_string());
updated_rules.into()
})
.await?;
// Created Index
update_index(&client, "newest_projects", {
let mut newest_rules = default_rules();
newest_rules.push_front("desc(created_timestamp)".to_string());
newest_rules.into()
})
.await?;
update_index_helper(&client, "relevance_projects", "desc(downloads)").await?;
update_index_helper(&client, "downloads_projects", "desc(downloads)").await?;
update_index_helper(&client, "follows_projects", "desc(follows)").await?;
update_index_helper(&client, "updated_projects", "desc(modified_timestamp)").await?;
update_index_helper(&client, "newest_projects", "desc(created_timestamp)").await?;
Ok(())
}
@@ -123,7 +104,7 @@ pub async fn reconfigure_indices(config: &SearchConfig) -> Result<(), IndexingEr
async fn update_index<'a>(
client: &'a Client<'a>,
name: &'a str,
rules: Vec<String>,
rules: Vec<&'static str>,
) -> Result<Index<'a>, IndexingError> {
let index = match client.get_index(name).await {
Ok(index) => index,
@@ -143,8 +124,8 @@ async fn update_index<'a>(
async fn create_index<'a>(
client: &'a Client<'a>,
name: &'a str,
rules: impl FnOnce() -> Vec<String>,
name: &'static str,
rules: impl FnOnce() -> Vec<&'static str>,
) -> Result<Index<'a>, IndexingError> {
match client.get_index(name).await {
// TODO: update index settings on startup (or delete old indices on startup)
@@ -176,127 +157,109 @@ async fn add_to_index(index: Index<'_>, mods: &[UploadSearchProject]) -> Result<
Ok(())
}
async fn create_and_add_to_index<'a>(
client: &'a Client<'a>,
projects: &'a Vec<UploadSearchProject>,
name: &'static str,
rule: &'static str,
) -> Result<(), IndexingError> {
let index = create_index(&client, name, || {
let mut relevance_rules = default_rules();
relevance_rules.push_back(rule);
relevance_rules.into()
})
.await?;
add_to_index(index, projects).await?;
Ok(())
}
pub async fn add_projects(
projects: Vec<UploadSearchProject>,
config: &SearchConfig,
) -> Result<(), IndexingError> {
let client = Client::new(&*config.address, &*config.key);
let client = config.make_client();
// Relevance Index
let relevance_index = create_index(&client, "relevance_projects", || {
let mut relevance_rules = default_rules();
relevance_rules.push_back("desc(downloads)".to_string());
relevance_rules.into()
})
create_and_add_to_index(&client, &projects, "relevance_projects", "desc(downloads)").await?;
create_and_add_to_index(&client, &projects, "downloads_projects", "desc(downloads)").await?;
create_and_add_to_index(&client, &projects, "follows_projects", "desc(follows)").await?;
create_and_add_to_index(
&client,
&projects,
"updated_projects",
"desc(modified_timestamp)",
)
.await?;
add_to_index(relevance_index, &projects).await?;
// Downloads Index
let downloads_index = create_index(&client, "downloads_projects", || {
let mut downloads_rules = default_rules();
downloads_rules.push_front("desc(downloads)".to_string());
downloads_rules.into()
})
create_and_add_to_index(
&client,
&projects,
"newest_projects",
"desc(created_timestamp)",
)
.await?;
add_to_index(downloads_index, &projects).await?;
// Follows Index
let follows_index = create_index(&client, "follows_projects", || {
let mut follows_rules = default_rules();
follows_rules.push_front("desc(follows)".to_string());
follows_rules.into()
})
.await?;
add_to_index(follows_index, &projects).await?;
// Updated Index
let updated_index = create_index(&client, "updated_projects", || {
let mut updated_rules = default_rules();
updated_rules.push_front("desc(modified_timestamp)".to_string());
updated_rules.into()
})
.await?;
add_to_index(updated_index, &projects).await?;
// Created Index
let newest_index = create_index(&client, "newest_projects", || {
let mut newest_rules = default_rules();
newest_rules.push_front("desc(created_timestamp)".to_string());
newest_rules.into()
})
.await?;
add_to_index(newest_index, &projects).await?;
Ok(())
}
//region Utils
fn default_rules() -> VecDeque<String> {
fn default_rules() -> VecDeque<&'static str> {
vec![
"typo".to_string(),
"words".to_string(),
"proximity".to_string(),
"attribute".to_string(),
"wordsPosition".to_string(),
"exactness".to_string(),
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness",
]
.into()
}
fn default_settings() -> Settings {
let displayed_attributes = vec![
"project_id".to_string(),
"project_type".to_string(),
"slug".to_string(),
"author".to_string(),
"title".to_string(),
"description".to_string(),
"categories".to_string(),
"versions".to_string(),
"downloads".to_string(),
"follows".to_string(),
"icon_url".to_string(),
"date_created".to_string(),
"date_modified".to_string(),
"latest_version".to_string(),
"license".to_string(),
"client_side".to_string(),
"server_side".to_string(),
"gallery".to_string(),
];
let searchable_attributes = vec![
"title".to_string(),
"description".to_string(),
"categories".to_string(),
"versions".to_string(),
"author".to_string(),
];
let stop_words: Vec<String> = Vec::new();
let synonyms: HashMap<String, Vec<String>> = HashMap::new();
Settings::new()
.with_displayed_attributes(displayed_attributes)
.with_searchable_attributes(searchable_attributes)
.with_stop_words(stop_words)
.with_synonyms(synonyms)
.with_attributes_for_faceting(vec![
String::from("categories"),
String::from("host"),
String::from("versions"),
String::from("license"),
String::from("client_side"),
String::from("server_side"),
String::from("project_type"),
])
.with_displayed_attributes(DEFAULT_DISPLAYED_ATTRIBUTES)
.with_searchable_attributes(DEFAULT_SEARCHABLE_ATTRIBUTES)
.with_stop_words(Vec::<String>::new())
.with_synonyms(HashMap::<String, Vec<String>>::new())
.with_attributes_for_faceting(DEFAULT_ATTRIBUTES_FOR_FACETING)
}
const DEFAULT_DISPLAYED_ATTRIBUTES: &[&str] = &[
"project_id",
"project_type",
"slug",
"author",
"title",
"description",
"categories",
"versions",
"downloads",
"follows",
"icon_url",
"date_created",
"date_modified",
"latest_version",
"license",
"client_side",
"server_side",
"gallery",
];
const DEFAULT_SEARCHABLE_ATTRIBUTES: &[&str] =
&["title", "description", "categories", "versions", "author"];
const DEFAULT_ATTRIBUTES_FOR_FACETING: &[&str] = &[
"categories",
"host",
"versions",
"license",
"client_side",
"server_side",
"project_type",
];
//endregion
// This shouldn't be relied on for proper sorting, but it makes an
// attempt at getting proper sorting for mojang's versions.
// This isn't currenly used, but I wrote it and it works, so I'm
// attempt at getting proper sorting for Mojang's versions.
// This isn't currently used, but I wrote it and it works, so I'm
// keeping this mess in case someone needs it in the future.
#[allow(dead_code)]
pub fn sort_projects(a: &str, b: &str) -> std::cmp::Ordering {
@@ -346,7 +309,7 @@ pub fn sort_projects(a: &str, b: &str) -> std::cmp::Ordering {
(false, false) => a.0.cmp(&b.0),
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
(true, true) => Ordering::Equal, // unreachable
(true, true) => unreachable!(),
}
}
}

View File

@@ -16,7 +16,6 @@ impl CreationQueue {
queue: Mutex::new(Vec::with_capacity(10)),
}
}
pub fn add(&self, search_project: UploadSearchProject) {
// Can only panic if mutex is poisoned
self.queue.lock().unwrap().push(search_project);
@@ -24,12 +23,8 @@ impl CreationQueue {
pub fn take(&self) -> Vec<UploadSearchProject> {
std::mem::replace(&mut *self.queue.lock().unwrap(), Vec::with_capacity(10))
}
}
pub async fn index_queue(
queue: &CreationQueue,
config: &SearchConfig,
) -> Result<(), IndexingError> {
let queue = queue.take();
add_projects(queue, config).await
pub async fn index(&self, config: &SearchConfig) -> Result<(), IndexingError> {
let queue = self.take();
add_projects(queue, config).await
}
}

View File

@@ -17,7 +17,7 @@ pub enum SearchError {
#[error("MeiliSearch Error: {0}")]
MeiliSearchError(#[from] meilisearch_sdk::errors::Error),
#[error("Error while serializing or deserializing JSON: {0}")]
SerDeError(#[from] serde_json::Error),
SerdeError(#[from] serde_json::Error),
#[error("Error while parsing an integer: {0}")]
IntParsingError(#[from] std::num::ParseIntError),
#[error("Environment Error")]
@@ -31,7 +31,7 @@ impl actix_web::ResponseError for SearchError {
match self {
SearchError::EnvError(..) => StatusCode::INTERNAL_SERVER_ERROR,
SearchError::MeiliSearchError(..) => StatusCode::BAD_REQUEST,
SearchError::SerDeError(..) => StatusCode::BAD_REQUEST,
SearchError::SerdeError(..) => StatusCode::BAD_REQUEST,
SearchError::IntParsingError(..) => StatusCode::BAD_REQUEST,
SearchError::InvalidIndex(..) => StatusCode::BAD_REQUEST,
}
@@ -42,7 +42,7 @@ impl actix_web::ResponseError for SearchError {
error: match self {
SearchError::EnvError(..) => "environment_error",
SearchError::MeiliSearchError(..) => "meilisearch_error",
SearchError::SerDeError(..) => "invalid_input",
SearchError::SerdeError(..) => "invalid_input",
SearchError::IntParsingError(..) => "invalid_input",
SearchError::InvalidIndex(..) => "invalid_input",
},
@@ -57,7 +57,13 @@ pub struct SearchConfig {
pub key: String,
}
/// A project document used for uploading projects to meilisearch's indices.
impl SearchConfig {
pub fn make_client(&self) -> Client {
Client::new(self.address.as_str(), self.key.as_str())
}
}
/// A project document used for uploading projects to MeiliSearch's indices.
/// This contains some extra data that is not returned by search results.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct UploadSearchProject {