Fix download counts (#746) (#747)

* Fix download counts (#746)

* Fix download counts

* remove unsafe send

* update indexing time

* run prep

* run prep again
This commit is contained in:
Geometrically
2023-11-06 15:04:32 -07:00
committed by GitHub
parent 40f28be3b4
commit aab95444a8
15 changed files with 495 additions and 578 deletions

View File

@@ -0,0 +1,28 @@
{
"db_name": "PostgreSQL",
"query": "\n SELECT v.id id, v.mod_id mod_id FROM files f\n INNER JOIN versions v ON v.id = f.version_id\n WHERE f.url = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
},
{
"ordinal": 1,
"name": "mod_id",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"Text"
]
},
"nullable": [
false,
false
]
},
"hash": "155361716f9d697c0d961b7bbad30e70698a8e5c9ceaa03b2091e058b58fb938"
}

View File

@@ -1,14 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "UPDATE mods\n SET downloads = downloads + 1\n WHERE (id = $1)",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": []
},
"hash": "49a5d21a1454afc6383b78e468fd0decc75b9163e7286f34ceab22d563a0d3f7"
}

View File

@@ -1,14 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "UPDATE versions\n SET downloads = downloads + 1\n WHERE (id = $1)",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int8"
]
},
"nullable": []
},
"hash": "4d54032b02c860f4facec39eacb4548a0701d4505e7a80b4834650696df69c2b"
}

View File

@@ -0,0 +1,14 @@
{
"db_name": "PostgreSQL",
"query": "UPDATE versions\n SET downloads = downloads + 1\n WHERE id = ANY($1)",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int8Array"
]
},
"nullable": []
},
"hash": "b993ec7579f06603a2a308dccd1ea1fbffd94286db48bc0e36a30f4f6a9d39af"
}

View File

@@ -0,0 +1,14 @@
{
"db_name": "PostgreSQL",
"query": "UPDATE mods\n SET downloads = downloads + 1\n WHERE id = ANY($1)",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Int8Array"
]
},
"nullable": []
},
"hash": "d08c9ef6a8829ce1d23d66f27c58f4b9b64f4ce985e60ded871d1f31eb0c818b"
}

View File

@@ -1,34 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "\n SELECT v.id id, v.mod_id mod_id, file_type FROM files f\n INNER JOIN versions v ON v.id = f.version_id\n WHERE f.url = $1\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
},
{
"ordinal": 1,
"name": "mod_id",
"type_info": "Int8"
},
{
"ordinal": 2,
"name": "file_type",
"type_info": "Varchar"
}
],
"parameters": {
"Left": [
"Text"
]
},
"nullable": [
false,
false,
true
]
},
"hash": "dd57a6dd89fefedbde796ef02b308ce7dba17ca0c65ffd5f9e35e296a72d4c1c"
}

527
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -86,8 +86,8 @@ rust_decimal = { version = "1.30.0", features = [
"serde-with-float",
"serde-with-str",
] }
redis = { version = "0.23.0", features = ["tokio-comp", "ahash", "r2d2"] }
deadpool-redis = "0.12.0"
redis = { version = "0.23.3", features = ["tokio-comp", "ahash", "r2d2"]}
deadpool-redis = "0.13.0"
clickhouse = { version = "0.11.2", features = ["uuid", "time"] }
uuid = { version = "1.2.2", features = ["v4", "fast-rng", "serde"] }

View File

@@ -7,7 +7,7 @@ const DEFAULT_EXPIRY: i64 = 1800; // 30 minutes
#[derive(Clone)]
pub struct RedisPool {
pool: deadpool_redis::Pool,
pub pool: deadpool_redis::Pool,
meta_namespace: String,
}

View File

@@ -4,7 +4,7 @@ use actix_web::web;
use database::redis::RedisPool;
use log::{info, warn};
use queue::{
analytics::AnalyticsQueue, download::DownloadQueue, payouts::PayoutsQueue, session::AuthQueue,
analytics::AnalyticsQueue, payouts::PayoutsQueue, session::AuthQueue,
socket::ActiveSockets,
};
use scheduler::Scheduler;
@@ -49,7 +49,6 @@ pub struct LabrinthConfig {
pub scheduler: Arc<Scheduler>,
pub ip_salt: Pepper,
pub search_config: search::SearchConfig,
pub download_queue: web::Data<DownloadQueue>,
pub session_queue: web::Data<AuthQueue>,
pub payouts_queue: web::Data<Mutex<PayoutsQueue>>,
pub analytics_queue: Arc<AnalyticsQueue>,
@@ -139,24 +138,6 @@ pub fn app_setup(
scheduler::schedule_versions(&mut scheduler, pool.clone());
let download_queue = web::Data::new(DownloadQueue::new());
let pool_ref = pool.clone();
let download_queue_ref = download_queue.clone();
scheduler.run(std::time::Duration::from_secs(60 * 5), move || {
let pool_ref = pool_ref.clone();
let download_queue_ref = download_queue_ref.clone();
async move {
info!("Indexing download queue");
let result = download_queue_ref.index(&pool_ref).await;
if let Err(e) = result {
warn!("Indexing download queue failed: {:?}", e);
}
info!("Done indexing download queue");
}
});
let session_queue = web::Data::new(AuthQueue::new());
let pool_ref = pool.clone();
@@ -202,13 +183,19 @@ pub fn app_setup(
{
let client_ref = clickhouse.clone();
let analytics_queue_ref = analytics_queue.clone();
scheduler.run(std::time::Duration::from_secs(60 * 5), move || {
let pool_ref = pool.clone();
let redis_ref = redis_pool.clone();
scheduler.run(std::time::Duration::from_secs(15), move || {
let client_ref = client_ref.clone();
let analytics_queue_ref = analytics_queue_ref.clone();
let pool_ref = pool_ref.clone();
let redis_ref = redis_ref.clone();
async move {
info!("Indexing analytics queue");
let result = analytics_queue_ref.index(client_ref).await;
let result = analytics_queue_ref
.index(client_ref, &redis_ref, &pool_ref)
.await;
if let Err(e) = result {
warn!("Indexing analytics queue failed: {:?}", e);
}
@@ -252,7 +239,6 @@ pub fn app_setup(
maxmind,
scheduler: Arc::new(scheduler),
ip_salt,
download_queue,
search_config,
session_queue,
payouts_queue,
@@ -282,7 +268,6 @@ pub fn app_config(cfg: &mut web::ServiceConfig, labrinth_config: LabrinthConfig)
.app_data(web::Data::new(labrinth_config.pool.clone()))
.app_data(web::Data::new(labrinth_config.file_host.clone()))
.app_data(web::Data::new(labrinth_config.search_config.clone()))
.app_data(labrinth_config.download_queue.clone())
.app_data(labrinth_config.session_queue.clone())
.app_data(labrinth_config.payouts_queue.clone())
.app_data(web::Data::new(labrinth_config.ip_salt.clone()))

View File

@@ -1,16 +1,16 @@
use crate::database::models::DatabaseError;
use crate::models::analytics::{Download, PageView, Playtime};
use dashmap::DashSet;
use crate::routes::ApiError;
use dashmap::{DashMap, DashSet};
use redis::cmd;
use sqlx::PgPool;
use crate::database::redis::RedisPool;
#[cfg(test)]
mod tests;
const VIEWS_TABLENAME: &str = "views";
const DOWNLOADS_TABLENAME: &str = "downloads";
const PLAYTIME_TABLENAME: &str = "playtime";
const DOWNLOADS_NAMESPACE: &str = "downloads";
pub struct AnalyticsQueue {
views_queue: DashSet<PageView>,
downloads_queue: DashSet<Download>,
downloads_queue: DashMap<String, Download>,
playtime_queue: DashSet<Playtime>,
}
@@ -25,7 +25,7 @@ impl AnalyticsQueue {
pub fn new() -> Self {
AnalyticsQueue {
views_queue: DashSet::with_capacity(1000),
downloads_queue: DashSet::with_capacity(1000),
downloads_queue: DashMap::with_capacity(1000),
playtime_queue: DashSet::with_capacity(1000),
}
}
@@ -35,45 +35,138 @@ impl AnalyticsQueue {
}
pub fn add_download(&self, download: Download) {
self.downloads_queue.insert(download);
let octets = download.ip.octets();
let ip_stripped = u64::from_be_bytes([
octets[0], octets[1], octets[2], octets[3], octets[4], octets[5], octets[6], octets[7],
]);
self.downloads_queue
.insert(format!("{}-{}", ip_stripped, download.project_id), download);
}
pub fn add_playtime(&self, playtime: Playtime) {
self.playtime_queue.insert(playtime);
}
pub async fn index(&self, client: clickhouse::Client) -> Result<(), clickhouse::error::Error> {
Self::index_queue(&client, &self.views_queue, VIEWS_TABLENAME).await?;
Self::index_queue(&client, &self.downloads_queue, DOWNLOADS_TABLENAME).await?;
Self::index_queue(&client, &self.playtime_queue, PLAYTIME_TABLENAME).await?;
pub async fn index(
&self,
client: clickhouse::Client,
redis: &RedisPool,
pool: &PgPool,
) -> Result<(), ApiError> {
let views_queue = self.views_queue.clone();
self.views_queue.clear();
let downloads_queue = self.downloads_queue.clone();
self.downloads_queue.clear();
let playtime_queue = self.playtime_queue.clone();
self.playtime_queue.clear();
if !views_queue.is_empty() {
let mut views = client.insert("views")?;
for view in views_queue {
views.write(&view).await?;
}
views.end().await?;
}
if !playtime_queue.is_empty() {
let mut playtimes = client.insert("playtime")?;
for playtime in playtime_queue {
playtimes.write(&playtime).await?;
}
playtimes.end().await?;
}
if !downloads_queue.is_empty() {
let mut downloads_keys = Vec::new();
let raw_downloads = DashMap::new();
for (index, (key, download)) in downloads_queue.into_iter().enumerate() {
downloads_keys.push(key);
raw_downloads.insert(index, download);
}
let mut redis = redis.pool.get().await.map_err(DatabaseError::RedisPool)?;
let results = cmd("MGET")
.arg(
downloads_keys
.iter()
.map(|x| format!("{}:{}", DOWNLOADS_NAMESPACE, x))
.collect::<Vec<_>>(),
)
.query_async::<_, Vec<Option<u32>>>(&mut redis)
.await
.map_err(DatabaseError::CacheError)?;
let mut pipe = redis::pipe();
for (idx, count) in results.into_iter().enumerate() {
let key = &downloads_keys[idx];
let new_count = if let Some(count) = count {
if count > 5 {
raw_downloads.remove(&idx);
continue;
}
count + 1
} else {
1
};
pipe.atomic().set_ex(
format!("{}:{}", DOWNLOADS_NAMESPACE, key),
new_count,
6 * 60 * 60,
);
}
pipe.query_async(&mut *redis)
.await
.map_err(DatabaseError::CacheError)?;
let version_ids = raw_downloads
.iter()
.map(|x| x.version_id as i64)
.collect::<Vec<_>>();
let project_ids = raw_downloads
.iter()
.map(|x| x.project_id as i64)
.collect::<Vec<_>>();
let mut transaction = pool.begin().await?;
let mut downloads = client.insert("downloads")?;
for (_, download) in raw_downloads {
downloads.write(&download).await?;
}
sqlx::query!(
"UPDATE versions
SET downloads = downloads + 1
WHERE id = ANY($1)",
&version_ids
)
.execute(&mut *transaction)
.await?;
sqlx::query!(
"UPDATE mods
SET downloads = downloads + 1
WHERE id = ANY($1)",
&project_ids
)
.execute(&mut *transaction)
.await?;
transaction.commit().await?;
downloads.end().await?;
}
Ok(())
}
async fn index_queue<T>(
client: &clickhouse::Client,
queue: &DashSet<T>,
table_name: &str,
) -> Result<(), clickhouse::error::Error>
where
T: serde::Serialize + Eq + std::hash::Hash + Clone + clickhouse::Row,
{
if queue.is_empty() {
return Ok(());
}
let current_queue = queue.clone();
queue.clear();
let mut inserter = client.inserter(table_name)?;
for row in current_queue {
inserter.write(&row).await?;
inserter.commit().await?;
}
inserter.end().await?;
Ok(())
}
}
}

View File

@@ -1,128 +0,0 @@
use futures::Future;
use uuid::Uuid;
use super::*;
use crate::clickhouse::init_client_with_database;
use std::net::Ipv6Addr;
#[tokio::test]
async fn test_indexing() {
with_test_clickhouse_db(|clickhouse_client| async move {
let analytics = AnalyticsQueue::new();
analytics.add_download(get_default_download());
analytics.add_playtime(get_default_playtime());
analytics.add_view(get_default_views());
analytics.index(clickhouse_client.clone()).await.unwrap();
assert_table_counts(&clickhouse_client, 1, 1, 1).await;
analytics.index(clickhouse_client.clone()).await.unwrap();
assert_table_counts(&clickhouse_client, 1, 1, 1).await;
})
.await;
}
#[tokio::test]
async fn can_insert_many_downloads() {
with_test_clickhouse_db(|clickhouse_client| async move {
let analytics = AnalyticsQueue::new();
let n_downloads = 100_000;
for _ in 0..n_downloads {
analytics.add_download(get_default_download());
}
analytics.index(clickhouse_client.clone()).await.unwrap();
assert_table_count(DOWNLOADS_TABLENAME, &clickhouse_client, n_downloads).await;
})
.await;
}
async fn assert_table_counts(
client: &clickhouse::Client,
downloads: u64,
playtimes: u64,
views: u64,
) {
assert_table_count(DOWNLOADS_TABLENAME, client, downloads).await;
assert_table_count(PLAYTIME_TABLENAME, client, playtimes).await;
assert_table_count(VIEWS_TABLENAME, client, views).await;
}
async fn assert_table_count(table_name: &str, client: &clickhouse::Client, expected_count: u64) {
let count = client
.query(&format!("SELECT COUNT(*) from {table_name}"))
.fetch_one::<u64>()
.await
.unwrap();
assert_eq!(expected_count, count);
}
async fn with_test_clickhouse_db<Fut>(f: impl FnOnce(clickhouse::Client) -> Fut)
where
Fut: Future<Output = ()>,
{
let db_name = format!("test_{}", uuid::Uuid::new_v4().as_simple());
println!("Clickhouse test db: {}", db_name);
let clickhouse_client = init_client_with_database(&db_name)
.await
.expect("A real clickhouse instance should be running locally");
f(clickhouse_client.clone()).await;
clickhouse_client
.query(&format!("DROP DATABASE IF EXISTS {db_name}"))
.execute()
.await
.unwrap();
}
fn get_default_download() -> Download {
Download {
id: Uuid::new_v4(),
recorded: Default::default(),
domain: Default::default(),
site_path: Default::default(),
user_id: Default::default(),
project_id: Default::default(),
version_id: Default::default(),
ip: get_default_ipv6(),
country: Default::default(),
user_agent: Default::default(),
headers: Default::default(),
}
}
fn get_default_playtime() -> Playtime {
Playtime {
id: Uuid::new_v4(),
recorded: Default::default(),
seconds: Default::default(),
user_id: Default::default(),
project_id: Default::default(),
version_id: Default::default(),
loader: Default::default(),
game_version: Default::default(),
parent: Default::default(),
}
}
fn get_default_views() -> PageView {
PageView {
id: Uuid::new_v4(),
recorded: Default::default(),
domain: Default::default(),
site_path: Default::default(),
user_id: Default::default(),
project_id: Default::default(),
ip: get_default_ipv6(),
country: Default::default(),
user_agent: Default::default(),
headers: Default::default(),
}
}
fn get_default_ipv6() -> Ipv6Addr {
Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)
}

View File

@@ -1,64 +0,0 @@
use crate::database::models::{DatabaseError, ProjectId, VersionId};
use sqlx::PgPool;
use tokio::sync::Mutex;
pub struct DownloadQueue {
queue: Mutex<Vec<(ProjectId, VersionId)>>,
}
impl Default for DownloadQueue {
fn default() -> Self {
Self::new()
}
}
// Batches download transactions every thirty seconds
impl DownloadQueue {
pub fn new() -> Self {
DownloadQueue {
queue: Mutex::new(Vec::with_capacity(1000)),
}
}
pub async fn add(&self, project_id: ProjectId, version_id: VersionId) {
self.queue.lock().await.push((project_id, version_id));
}
pub async fn take(&self) -> Vec<(ProjectId, VersionId)> {
let mut queue = self.queue.lock().await;
let len = queue.len();
std::mem::replace(&mut queue, Vec::with_capacity(len))
}
pub async fn index(&self, pool: &PgPool) -> Result<(), DatabaseError> {
let queue = self.take().await;
if !queue.is_empty() {
let mut transaction = pool.begin().await?;
for (project_id, version_id) in queue {
sqlx::query!(
"UPDATE versions
SET downloads = downloads + 1
WHERE (id = $1)",
version_id as VersionId
)
.execute(&mut *transaction)
.await?;
sqlx::query!(
"UPDATE mods
SET downloads = downloads + 1
WHERE (id = $1)",
project_id as ProjectId
)
.execute(&mut *transaction)
.await?;
}
transaction.commit().await?;
}
Ok(())
}
}

View File

@@ -1,5 +1,4 @@
pub mod analytics;
pub mod download;
pub mod maxmind;
pub mod payouts;
pub mod session;

View File

@@ -6,7 +6,6 @@ use crate::models::ids::ProjectId;
use crate::models::pats::Scopes;
use crate::models::users::{PayoutStatus, RecipientStatus};
use crate::queue::analytics::AnalyticsQueue;
use crate::queue::download::DownloadQueue;
use crate::queue::maxmind::MaxMindIndexer;
use crate::queue::session::AuthQueue;
use crate::routes::ApiError;
@@ -53,7 +52,6 @@ pub async fn count_download(
analytics_queue: web::Data<Arc<AnalyticsQueue>>,
session_queue: web::Data<AuthQueue>,
download_body: web::Json<DownloadBody>,
download_queue: web::Data<DownloadQueue>,
) -> Result<HttpResponse, ApiError> {
let token = download_body
.headers
@@ -72,9 +70,9 @@ pub async fn count_download(
.ok()
.map(|x| x as i64);
let (version_id, project_id, file_type) = if let Some(version) = sqlx::query!(
let (version_id, project_id) = if let Some(version) = sqlx::query!(
"
SELECT v.id id, v.mod_id mod_id, file_type FROM files f
SELECT v.id id, v.mod_id mod_id FROM files f
INNER JOIN versions v ON v.id = f.version_id
WHERE f.url = $1
",
@@ -83,7 +81,7 @@ pub async fn count_download(
.fetch_optional(pool.as_ref())
.await?
{
(version.id, version.mod_id, version.file_type)
(version.id, version.mod_id)
} else if let Some(version) = sqlx::query!(
"
SELECT id, mod_id FROM versions
@@ -96,22 +94,13 @@ pub async fn count_download(
.fetch_optional(pool.as_ref())
.await?
{
(version.id, version.mod_id, None)
(version.id, version.mod_id)
} else {
return Err(ApiError::InvalidInput(
"Specified version does not exist!".to_string(),
));
};
if file_type.is_none() {
download_queue
.add(
crate::database::models::ProjectId(project_id),
crate::database::models::VersionId(version_id),
)
.await;
}
let url = url::Url::parse(&download_body.url)
.map_err(|_| ApiError::InvalidInput("invalid download URL specified!".to_string()))?;