Files
AstralRinth/apps/labrinth/src/routes/internal/delphi/mod.rs
T
aecsocket 34997bada5 Rescan tech review reports when a new version of Delphi is ran (#5433)
* Delphi rescan when version changes

* Fix inserting duplicate reports when rescanning

* upsert report issue details instead of deleting

* fix up rescan stuff
2026-03-03 17:44:30 +00:00

574 lines
18 KiB
Rust

use std::{collections::HashMap, fmt::Write, sync::LazyLock, time::Instant};
use crate::database::PgPool;
use crate::env::ENV;
use actix_web::{HttpRequest, HttpResponse, get, post, web};
use chrono::{DateTime, Utc};
use eyre::eyre;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use serde::Deserialize;
use tokio::sync::Mutex;
use tracing::info;
use crate::{
auth::check_is_moderator_from_headers,
database::{
models::{
DBFileId, DBProjectId, DBThreadId, DelphiReportId,
DelphiReportIssueDetailsId, DelphiReportIssueId,
delphi_report_item::{
DBDelphiReport, DBDelphiReportIssue, DelphiSeverity,
DelphiStatus, ReportIssueDetail,
},
thread_item::ThreadMessageBuilder,
},
redis::RedisPool,
},
models::{
ids::{ProjectId, VersionId},
pats::Scopes,
threads::MessageBody,
},
queue::session::AuthQueue,
routes::ApiError,
util::{error::Context, guards::admin_key_guard},
};
pub mod rescan;
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.service(
web::scope("delphi")
.service(ingest_report)
.service(_run)
.service(version)
.service(issue_type_schema),
);
}
static DELPHI_CLIENT: LazyLock<reqwest::Client> = LazyLock::new(|| {
reqwest::Client::builder()
.default_headers({
HeaderMap::from_iter([(
USER_AGENT,
HeaderValue::from_static(concat!(
"Labrinth/",
env!("COMPILATION_DATE")
)),
)])
})
.build()
.unwrap()
});
/// Type of [`DelphiReportIssueDetails::key`].
///
/// Delphi may provide `null` for the key, but we require a key for storing
/// issue details in the database, since detail verdicts are keyed by
/// (project id, issue detail key). Keys are opaque strings generated by Delphi
/// which refer to some "unique location" in a JAR file, such that subsequent
/// Delphi scans of different JARs with the same issue detail will result in
/// having the same key.
///
/// If Delphi doesn't provide us with a key, we generate a random one.
#[derive(Debug, Clone)]
pub struct IssueDetailKey(pub String);
impl<'de> Deserialize<'de> for IssueDetailKey {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = Option::<String>::deserialize(deserializer)?;
let value = value.unwrap_or_else(|| {
format!("<no-key-{:016x}>", rand::random::<u64>())
});
Ok(Self(value))
}
}
#[derive(Debug, Deserialize)]
struct DelphiReportIssueDetails {
pub file: String,
pub key: IssueDetailKey,
pub jar: Option<String>,
pub data: HashMap<String, serde_json::Value>,
pub severity: DelphiSeverity,
}
#[derive(Debug, Deserialize)]
struct DelphiReport {
pub url: String,
pub project_id: crate::models::ids::ProjectId,
#[serde(rename = "version_id")]
pub version_id: crate::models::ids::VersionId,
pub file_id: crate::models::ids::FileId,
/// A sequential, monotonically increasing version number for the
/// Delphi version that generated this report.
pub delphi_version: i32,
pub issues: HashMap<String, Vec<DelphiReportIssueDetails>>,
pub severity: DelphiSeverity,
/// Map of [`DelphiReportIssueDetails::file`] to the decompiled Java source
/// code.
pub decompiled_sources: HashMap<String, Option<String>>,
}
impl DelphiReport {
async fn send_to_slack(
&self,
pool: &PgPool,
redis: &RedisPool,
) -> Result<(), ApiError> {
let webhook_url = ENV.DELPHI_SLACK_WEBHOOK.clone();
let mut message_header =
format!("⚠️ Suspicious traces found at {}", self.url);
for (issue, trace) in &self.issues {
for DelphiReportIssueDetails { file, .. } in trace {
let decompiled_source =
self.decompiled_sources.get(file).and_then(|o| o.as_ref());
write!(
&mut message_header,
"\n issue {issue} found at class `{file}`:\n```\n{}\n```",
decompiled_source.as_ref().map_or(
"No decompiled source available",
|decompiled_source| &**decompiled_source
)
)
.ok();
}
}
crate::util::webhook::send_slack_project_webhook(
self.project_id,
pool,
redis,
&webhook_url,
Some(message_header),
)
.await
}
}
#[derive(Deserialize)]
pub struct DelphiRunParameters {
pub file_id: crate::models::ids::FileId,
}
#[post("ingest", guard = "admin_key_guard")]
async fn ingest_report(
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
web::Json(report): web::Json<serde_json::Value>,
) -> Result<(), ApiError> {
// treat this as an internal error, since it's not a bad request from the
// client's side - it's *our* fault for handling the Delphi schema wrong
// this could happen if Delphi updates and Labrinth doesn't
let report = serde_json::from_value::<DelphiReport>(report.clone())
.wrap_internal_err_with(|| {
eyre!(
"Delphi sent a response which does not match our schema\n\n{}",
serde_json::to_string_pretty(&report).unwrap()
)
})?;
ingest_report_deserialized(pool, redis, report).await
}
#[tracing::instrument(
level = "info",
skip_all,
fields(
%report.url,
%report.file_id,
%report.project_id,
%report.version_id,
)
)]
async fn ingest_report_deserialized(
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
report: DelphiReport,
) -> Result<(), ApiError> {
if report.issues.is_empty() {
info!("No issues found for file");
return Ok(());
}
report.send_to_slack(&pool, &redis).await.ok();
let mut transaction = pool
.begin()
.await
.wrap_internal_err("failed to begin Delphi ingest transaction")?;
let report_id = DBDelphiReport {
id: DelphiReportId(0), // This will be set by the database
file_id: Some(DBFileId(report.file_id.0 as i64)),
delphi_version: report.delphi_version,
artifact_url: report.url.clone(),
created: DateTime::<Utc>::MIN_UTC, // This will be set by the database
severity: report.severity,
}
.upsert(&mut transaction)
.await
.wrap_internal_err("failed to upsert Delphi report")?;
info!(
num_issues = %report.issues.len(),
"Delphi found issues in file",
);
let record = sqlx::query!(
r#"
SELECT
EXISTS(
SELECT 1 FROM delphi_issue_details_with_statuses didws
WHERE didws.project_id = $1 AND didws.status = 'pending'
) AS "pending_issue_details_exist!",
t.id AS "thread_id: DBThreadId"
FROM mods m
INNER JOIN threads t ON t.mod_id = $1
"#,
DBProjectId::from(report.project_id) as _,
)
.fetch_one(&mut transaction)
.await
.wrap_internal_err("failed to check if pending issue details exist")?;
let issue_detail_keys = report
.issues
.values()
.flatten()
.map(|issue_detail| issue_detail.key.0.clone())
.collect::<Vec<_>>();
let has_unflagged_issue_details = sqlx::query!(
r#"
SELECT EXISTS(
SELECT 1
FROM unnest($2::text[]) AS incoming(detail_key)
LEFT JOIN delphi_issue_detail_verdicts didv
ON didv.project_id = $1 AND didv.detail_key = incoming.detail_key
WHERE didv.project_id IS NULL
) AS "has_unflagged_issue_details!"
"#,
DBProjectId::from(report.project_id) as _,
&issue_detail_keys
)
.fetch_one(&mut transaction)
.await
.wrap_internal_err("failed to check if report has unflagged issue details")?;
let should_enter_tech_review = !record.pending_issue_details_exist
&& has_unflagged_issue_details.has_unflagged_issue_details;
if should_enter_tech_review {
info!("File's project is entering tech review queue");
ThreadMessageBuilder {
author_id: None,
body: MessageBody::TechReviewEntered,
thread_id: record.thread_id,
hide_identity: false,
}
.insert(&mut transaction)
.await
.wrap_internal_err("failed to add entering tech review message")?;
} else {
info!(
"File's project is not entering tech review queue (already pending or no new unflagged issue details)"
);
}
// TODO: Currently, the way we determine if an issue is in tech review or not
// is if it has any issue details which are pending.
// If you mark all issue details are safe or not safe - even if you don't
// submit the final report - the project will be taken out of tech review
// queue, and into moderation queue.
//
// This is undesirable, but we can't rework the database schema to fix it
// right now. As a hack, we add a dummy report issue which blocks the
// project from exiting the tech review queue.
if should_enter_tech_review {
let dummy_issue_id = DBDelphiReportIssue {
id: DelphiReportIssueId(0), // This will be set by the database
report_id,
issue_type: "__dummy".into(),
}
.upsert(&mut transaction)
.await
.wrap_internal_err("failed to upsert dummy Delphi report issue")?;
ReportIssueDetail {
id: DelphiReportIssueDetailsId(0), // This will be set by the database
issue_id: dummy_issue_id,
key: "".into(),
jar: None,
file_path: "".into(),
decompiled_source: None,
data: HashMap::new(),
severity: DelphiSeverity::Low,
status: DelphiStatus::Pending,
}
.insert(&mut transaction)
.await
.wrap_internal_err(
"failed to insert dummy Delphi report issue detail",
)?;
}
for (issue_type, issue_details) in report.issues {
let issue_id = DBDelphiReportIssue {
id: DelphiReportIssueId(0), // This will be set by the database
report_id,
issue_type,
}
.upsert(&mut transaction)
.await
.wrap_internal_err("failed to upsert Delphi report issue")?;
// This is required to handle the case where the same Delphi version is re-run on the same file
ReportIssueDetail::remove_all_by_issue_id(issue_id, &mut transaction)
.await
.wrap_internal_err("failed to remove old Delphi issue details")?;
for issue_detail in issue_details {
let decompiled_source =
report.decompiled_sources.get(&issue_detail.file);
ReportIssueDetail {
id: DelphiReportIssueDetailsId(0), // This will be set by the database
issue_id,
key: issue_detail.key.0,
jar: issue_detail.jar,
file_path: issue_detail.file,
decompiled_source: decompiled_source.cloned().flatten(),
data: issue_detail.data,
severity: issue_detail.severity,
status: DelphiStatus::Pending,
}
.insert(&mut transaction)
.await
.wrap_internal_err("failed to insert Delphi issue detail")?;
}
}
transaction
.commit()
.await
.wrap_internal_err("failed to commit Delphi ingest transaction")?;
Ok(())
}
pub async fn run(
exec: impl crate::database::Executor<'_, Database = sqlx::Postgres>,
run_parameters: DelphiRunParameters,
) -> Result<HttpResponse, ApiError> {
let file_data = sqlx::query!(
r#"
SELECT
version_id AS "version_id: crate::database::models::DBVersionId",
versions.mod_id AS "project_id: crate::database::models::DBProjectId",
files.url AS "url"
FROM files INNER JOIN versions ON files.version_id = versions.id
WHERE files.id = $1
"#,
run_parameters.file_id.0 as i64
)
.fetch_one(exec)
.await?;
tracing::debug!(
"Running Delphi for project {}, version {}, file {}",
file_data.project_id.0,
file_data.version_id.0,
run_parameters.file_id.0
);
DELPHI_CLIENT
.post(&ENV.DELPHI_URL)
.json(&serde_json::json!({
"url": file_data.url,
"project_id": ProjectId(file_data.project_id.0 as u64),
"version_id": VersionId(file_data.version_id.0 as u64),
"file_id": run_parameters.file_id,
}))
.send()
.await
.and_then(|res| res.error_for_status())
.map_err(ApiError::delphi)?;
Ok(HttpResponse::NoContent().finish())
}
pub async fn is_project_in_tech_review(
project_id: DBProjectId,
exec: impl crate::database::Executor<'_, Database = sqlx::Postgres>,
) -> Result<bool, ApiError> {
let row = sqlx::query!(
r#"
SELECT EXISTS(
SELECT 1
FROM delphi_issue_details_with_statuses didws
INNER JOIN delphi_report_issues dri ON dri.id = didws.issue_id
WHERE
didws.project_id = $1
AND didws.status = 'pending'
-- see delphi.rs todo comment
AND dri.issue_type != '__dummy'
) AS "is_in_tech_review!"
"#,
project_id as _,
)
.fetch_one(exec)
.await
.wrap_internal_err("failed to fetch project tech review state")?;
Ok(row.is_in_tech_review)
}
pub async fn send_tech_review_exit_file_deleted_message(
project_id: DBProjectId,
txn: &mut crate::database::PgTransaction<'_>,
) -> Result<(), ApiError> {
let thread = sqlx::query!(
r#"
SELECT id AS "thread_id: DBThreadId"
FROM threads
WHERE mod_id = $1
LIMIT 1
"#,
project_id as _,
)
.fetch_optional(&mut *txn)
.await
.wrap_internal_err("failed to fetch thread for tech review exit message")?;
if let Some(thread) = thread {
ThreadMessageBuilder {
author_id: None,
body: MessageBody::TechReviewExitFileDeleted,
thread_id: thread.thread_id,
hide_identity: false,
}
.insert(txn)
.await
.wrap_internal_err("failed to add tech review exit message")?;
}
Ok(())
}
pub async fn send_tech_review_exit_file_deleted_message_if_exited(
project_id: DBProjectId,
was_in_tech_review: bool,
txn: &mut crate::database::PgTransaction<'_>,
) -> Result<(), ApiError> {
if !was_in_tech_review {
return Ok(());
}
let is_still_in_tech_review =
is_project_in_tech_review(project_id, &mut *txn).await?;
if !is_still_in_tech_review {
send_tech_review_exit_file_deleted_message(project_id, txn).await?;
}
Ok(())
}
#[post("run")]
async fn _run(
req: HttpRequest,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
run_parameters: web::Query<DelphiRunParameters>,
) -> Result<HttpResponse, ApiError> {
check_is_moderator_from_headers(
&req,
&**pool,
&redis,
&session_queue,
Scopes::PROJECT_READ,
)
.await?;
run(&**pool, run_parameters.into_inner()).await
}
#[get("version")]
async fn version(
req: HttpRequest,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
) -> Result<HttpResponse, ApiError> {
check_is_moderator_from_headers(
&req,
&**pool,
&redis,
&session_queue,
Scopes::PROJECT_READ,
)
.await?;
Ok(HttpResponse::Ok().json(
sqlx::query_scalar!("SELECT MAX(delphi_version) FROM delphi_reports")
.fetch_one(&**pool)
.await?,
))
}
#[get("issue_type/schema")]
async fn issue_type_schema(
req: HttpRequest,
pool: web::Data<PgPool>,
redis: web::Data<RedisPool>,
session_queue: web::Data<AuthQueue>,
) -> Result<HttpResponse, ApiError> {
check_is_moderator_from_headers(
&req,
&**pool,
&redis,
&session_queue,
Scopes::PROJECT_READ,
)
.await?;
// This route is expected to be called often by the frontend, and Delphi is not necessarily
// built to scale beyond malware analysis, so cache the result of its quasi-constant-valued
// schema route to alleviate the load on it
static CACHED_ISSUE_TYPE_SCHEMA: Mutex<
Option<(serde_json::Map<String, serde_json::Value>, Instant)>,
> = Mutex::const_new(None);
match &mut *CACHED_ISSUE_TYPE_SCHEMA.lock().await {
Some((schema, last_fetch)) if last_fetch.elapsed().as_secs() < 60 => {
Ok(HttpResponse::Ok().json(schema))
}
cache_entry => Ok(HttpResponse::Ok().json(
&cache_entry
.insert((
DELPHI_CLIENT
.get(format!("{}/schema", ENV.DELPHI_URL))
.send()
.await
.and_then(|res| res.error_for_status())
.map_err(ApiError::delphi)?
.json::<serde_json::Map<String, serde_json::Value>>()
.await
.map_err(ApiError::delphi)?,
Instant::now(),
))
.0,
)),
}
}