Gotenberg/PDF gen implementation (#4574)

* Gotenberg/PDF gen implementation

* Security, PDF type enum, propagate client

* chore: query cache, clippy, fmt

* clippy fixes + tombi

* Update env example, add GOTENBERG_CALLBACK_URL

* Remove test code

* Fix .env, docker-compose

* Update purpose of payment

* Add internal networking guards to gotenberg webhooks

* Fix error

* Fix lint
This commit is contained in:
François-Xavier Talbot
2025-10-20 00:56:26 +01:00
committed by GitHub
parent 6a70acef25
commit 4b17eb5d35
14 changed files with 421 additions and 13 deletions

View File

@@ -36,7 +36,7 @@ async-stripe = { version = "0.41.0", default-features = false, features = [
] }
async-trait = "0.1.89"
async-tungstenite = { version = "0.31.0", default-features = false, features = [
"futures-03-sink",
"futures-03-sink"
] }
async-walkdir = "2.1.0"
async_zip = "0.0.18"
@@ -48,7 +48,7 @@ censor = "0.3.0"
chardetng = "0.1.17"
chrono = "0.4.42"
cidre = { version = "0.11.3", default-features = false, features = [
"macos_15_0",
"macos_15_0"
] }
clap = "4.5.48"
clickhouse = "0.14.0"
@@ -129,7 +129,7 @@ reqwest = { version = "0.12.24", default-features = false }
rgb = "0.8.52"
rust_decimal = { version = "1.39.0", features = [
"serde-with-float",
"serde-with-str",
"serde-with-str"
] }
rust_iso3166 = "0.1.14"
rust-s3 = { version = "0.37.0", default-features = false, features = [

View File

@@ -90,10 +90,10 @@ import StyledDoc from '../shared/StyledDoc.vue'
Purpose of Payment
</Text>
<Text class="m-0 text-sm leading-relaxed text-secondary">
This payout reflects revenue earned by the creator through their activity on the Modrinth
platform. Earnings are based on advertising revenue, subscriptions, and/or affiliate
commissions tied to the creator's published projects, in accordance with the Rewards Program
Terms.
This payout reflects the creator's earnings from their activity on the Modrinth platform.
Such earnings are based on advertising revenue derived from user engagement with the
creator's published projects and/or affiliate commissions in accordance with the Rewards
Program Terms.
</Text>
</Section>

View File

@@ -142,4 +142,7 @@ COMPLIANCE_PAYOUT_THRESHOLD=disabled
ANROK_API_KEY=none
ANROK_API_URL=none
GOTENBERG_URL=http://labrinth-gotenberg:13000
GOTENBERG_CALLBACK_BASE=http://host.docker.internal:8000/_internal/gotenberg
ARCHON_URL=none

View File

@@ -143,4 +143,7 @@ COMPLIANCE_PAYOUT_THRESHOLD=disabled
ANROK_API_KEY=none
ANROK_API_URL=none
GOTENBERG_URL=http://localhost:13000
GOTENBERG_CALLBACK_BASE=http://host.docker.internal:8000/_internal/gotenberg
ARCHON_URL=none

View File

@@ -14,6 +14,7 @@ use tracing::{info, warn};
extern crate clickhouse as clickhouse_crate;
use clickhouse_crate::Client;
use util::cors::default_cors;
use util::gotenberg::GotenbergClient;
use crate::background_task::update_versions;
use crate::database::ReadOnlyPgPool;
@@ -63,6 +64,7 @@ pub struct LabrinthConfig {
pub stripe_client: stripe::Client,
pub anrok_client: anrok::Client,
pub email_queue: web::Data<EmailQueue>,
pub gotenberg_client: GotenbergClient,
}
#[allow(clippy::too_many_arguments)]
@@ -77,6 +79,7 @@ pub fn app_setup(
stripe_client: stripe::Client,
anrok_client: anrok::Client,
email_queue: EmailQueue,
gotenberg_client: GotenbergClient,
enable_background_tasks: bool,
) -> LabrinthConfig {
info!(
@@ -279,6 +282,7 @@ pub fn app_setup(
rate_limiter: limiter,
stripe_client,
anrok_client,
gotenberg_client,
email_queue: web::Data::new(email_queue),
}
}
@@ -304,6 +308,7 @@ pub fn app_config(
.app_data(web::Data::new(labrinth_config.ro_pool.clone()))
.app_data(web::Data::new(labrinth_config.file_host.clone()))
.app_data(web::Data::new(labrinth_config.search_config.clone()))
.app_data(web::Data::new(labrinth_config.gotenberg_client.clone()))
.app_data(labrinth_config.session_queue.clone())
.app_data(labrinth_config.payouts_queue.clone())
.app_data(labrinth_config.email_queue.clone())
@@ -477,6 +482,9 @@ pub fn check_env_vars() -> bool {
failed |= check_var::<String>("FLAME_ANVIL_URL");
failed |= check_var::<String>("GOTENBERG_URL");
failed |= check_var::<String>("GOTENBERG_CALLBACK_BASE");
failed |= check_var::<String>("STRIPE_API_KEY");
failed |= check_var::<String>("STRIPE_WEBHOOK_SECRET");

View File

@@ -12,6 +12,7 @@ use labrinth::queue::email::EmailQueue;
use labrinth::search;
use labrinth::util::anrok;
use labrinth::util::env::parse_var;
use labrinth::util::gotenberg::GotenbergClient;
use labrinth::util::ratelimit::rate_limit_middleware;
use labrinth::{check_env_vars, clickhouse, database, file_hosting};
use std::ffi::CStr;
@@ -200,6 +201,9 @@ async fn main() -> std::io::Result<()> {
let email_queue =
EmailQueue::init(pool.clone(), redis_pool.clone()).unwrap();
let gotenberg_client =
GotenbergClient::from_env().expect("Failed to create Gotenberg client");
if let Some(task) = args.run_background_task {
info!("Running task {task:?} and exiting");
task.run(
@@ -249,6 +253,7 @@ async fn main() -> std::io::Result<()> {
stripe_client,
anrok_client.clone(),
email_queue,
gotenberg_client,
!args.no_background_tasks,
);

View File

@@ -57,8 +57,8 @@ pub struct OAuthClientAuthorization {
pub created: DateTime<Utc>,
}
#[serde_as]
#[derive(Deserialize, Serialize)]
#[serde_as]
pub struct GetOAuthClientsRequest {
#[serde_as(
as = "serde_with::StringWithSeparator::<serde_with::formats::CommaSeparator, String>"

View File

@@ -0,0 +1,150 @@
use actix_web::{
HttpMessage, HttpResponse, error::ParseError, http::header, post, web,
};
use serde::Deserialize;
use tracing::trace;
use crate::routes::ApiError;
use crate::util::gotenberg::{
GeneratedPdfType, MODRINTH_GENERATED_PDF_TYPE, MODRINTH_PAYMENT_ID,
};
use crate::util::guards::internal_network_guard;
pub fn config(cfg: &mut actix_web::web::ServiceConfig) {
cfg.service(success).service(error);
}
#[post("/gotenberg/success", guard = "internal_network_guard")]
pub async fn success(
web::Header(header::ContentDisposition {
disposition,
parameters: disposition_parameters,
}): web::Header<header::ContentDisposition>,
web::Header(GotenbergTrace(trace)): web::Header<GotenbergTrace>,
web::Header(ModrinthGeneratedPdfType(r#type)): web::Header<
ModrinthGeneratedPdfType,
>,
maybe_payment_id: Option<web::Header<ModrinthPaymentId>>,
body: web::Bytes,
) -> Result<HttpResponse, ApiError> {
trace!(
%trace,
%disposition,
?disposition_parameters,
r#type = r#type.as_str(),
?maybe_payment_id,
body.len = body.len(),
"Received Gotenberg generated PDF"
);
Ok(HttpResponse::Ok().finish())
}
#[allow(dead_code)]
#[derive(Debug, Deserialize)]
pub struct ErrorBody {
status: Option<String>,
message: Option<String>,
}
#[post("/gotenberg/error", guard = "internal_network_guard")]
pub async fn error(
web::Header(GotenbergTrace(trace)): web::Header<GotenbergTrace>,
web::Header(ModrinthGeneratedPdfType(r#type)): web::Header<
ModrinthGeneratedPdfType,
>,
maybe_payment_id: Option<web::Header<ModrinthPaymentId>>,
web::Json(error_body): web::Json<ErrorBody>,
) -> Result<HttpResponse, ApiError> {
trace!(
%trace,
r#type = r#type.as_str(),
?maybe_payment_id,
?error_body,
"Received Gotenberg error webhook"
);
Ok(HttpResponse::Ok().finish())
}
#[derive(Debug)]
struct GotenbergTrace(String);
impl header::TryIntoHeaderValue for GotenbergTrace {
type Error = header::InvalidHeaderValue;
fn try_into_value(self) -> Result<header::HeaderValue, Self::Error> {
header::HeaderValue::from_str(&self.0)
}
}
impl header::Header for GotenbergTrace {
fn name() -> header::HeaderName {
header::HeaderName::from_static("gotenberg-trace")
}
fn parse<M: HttpMessage>(m: &M) -> Result<Self, ParseError> {
m.headers()
.get(Self::name())
.ok_or(ParseError::Header)?
.to_str()
.map_err(|_| ParseError::Header)
.map(ToOwned::to_owned)
.map(GotenbergTrace)
}
}
#[derive(Debug)]
struct ModrinthGeneratedPdfType(GeneratedPdfType);
impl header::TryIntoHeaderValue for ModrinthGeneratedPdfType {
type Error = header::InvalidHeaderValue;
fn try_into_value(self) -> Result<header::HeaderValue, Self::Error> {
header::HeaderValue::from_str(self.0.as_str())
}
}
impl header::Header for ModrinthGeneratedPdfType {
fn name() -> header::HeaderName {
MODRINTH_GENERATED_PDF_TYPE
}
fn parse<M: HttpMessage>(m: &M) -> Result<Self, ParseError> {
m.headers()
.get(Self::name())
.ok_or(ParseError::Header)?
.to_str()
.map_err(|_| ParseError::Header)?
.parse()
.map_err(|_| ParseError::Header)
.map(ModrinthGeneratedPdfType)
}
}
#[derive(Debug)]
struct ModrinthPaymentId(String);
impl header::TryIntoHeaderValue for ModrinthPaymentId {
type Error = header::InvalidHeaderValue;
fn try_into_value(self) -> Result<header::HeaderValue, Self::Error> {
header::HeaderValue::from_str(&self.0)
}
}
impl header::Header for ModrinthPaymentId {
fn name() -> header::HeaderName {
MODRINTH_PAYMENT_ID
}
fn parse<M: HttpMessage>(m: &M) -> Result<Self, ParseError> {
m.headers()
.get(Self::name())
.ok_or(ParseError::Header)?
.to_str()
.map_err(|_| ParseError::Header)
.map(ToOwned::to_owned)
.map(ModrinthPaymentId)
}
}

View File

@@ -4,6 +4,7 @@ pub mod billing;
pub mod external_notifications;
pub mod flows;
pub mod gdpr;
pub mod gotenberg;
pub mod medal;
pub mod moderation;
pub mod pats;
@@ -26,6 +27,7 @@ pub fn config(cfg: &mut actix_web::web::ServiceConfig) {
.configure(moderation::config)
.configure(billing::config)
.configure(gdpr::config)
.configure(gotenberg::config)
.configure(statuses::config)
.configure(medal::config)
.configure(external_notifications::config)

View File

@@ -0,0 +1,219 @@
use crate::routes::ApiError;
use crate::util::error::Context;
use actix_web::http::header::HeaderName;
use serde::{Deserialize, Serialize};
use std::str::FromStr;
pub const MODRINTH_GENERATED_PDF_TYPE: HeaderName =
HeaderName::from_static("modrinth-generated-pdf-type");
pub const MODRINTH_PAYMENT_ID: HeaderName =
HeaderName::from_static("modrinth-payment-id");
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct PaymentStatement {
pub payment_id: String,
pub recipient_address_line_1: Option<String>,
pub recipient_address_line_2: Option<String>,
pub recipient_address_line_3: Option<String>,
pub recipient_email: String,
pub payment_date: String,
pub gross_amount_cents: i64,
pub net_amount_cents: i64,
pub fees_cents: i64,
pub currency_code: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GeneratedPdfType {
PaymentStatement,
}
impl GeneratedPdfType {
pub fn as_str(self) -> &'static str {
match self {
GeneratedPdfType::PaymentStatement => "payment-statement",
}
}
}
impl FromStr for GeneratedPdfType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"payment-statement" => Ok(GeneratedPdfType::PaymentStatement),
_ => Err(s.to_owned()),
}
}
}
#[derive(Clone)]
pub struct GotenbergClient {
client: reqwest::Client,
gotenberg_url: String,
site_url: String,
callback_base: String,
}
impl GotenbergClient {
/// Initialize the client from environment variables.
pub fn from_env() -> Result<Self, ApiError> {
let client = reqwest::Client::builder()
.user_agent("Modrinth")
.build()
.wrap_internal_err("failed to build reqwest client")?;
let gotenberg_url = dotenvy::var("GOTENBERG_URL")
.wrap_internal_err("GOTENBERG_URL is not set")?;
let site_url = dotenvy::var("SITE_URL")
.wrap_internal_err("SITE_URL is not set")?;
let callback_base = dotenvy::var("GOTENBERG_CALLBACK_BASE")
.wrap_internal_err("GOTENBERG_CALLBACK_BASE is not set")?;
Ok(Self {
client,
gotenberg_url: gotenberg_url.trim_end_matches('/').to_owned(),
site_url: site_url.trim_end_matches('/').to_owned(),
callback_base: callback_base.trim_end_matches('/').to_owned(),
})
}
/// Generate a PDF payment statement via Gotenberg.
///
/// This will:
/// - Fetch the HTML template from `{SITE_URL}/_internal/templates/doc/payment-statement`.
/// - Perform simple template substitution with fields from `PaymentStatement`.
/// - Submit the HTML to Gotenberg HTML route with webhook headers.
pub async fn generate_payment_statement(
&self,
statement: &PaymentStatement,
) -> Result<(), ApiError> {
let template_url = format!(
"{}/_internal/templates/doc/payment-statement",
self.site_url
);
let template_html = {
let resp = self
.client
.get(template_url)
.send()
.await
.wrap_internal_err(
"failed to request payment statement template",
)?;
let resp = resp.error_for_status().wrap_internal_err(
"failed to fetch payment statement template (bad status)",
)?;
resp.text().await.wrap_internal_err(
"failed to read payment statement template body",
)?
};
let filled_html = fill_statement_template(&template_html, statement);
let form = reqwest::multipart::Form::new().part(
"files",
reqwest::multipart::Part::text(filled_html)
.file_name("index.html")
.mime_str("text/html")
.wrap_internal_err("invalid mime type for html part")?,
);
let success_webhook = format!("{}/success", self.callback_base);
let error_webhook = format!("{}/error", self.callback_base);
self
.client
.post(format!(
"{}/forms/chromium/convert/html",
self.gotenberg_url
))
.header("Gotenberg-Webhook-Url", success_webhook)
.header("Gotenberg-Webhook-Error-Url", error_webhook)
.header(
"Gotenberg-Webhook-Extra-Http-Headers",
serde_json::json!({
"Modrinth-Payment-Id": statement.payment_id,
"Modrinth-Generated-Pdf-Type": GeneratedPdfType::PaymentStatement.as_str(),
}).to_string(),
)
.header(
"Modrinth-Payment-Id",
&statement.payment_id,
)
.header(
"Gotenberg-Output-Filename",
format!("payment-statement-{}", statement.payment_id),
)
.multipart(form)
.send()
.await
.wrap_internal_err("failed to submit HTML to Gotenberg")?
.error_for_status()
.wrap_internal_err("Gotenberg returned an error status")?;
Ok(())
}
}
fn fill_statement_template(html: &str, s: &PaymentStatement) -> String {
let variables: Vec<(&str, String)> = vec![
("statement.payment_id", s.payment_id.clone()),
(
"statement.recipient_address_line_1",
s.recipient_address_line_1.clone().unwrap_or_default(),
),
(
"statement.recipient_address_line_2",
s.recipient_address_line_2.clone().unwrap_or_default(),
),
(
"statement.recipient_address_line_3",
s.recipient_address_line_3.clone().unwrap_or_default(),
),
("statement.recipient_email", s.recipient_email.clone()),
("statement.payment_date", s.payment_date.clone()),
(
"statement.gross_amount",
format_money(s.gross_amount_cents, &s.currency_code),
),
(
"statement.net_amount",
format_money(s.net_amount_cents, &s.currency_code),
),
(
"statement.fees",
format_money(s.fees_cents, &s.currency_code),
),
];
let mut out = String::with_capacity(html.len());
let mut remaining = html;
while let Some((before, rest)) = remaining.split_once('{') {
out.push_str(before);
if let Some((key, after)) = rest.split_once('}') {
let key = key.trim();
if let Some((_, val)) = variables.iter().find(|(k, _)| *k == key) {
out.push_str(val);
}
// if key not found, insert empty string
remaining = after;
} else {
// unmatched '{', push the rest and break
out.push_str(rest);
remaining = "";
break;
}
}
out.push_str(remaining);
out
}
fn format_money(amount_cents: i64, currency: &str) -> String {
rusty_money::Money::from_minor(
amount_cents,
rusty_money::iso::find(currency).unwrap_or(rusty_money::iso::USD),
)
.to_string()
}

View File

@@ -1,4 +1,5 @@
use actix_web::guard::GuardContext;
use actix_web::http::header::X_FORWARDED_FOR;
pub const ADMIN_KEY_HEADER: &str = "Modrinth-Admin";
pub const MEDAL_KEY_HEADER: &str = "X-Medal-Access-Key";
@@ -42,3 +43,10 @@ pub fn external_notification_key_guard(ctx: &GuardContext) -> bool {
}),
}
}
pub fn internal_network_guard(ctx: &GuardContext) -> bool {
ctx.head()
.peer_addr
.is_some_and(|sock| matches!(sock.ip().to_canonical(), std::net::IpAddr::V4(v4) if v4.is_private()))
&& ctx.head().headers().get(X_FORWARDED_FOR).is_none()
}

View File

@@ -9,6 +9,7 @@ pub mod date;
pub mod env;
pub mod error;
pub mod ext;
pub mod gotenberg;
pub mod guards;
pub mod img;
pub mod ip;

View File

@@ -1,5 +1,6 @@
use labrinth::queue::email::EmailQueue;
use labrinth::util::anrok;
use labrinth::util::gotenberg::GotenbergClient;
use labrinth::{LabrinthConfig, file_hosting};
use labrinth::{check_env_vars, clickhouse};
use modrinth_maxmind::MaxMind;
@@ -46,6 +47,8 @@ pub async fn setup(db: &database::TemporaryDatabase) -> LabrinthConfig {
let anrok_client = anrok::Client::from_env().unwrap();
let email_queue =
EmailQueue::init(pool.clone(), redis_pool.clone()).unwrap();
let gotenberg_client =
GotenbergClient::from_env().expect("Failed to create Gotenberg client");
labrinth::app_setup(
pool.clone(),
@@ -58,6 +61,7 @@ pub async fn setup(db: &database::TemporaryDatabase) -> LabrinthConfig {
stripe_client,
anrok_client,
email_queue,
gotenberg_client,
false,
)
}

View File

@@ -12,7 +12,7 @@ services:
POSTGRES_PASSWORD: labrinth
POSTGRES_HOST_AUTH_METHOD: trust
healthcheck:
test: ['CMD', 'pg_isready']
test: [ 'CMD', 'pg_isready' ]
interval: 3s
timeout: 5s
retries: 3
@@ -28,7 +28,7 @@ services:
MEILI_MASTER_KEY: modrinth
MEILI_HTTP_PAYLOAD_SIZE_LIMIT: 107374182400
healthcheck:
test: ['CMD', 'curl', '--fail', 'http://localhost:7700/health']
test: [ 'CMD', 'curl', '--fail', 'http://localhost:7700/health' ]
interval: 3s
timeout: 5s
retries: 3
@@ -41,7 +41,7 @@ services:
volumes:
- redis-data:/data
healthcheck:
test: ['CMD', 'redis-cli', 'PING']
test: [ 'CMD', 'redis-cli', 'PING' ]
interval: 3s
timeout: 5s
retries: 3
@@ -54,7 +54,7 @@ services:
CLICKHOUSE_USER: default
CLICKHOUSE_PASSWORD: default
healthcheck:
test: ['CMD-SHELL', 'clickhouse-client --query "SELECT 1"']
test: [ 'CMD-SHELL', 'clickhouse-client --query "SELECT 1"' ]
interval: 3s
timeout: 5s
retries: 3
@@ -67,10 +67,15 @@ services:
environment:
MP_ENABLE_SPAMASSASSIN: postmark
healthcheck:
test: ['CMD', 'wget', '-q', '-O/dev/null', 'http://localhost:8025/api/v1/info']
test: [ 'CMD', 'wget', '-q', '-O/dev/null', 'http://localhost:8025/api/v1/info' ]
interval: 3s
timeout: 5s
retries: 3
gotenberg:
image: gotenberg/gotenberg:8
container_name: labrinth-gotenberg
ports:
- "3000:13000"
labrinth:
profiles:
- with-labrinth