Improve error logging and observability (#4443)

* Replace actix tracing with custom error tracing

* Fix logging

* wip: JSON logging

* Use LABRINTH_FORMAT to change to JSON output

* sqlx fix?

* CI fix

* Add tracing span info to HTTP requests

* Merge Result and Option error wrapping

* Add http.authorized to tracing
This commit is contained in:
aecsocket
2025-10-03 14:02:20 +01:00
committed by GitHub
parent 7e84659249
commit b96c5cd5ab
9 changed files with 326 additions and 31 deletions

View File

@@ -23,6 +23,10 @@ actix-web-prom = { workspace = true, features = ["process"] }
tracing.workspace = true
tracing-actix-web.workspace = true
console-subscriber.workspace = true
tracing-subscriber.workspace = true
tracing-ecs.workspace = true
eyre.workspace = true
color-eyre.workspace = true
tokio = { workspace = true, features = ["sync", "rt-multi-thread"] }
tokio-stream.workspace = true

View File

@@ -1,7 +1,9 @@
use actix_web::dev::Service;
use actix_web::middleware::from_fn;
use actix_web::{App, HttpServer};
use actix_web_prom::PrometheusMetricsBuilder;
use clap::Parser;
use labrinth::app_config;
use labrinth::background_task::BackgroundTask;
use labrinth::database::redis::RedisPool;
@@ -13,9 +15,15 @@ use labrinth::util::env::parse_var;
use labrinth::util::ratelimit::rate_limit_middleware;
use labrinth::{check_env_vars, clickhouse, database, file_hosting, queue};
use std::ffi::CStr;
use std::str::FromStr;
use std::sync::Arc;
use tracing::{error, info};
use tracing::level_filters::LevelFilter;
use tracing::{Instrument, error, info, info_span};
use tracing_actix_web::TracingLogger;
use tracing_ecs::ECSLayerBuilder;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
#[cfg(target_os = "linux")]
#[global_allocator]
@@ -46,12 +54,59 @@ struct Args {
run_background_task: Option<BackgroundTask>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
enum OutputFormat {
#[default]
Human,
Json,
}
impl FromStr for OutputFormat {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"human" => Ok(Self::Human),
"json" => Ok(Self::Json),
_ => Err(()),
}
}
}
#[actix_rt::main]
async fn main() -> std::io::Result<()> {
let args = Args::parse();
color_eyre::install().expect("failed to install `color-eyre`");
dotenvy::dotenv().ok();
console_subscriber::init();
let console_layer = console_subscriber::spawn();
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();
let output_format =
dotenvy::var("LABRINTH_FORMAT").map_or(OutputFormat::Human, |format| {
format
.parse::<OutputFormat>()
.unwrap_or_else(|_| panic!("invalid output format '{format}'"))
});
match output_format {
OutputFormat::Human => {
tracing_subscriber::registry()
.with(console_layer)
.with(env_filter)
.with(tracing_subscriber::fmt::layer())
.init();
}
OutputFormat::Json => {
tracing_subscriber::registry()
.with(console_layer)
.with(env_filter)
.with(ECSLayerBuilder::default().stdout())
.init();
}
}
if check_env_vars() {
error!("Some environment variables are missing!");
@@ -199,6 +254,33 @@ async fn main() -> std::io::Result<()> {
HttpServer::new(move || {
App::new()
.wrap(TracingLogger::default())
.wrap_fn(|req, srv| {
// We capture the same fields as `tracing-actix-web`'s `RootSpanBuilder`.
// See `root_span!` macro.
let span = info_span!(
"HTTP request",
http.method = %req.method(),
http.client_ip = %req.connection_info().realip_remote_addr().unwrap_or(""),
http.user_agent = %req.headers().get("User-Agent").map_or("", |h| h.to_str().unwrap_or("")),
http.target = %req.uri().path_and_query().map_or("", |p| p.as_str()),
http.authenticated = %req.headers().get("Authorization").is_some()
);
let fut = srv.call(req);
async move {
fut.await.inspect(|resp| {
let _span = info_span!(
"HTTP response",
http.status = %resp.response().status().as_u16(),
).entered();
resp.response()
.error()
.inspect(|err| log_error(err));
})
}
.instrument(span)
})
.wrap(prometheus.clone())
.wrap(from_fn(rate_limit_middleware))
.wrap(actix_web::middleware::Compress::default())
@@ -209,3 +291,15 @@ async fn main() -> std::io::Result<()> {
.run()
.await
}
fn log_error(err: &actix_web::Error) {
if err.as_response_error().status_code().is_client_error() {
tracing::debug!(
"Error encountered while processing the incoming HTTP request: {err}"
);
} else {
tracing::error!(
"Error encountered while processing the incoming HTTP request: {err}"
);
}
}

View File

@@ -87,53 +87,57 @@ pub fn root_config(cfg: &mut web::ServiceConfig) {
#[derive(thiserror::Error, Debug)]
pub enum ApiError {
#[error("Environment Error")]
#[error(transparent)]
Internal(eyre::Report),
#[error(transparent)]
Request(eyre::Report),
#[error("Invalid input: {0}")]
InvalidInput(String),
#[error("Environment error")]
Env(#[from] dotenvy::Error),
#[error("Error while uploading file: {0}")]
FileHosting(#[from] FileHostingError),
#[error("Database Error: {0}")]
#[error("Database error: {0}")]
Database(#[from] crate::database::models::DatabaseError),
#[error("Database Error: {0}")]
#[error("SQLx database error: {0}")]
SqlxDatabase(#[from] sqlx::Error),
#[error("Database Error: {0}")]
#[error("Redis database error: {0}")]
RedisDatabase(#[from] redis::RedisError),
#[error("Clickhouse Error: {0}")]
#[error("Clickhouse error: {0}")]
Clickhouse(#[from] clickhouse::error::Error),
#[error("Internal server error: {0}")]
#[error("XML error: {0}")]
Xml(String),
#[error("Deserialization error: {0}")]
Json(#[from] serde_json::Error),
#[error("Authentication Error: {0}")]
#[error("Authentication error: {0}")]
Authentication(#[from] crate::auth::AuthenticationError),
#[error("Authentication Error: {0}")]
#[error("Authentication error: {0}")]
CustomAuthentication(String),
#[error("Invalid Input: {0}")]
InvalidInput(String),
#[error("Error while validating input: {0}")]
Validation(String),
#[error("Search Error: {0}")]
#[error("Search error: {0}")]
Search(#[from] meilisearch_sdk::errors::Error),
#[error("Indexing Error: {0}")]
#[error("Indexing error: {0}")]
Indexing(#[from] crate::search::indexing::IndexingError),
#[error("Payments Error: {0}")]
#[error("Payments error: {0}")]
Payments(String),
#[error("Discord Error: {0}")]
#[error("Discord error: {0}")]
Discord(String),
#[error("Slack Webhook Error: {0}")]
#[error("Slack webhook error: {0}")]
Slack(String),
#[error("Captcha Error. Try resubmitting the form.")]
#[error("Captcha error. Try resubmitting the form.")]
Turnstile,
#[error("Error while decoding Base62: {0}")]
Decoding(#[from] ariadne::ids::DecodingError),
#[error("Image Parsing Error: {0}")]
#[error("Image parsing error: {0}")]
ImageParse(#[from] image::ImageError),
#[error("Password Hashing Error: {0}")]
#[error("Password hashing error: {0}")]
PasswordHashing(#[from] argon2::password_hash::Error),
#[error("{0}")]
Mail(#[from] crate::queue::email::MailError),
#[error("Error while rerouting request: {0:?}")]
Reroute(#[from] reqwest::Error),
#[error("Unable to read Zip Archive: {0}")]
#[error("Unable to read zip archive: {0}")]
Zip(#[from] zip::result::ZipError),
#[error("IO Error: {0}")]
Io(#[from] std::io::Error),
@@ -141,7 +145,7 @@ pub enum ApiError {
NotFound,
#[error("Conflict: {0}")]
Conflict(String),
#[error("External tax compliance API Error")]
#[error("External tax compliance API error")]
TaxComplianceApi,
#[error(transparent)]
TaxProcessor(#[from] crate::util::anrok::AnrokError),
@@ -157,6 +161,8 @@ impl ApiError {
pub fn as_api_error<'a>(&self) -> crate::models::error::ApiError<'a> {
crate::models::error::ApiError {
error: match self {
ApiError::Internal(..) => "internal_error",
Self::Request(..) => "request_error",
ApiError::Env(..) => "environment_error",
ApiError::Database(..) => "database_error",
ApiError::SqlxDatabase(..) => "database_error",
@@ -197,6 +203,9 @@ impl ApiError {
impl actix_web::ResponseError for ApiError {
fn status_code(&self) -> StatusCode {
match self {
ApiError::Internal(..) => StatusCode::INTERNAL_SERVER_ERROR,
ApiError::Request(..) => StatusCode::BAD_REQUEST,
ApiError::InvalidInput(..) => StatusCode::BAD_REQUEST,
ApiError::Env(..) => StatusCode::INTERNAL_SERVER_ERROR,
ApiError::Database(..) => StatusCode::INTERNAL_SERVER_ERROR,
ApiError::SqlxDatabase(..) => StatusCode::INTERNAL_SERVER_ERROR,
@@ -209,7 +218,6 @@ impl actix_web::ResponseError for ApiError {
ApiError::Search(..) => StatusCode::INTERNAL_SERVER_ERROR,
ApiError::Indexing(..) => StatusCode::INTERNAL_SERVER_ERROR,
ApiError::FileHosting(..) => StatusCode::INTERNAL_SERVER_ERROR,
ApiError::InvalidInput(..) => StatusCode::BAD_REQUEST,
ApiError::Validation(..) => StatusCode::BAD_REQUEST,
ApiError::Payments(..) => StatusCode::FAILED_DEPENDENCY,
ApiError::Discord(..) => StatusCode::FAILED_DEPENDENCY,

View File

@@ -14,6 +14,7 @@ use crate::{
use actix_web::{HttpRequest, HttpResponse, web};
use ariadne::ids::base62_impl::to_base62;
use chrono::{DateTime, Duration, Utc};
use eyre::eyre;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use sqlx::postgres::types::PgInterval;
@@ -331,7 +332,7 @@ pub async fn revenue_get(
let duration: PgInterval = Duration::minutes(resolution_minutes as i64)
.try_into()
.map_err(|_| {
ApiError::InvalidInput("Invalid resolution_minutes".to_string())
ApiError::Request(eyre!("Invalid `resolution_minutes`"))
})?;
// Get the revenue data
let project_ids = project_ids.unwrap_or_default();

View File

@@ -12,6 +12,7 @@ use crate::models::v3::user_limits::UserLimits;
use crate::queue::session::AuthQueue;
use crate::routes::ApiError;
use crate::routes::v3::project_creation::CreateError;
use crate::util::error::Context;
use crate::util::img::delete_old_images;
use crate::util::routes::read_limited_from_payload;
use crate::util::validate::validation_errors_to_string;
@@ -20,6 +21,7 @@ use actix_web::web::Data;
use actix_web::{HttpRequest, HttpResponse, web};
use ariadne::ids::base62_impl::parse_base62;
use chrono::Utc;
use eyre::eyre;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
@@ -335,10 +337,8 @@ pub async fn collection_edit(
project_id, &**pool, &redis,
)
.await?
.ok_or_else(|| {
ApiError::InvalidInput(format!(
"The specified project {project_id} does not exist!"
))
.wrap_request_err_with(|| {
eyre!("The specified project {project_id} does not exist!")
})?;
validated_project_ids.push(project.inner.id.0);
}

View File

@@ -0,0 +1,115 @@
use std::{
convert::Infallible,
fmt::{Debug, Display},
};
use crate::routes::ApiError;
pub trait Context<T, E>: Sized {
fn wrap_request_err_with<D>(
self,
f: impl FnOnce() -> D,
) -> Result<T, ApiError>
where
D: Debug + Display + Send + Sync + 'static;
fn wrap_request_err<D>(self, msg: D) -> Result<T, ApiError>
where
D: Debug + Display + Send + Sync + 'static,
{
self.wrap_request_err_with(|| msg)
}
fn wrap_internal_err_with<D>(
self,
f: impl FnOnce() -> D,
) -> Result<T, ApiError>
where
D: Debug + Display + Send + Sync + 'static;
fn wrap_internal_err<D>(self, msg: D) -> Result<T, ApiError>
where
D: Debug + Display + Send + Sync + 'static,
{
self.wrap_internal_err_with(|| msg)
}
}
impl<T, E> Context<T, E> for Result<T, E>
where
E: std::error::Error + Send + Sync + Sized + 'static,
{
fn wrap_request_err_with<D>(
self,
f: impl FnOnce() -> D,
) -> Result<T, ApiError>
where
D: Display + Send + Sync + 'static,
{
self.map_err(|err| {
let report = eyre::Report::new(err).wrap_err(f());
ApiError::Request(report)
})
}
fn wrap_internal_err_with<D>(
self,
f: impl FnOnce() -> D,
) -> Result<T, ApiError>
where
D: Display + Send + Sync + 'static,
{
self.map_err(|err| {
let report = eyre::Report::new(err).wrap_err(f());
ApiError::Internal(report)
})
}
}
impl<T> Context<T, Infallible> for Option<T> {
fn wrap_request_err_with<D>(
self,
f: impl FnOnce() -> D,
) -> Result<T, ApiError>
where
D: Debug + Display + Send + Sync + 'static,
{
self.ok_or_else(|| ApiError::Request(eyre::Report::msg(f())))
}
fn wrap_internal_err_with<D>(
self,
f: impl FnOnce() -> D,
) -> Result<T, ApiError>
where
D: Debug + Display + Send + Sync + 'static,
{
self.ok_or_else(|| ApiError::Internal(eyre::Report::msg(f())))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sqlx_result() -> Result<(), sqlx::Error> {
Err(sqlx::Error::RowNotFound)
}
// these just test that code written with the above API compiles
fn propagating() -> Result<(), ApiError> {
sqlx_result()
.wrap_internal_err("failed to perform database operation")?;
sqlx_result().wrap_request_err("invalid request parameter")?;
None::<()>.wrap_internal_err("something is missing")?;
Ok(())
}
// just so we don't get a dead code warning
#[test]
fn test_propagating() {
_ = propagating();
}
}

View File

@@ -7,6 +7,7 @@ pub mod captcha;
pub mod cors;
pub mod date;
pub mod env;
pub mod error;
pub mod ext;
pub mod guards;
pub mod img;