rust-lang
diff --git a/‎Cargo.lock
Lines changed: 22 additions & 0 deletions b/‎Cargo.lock
Lines changed: 22 additions & 0 deletions
diff --git a/‎Cargo.toml
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/app.rs
Lines changed: 10 additions & 0 deletions b/‎src/app.rs
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/config.rs
Lines changed: 5 additions & 1 deletion b/‎src/config.rs
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/controllers.rs
Lines changed: 1 addition & 0 deletions b/‎src/controllers.rs
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/controllers/metrics.rs
Lines changed: 39 additions & 0 deletions b/‎src/controllers/metrics.rs
Lines changed: 39 additions & 0 deletions
diff --git a/‎src/db.rs
Lines changed: 18 additions & 3 deletions b/‎src/db.rs
Lines changed: 18 additions & 3 deletions
diff --git a/‎src/lib.rs
Lines changed: 1 addition & 0 deletions b/‎src/lib.rs
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/metrics/instance.rs
Lines changed: 64 additions & 0 deletions b/‎src/metrics/instance.rs
Lines changed: 64 additions & 0 deletions
diff --git a/‎src/metrics/macros.rs
Lines changed: 81 additions & 0 deletions b/‎src/metrics/macros.rs
Lines changed: 81 additions & 0 deletions
diff --git a/‎src/metrics/mod.rs
Lines changed: 12 additions & 0 deletions b/‎src/metrics/mod.rs
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/metrics/service.rs
Lines changed: 39 additions & 0 deletions b/‎src/metrics/service.rs
Lines changed: 39 additions & 0 deletions
@@ -68,6 +68,7 @@ lettre = { version = "0.10.0-beta.3", default-features = false, features = ["fil
 license-exprs = "1.6"
 oauth2 = { version = "4.0.0-beta.1", default-features = false, features = ["reqwest"] }
 parking_lot = "0.11"
+prometheus = "0.12.0"
 rand = "0.8"
 reqwest = { version = "0.11", features = ["blocking", "gzip", "json"] }
 scheduled-thread-pool = "0.2.0"
 
@@ -6,6 +6,7 @@ use std::{sync::Arc, time::Duration};
 use crate::downloads_counter::DownloadsCounter;
 use crate::email::Emails;
 use crate::github::GitHubClient;
+use crate::metrics::{InstanceMetrics, ServiceMetrics};
 use diesel::r2d2;
 use oauth2::basic::BasicClient;
 use reqwest::blocking::Client;
@@ -40,6 +41,12 @@ pub struct App {
     /// Backend used to send emails
     pub emails: Emails,
 
+    /// Metrics related to the service as a whole
+    pub service_metrics: ServiceMetrics,
+
+    /// Metrics related to this specific instance of the service
+    pub instance_metrics: InstanceMetrics,
+
     /// A configured client for outgoing HTTP requests
     ///
     /// In production this shares a single connection pool across requests.  In tests
@@ -141,6 +148,9 @@ impl App {
             config,
             downloads_counter: DownloadsCounter::new(),
             emails: Emails::from_environment(),
+            service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"),
+            instance_metrics: InstanceMetrics::new()
+                .expect("could not initialize instance metrics"),
             http_client,
         }
     }
 
@@ -21,6 +21,7 @@ pub struct Config {
     pub allowed_origins: Vec<String>,
     pub downloads_persist_interval_ms: usize,
     pub ownership_invitations_expiration_days: u64,
+    pub metrics_authorization_token: Option<String>,
 }
 
 impl Default for Config {
@@ -47,8 +48,10 @@ impl Default for Config {
     /// - `DATABASE_URL`: The URL of the postgres database to use.
     /// - `READ_ONLY_REPLICA_URL`: The URL of an optional postgres read-only replica database.
     /// - `BLOCKED_TRAFFIC`: A list of headers and environment variables to use for blocking
-    ///.  traffic. See the `block_traffic` module for more documentation.
+    ///   traffic. See the `block_traffic` module for more documentation.
     /// - `DOWNLOADS_PERSIST_INTERVAL_MS`: how frequent to persist download counts (in ms).
+    /// - `METRICS_AUTHORIZATION_TOKEN`: authorization token needed to query metrics. If missing,
+    ///   querying metrics will be completely disabled.
     fn default() -> Config {
         let api_protocol = String::from("https");
         let mirror = if dotenv::var("MIRROR").is_ok() {
@@ -156,6 +159,7 @@ impl Default for Config {
                 })
                 .unwrap_or(60_000), // 1 minute
             ownership_invitations_expiration_days: 30,
+            metrics_authorization_token: dotenv::var("METRICS_AUTHORIZATION_TOKEN").ok(),
         }
     }
 }
 
@@ -101,6 +101,7 @@ pub mod category;
 pub mod crate_owner_invitation;
 pub mod keyword;
 pub mod krate;
+pub mod metrics;
 pub mod site_metadata;
 pub mod team;
 pub mod token;
 
@@ -0,0 +1,39 @@
+use crate::controllers::frontend_prelude::*;
+use crate::util::errors::{forbidden, not_found, MetricsDisabled};
+use conduit::{Body, Response};
+use prometheus::{Encoder, TextEncoder};
+
+/// Handles the `GET /api/private/metrics/:kind` endpoint.
+pub fn prometheus(req: &mut dyn RequestExt) -> EndpointResult {
+    let app = req.app();
+
+    if let Some(expected_token) = &app.config.metrics_authorization_token {
+        let provided_token = req
+            .headers()
+            .get(header::AUTHORIZATION)
+            .and_then(|value| value.to_str().ok())
+            .and_then(|value| value.strip_prefix("Bearer "));
+
+        if provided_token != Some(expected_token.as_str()) {
+            return Err(forbidden());
+        }
+    } else {
+        // To avoid accidentally leaking metrics if the environment variable is not set, prevent
+        // access to any metrics endpoint if the authorization token is not configured.
+        return Err(Box::new(MetricsDisabled));
+    }
+
+    let metrics = match req.params()["kind"].as_str() {
+        "service" => app.service_metrics.gather(&*req.db_read_only()?)?,
+        "instance" => app.instance_metrics.gather(app)?,
+        _ => return Err(not_found()),
+    };
+
+    let mut output = Vec::new();
+    TextEncoder::new().encode(&metrics, &mut output)?;
+
+    Ok(Response::builder()
+        .header(header::CONTENT_TYPE, "text/plain; charset=utf-8")
+        .header(header::CONTENT_LENGTH, output.len())
+        .body(Body::from_vec(output))?)
+}
@@ -24,10 +24,19 @@ impl DieselPool {
         }
     }
 
-    pub fn state(&self) -> r2d2::State {
+    pub fn state(&self) -> PoolState {
         match self {
-            DieselPool::Pool(pool) => pool.state(),
-            DieselPool::Test(_) => panic!("Cannot get the state of a test pool"),
+            DieselPool::Pool(pool) => {
+                let state = pool.state();
+                PoolState {
+                    connections: state.connections,
+                    idle_connections: state.idle_connections,
+                }
+            }
+            DieselPool::Test(_) => PoolState {
+                connections: 0,
+                idle_connections: 0,
+            },
         }
     }
 
@@ -36,6 +45,12 @@ impl DieselPool {
     }
 }
 
+#[derive(Debug, Copy, Clone)]
+pub struct PoolState {
+    pub connections: u32,
+    pub idle_connections: u32,
+}
+
 #[allow(missing_debug_implementations)]
 pub enum DieselPooledConn<'a> {
     Pool(r2d2::PooledConnection<ConnectionManager<PgConnection>>),
 
@@ -40,6 +40,7 @@ mod downloads_counter;
 pub mod email;
 pub mod git;
 pub mod github;
+mod metrics;
 pub mod middleware;
 mod publish_rate_limit;
 pub mod render;
 
@@ -0,0 +1,64 @@
+//! This module defines all the instance-level metrics of crates.io.
+//!
+//! Instance-level metrics are collected separately for each instance of the crates.io application,
+//! and are then aggregated at the Prometheus level. They're not suited for service-level metrics
+//! (like "how many users are there").
+//!
+//! There are two ways to update instance-level metrics:
+//!
+//! * Continuously as things happen in the instance: every time something worth recording happens
+//!   the application updates the value of the metrics, accessing the metric through
+//!   `req.app().instance_metrics.$metric_name`.
+//!
+//! * When metrics are scraped by Prometheus: every `N` seconds Prometheus sends a request to the
+//!   instance asking what the value of the metrics are, and you can update metrics when that
+//!   happens by calculating them in the `gather` method.
+//!
+//! As a rule of thumb, if the metric requires a database query to be updated it's probably a
+//! service-level metric, and you should add it to `src/metrics/service.rs` instead.
+
+use crate::util::errors::AppResult;
+use crate::{app::App, db::DieselPool};
+use prometheus::{proto::MetricFamily, IntCounter, IntGauge, IntGaugeVec};
+
+metrics! {
+    pub struct InstanceMetrics {
+        /// Number of idle database connections in the pool
+        database_idle_conns: IntGaugeVec["pool"],
+        /// Number of used database connections in the pool
+        database_used_conns: IntGaugeVec["pool"],
+
+        /// Number of requests processed by this instance
+        pub requests_total: IntCounter,
+        /// Number of requests currently being processed
+        pub requests_in_flight: IntGauge,
+    }
+
+    // All instance metrics will be prefixed with this namespace.
+    namespace: "cratesio_instance",
+}
+
+impl InstanceMetrics {
+    pub(crate) fn gather(&self, app: &App) -> AppResult<Vec<MetricFamily>> {
+        // Database pool stats
+        self.refresh_pool_stats("primary", &app.primary_database)?;
+        if let Some(follower) = &app.read_only_replica_database {
+            self.refresh_pool_stats("follower", follower)?;
+        }
+
+        Ok(self.registry.gather())
+    }
+
+    fn refresh_pool_stats(&self, name: &str, pool: &DieselPool) -> AppResult<()> {
+        let state = pool.state();
+
+        self.database_idle_conns
+            .get_metric_with_label_values(&[name])?
+            .set(state.idle_connections as i64);
+        self.database_used_conns
+            .get_metric_with_label_values(&[name])?
+            .set((state.connections - state.idle_connections) as i64);
+
+        Ok(())
+    }
+}
@@ -0,0 +1,81 @@
+pub(super) trait MetricFromOpts: Sized {
+    fn from_opts(opts: prometheus::Opts) -> Result<Self, prometheus::Error>;
+}
+
+#[macro_export]
+macro_rules! metrics {
+    (
+        $vis:vis struct $name:ident {
+            $(
+                #[doc = $help:expr]
+                $(#[$meta:meta])*
+                $metric_vis:vis $metric:ident: $ty:ty $([$($label:expr),* $(,)?])?
+            ),* $(,)?
+        }
+        namespace: $namespace:expr,
+    ) => {
+        $vis struct $name {
+            registry: prometheus::Registry,
+            $(
+                $(#[$meta])*
+                $metric_vis $metric: $ty,
+            )*
+        }
+        impl $name {
+            $vis fn new() -> Result<Self, prometheus::Error> {
+                use crate::metrics::macros::MetricFromOpts;
+
+                let registry = prometheus::Registry::new();
+                $(
+                    $(#[$meta])*
+                    let $metric = <$ty>::from_opts(
+                        prometheus::Opts::new(stringify!($metric), $help)
+                            .namespace($namespace)
+                            $(.variable_labels(vec![$($label.into()),*]))?
+                    )?;
+                    $(#[$meta])*
+                    registry.register(Box::new($metric.clone()))?;
+                )*
+                Ok(Self {
+                    registry,
+                    $(
+                        $(#[$meta])*
+                        $metric,
+                    )*
+                })
+            }
+        }
+        impl std::fmt::Debug for $name {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(f, "{}", stringify!($name))
+            }
+        }
+    };
+}
+
+#[macro_export]
+macro_rules! load_metric_type {
+    ($name:ident as single) => {
+        use prometheus::$name;
+        impl crate::metrics::macros::MetricFromOpts for $name {
+            fn from_opts(opts: prometheus::Opts) -> Result<Self, prometheus::Error> {
+                $name::with_opts(opts)
+            }
+        }
+    };
+    ($name:ident as vec) => {
+        use prometheus::$name;
+        impl crate::metrics::macros::MetricFromOpts for $name {
+            fn from_opts(opts: prometheus::Opts) -> Result<Self, prometheus::Error> {
+                $name::new(
+                    opts.clone().into(),
+                    opts.variable_labels
+                        .iter()
+                        .map(|s| s.as_str())
+                        .collect::<Vec<_>>()
+                        .as_slice(),
+                )
+            }
+        }
+    };
+}
@@ -0,0 +1,12 @@
+pub use self::instance::InstanceMetrics;
+pub use self::service::ServiceMetrics;
+
+#[macro_use]
+mod macros;
+
+mod instance;
+mod service;
+
+load_metric_type!(IntGauge as single);
+load_metric_type!(IntCounter as single);
+load_metric_type!(IntGaugeVec as vec);
@@ -0,0 +1,39 @@
+//! This module defines all the service-level metrics of crates.io.
+//!
+//! Service-level metrics are collected for the whole service, without querying the individual
+//! instances of the application. They're not suited for instance-level metrics (like "how many
+//! requests were processed" or "how many connections are left in the database pool").
+//!
+//! Service-level metrics should **never** be updated around the codebase: instead all the updates
+//! should happen inside the `gather` method. A database connection is available inside the method.
+//!
+//! As a rule of thumb, if the metric is not straight up fetched from the database it's probably an
+//! instance-level metric, and you should add it to `src/metrics/instance.rs`.
+
+use crate::schema::{crates, versions};
+use crate::util::errors::AppResult;
+use diesel::{dsl::count_star, prelude::*, PgConnection};
+use prometheus::{proto::MetricFamily, IntGauge};
+
+metrics! {
+    pub struct ServiceMetrics {
+        /// Number of crates ever published
+        crates_total: IntGauge,
+        /// Number of versions ever published
+        versions_total: IntGauge,
+    }
+
+    // All service metrics will be prefixed with this namespace.
+    namespace: "cratesio_service",
+}
+
+impl ServiceMetrics {
+    pub(crate) fn gather(&self, conn: &PgConnection) -> AppResult<Vec<MetricFamily>> {
+        self.crates_total
+            .set(crates::table.select(count_star()).first(conn)?);
+        self.versions_total
+            .set(versions::table.select(count_star()).first(conn)?);
+
+        Ok(self.registry.gather())
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -24,10 +24,19 @@ impl DieselPool {`
`24`	`24`	`}`
`25`	`25`	`}`
`26`	`26`
`27`		`- pub fn state(&self) -> r2d2::State {`
	`27`	`+ pub fn state(&self) -> PoolState {`
`28`	`28`	`match self {`
`29`		`- DieselPool::Pool(pool) => pool.state(),`
`30`		`- DieselPool::Test(_) => panic!("Cannot get the state of a test pool"),`
	`29`	`+ DieselPool::Pool(pool) => {`
	`30`	`+ let state = pool.state();`
	`31`	`+ PoolState {`
	`32`	`+ connections: state.connections,`
	`33`	`+ idle_connections: state.idle_connections,`
	`34`	`+ }`
	`35`	`+ }`
	`36`	`+ DieselPool::Test(_) => PoolState {`
	`37`	`+ connections: 0,`
	`38`	`+ idle_connections: 0,`
	`39`	`+ },`
`31`	`40`	`}`
`32`	`41`	`}`
`33`	`42`
`@@ -36,6 +45,12 @@ impl DieselPool {`
`36`	`45`	`}`
`37`	`46`	`}`
`38`	`47`
	`48`	`+#[derive(Debug, Copy, Clone)]`
	`49`	`+pub struct PoolState {`
	`50`	`+ pub connections: u32,`
	`51`	`+ pub idle_connections: u32,`
	`52`	`+}`
	`53`	`+`
`39`	`54`	`#[allow(missing_debug_implementations)]`
`40`	`55`	`pub enum DieselPooledConn<'a> {`
`41`	`56`	`Pool(r2d2::PooledConnection<ConnectionManager<PgConnection>>),`