Skip to content

Commit e4e28f5

Browse files
committed
Auto merge of #3999 - Turbo87:moka, r=pietroalbini
Implement `version_id` cache for `download` endpoint using `moka` crate This PR builds on top of #3848, but uses https://github.com/moka-rs/moka instead of https://github.com/xacrimon/dashmap for caching the `version_id` in the `download` API endpoint. I've implemented it using environment variables and default values that will allow us to fine tune the values in production, if necessary.
2 parents a02e862 + b296c42 commit e4e28f5

File tree

8 files changed

+443
-120
lines changed

8 files changed

+443
-120
lines changed

Cargo.lock

+260-22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ tikv-jemallocator = { version = "0.4.1", features = ['unprefixed_malloc_on_suppo
6565
lettre = { version = "0.10.0-rc.4", default-features = false, features = ["file-transport", "smtp-transport", "native-tls", "hostname", "builder"] }
6666
license-exprs = "1.6"
6767
minijinja = "0.8.1"
68+
moka = "0.6.1"
6869
oauth2 = { version = "4.1.0", default-features = false, features = ["reqwest"] }
6970
parking_lot = "0.11"
7071
prometheus = { version = "0.13.0", default-features = false }

src/app.rs

+12
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::email::Emails;
99
use crate::github::GitHubClient;
1010
use crate::metrics::{InstanceMetrics, ServiceMetrics};
1111
use diesel::r2d2;
12+
use moka::sync::{Cache, CacheBuilder};
1213
use oauth2::basic::BasicClient;
1314
use reqwest::blocking::Client;
1415
use scheduled_thread_pool::ScheduledThreadPool;
@@ -31,6 +32,12 @@ pub struct App {
3132
/// The server configuration
3233
pub config: config::Server,
3334

35+
/// Cache the `version_id` of a `canonical_crate_name:semver` pair
36+
///
37+
/// This is used by the download endpoint to reduce the number of database queries. The
38+
/// `version_id` is only cached under the canonical spelling of the crate name.
39+
pub(crate) version_id_cacher: Cache<(String, String), i32>,
40+
3441
/// Count downloads and periodically persist them in the database
3542
pub downloads_counter: DownloadsCounter,
3643

@@ -148,12 +155,17 @@ impl App {
148155
None
149156
};
150157

158+
let version_id_cacher = CacheBuilder::new(config.version_id_cache_size)
159+
.time_to_live(config.version_id_cache_ttl)
160+
.build();
161+
151162
App {
152163
primary_database,
153164
read_only_replica_database: replica_database,
154165
github,
155166
github_oauth,
156167
config,
168+
version_id_cacher,
157169
downloads_counter: DownloadsCounter::new(),
158170
emails: Emails::from_environment(),
159171
service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"),

src/config.rs

+11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ mod database_pools;
77
pub use self::base::Base;
88
pub use self::database_pools::DatabasePools;
99
use std::collections::HashSet;
10+
use std::time::Duration;
11+
12+
const DEFAULT_VERSION_ID_CACHE_SIZE: usize = 10_000;
13+
const DEFAULT_VERSION_ID_CACHE_TTL: u64 = 5 * 60; // 5 minutes
1014

1115
pub struct Server {
1216
pub base: Base,
@@ -30,6 +34,8 @@ pub struct Server {
3034
pub instance_metrics_log_every_seconds: Option<u64>,
3135
pub force_unconditional_redirects: bool,
3236
pub blocked_routes: HashSet<String>,
37+
pub version_id_cache_size: usize,
38+
pub version_id_cache_ttl: Duration,
3339
}
3440

3541
impl Default for Server {
@@ -107,6 +113,11 @@ impl Default for Server {
107113
blocked_routes: env_optional("BLOCKED_ROUTES")
108114
.map(|routes: String| routes.split(',').map(|s| s.into()).collect())
109115
.unwrap_or_else(HashSet::new),
116+
version_id_cache_size: env_optional("VERSION_ID_CACHE_SIZE")
117+
.unwrap_or(DEFAULT_VERSION_ID_CACHE_SIZE),
118+
version_id_cache_ttl: Duration::from_secs(
119+
env_optional("VERSION_ID_CACHE_TTL").unwrap_or(DEFAULT_VERSION_ID_CACHE_TTL),
120+
),
110121
}
111122
}
112123
}

src/controllers/version/downloads.rs

+75-62
Original file line numberDiff line numberDiff line change
@@ -18,75 +18,88 @@ pub fn download(req: &mut dyn RequestExt) -> EndpointResult {
1818
let mut crate_name = req.params()["crate_id"].clone();
1919
let version = req.params()["version"].as_str();
2020

21-
// When no database connection is ready unconditional redirects will be performed. This could
22-
// happen if the pool is not healthy or if an operator manually configured the application to
23-
// always perform unconditional redirects (for example as part of the mitigations for an
24-
// outage). See the comments below for a description of what unconditional redirects do.
25-
let conn = if app.config.force_unconditional_redirects {
26-
None
27-
} else {
28-
match req.db_conn() {
29-
Ok(conn) => Some(conn),
30-
Err(PoolError::UnhealthyPool) => None,
31-
Err(err) => return Err(err.into()),
32-
}
33-
};
34-
3521
let mut log_metadata = None;
36-
if let Some(conn) = &conn {
37-
use self::versions::dsl::*;
38-
39-
// Returns the crate name as stored in the database, or an error if we could
40-
// not load the version ID from the database.
41-
let (version_id, canonical_crate_name) = app
42-
.instance_metrics
43-
.downloads_select_query_execution_time
44-
.observe_closure_duration(|| {
45-
versions
46-
.inner_join(crates::table)
47-
.select((id, crates::name))
48-
.filter(Crate::with_name(&crate_name))
49-
.filter(num.eq(version))
50-
.first::<(i32, String)>(&**conn)
51-
})?;
52-
53-
if canonical_crate_name != crate_name {
54-
app.instance_metrics
55-
.downloads_non_canonical_crate_name_total
56-
.inc();
57-
log_metadata = Some(("bot", "dl"));
58-
}
59-
crate_name = canonical_crate_name;
22+
23+
let cache_key = (crate_name.to_string(), version.to_string());
24+
if let Some(version_id) = app.version_id_cacher.get(&cache_key) {
25+
app.instance_metrics.version_id_cache_hits.inc();
6026

6127
// The increment does not happen instantly, but it's deferred to be executed in a batch
6228
// along with other downloads. See crate::downloads_counter for the implementation.
6329
app.downloads_counter.increment(version_id);
6430
} else {
65-
// The download endpoint is the most critical route in the whole crates.io application,
66-
// as it's relied upon by users and automations to download crates. Keeping it working
67-
// is the most important thing for us.
68-
//
69-
// The endpoint relies on the database to fetch the canonical crate name (with the
70-
// right capitalization and hyphenation), but that's only needed to serve clients who
71-
// don't call the endpoint with the crate's canonical name.
72-
//
73-
// Thankfully Cargo always uses the right name when calling the endpoint, and we can
74-
// keep it working during a full database outage by unconditionally redirecting without
75-
// checking whether the crate exists or the rigth name is used. Non-Cargo clients might
76-
// get a 404 response instead of a 500, but that's worth it.
77-
//
78-
// Without a working database we also can't count downloads, but that's also less
79-
// critical than keeping Cargo downloads operational.
80-
81-
app.instance_metrics
82-
.downloads_unconditional_redirects_total
83-
.inc();
84-
log_metadata = Some(("unconditional_redirect", "true"));
85-
}
31+
app.instance_metrics.version_id_cache_misses.inc();
32+
33+
// When no database connection is ready unconditional redirects will be performed. This could
34+
// happen if the pool is not healthy or if an operator manually configured the application to
35+
// always perform unconditional redirects (for example as part of the mitigations for an
36+
// outage). See the comments below for a description of what unconditional redirects do.
37+
let conn = if app.config.force_unconditional_redirects {
38+
None
39+
} else {
40+
match req.db_conn() {
41+
Ok(conn) => Some(conn),
42+
Err(PoolError::UnhealthyPool) => None,
43+
Err(err) => return Err(err.into()),
44+
}
45+
};
46+
47+
if let Some(conn) = &conn {
48+
use self::versions::dsl::*;
49+
50+
// Returns the crate name as stored in the database, or an error if we could
51+
// not load the version ID from the database.
52+
let (version_id, canonical_crate_name) = app
53+
.instance_metrics
54+
.downloads_select_query_execution_time
55+
.observe_closure_duration(|| {
56+
versions
57+
.inner_join(crates::table)
58+
.select((id, crates::name))
59+
.filter(Crate::with_name(&crate_name))
60+
.filter(num.eq(version))
61+
.first::<(i32, String)>(&**conn)
62+
})?;
63+
64+
if canonical_crate_name != crate_name {
65+
app.instance_metrics
66+
.downloads_non_canonical_crate_name_total
67+
.inc();
68+
log_metadata = Some(("bot", "dl"));
69+
crate_name = canonical_crate_name;
70+
} else {
71+
// The version_id is only cached if the provided crate name was canonical.
72+
// Non-canonical requests fallback to the "slow" path with a DB query, but
73+
// we typically only get a few hundred non-canonical requests in a day anyway.
74+
app.version_id_cacher.insert(cache_key, version_id);
75+
}
76+
77+
// The increment does not happen instantly, but it's deferred to be executed in a batch
78+
// along with other downloads. See crate::downloads_counter for the implementation.
79+
app.downloads_counter.increment(version_id);
80+
} else {
81+
// The download endpoint is the most critical route in the whole crates.io application,
82+
// as it's relied upon by users and automations to download crates. Keeping it working
83+
// is the most important thing for us.
84+
//
85+
// The endpoint relies on the database to fetch the canonical crate name (with the
86+
// right capitalization and hyphenation), but that's only needed to serve clients who
87+
// don't call the endpoint with the crate's canonical name.
88+
//
89+
// Thankfully Cargo always uses the right name when calling the endpoint, and we can
90+
// keep it working during a full database outage by unconditionally redirecting without
91+
// checking whether the crate exists or the rigth name is used. Non-Cargo clients might
92+
// get a 404 response instead of a 500, but that's worth it.
93+
//
94+
// Without a working database we also can't count downloads, but that's also less
95+
// critical than keeping Cargo downloads operational.
8696

87-
// Ensure the connection is released to the pool as soon as possible, as the download endpoint
88-
// covers the majority of our traffic and we don't want it to starve other requests.
89-
drop(conn);
97+
app.instance_metrics
98+
.downloads_unconditional_redirects_total
99+
.inc();
100+
log_metadata = Some(("unconditional_redirect", "true"));
101+
}
102+
};
90103

91104
let redirect_url = req
92105
.app()

src/metrics/instance.rs

+5
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ metrics! {
5050
pub downloads_select_query_execution_time: Histogram,
5151
/// Number of download requests that are not counted yet.
5252
downloads_not_counted_total: IntGauge,
53+
54+
/// Number of version ID cache hits on the download endpoint.
55+
pub version_id_cache_hits: IntCounter,
56+
/// Number of version ID cache misses on the download endpoint.
57+
pub version_id_cache_misses: IntCounter,
5358
}
5459

5560
// All instance metrics will be prefixed with this namespace.

src/tests/krate/downloads.rs

+77-36
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::builders::{CrateBuilder, VersionBuilder};
2-
use crate::util::{RequestHelper, TestApp};
2+
use crate::util::{MockAnonymousUser, RequestHelper, TestApp};
33
use cargo_registry::views::EncodableVersionDownload;
44
use chrono::{Duration, Utc};
55
use http::StatusCode;
@@ -9,6 +9,35 @@ struct Downloads {
99
version_downloads: Vec<EncodableVersionDownload>,
1010
}
1111

12+
fn persist_downloads_count(app: &TestApp) {
13+
app.as_inner()
14+
.downloads_counter
15+
.persist_all_shards(app.as_inner())
16+
.expect("failed to persist downloads count")
17+
.log();
18+
}
19+
20+
#[track_caller]
21+
fn assert_dl_count(
22+
anon: &MockAnonymousUser,
23+
name_and_version: &str,
24+
query: Option<&str>,
25+
count: i32,
26+
) {
27+
let url = format!("/api/v1/crates/{}/downloads", name_and_version);
28+
let downloads: Downloads = if let Some(query) = query {
29+
anon.get_with_query(&url, query).good()
30+
} else {
31+
anon.get(&url).good()
32+
};
33+
let total_downloads = downloads
34+
.version_downloads
35+
.iter()
36+
.map(|vd| vd.downloads)
37+
.sum::<i32>();
38+
assert_eq!(total_downloads, count);
39+
}
40+
1241
#[test]
1342
fn download() {
1443
let (app, anon, user) = TestApp::init().with_user();
@@ -20,60 +49,37 @@ fn download() {
2049
.expect_build(conn);
2150
});
2251

23-
let assert_dl_count = |name_and_version: &str, query: Option<&str>, count: i32| {
24-
let url = format!("/api/v1/crates/{}/downloads", name_and_version);
25-
let downloads: Downloads = if let Some(query) = query {
26-
anon.get_with_query(&url, query).good()
27-
} else {
28-
anon.get(&url).good()
29-
};
30-
let total_downloads = downloads
31-
.version_downloads
32-
.iter()
33-
.map(|vd| vd.downloads)
34-
.sum::<i32>();
35-
assert_eq!(total_downloads, count);
36-
};
37-
3852
let download = |name_and_version: &str| {
3953
let url = format!("/api/v1/crates/{}/download", name_and_version);
4054
let response = anon.get::<()>(&url);
4155
assert_eq!(response.status(), StatusCode::FOUND);
4256
// TODO: test the with_json code path
4357
};
4458

45-
let persist_downloads_count = || {
46-
app.as_inner()
47-
.downloads_counter
48-
.persist_all_shards(app.as_inner())
49-
.expect("failed to persist downloads count")
50-
.log();
51-
};
52-
5359
download("foo_download/1.0.0");
5460
// No downloads are counted until the counters are persisted
55-
assert_dl_count("foo_download/1.0.0", None, 0);
56-
assert_dl_count("foo_download", None, 0);
57-
persist_downloads_count();
61+
assert_dl_count(&anon, "foo_download/1.0.0", None, 0);
62+
assert_dl_count(&anon, "foo_download", None, 0);
63+
persist_downloads_count(&app);
5864
// Now that the counters are persisted the download counts show up.
59-
assert_dl_count("foo_download/1.0.0", None, 1);
60-
assert_dl_count("foo_download", None, 1);
65+
assert_dl_count(&anon, "foo_download/1.0.0", None, 1);
66+
assert_dl_count(&anon, "foo_download", None, 1);
6167

6268
download("FOO_DOWNLOAD/1.0.0");
63-
persist_downloads_count();
64-
assert_dl_count("FOO_DOWNLOAD/1.0.0", None, 2);
65-
assert_dl_count("FOO_DOWNLOAD", None, 2);
69+
persist_downloads_count(&app);
70+
assert_dl_count(&anon, "FOO_DOWNLOAD/1.0.0", None, 2);
71+
assert_dl_count(&anon, "FOO_DOWNLOAD", None, 2);
6672

6773
let yesterday = (Utc::today() + Duration::days(-1)).format("%F");
6874
let query = format!("before_date={}", yesterday);
69-
assert_dl_count("FOO_DOWNLOAD/1.0.0", Some(&query), 0);
75+
assert_dl_count(&anon, "FOO_DOWNLOAD/1.0.0", Some(&query), 0);
7076
// crate/downloads always returns the last 90 days and ignores date params
71-
assert_dl_count("FOO_DOWNLOAD", Some(&query), 2);
77+
assert_dl_count(&anon, "FOO_DOWNLOAD", Some(&query), 2);
7278

7379
let tomorrow = (Utc::today() + Duration::days(1)).format("%F");
7480
let query = format!("before_date={}", tomorrow);
75-
assert_dl_count("FOO_DOWNLOAD/1.0.0", Some(&query), 2);
76-
assert_dl_count("FOO_DOWNLOAD", Some(&query), 2);
81+
assert_dl_count(&anon, "FOO_DOWNLOAD/1.0.0", Some(&query), 2);
82+
assert_dl_count(&anon, "FOO_DOWNLOAD", Some(&query), 2);
7783
}
7884

7985
#[test]
@@ -136,3 +142,38 @@ fn force_unconditional_redirect() {
136142
anon.get::<()>("/api/v1/crates/bar-download/1.0.0/download")
137143
.assert_redirect_ends_with("/crates/bar-download/bar-download-1.0.0.crate");
138144
}
145+
146+
#[test]
147+
fn download_caches_version_id() {
148+
use cargo_registry::schema::crates::dsl::*;
149+
use diesel::prelude::*;
150+
151+
let (app, anon, user) = TestApp::init().with_user();
152+
let user = user.as_model();
153+
154+
app.db(|conn| {
155+
CrateBuilder::new("foo_download", user.id)
156+
.version(VersionBuilder::new("1.0.0"))
157+
.expect_build(conn);
158+
});
159+
160+
anon.get::<()>("/api/v1/crates/foo_download/1.0.0/download")
161+
.assert_redirect_ends_with("/crates/foo_download/foo_download-1.0.0.crate");
162+
163+
// Rename the crate, so that `foo_download` will not be found if its version_id was not cached
164+
app.db(|conn| {
165+
diesel::update(crates.filter(name.eq("foo_download")))
166+
.set(name.eq("other"))
167+
.execute(conn)
168+
.unwrap();
169+
});
170+
171+
// This would result in a 404 if the endpoint tried to read from the database
172+
anon.get::<()>("/api/v1/crates/foo_download/1.0.0/download")
173+
.assert_redirect_ends_with("/crates/foo_download/foo_download-1.0.0.crate");
174+
175+
// Downloads are persisted by version_id, so the rename doesn't matter
176+
persist_downloads_count(&app);
177+
// Check download count against the new name, rather than rename it back to the original value
178+
assert_dl_count(&anon, "other/1.0.0", None, 2);
179+
}

0 commit comments

Comments
 (0)