Skip to content

Commit 6f096e3

Browse files
jtgeibelTurbo87
authored andcommitted
Cache the version_id for download requests
This adds a caching layer in the app backend to reduce the number of database queries. While this query is extremely fast, a massive number of concurrent download requests can still result in a request backlog. In this implementation cached values are never dropped. This means that memory usage will grow over time, however the memory usage is expected to be well within our instance size and the application is automatically rebooted at least once every 24 hours. Additionally, on the rare occasion that a crate version is deleted, the cache will continue to serve redirects and attempt to record download counts for that version. If a version is deleted and continues to see download requests, it is expected that the entire shard of downloads containing that version_id will fail to persist. Even if we expired cached entries after a certain time period, there would still be a window where a deleted version could cause download counts for other versions to not be persisted. It is possible we could make the download counter logic robust against this scenario, but given that deleting versions is rare we can mitigate this by restarting the app after deleting crates or versions.
1 parent 69e9687 commit 6f096e3

File tree

3 files changed

+131
-66
lines changed

3 files changed

+131
-66
lines changed

src/app.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::downloads_counter::DownloadsCounter;
88
use crate::email::Emails;
99
use crate::github::GitHubClient;
1010
use crate::metrics::{InstanceMetrics, ServiceMetrics};
11+
use dashmap::DashMap;
1112
use diesel::r2d2;
1213
use oauth2::basic::BasicClient;
1314
use reqwest::blocking::Client;
@@ -31,6 +32,12 @@ pub struct App {
3132
/// The server configuration
3233
pub config: config::Server,
3334

35+
/// Cache the `version_id` of a `canonical_crate_name:semver` pair
36+
///
37+
/// This is used by the download endpoint to reduce the number of database queries. The
38+
/// `version_id` is only cached under the canonical spelling of the crate name.
39+
pub(crate) version_id_cacher: DashMap<String, i32>,
40+
3441
/// Count downloads and periodically persist them in the database
3542
pub downloads_counter: DownloadsCounter,
3643

@@ -154,6 +161,7 @@ impl App {
154161
github,
155162
github_oauth,
156163
config,
164+
version_id_cacher: DashMap::new(),
157165
downloads_counter: DownloadsCounter::new(),
158166
emails: Emails::from_environment(),
159167
service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"),

src/controllers/version/downloads.rs

Lines changed: 88 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::models::{Crate, VersionDownload};
99
use crate::schema::*;
1010
use crate::views::EncodableVersionDownload;
1111
use chrono::{Duration, NaiveDate, Utc};
12+
use dashmap::mapref::entry::Entry;
1213

1314
/// Handles the `GET /crates/:crate_id/:version/download` route.
1415
/// This returns a URL to the location where the crate is stored.
@@ -18,75 +19,96 @@ pub fn download(req: &mut dyn RequestExt) -> EndpointResult {
1819
let mut crate_name = req.params()["crate_id"].clone();
1920
let version = req.params()["version"].as_str();
2021

21-
// When no database connection is ready unconditional redirects will be performed. This could
22-
// happen if the pool is not healthy or if an operator manually configured the application to
23-
// always perform unconditional redirects (for example as part of the mitigations for an
24-
// outage). See the comments below for a description of what unconditional redirects do.
25-
let conn = if app.config.force_unconditional_redirects {
26-
None
27-
} else {
28-
match req.db_conn() {
29-
Ok(conn) => Some(conn),
30-
Err(PoolError::UnhealthyPool) => None,
31-
Err(err) => return Err(err.into()),
32-
}
33-
};
34-
3522
let mut log_metadata = None;
36-
if let Some(conn) = &conn {
37-
use self::versions::dsl::*;
38-
39-
// Returns the crate name as stored in the database, or an error if we could
40-
// not load the version ID from the database.
41-
let (version_id, canonical_crate_name) = app
42-
.instance_metrics
43-
.downloads_select_query_execution_time
44-
.observe_closure_duration(|| {
45-
versions
46-
.inner_join(crates::table)
47-
.select((id, crates::name))
48-
.filter(Crate::with_name(&crate_name))
49-
.filter(num.eq(version))
50-
.first::<(i32, String)>(&**conn)
51-
})?;
52-
53-
if canonical_crate_name != crate_name {
54-
app.instance_metrics
55-
.downloads_non_canonical_crate_name_total
56-
.inc();
57-
log_metadata = Some(("bot", "dl"));
58-
}
59-
crate_name = canonical_crate_name;
6023

61-
// The increment does not happen instantly, but it's deferred to be executed in a batch
62-
// along with other downloads. See crate::downloads_counter for the implementation.
63-
app.downloads_counter.increment(version_id);
64-
} else {
65-
// The download endpoint is the most critical route in the whole crates.io application,
66-
// as it's relied upon by users and automations to download crates. Keeping it working
67-
// is the most important thing for us.
68-
//
69-
// The endpoint relies on the database to fetch the canonical crate name (with the
70-
// right capitalization and hyphenation), but that's only needed to serve clients who
71-
// don't call the endpoint with the crate's canonical name.
72-
//
73-
// Thankfully Cargo always uses the right name when calling the endpoint, and we can
74-
// keep it working during a full database outage by unconditionally redirecting without
75-
// checking whether the crate exists or the rigth name is used. Non-Cargo clients might
76-
// get a 404 response instead of a 500, but that's worth it.
77-
//
78-
// Without a working database we also can't count downloads, but that's also less
79-
// critical than keeping Cargo downloads operational.
80-
81-
app.instance_metrics
82-
.downloads_unconditional_redirects_total
83-
.inc();
84-
log_metadata = Some(("unconditional_redirect", "true"));
85-
}
24+
match app
25+
.version_id_cacher
26+
.entry(format!("{}:{}", crate_name, version))
27+
{
28+
// The version_id is cached. This also means that the provided crate_name is canonical
29+
// and that no fixup is necessary before redirecting.
30+
Entry::Occupied(entry) => {
31+
let version_id = *entry.get();
32+
33+
// The increment does not happen instantly, but it's deferred to be executed in a batch
34+
// along with other downloads. See crate::downloads_counter for the implementation.
35+
app.downloads_counter.increment(version_id);
36+
}
8637

87-
// Ensure the connection is released to the pool as soon as possible, as the download endpoint
88-
// covers the majority of our traffic and we don't want it to starve other requests.
89-
drop(conn);
38+
// The version_id is not cached, so either query the database or fallback to an
39+
// unconditional redirect if the database pool is unhealthy.
40+
Entry::Vacant(entry) => {
41+
// When no database connection is ready unconditional redirects will be performed. This could
42+
// happen if the pool is not healthy or if an operator manually configured the application to
43+
// always perform unconditional redirects (for example as part of the mitigations for an
44+
// outage). See the comments below for a description of what unconditional redirects do.
45+
let conn = if app.config.force_unconditional_redirects {
46+
None
47+
} else {
48+
match req.db_conn() {
49+
Ok(conn) => Some(conn),
50+
Err(PoolError::UnhealthyPool) => None,
51+
Err(err) => return Err(err.into()),
52+
}
53+
};
54+
55+
if let Some(conn) = &conn {
56+
use self::versions::dsl::*;
57+
58+
// Returns the crate name as stored in the database, or an error if we could
59+
// not load the version ID from the database.
60+
let (version_id, canonical_crate_name) = app
61+
.instance_metrics
62+
.downloads_select_query_execution_time
63+
.observe_closure_duration(|| {
64+
versions
65+
.inner_join(crates::table)
66+
.select((id, crates::name))
67+
.filter(Crate::with_name(&crate_name))
68+
.filter(num.eq(version))
69+
.first::<(i32, String)>(&**conn)
70+
})?;
71+
72+
if canonical_crate_name != crate_name {
73+
app.instance_metrics
74+
.downloads_non_canonical_crate_name_total
75+
.inc();
76+
log_metadata = Some(("bot", "dl"));
77+
crate_name = canonical_crate_name;
78+
} else {
79+
// The version_id is only cached if the provided crate name was canonical.
80+
// Non-canonical requests fallback to the "slow" path with a DB query, but
81+
// we typically only get a few hundred non-canonical requests in a day anyway.
82+
entry.insert(version_id);
83+
}
84+
85+
// The increment does not happen instantly, but it's deferred to be executed in a batch
86+
// along with other downloads. See crate::downloads_counter for the implementation.
87+
app.downloads_counter.increment(version_id);
88+
} else {
89+
// The download endpoint is the most critical route in the whole crates.io application,
90+
// as it's relied upon by users and automations to download crates. Keeping it working
91+
// is the most important thing for us.
92+
//
93+
// The endpoint relies on the database to fetch the canonical crate name (with the
94+
// right capitalization and hyphenation), but that's only needed to serve clients who
95+
// don't call the endpoint with the crate's canonical name.
96+
//
97+
// Thankfully Cargo always uses the right name when calling the endpoint, and we can
98+
// keep it working during a full database outage by unconditionally redirecting without
99+
// checking whether the crate exists or the rigth name is used. Non-Cargo clients might
100+
// get a 404 response instead of a 500, but that's worth it.
101+
//
102+
// Without a working database we also can't count downloads, but that's also less
103+
// critical than keeping Cargo downloads operational.
104+
105+
app.instance_metrics
106+
.downloads_unconditional_redirects_total
107+
.inc();
108+
log_metadata = Some(("unconditional_redirect", "true"));
109+
}
110+
}
111+
};
90112

91113
let redirect_url = req
92114
.app()

src/tests/krate/downloads.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,38 @@ fn force_unconditional_redirect() {
142142
anon.get::<()>("/api/v1/crates/bar-download/1.0.0/download")
143143
.assert_redirect_ends_with("/crates/bar-download/bar-download-1.0.0.crate");
144144
}
145+
146+
#[test]
147+
fn download_caches_version_id() {
148+
use cargo_registry::schema::crates::dsl::*;
149+
use diesel::prelude::*;
150+
151+
let (app, anon, user) = TestApp::init().with_user();
152+
let user = user.as_model();
153+
154+
app.db(|conn| {
155+
CrateBuilder::new("foo_download", user.id)
156+
.version(VersionBuilder::new("1.0.0"))
157+
.expect_build(conn);
158+
});
159+
160+
anon.get::<()>("/api/v1/crates/foo_download/1.0.0/download")
161+
.assert_redirect_ends_with("/crates/foo_download/foo_download-1.0.0.crate");
162+
163+
// Rename the crate, so that `foo_download` will not be found if its version_id was not cached
164+
app.db(|conn| {
165+
diesel::update(crates.filter(name.eq("foo_download")))
166+
.set(name.eq("other"))
167+
.execute(conn)
168+
.unwrap();
169+
});
170+
171+
// This would result in a 404 if the endpoint tried to read from the database
172+
anon.get::<()>("/api/v1/crates/foo_download/1.0.0/download")
173+
.assert_redirect_ends_with("/crates/foo_download/foo_download-1.0.0.crate");
174+
175+
// Downloads are persisted by version_id, so the rename doesn't matter
176+
persist_downloads_count(&app);
177+
// Check download count against the new name, rather than rename it back to the original value
178+
assert_dl_count(&anon, "other/1.0.0", None, 2);
179+
}

0 commit comments

Comments
 (0)