Skip to content

Commit 78e39c5

Browse files
authored
Implement ArchiveVersionDownloads background job (#8596)
1 parent 180c7b2 commit 78e39c5

9 files changed

+499
-2
lines changed

.env.sample

+7
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ export TEST_DATABASE_URL=
5252
# export CDN_LOG_QUEUE_URL=
5353
# export CDN_LOG_QUEUE_REGION=
5454

55+
# Configuration for the version downloads data archive.
56+
# You can leave these commented out if you're not using the archival process.
57+
# export DOWNLOADS_ARCHIVE_ACCESS_KEY=
58+
# export DOWNLOADS_ARCHIVE_SECRET_KEY=
59+
# export DOWNLOADS_ARCHIVE_REGION=
60+
# export DOWNLOADS_ARCHIVE_BUCKET=
61+
5562
# Upstream location of the registry index. Background jobs will push to
5663
# this URL. The default points to a local index for development.
5764
# Run `./script/init-local-index.sh` to initialize this repo.

Cargo.lock

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ crates_io_index = { path = "crates/crates_io_index" }
6565
crates_io_markdown = { path = "crates/crates_io_markdown" }
6666
crates_io_tarball = { path = "crates/crates_io_tarball" }
6767
crates_io_worker = { path = "crates/crates_io_worker" }
68+
csv = "=1.3.0"
6869
chrono = { version = "=0.4.38", default-features = false, features = ["serde"] }
6970
clap = { version = "=4.5.4", features = ["derive", "env", "unicode", "wrap_help"] }
7071
cookie = { version = "=0.18.1", features = ["secure"] }
@@ -110,7 +111,7 @@ spdx = "=0.10.4"
110111
tar = "=0.4.40"
111112
tempfile = "=3.10.1"
112113
thiserror = "=1.0.60"
113-
tokio = { version = "=1.37.0", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros"]}
114+
tokio = { version = "=1.37.0", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros", "process"]}
114115
toml = "=0.8.12"
115116
tower = "=0.4.13"
116117
tower-http = { version = "=0.5.2", features = ["add-extension", "fs", "catch-panic", "timeout", "compression-full"] }

src/admin/enqueue_job.rs

+12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::db;
22
use crate::schema::{background_jobs, crates};
33
use crate::worker::jobs;
44
use anyhow::Result;
5+
use chrono::NaiveDate;
56
use crates_io_worker::BackgroundJob;
67
use diesel::dsl::exists;
78
use diesel::prelude::*;
@@ -14,6 +15,11 @@ use secrecy::{ExposeSecret, SecretString};
1415
rename_all = "snake_case"
1516
)]
1617
pub enum Command {
18+
ArchiveVersionDownloads {
19+
#[arg(long)]
20+
/// The date before which to archive version downloads (default: 90 days ago)
21+
before: Option<NaiveDate>,
22+
},
1723
UpdateDownloads,
1824
CleanProcessedLogFiles,
1925
DumpDb {
@@ -45,6 +51,12 @@ pub fn run(command: Command) -> Result<()> {
4551
println!("Enqueueing background job: {command:?}");
4652

4753
match command {
54+
Command::ArchiveVersionDownloads { before } => {
55+
before
56+
.map(jobs::ArchiveVersionDownloads::before)
57+
.unwrap_or_default()
58+
.enqueue(conn)?;
59+
}
4860
Command::UpdateDownloads => {
4961
let count: i64 = background_jobs::table
5062
.filter(background_jobs::job_type.eq(jobs::UpdateDownloads::JOB_NAME))

src/bin/background-worker.rs

+3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use crates_io::cloudfront::CloudFront;
1818
use crates_io::fastly::Fastly;
1919
use crates_io::storage::Storage;
2020
use crates_io::team_repo::TeamRepoImpl;
21+
use crates_io::worker::jobs::ArchiveVersionDownloads;
2122
use crates_io::worker::{Environment, RunnerExt};
2223
use crates_io::{config, Emails};
2324
use crates_io::{db, ssh};
@@ -69,6 +70,7 @@ fn main() -> anyhow::Result<()> {
6970

7071
let cloudfront = CloudFront::from_environment();
7172
let storage = Arc::new(Storage::from_config(&config.storage));
73+
let downloads_archive_store = ArchiveVersionDownloads::store_from_environment()?;
7274

7375
let client = Client::builder()
7476
.timeout(Duration::from_secs(45))
@@ -88,6 +90,7 @@ fn main() -> anyhow::Result<()> {
8890
.cloudfront(cloudfront)
8991
.fastly(fastly)
9092
.storage(storage)
93+
.downloads_archive_store(downloads_archive_store)
9194
.deadpool(deadpool.clone())
9295
.emails(emails)
9396
.team_repo(Box::new(team_repo))

src/worker/environment.rs

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crates_io_index::{Repository, RepositoryConfig};
88
use deadpool_diesel::postgres::Pool as DeadpoolPool;
99
use derive_builder::Builder;
1010
use diesel::PgConnection;
11+
use object_store::ObjectStore;
1112
use parking_lot::{Mutex, MutexGuard};
1213
use std::ops::{Deref, DerefMut};
1314
use std::sync::{Arc, OnceLock};
@@ -26,6 +27,8 @@ pub struct Environment {
2627
#[builder(default)]
2728
fastly: Option<Fastly>,
2829
pub storage: Arc<Storage>,
30+
#[builder(default)]
31+
pub downloads_archive_store: Option<Box<dyn ObjectStore>>,
2932
pub deadpool: DeadpoolPool,
3033
pub emails: Emails,
3134
pub team_repo: Box<dyn TeamRepo + Send + Sync>,

0 commit comments

Comments
 (0)