Skip to content

refactor random crate search, the random view size is configurable and we don't retry any more #1260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ pub struct Config {
// Time between 'git gc --auto' calls in seconds
pub(crate) registry_gc_interval: u64,

// random crate search generates a number of random IDs to
// efficiently find a random crate with > 100 GH stars.
// The amount depends on the ratio of crates with >100 stars
// to the count of all crates.
// At the time of creating this setting, it is set to
// `500` for a ratio of 7249 over 54k crates.
// For unit-tests the number has to be higher.
pub(crate) random_crate_search_view_size: u32,

// Build params
pub(crate) build_attempts: u16,
pub(crate) rustwide_workspace: PathBuf,
Expand Down Expand Up @@ -84,6 +93,8 @@ impl Config {
max_parse_memory: env("DOCSRS_MAX_PARSE_MEMORY", 5 * 1024 * 1024)?,
registry_gc_interval: env("DOCSRS_REGISTRY_GC_INTERVAL", 60 * 60)?,

random_crate_search_view_size: env("DOCSRS_RANDOM_CRATE_SEARCH_VIEW_SIZE", 500)?,

rustwide_workspace: env("CRATESFYI_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?,
inside_docker: env("DOCS_RS_DOCKER", false)?,
local_docker_image: maybe_env("DOCS_RS_LOCAL_DOCKER_IMAGE")?,
Expand Down
96 changes: 51 additions & 45 deletions src/web/releases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
db::{Pool, PoolClient},
impl_webpage,
web::{error::Nope, match_version, page::WebPage, redirect_base},
BuildQueue,
BuildQueue, Config,
};
use chrono::{DateTime, Utc};
use iron::{
Expand Down Expand Up @@ -498,13 +498,18 @@ impl Default for Search {
}

fn redirect_to_random_crate(req: &Request, conn: &mut PoolClient) -> IronResult<Response> {
// since there is a chance of this query returning an empty result,
// we retry a certain amount of times, and log a warning.
for i in 0..20 {
let rows = ctry!(
req,
conn.query(
"WITH params AS (
// We try to find a random crate and redirect to it.
//
// The query is efficient, but relies on a static factor which depends
// on the amount of crates with > 100 GH stars over the amount of all crates.
//
// If random-crate-searches end up being empty, increase that value.

let config = extension!(req, Config);
let rows = ctry!(
req,
conn.query(
"WITH params AS (
-- get maximum possible id-value in crates-table
SELECT last_value AS max_id FROM crates_id_seq
)
Expand All @@ -513,55 +518,44 @@ fn redirect_to_random_crate(req: &Request, conn: &mut PoolClient) -> IronResult<
releases.version,
releases.target_name
FROM (
-- generate 500 random numbers in the ID-range.
-- this might have to be increased when we have to repeat
-- this query too often.
-- it depends on the percentage of crates with > 100 stars
-- generate random numbers in the ID-range.
SELECT DISTINCT 1 + trunc(random() * params.max_id)::INTEGER AS id
FROM params, generate_series(1, 500)
FROM params, generate_series(1, $1)
) AS r
INNER JOIN crates ON r.id = crates.id
INNER JOIN releases ON crates.latest_version_id = releases.id
INNER JOIN github_repos ON releases.github_repo = github_repos.id
WHERE
releases.rustdoc_status = TRUE AND
github_repos.stars >= 100
LIMIT 1
",
&[]
),
);

if let Some(row) = rows.into_iter().next() {
let name: String = row.get("name");
let version: String = row.get("version");
let target_name: String = row.get("target_name");
let url = ctry!(
req,
Url::parse(&format!(
"{}/{}/{}/{}/",
redirect_base(req),
name,
version,
target_name
)),
);
LIMIT 1",
&[&(config.random_crate_search_view_size as i32)]
)
);

let mut resp = Response::with((status::Found, Redirect(url)));
resp.headers.set(Expires(HttpDate(time::now())));
if let Some(row) = rows.into_iter().next() {
let name: String = row.get("name");
let version: String = row.get("version");
let target_name: String = row.get("target_name");
let url = ctry!(
req,
Url::parse(&format!(
"{}/{}/{}/{}/",
redirect_base(req),
name,
version,
target_name
)),
);

let metrics = extension!(req, crate::Metrics).clone();
metrics.im_feeling_lucky_searches.inc();
let metrics = extension!(req, crate::Metrics).clone();
metrics.im_feeling_lucky_searches.inc();

log::debug!("finished random crate search on iteration {}", i);
return Ok(resp);
} else {
log::warn!("retrying random crate search");
}
Ok(super::redirect(url))
} else {
log::error!("found no result in random crate search");
Err(Nope::NoResults.into())
}
log::error!("found no result in random crate search");

Err(Nope::NoResults.into())
}

impl_webpage! {
Expand Down Expand Up @@ -1095,6 +1089,18 @@ mod tests {
#[test]
fn im_feeling_lucky_with_stars() {
wrapper(|env| {
{
// The normal test-setup will offset all primary sequences by 10k
// to prevent errors with foreign key relations.
// Random-crate-search relies on the sequence for the crates-table
// to find a maximum possible ID. This combined with only one actual
// crate in the db breaks this test.
// That's why we reset the id-sequence to zero for this test.

let mut conn = env.db().conn();
conn.execute(r#"ALTER SEQUENCE crates_id_seq RESTART WITH 1"#, &[])?;
}

let web = env.frontend();
env.fake_release()
.github_stats("some/repo", 333, 22, 11)
Expand Down