From 9598ae705221fabf306904e5ae634a9bf152f681 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 21 Jan 2021 00:02:35 +0100 Subject: [PATCH 1/7] Set CDN caching headers for rustdoc redirects --- src/config.rs | 6 ++++++ src/web/rustdoc.rs | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/config.rs b/src/config.rs index 0b26acffc..a603cadfa 100644 --- a/src/config.rs +++ b/src/config.rs @@ -38,6 +38,7 @@ pub struct Config { // Time between 'git gc --auto' calls in seconds pub(crate) registry_gc_interval: u64, +<<<<<<< HEAD // random crate search generates a number of random IDs to // efficiently find a random crate with > 100 GH stars. // The amount depends on the ratio of crates with >100 stars @@ -46,6 +47,10 @@ pub struct Config { // `500` for a ratio of 7249 over 54k crates. // For unit-tests the number has to be higher. pub(crate) random_crate_search_view_size: u32, +======= + // CDN / caching settings + pub(crate) cache_rustdoc_redirects: u32, +>>>>>>> 53d35de (Set CDN caching headers for rustdoc redirects) // Build params pub(crate) build_attempts: u16, @@ -94,6 +99,7 @@ impl Config { registry_gc_interval: env("DOCSRS_REGISTRY_GC_INTERVAL", 60 * 60)?, random_crate_search_view_size: env("DOCSRS_RANDOM_CRATE_SEARCH_VIEW_SIZE", 500)?, + cache_rustdoc_redirects: env("DOCSRS_CACHE_RUSTDOC_REDIRECTS", 30 * 60)?, rustwide_workspace: env("CRATESFYI_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?, inside_docker: env("DOCS_RS_DOCKER", false)?, diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 195c1a158..8ec13100b 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -69,8 +69,13 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { url_str.push_str(query); } let url = ctry!(req, Url::parse(&url_str)); + let config = extension!(req, Config); let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(Expires(HttpDate(time::now()))); + resp.headers.set(CacheControl(vec![ + CacheDirective::MaxAge(0), + // s-maxage is only for the CDN. + CacheDirective::SMaxAge(config.cache_rustdoc_redirects), + ])); Ok(resp) } @@ -81,8 +86,13 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { Url::parse(&format!("{}/crate/{}/{}", redirect_base(req), name, vers)), ); + let config = extension!(req, Config); let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(Expires(HttpDate(time::now()))); + resp.headers.set(CacheControl(vec![ + CacheDirective::MaxAge(0), + // s-maxage is only for the CDN. + CacheDirective::SMaxAge(config.cache_rustdoc_redirects), + ])); Ok(resp) } From 644ae509941fc2b6c510add9434ef5251a35b748 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 Jan 2021 15:35:52 +0100 Subject: [PATCH 2/7] add CDN caching to rustdoc redirects only --- src/web/mod.rs | 24 +++++++++++++++++++++++- src/web/rustdoc.rs | 26 +++++--------------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/web/mod.rs b/src/web/mod.rs index 0b60e4dbf..83c5af8ce 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -99,7 +99,7 @@ use extensions::InjectExtensions; use failure::Error; use iron::{ self, - headers::{Expires, HttpDate}, + headers::{CacheControl, CacheDirective, Expires, HttpDate}, modifiers::Redirect, status, status::Status, @@ -496,6 +496,28 @@ fn redirect(url: Url) -> Response { resp } +/// creates a redirect-response which is cached on the CDN level for +/// the given amount of seconds. Browser-Local caching is deactivated. +/// The used s-maxage is respected by CloudFront (which we can invalidate +/// if we need to), and perhaps by other proxies in between. +/// When the cache-seconds are reasonably small, this should be fine. +/// +/// CloudFront doesn't have `surrogate-control` headers which would be only +/// used by CloudFront and dropped (Fastly has them). +/// +/// The `Expires` header from `redirect` is still kept to be safe, +/// CloudFront ignores it when it gets `Cache-Control: max-age` or +/// `s-maxage`. +fn cached_redirect(url: Url, cache_seconds: u32) -> Response { + let mut resp = redirect(url); + resp.headers.set(CacheControl(vec![ + CacheDirective::MaxAge(0), + CacheDirective::SMaxAge(cache_seconds), + ])); + + resp +} + fn redirect_base(req: &Request) -> String { // Try to get the scheme from CloudFront first, and then from iron let scheme = req diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 8ec13100b..9de753e83 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -70,14 +70,8 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { } let url = ctry!(req, Url::parse(&url_str)); let config = extension!(req, Config); - let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(CacheControl(vec![ - CacheDirective::MaxAge(0), - // s-maxage is only for the CDN. - CacheDirective::SMaxAge(config.cache_rustdoc_redirects), - ])); - - Ok(resp) + + Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) } fn redirect_to_crate(req: &Request, name: &str, vers: &str) -> IronResult { @@ -87,14 +81,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { ); let config = extension!(req, Config); - let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(CacheControl(vec![ - CacheDirective::MaxAge(0), - // s-maxage is only for the CDN. - CacheDirective::SMaxAge(config.cache_rustdoc_redirects), - ])); - - Ok(resp) + Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) } let metrics = extension!(req, Metrics).clone(); @@ -292,7 +279,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { ); let url = ctry!(req, Url::parse(&redirect_path)); - Ok(super::redirect(url)) + Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) }; rendering_time.step("match version"); @@ -561,10 +548,7 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { ); let url = ctry!(req, Url::parse(&url)); - let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(Expires(HttpDate(time::now()))); - - Ok(resp) + Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) } pub fn badge_handler(req: &mut Request) -> IronResult { From 39db3b1de1bd898d2715749bf1ec90851bc63a43 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 22 Jan 2021 17:39:18 +0100 Subject: [PATCH 3/7] cached rustlangredirector --- src/config.rs | 3 --- src/web/rustdoc.rs | 9 ++++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/config.rs b/src/config.rs index a603cadfa..21439e39c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -38,7 +38,6 @@ pub struct Config { // Time between 'git gc --auto' calls in seconds pub(crate) registry_gc_interval: u64, -<<<<<<< HEAD // random crate search generates a number of random IDs to // efficiently find a random crate with > 100 GH stars. // The amount depends on the ratio of crates with >100 stars @@ -47,10 +46,8 @@ pub struct Config { // `500` for a ratio of 7249 over 54k crates. // For unit-tests the number has to be higher. pub(crate) random_crate_search_view_size: u32, -======= // CDN / caching settings pub(crate) cache_rustdoc_redirects: u32, ->>>>>>> 53d35de (Set CDN caching headers for rustdoc redirects) // Build params pub(crate) build_attempts: u16, diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 9de753e83..5363cb929 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -12,7 +12,6 @@ use crate::{ use iron::url::percent_encoding::percent_decode; use iron::{ headers::{CacheControl, CacheDirective, Expires, HttpDate}, - modifiers::Redirect, status, Handler, IronResult, Request, Response, Url, }; use lol_html::errors::RewritingError; @@ -37,8 +36,12 @@ impl RustLangRedirector { } impl iron::Handler for RustLangRedirector { - fn handle(&self, _req: &mut Request) -> IronResult { - Ok(Response::with((status::Found, Redirect(self.url.clone())))) + fn handle(&self, req: &mut Request) -> IronResult { + let config = extension!(req, Config); + Ok(super::cached_redirect( + self.url.clone(), + config.cache_rustdoc_redirects, + )) } } From 58e4fbfb24b2addec624d6b9b3b37a70ff3610cf Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sat, 23 Jan 2021 23:37:59 +0100 Subject: [PATCH 4/7] split cache durations into short and long cache times --- src/config.rs | 19 ++++++++++++++++--- src/web/mod.rs | 2 +- src/web/rustdoc.rs | 45 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/config.rs b/src/config.rs index 21439e39c..2142cffc8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -46,8 +46,17 @@ pub struct Config { // `500` for a ratio of 7249 over 54k crates. // For unit-tests the number has to be higher. pub(crate) random_crate_search_view_size: u32, - // CDN / caching settings - pub(crate) cache_rustdoc_redirects: u32, + + // Time to cache crate-level redirects in rustdoc, in seconds. + // This is for redirects where the destination + // can change after the release of a new version + // of a crate. + pub(crate) cache_rustdoc_redirects_crate: u32, + + // Time to cache release-level redirects in rustdoc, in seconds. + // Here the destination can only change after + // rebuilds or yanks, so very infrequently. + pub(crate) cache_rustdoc_redirects_version: u32, // Build params pub(crate) build_attempts: u16, @@ -96,7 +105,11 @@ impl Config { registry_gc_interval: env("DOCSRS_REGISTRY_GC_INTERVAL", 60 * 60)?, random_crate_search_view_size: env("DOCSRS_RANDOM_CRATE_SEARCH_VIEW_SIZE", 500)?, - cache_rustdoc_redirects: env("DOCSRS_CACHE_RUSTDOC_REDIRECTS", 30 * 60)?, + cache_rustdoc_redirects_crate: env("DOCSRS_CACHE_RUSTDOC_REDIRECTS_CRATE", 15 * 60)?, // 15 minutes + cache_rustdoc_redirects_version: env( + "DOCSRS_CACHE_RUSTDOC_REDIRECTS_RELEASE", + 7 * 24 * 60 * 60, // 7 days + )?, rustwide_workspace: env("CRATESFYI_RUSTWIDE_WORKSPACE", PathBuf::from(".workspace"))?, inside_docker: env("DOCS_RS_DOCKER", false)?, diff --git a/src/web/mod.rs b/src/web/mod.rs index 83c5af8ce..2fb2a3413 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -496,7 +496,7 @@ fn redirect(url: Url) -> Response { resp } -/// creates a redirect-response which is cached on the CDN level for +/// Creates a redirect-response which is cached on the CDN level for /// the given amount of seconds. Browser-Local caching is deactivated. /// The used s-maxage is respected by CloudFront (which we can invalidate /// if we need to), and perhaps by other proxies in between. diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 5363cb929..fbb2a62c8 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -40,7 +40,7 @@ impl iron::Handler for RustLangRedirector { let config = extension!(req, Config); Ok(super::cached_redirect( self.url.clone(), - config.cache_rustdoc_redirects, + config.cache_rustdoc_redirects_crate, )) } } @@ -54,6 +54,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { vers: &str, target: Option<&str>, target_name: &str, + cache_seconds: u32, ) -> IronResult { let mut url_str = if let Some(target) = target { format!( @@ -72,21 +73,25 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { url_str.push_str(query); } let url = ctry!(req, Url::parse(&url_str)); - let config = extension!(req, Config); - Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) + Ok(super::cached_redirect(url, cache_seconds)) } - fn redirect_to_crate(req: &Request, name: &str, vers: &str) -> IronResult { + fn redirect_to_crate( + req: &Request, + name: &str, + vers: &str, + cache_seconds: u32, + ) -> IronResult { let url = ctry!( req, Url::parse(&format!("{}/crate/{}/{}", redirect_base(req), name, vers)), ); - let config = extension!(req, Config); - Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) + Ok(super::cached_redirect(url, cache_seconds)) } + let config = extension!(req, Config); let metrics = extension!(req, Metrics).clone(); let mut rendering_time = RenderingTimesRecorder::new(&metrics.rustdoc_redirect_rendering_times); @@ -145,6 +150,13 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { let req_version = router.find("version"); let mut target = router.find("target"); + // redirects with an existing version in the URL can be cached for longer. + let cache_redirect_for = if let Some(_) = req_version { + config.cache_rustdoc_redirects_version + } else { + config.cache_rustdoc_redirects_crate + }; + // it doesn't matter if the version that was given was exact or not, since we're redirecting // anyway rendering_time.step("match version"); @@ -179,10 +191,17 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { if has_docs { rendering_time.step("redirect to doc"); - redirect_to_doc(req, &crate_name, &version, target, &target_name) + redirect_to_doc( + req, + &crate_name, + &version, + target, + &target_name, + cache_redirect_for, + ) } else { rendering_time.step("redirect to crate"); - redirect_to_crate(req, &crate_name, &version) + redirect_to_crate(req, &crate_name, &version, cache_redirect_for) } } @@ -282,7 +301,10 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { ); let url = ctry!(req, Url::parse(&redirect_path)); - Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) + Ok(super::cached_redirect( + url, + config.cache_rustdoc_redirects_version, + )) }; rendering_time.step("match version"); @@ -551,7 +573,10 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { ); let url = ctry!(req, Url::parse(&url)); - Ok(super::cached_redirect(url, config.cache_rustdoc_redirects)) + Ok(super::cached_redirect( + url, + config.cache_rustdoc_redirects_version, + )) } pub fn badge_handler(req: &mut Request) -> IronResult { From 535be778aca3f43059f296c5feb5e7d6e5219076 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 25 Jan 2021 19:17:24 +0100 Subject: [PATCH 5/7] review fixes --- src/config.rs | 4 ++-- src/web/mod.rs | 5 +++-- src/web/rustdoc.rs | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/config.rs b/src/config.rs index 2142cffc8..8b83f0123 100644 --- a/src/config.rs +++ b/src/config.rs @@ -56,7 +56,7 @@ pub struct Config { // Time to cache release-level redirects in rustdoc, in seconds. // Here the destination can only change after // rebuilds or yanks, so very infrequently. - pub(crate) cache_rustdoc_redirects_version: u32, + pub(crate) cache_rustdoc_redirects_release: u32, // Build params pub(crate) build_attempts: u16, @@ -106,7 +106,7 @@ impl Config { random_crate_search_view_size: env("DOCSRS_RANDOM_CRATE_SEARCH_VIEW_SIZE", 500)?, cache_rustdoc_redirects_crate: env("DOCSRS_CACHE_RUSTDOC_REDIRECTS_CRATE", 15 * 60)?, // 15 minutes - cache_rustdoc_redirects_version: env( + cache_rustdoc_redirects_release: env( "DOCSRS_CACHE_RUSTDOC_REDIRECTS_RELEASE", 7 * 24 * 60 * 60, // 7 days )?, diff --git a/src/web/mod.rs b/src/web/mod.rs index 2fb2a3413..61d43d337 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -491,7 +491,8 @@ fn duration_to_str(init: DateTime) -> String { /// `Request`. fn redirect(url: Url) -> Response { let mut resp = Response::with((status::Found, Redirect(url))); - resp.headers.set(Expires(HttpDate(time::now()))); + resp.headers + .set(CacheControl(vec![CacheDirective::MaxAge(0)])); resp } @@ -509,7 +510,7 @@ fn redirect(url: Url) -> Response { /// CloudFront ignores it when it gets `Cache-Control: max-age` or /// `s-maxage`. fn cached_redirect(url: Url, cache_seconds: u32) -> Response { - let mut resp = redirect(url); + let mut resp = Response::with((status::Found, Redirect(url))); resp.headers.set(CacheControl(vec![ CacheDirective::MaxAge(0), CacheDirective::SMaxAge(cache_seconds), diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index fbb2a62c8..362ce1de8 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -152,7 +152,7 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { // redirects with an existing version in the URL can be cached for longer. let cache_redirect_for = if let Some(_) = req_version { - config.cache_rustdoc_redirects_version + config.cache_rustdoc_redirects_release } else { config.cache_rustdoc_redirects_crate }; @@ -303,7 +303,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { Ok(super::cached_redirect( url, - config.cache_rustdoc_redirects_version, + config.cache_rustdoc_redirects_release, )) }; @@ -575,7 +575,7 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let url = ctry!(req, Url::parse(&url)); Ok(super::cached_redirect( url, - config.cache_rustdoc_redirects_version, + config.cache_rustdoc_redirects_release, )) } From cec4735beda241c9166ef20b644bf6e751b16f10 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Mon, 25 Jan 2021 20:05:05 +0100 Subject: [PATCH 6/7] differentiate between exact- and semver-matches --- src/web/mod.rs | 2 +- src/web/rustdoc.rs | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/web/mod.rs b/src/web/mod.rs index 61d43d337..d731e1a31 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -99,7 +99,7 @@ use extensions::InjectExtensions; use failure::Error; use iron::{ self, - headers::{CacheControl, CacheDirective, Expires, HttpDate}, + headers::{CacheControl, CacheDirective}, modifiers::Redirect, status, status::Status, diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 362ce1de8..b6c5d9e63 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -150,13 +150,6 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { let req_version = router.find("version"); let mut target = router.find("target"); - // redirects with an existing version in the URL can be cached for longer. - let cache_redirect_for = if let Some(_) = req_version { - config.cache_rustdoc_redirects_release - } else { - config.cache_rustdoc_redirects_crate - }; - // it doesn't matter if the version that was given was exact or not, since we're redirecting // anyway rendering_time.step("match version"); @@ -166,6 +159,15 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { // use that instead crate_name = new_name; } + + // Redirects with an exact SemVer match can be cached for longer. + // This is also true for corrected names. + let cache_redirect_for = if let MatchSemver::Exact(_) = v.version { + config.cache_rustdoc_redirects_release + } else { + config.cache_rustdoc_redirects_crate + }; + let (version, id) = v.version.into_parts(); // get target name and whether it has docs From 2c9d239fe025abd88dc1ceee296ef3e9d4ec25bc Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Tue, 26 Jan 2021 22:18:29 +0100 Subject: [PATCH 7/7] wip, possibly move assert-redirect tests --- src/test/mod.rs | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/test/mod.rs b/src/test/mod.rs index 5e839ecc4..10dee291b 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -11,7 +11,7 @@ use once_cell::unsync::OnceCell; use postgres::Client as Connection; use reqwest::{ blocking::{Client, RequestBuilder}, - Method, + redirect, Method, StatusCode, Url, }; use std::{panic, sync::Arc}; @@ -52,21 +52,39 @@ pub(crate) fn assert_redirect( expected_target: &str, web: &TestFrontend, ) -> Result<(), Error> { - // Reqwest follows redirects automatically let response = web.get(path).send()?; + let status = response.status(); + assert_eq!(status, StatusCode::FOUND); + + println!("path: {:?}", path); + println!("expected target: {:?}", expected_target); + + let redirect_target = Url::parse( + response + .headers() + .get("Location") + .expect("Response doesn't have Location header") + .to_str() + .unwrap(), + ) + .expect("could not parse redirect location"); + + println!("\nredirect target: {:?}", redirect_target); let mut tmp; let redirect_target = if expected_target.starts_with("https://") { - response.url().as_str() + redirect_target.as_str() } else { - tmp = String::from(response.url().path()); - if let Some(query) = response.url().query() { + tmp = String::from(redirect_target.path()); + if let Some(query) = redirect_target.query() { tmp.push('?'); tmp.push_str(query); } &tmp }; + println!("converted redirect target: {:?}", redirect_target); + // Either we followed a redirect to the wrong place, or there was no redirect if redirect_target != expected_target { // wrong place @@ -83,6 +101,9 @@ pub(crate) fn assert_redirect( ); } } + + let target_response = web.get(redirect_target).send()?; + let status = target_response.status(); assert!( status.is_success(), "failed to GET {}: {}", @@ -333,7 +354,10 @@ impl TestFrontend { Self { server: Server::start(Some("127.0.0.1:0"), false, context) .expect("failed to start the web server"), - client: Client::new(), + client: Client::builder() + .redirect(redirect::Policy::none()) + .build() + .unwrap(), } }