Throttle calls to package registry for version resolution

The 'HEAD' call that is done to resolve package revisions from
  unpinned versions is already quite cheap, but it would still be better
  to avoid overloading Github with such calls; especially for users of a
  language-server that would compile on-the-fly very often. Upstream
  packages don't change often so there's no need to constantly check the
  etag.

  So we now keep a local version of etags that we fetched, as well as a
  timestamp from the last time we fetched them so that we only re-fetch
  them if more than an hour has elapsed. This should be fairly resilient
  while still massively improving the UX for people showing up after a
  day and trying to use latest 'main' features.

  This means that we now effectively have two caching levels:

  - In the manifest, we store previously fetched etags.
  - In the filesystem, we have a cache of already downloaded zip archives.

  The first cache is basically invalidated every hour, while the second
  cache is only invalidated when a etag changes. For pinned versions,
  nothing is invalidated as they are considered immutable.
This commit is contained in:
KtorZ 2023-09-08 15:54:25 +02:00 committed by Lucas
parent 15efeb3069
commit c711a97e69
5 changed files with 90 additions and 41 deletions

View File

@ -100,11 +100,10 @@ impl LocalPackages {
pub fn missing_local_packages<'a>( pub fn missing_local_packages<'a>(
&self, &self,
manifest: &'a Manifest, packages: &'a [Package],
root: &PackageName, root: &PackageName,
) -> Vec<&'a Package> { ) -> Vec<&'a Package> {
manifest packages
.packages
.iter() .iter()
.filter(|p| { .filter(|p| {
&p.name != root &p.name != root
@ -159,14 +158,14 @@ where
let runtime = tokio::runtime::Runtime::new().expect("Unable to start Tokio"); let runtime = tokio::runtime::Runtime::new().expect("Unable to start Tokio");
let (manifest, changed) = Manifest::load(event_listener, config, root_path)?; let (mut manifest, changed) = Manifest::load(event_listener, config, root_path)?;
let local = LocalPackages::load(root_path)?; let local = LocalPackages::load(root_path)?;
local.remove_extra_packages(&manifest, root_path)?; local.remove_extra_packages(&manifest, root_path)?;
runtime.block_on(fetch_missing_packages( runtime.block_on(fetch_missing_packages(
&manifest, &mut manifest,
&local, &local,
project_name, project_name,
root_path, root_path,
@ -183,7 +182,7 @@ where
} }
async fn fetch_missing_packages<T>( async fn fetch_missing_packages<T>(
manifest: &Manifest, manifest: &mut Manifest,
local: &LocalPackages, local: &LocalPackages,
project_name: PackageName, project_name: PackageName,
root_path: &Path, root_path: &Path,
@ -192,8 +191,10 @@ async fn fetch_missing_packages<T>(
where where
T: EventListener, T: EventListener,
{ {
let packages = manifest.packages.to_owned();
let mut missing = local let mut missing = local
.missing_local_packages(manifest, &project_name) .missing_local_packages(&packages, &project_name)
.into_iter() .into_iter()
.peekable(); .peekable();
@ -207,7 +208,7 @@ where
let downloader = Downloader::new(root_path); let downloader = Downloader::new(root_path);
let statuses = downloader let statuses = downloader
.download_packages(event_listener, missing, &project_name) .download_packages(event_listener, missing, &project_name, manifest)
.await?; .await?;
let downloaded_from_network = statuses let downloaded_from_network = statuses
@ -235,5 +236,5 @@ where
} }
} }
Ok(()) manifest.save(root_path)
} }

View File

@ -8,6 +8,7 @@ use reqwest::Client;
use zip::result::ZipError; use zip::result::ZipError;
use crate::{ use crate::{
deps::manifest::Manifest,
error::Error, error::Error,
package_name::PackageName, package_name::PackageName,
paths::{self, CacheKey}, paths::{self, CacheKey},
@ -34,28 +35,29 @@ impl<'a> Downloader<'a> {
event_listener: &T, event_listener: &T,
packages: I, packages: I,
project_name: &PackageName, project_name: &PackageName,
manifest: &mut Manifest,
) -> Result<Vec<(PackageName, bool)>, Error> ) -> Result<Vec<(PackageName, bool)>, Error>
where where
T: EventListener, T: EventListener,
I: Iterator<Item = &'a Package>, I: Iterator<Item = &'a Package>,
{ {
future::try_join_all( let mut tasks = vec![];
packages
.filter(|package| project_name != &package.name) for package in packages.filter(|package| project_name != &package.name) {
.map(|package| self.ensure_package_in_build_directory(event_listener, package)), let cache_key =
) paths::CacheKey::new(&self.http, event_listener, package, manifest).await?;
.await let task = self.ensure_package_in_build_directory(package, cache_key);
tasks.push(task);
} }
pub async fn ensure_package_in_build_directory<T>( future::try_join_all(tasks).await
}
pub async fn ensure_package_in_build_directory(
&self, &self,
event_listener: &T,
package: &Package, package: &Package,
) -> Result<(PackageName, bool), Error> cache_key: CacheKey,
where ) -> Result<(PackageName, bool), Error> {
T: EventListener,
{
let cache_key = paths::CacheKey::new(&self.http, event_listener, package).await?;
let downloaded = self let downloaded = self
.ensure_package_downloaded(package, &cache_key) .ensure_package_downloaded(package, &cache_key)
.await .await

View File

@ -1,8 +1,12 @@
use std::{fs, path::Path};
use aiken_lang::ast::Span; use aiken_lang::ast::Span;
use miette::NamedSource; use miette::NamedSource;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{
collections::BTreeMap,
fs,
path::Path,
time::{Duration, SystemTime},
};
use crate::{ use crate::{
config::{Config, Dependency, Platform}, config::{Config, Dependency, Platform},
@ -16,6 +20,8 @@ use crate::{
pub struct Manifest { pub struct Manifest {
pub requirements: Vec<Dependency>, pub requirements: Vec<Dependency>,
pub packages: Vec<Package>, pub packages: Vec<Package>,
#[serde(default)]
pub etags: BTreeMap<String, (SystemTime, String)>,
} }
impl Manifest { impl Manifest {
@ -76,6 +82,34 @@ impl Manifest {
Ok(()) Ok(())
} }
pub fn lookup_etag(&self, package: &Package) -> Option<String> {
match self.etags.get(&etag_key(package)) {
None => None,
Some((last_fetched, etag)) => {
let elapsed = SystemTime::now().duration_since(*last_fetched).unwrap();
// Discard any etag older than an hour. So that we throttle call to the package
// registry but we ensure a relatively good synchonization of local packages.
if elapsed > Duration::from_secs(3600) {
None
} else {
Some(etag.clone())
}
}
}
}
pub fn insert_etag(&mut self, package: &Package, etag: String) {
self.etags
.insert(etag_key(package), (SystemTime::now(), etag));
}
}
fn etag_key(package: &Package) -> String {
format!(
"{}/{}@{}",
package.name.owner, package.name.repo, package.version
)
} }
#[derive(Deserialize, Serialize, Clone, Debug)] #[derive(Deserialize, Serialize, Clone, Debug)]
@ -104,6 +138,7 @@ where
}) })
.collect(), .collect(),
requirements: config.dependencies.clone(), requirements: config.dependencies.clone(),
etags: BTreeMap::new(),
}; };
Ok(manifest) Ok(manifest)

View File

@ -1,5 +1,6 @@
use crate::deps::manifest::Package; use crate::deps::manifest::Package;
use crate::{ use crate::{
deps::manifest::Manifest,
error::Error, error::Error,
package_name::PackageName, package_name::PackageName,
telemetry::{Event, EventListener}, telemetry::{Event, EventListener},
@ -56,6 +57,7 @@ impl CacheKey {
http: &Client, http: &Client,
event_listener: &T, event_listener: &T,
package: &Package, package: &Package,
manifest: &mut Manifest,
) -> Result<CacheKey, Error> ) -> Result<CacheKey, Error>
where where
T: EventListener, T: EventListener,
@ -65,14 +67,26 @@ impl CacheKey {
if is_git_sha_or_tag(&package.version) { if is_git_sha_or_tag(&package.version) {
Ok(package.version.to_string()) Ok(package.version.to_string())
} else { } else {
match new_cache_key_from_network(http, package).await { match manifest.lookup_etag(package) {
None => match new_etag_from_network(http, package).await {
Err(_) => { Err(_) => {
event_listener.handle_event(Event::PackageResolveFallback { event_listener.handle_event(Event::PackageResolveFallback {
name: format!("{}", package.name), name: format!("{}", package.name),
}); });
new_cache_key_from_cache(package) new_cache_key_from_cache(package)
} }
Ok(cache_key) => Ok(cache_key), Ok(etag) => {
manifest.insert_etag(package, etag.clone());
Ok(format!(
"{version}@{etag}",
version = package.version.replace('/', "_")
))
}
},
Some(etag) => Ok(format!(
"{version}@{etag}",
version = package.version.replace('/', "_")
)),
} }
}?, }?,
)) ))
@ -89,7 +103,7 @@ impl CacheKey {
} }
} }
async fn new_cache_key_from_network(http: &Client, package: &Package) -> Result<String, Error> { async fn new_etag_from_network(http: &Client, package: &Package) -> Result<String, Error> {
let url = format!( let url = format!(
"https://api.github.com/repos/{}/{}/zipball/{}", "https://api.github.com/repos/{}/{}/zipball/{}",
package.name.owner, package.name.repo, package.version package.name.owner, package.name.repo, package.version
@ -104,14 +118,8 @@ async fn new_cache_key_from_network(http: &Client, package: &Package) -> Result<
.get("etag") .get("etag")
.ok_or(Error::UnknownPackageVersion { .ok_or(Error::UnknownPackageVersion {
package: package.clone(), package: package.clone(),
})? })?;
.to_str() Ok(etag.to_str().unwrap().replace('"', ""))
.unwrap()
.replace('"', "");
Ok(format!(
"{version}@{etag}",
version = package.version.replace('/', "_")
))
} }
fn new_cache_key_from_cache(target: &Package) -> Result<String, Error> { fn new_cache_key_from_cache(target: &Package) -> Result<String, Error> {

View File

@ -11,3 +11,6 @@ name = "aiken-lang/stdlib"
version = "main" version = "main"
requirements = [] requirements = []
source = "github" source = "github"
[etags]
"aiken-lang/stdlib@main" = [{ secs_since_epoch = 1694181032, nanos_since_epoch = 345700000 }, "e9abc03ee3dbf98637e8e2d8b115e1c95573a4168dbbbe517e8499208b67b020"]