Only compile modules the project depends on

This changes ensure that we only compile modules from dependencies
  that are used (or transitively used) in the project. This allows to
  discard entire compilation steps at a module level, for modules that
  we do not use.

  The main goal of this change isn't performances. It's about making
  dependencies management slightly easier in the time we decide whether
  and how we want to manage transitive dependencies in Aiken.

  A concrete case here is aiken-lang/stdlib, which will soon depend on
  aiken-lang/fuzz. However, we do not want to require every single
  project depending on stdlib to also require fuzz. So instead, we want
  to seggregate fuzz API from stdlib in separate module, and only
  compile those if they appear in the pruned dependency graph.

  While the goal isn't performances, here are some benchmarks analyzing
  the performances of deps pruning on a simple project depends on a few
  modules from stdlib:

	Benchmark 1: ./aiken-without-deps-pruning check scratchpad
	  Time (mean ± σ):     190.3 ms ± 101.1 ms    [User: 584.5 ms, System: 14.2 ms]
	  Range (min … max):   153.0 ms … 477.7 ms    10 runs

	Benchmark 2: ./aiken-with-deps-pruning check scratchpad
	  Time (mean ± σ):     162.3 ms ±  46.3 ms    [User: 572.6 ms, System: 14.0 ms]
	  Range (min … max):   142.8 ms … 293.7 ms    10 runs

  As we can see, this change seems to have an overall positive impact on
  the compilation time.
This commit is contained in:
KtorZ 2024-03-14 19:29:51 +01:00
parent 038c5b2d34
commit fd50473a32
No known key found for this signature in database
GPG Key ID: 33173CB6F77F4277
2 changed files with 63 additions and 28 deletions

View File

@ -51,7 +51,7 @@ use pallas::ledger::{
traverse::ComputeHash,
};
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
fs::{self, File},
io::BufReader,
path::{Path, PathBuf},
@ -185,8 +185,6 @@ where
destination: Option<PathBuf>,
include_dependencies: bool,
) -> Result<(), Vec<Error>> {
self.compile_deps()?;
self.event_listener
.handle_event(Event::BuildingDocumentation {
root: self.root.clone(),
@ -198,9 +196,13 @@ where
let destination = destination.unwrap_or_else(|| self.root.join("docs"));
let parsed_modules = self.parse_sources(self.config.name.clone())?;
let mut modules = self.parse_sources(self.config.name.clone())?;
self.type_check(parsed_modules, Tracing::silent(), false, false)?;
let our_modules: HashSet<String> = modules.keys().cloned().collect();
self.with_dependencies(&mut modules)?;
self.type_check(&our_modules, modules, Tracing::silent(), false)?;
self.event_listener.handle_event(Event::GeneratingDocFiles {
output_path: destination.clone(),
@ -283,8 +285,6 @@ where
}
pub fn compile(&mut self, options: Options) -> Result<(), Vec<Error>> {
self.compile_deps()?;
self.event_listener
.handle_event(Event::StartingCompilation {
root: self.root.clone(),
@ -294,9 +294,13 @@ where
self.read_source_files()?;
let parsed_modules = self.parse_sources(self.config.name.clone())?;
let mut modules = self.parse_sources(self.config.name.clone())?;
self.type_check(parsed_modules, options.tracing, true, false)?;
let our_modules: HashSet<String> = modules.keys().cloned().collect();
self.with_dependencies(&mut modules)?;
self.type_check(&our_modules, modules, options.tracing, true)?;
match options.code_gen_mode {
CodeGenMode::Build(uplc_dump) => {
@ -537,7 +541,7 @@ where
Ok(blueprint)
}
fn compile_deps(&mut self) -> Result<(), Vec<Error>> {
fn with_dependencies(&mut self, parsed_packages: &mut ParsedModules) -> Result<(), Vec<Error>> {
let manifest = deps::download(&self.event_listener, &self.root, &self.config)?;
for package in manifest.packages {
@ -565,7 +569,7 @@ where
.retain(|def| !matches!(def, Definition::Test { .. }))
});
self.type_check(parsed_modules, Tracing::silent(), true, true)?;
parsed_packages.extend(Into::<HashMap<_, _>>::into(parsed_modules));
}
Ok(())
@ -680,12 +684,12 @@ where
fn type_check(
&mut self,
mut parsed_modules: ParsedModules,
our_modules: &HashSet<String>,
mut all_modules: ParsedModules,
tracing: Tracing,
validate_module_name: bool,
is_dependency: bool,
) -> Result<(), Error> {
let processing_sequence = parsed_modules.sequence()?;
let processing_sequence = all_modules.sequence(our_modules)?;
for name in processing_sequence {
if let Some(ParsedModule {
@ -696,7 +700,7 @@ where
extra,
package,
ast,
}) = parsed_modules.remove(&name)
}) = all_modules.remove(&name)
{
let mut type_warnings = Vec::new();
@ -725,7 +729,7 @@ where
.into_iter()
.map(|w| Warning::from_type_warning(w, path.clone(), code.clone()));
if !is_dependency {
if our_modules.contains(name.as_str()) {
self.warnings.extend(type_warnings);
}

View File

@ -47,7 +47,7 @@ impl ParsedModules {
Self(HashMap::new())
}
pub fn sequence(&self) -> Result<Vec<String>, Error> {
pub fn sequence(&self, our_modules: &HashSet<String>) -> Result<Vec<String>, Error> {
let inputs = self
.0
.values()
@ -56,18 +56,18 @@ impl ParsedModules {
let capacity = inputs.len();
let mut graph = Graph::<(), ()>::with_capacity(capacity, capacity * 5);
let mut graph = Graph::<String, ()>::with_capacity(capacity, capacity * 5);
// TODO: maybe use a bimap?
let mut indices = HashMap::with_capacity(capacity);
let mut values = HashMap::with_capacity(capacity);
let mut our_indices = HashSet::with_capacity(our_modules.len());
for (value, _) in &inputs {
let index = graph.add_node(());
let index = graph.add_node(value.to_string());
indices.insert(value.clone(), index);
values.insert(index, value.clone());
if our_modules.contains(value) {
our_indices.insert(index);
}
}
for (value, deps) in inputs {
@ -80,12 +80,42 @@ impl ParsedModules {
}
}
let mut messed_up_indices = false;
// Prune the dependency graph to only keep nodes that have a path to one of our (i.e. the
// current project) module. This effectively prunes dependencies that are unused from the
// graph to ensure that we only compile the modules we actually depend on.
graph.retain_nodes(|graph, ix| {
// When discarding a node, indices in the graph end up being rewritten. Yet, we need to
// know starting indices for our search, so when we remove a dependency, we need find
// back what those indices are.
if messed_up_indices {
our_indices = HashSet::with_capacity(our_modules.len());
for j in graph.node_indices() {
if our_modules.contains(graph[j].as_str()) {
our_indices.insert(j);
}
}
}
for start in our_indices.iter() {
if algo::astar(&*graph, *start, |end| end == ix, |_| 1, |_| 0).is_some() {
messed_up_indices = false;
return true;
}
}
messed_up_indices = true;
false
});
match algo::toposort(&graph, None) {
Ok(sequence) => {
let sequence = sequence
.iter()
.filter_map(|i| values.remove(i))
.filter_map(|i| graph.node_weight(*i))
.rev()
.cloned()
.collect();
Ok(sequence)
@ -99,7 +129,8 @@ impl ParsedModules {
let modules = path
.iter()
.filter_map(|index| values.remove(index))
.filter_map(|i| graph.node_weight(*i))
.cloned()
.collect();
Err(Error::ImportCycle { modules })
@ -140,10 +171,10 @@ impl DerefMut for ParsedModules {
}
}
fn find_cycle(
fn find_cycle<W>(
origin: NodeIndex,
parent: NodeIndex,
graph: &petgraph::Graph<(), ()>,
graph: &petgraph::Graph<W, ()>,
path: &mut Vec<NodeIndex>,
seen: &mut HashSet<NodeIndex>,
) -> bool {