From fd50473a327646830fe7ac80f43f90321afa8a2b Mon Sep 17 00:00:00 2001 From: KtorZ Date: Thu, 14 Mar 2024 19:29:51 +0100 Subject: [PATCH 1/3] Only compile modules the project depends on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This changes ensure that we only compile modules from dependencies that are used (or transitively used) in the project. This allows to discard entire compilation steps at a module level, for modules that we do not use. The main goal of this change isn't performances. It's about making dependencies management slightly easier in the time we decide whether and how we want to manage transitive dependencies in Aiken. A concrete case here is aiken-lang/stdlib, which will soon depend on aiken-lang/fuzz. However, we do not want to require every single project depending on stdlib to also require fuzz. So instead, we want to seggregate fuzz API from stdlib in separate module, and only compile those if they appear in the pruned dependency graph. While the goal isn't performances, here are some benchmarks analyzing the performances of deps pruning on a simple project depends on a few modules from stdlib: Benchmark 1: ./aiken-without-deps-pruning check scratchpad Time (mean ± σ): 190.3 ms ± 101.1 ms [User: 584.5 ms, System: 14.2 ms] Range (min … max): 153.0 ms … 477.7 ms 10 runs Benchmark 2: ./aiken-with-deps-pruning check scratchpad Time (mean ± σ): 162.3 ms ± 46.3 ms [User: 572.6 ms, System: 14.0 ms] Range (min … max): 142.8 ms … 293.7 ms 10 runs As we can see, this change seems to have an overall positive impact on the compilation time. --- crates/aiken-project/src/lib.rs | 36 ++++++++++--------- crates/aiken-project/src/module.rs | 55 +++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/crates/aiken-project/src/lib.rs b/crates/aiken-project/src/lib.rs index a8fba735..212d4545 100644 --- a/crates/aiken-project/src/lib.rs +++ b/crates/aiken-project/src/lib.rs @@ -51,7 +51,7 @@ use pallas::ledger::{ traverse::ComputeHash, }; use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fs::{self, File}, io::BufReader, path::{Path, PathBuf}, @@ -185,8 +185,6 @@ where destination: Option, include_dependencies: bool, ) -> Result<(), Vec> { - self.compile_deps()?; - self.event_listener .handle_event(Event::BuildingDocumentation { root: self.root.clone(), @@ -198,9 +196,13 @@ where let destination = destination.unwrap_or_else(|| self.root.join("docs")); - let parsed_modules = self.parse_sources(self.config.name.clone())?; + let mut modules = self.parse_sources(self.config.name.clone())?; - self.type_check(parsed_modules, Tracing::silent(), false, false)?; + let our_modules: HashSet = modules.keys().cloned().collect(); + + self.with_dependencies(&mut modules)?; + + self.type_check(&our_modules, modules, Tracing::silent(), false)?; self.event_listener.handle_event(Event::GeneratingDocFiles { output_path: destination.clone(), @@ -283,8 +285,6 @@ where } pub fn compile(&mut self, options: Options) -> Result<(), Vec> { - self.compile_deps()?; - self.event_listener .handle_event(Event::StartingCompilation { root: self.root.clone(), @@ -294,9 +294,13 @@ where self.read_source_files()?; - let parsed_modules = self.parse_sources(self.config.name.clone())?; + let mut modules = self.parse_sources(self.config.name.clone())?; - self.type_check(parsed_modules, options.tracing, true, false)?; + let our_modules: HashSet = modules.keys().cloned().collect(); + + self.with_dependencies(&mut modules)?; + + self.type_check(&our_modules, modules, options.tracing, true)?; match options.code_gen_mode { CodeGenMode::Build(uplc_dump) => { @@ -537,7 +541,7 @@ where Ok(blueprint) } - fn compile_deps(&mut self) -> Result<(), Vec> { + fn with_dependencies(&mut self, parsed_packages: &mut ParsedModules) -> Result<(), Vec> { let manifest = deps::download(&self.event_listener, &self.root, &self.config)?; for package in manifest.packages { @@ -565,7 +569,7 @@ where .retain(|def| !matches!(def, Definition::Test { .. })) }); - self.type_check(parsed_modules, Tracing::silent(), true, true)?; + parsed_packages.extend(Into::>::into(parsed_modules)); } Ok(()) @@ -680,12 +684,12 @@ where fn type_check( &mut self, - mut parsed_modules: ParsedModules, + our_modules: &HashSet, + mut all_modules: ParsedModules, tracing: Tracing, validate_module_name: bool, - is_dependency: bool, ) -> Result<(), Error> { - let processing_sequence = parsed_modules.sequence()?; + let processing_sequence = all_modules.sequence(our_modules)?; for name in processing_sequence { if let Some(ParsedModule { @@ -696,7 +700,7 @@ where extra, package, ast, - }) = parsed_modules.remove(&name) + }) = all_modules.remove(&name) { let mut type_warnings = Vec::new(); @@ -725,7 +729,7 @@ where .into_iter() .map(|w| Warning::from_type_warning(w, path.clone(), code.clone())); - if !is_dependency { + if our_modules.contains(name.as_str()) { self.warnings.extend(type_warnings); } diff --git a/crates/aiken-project/src/module.rs b/crates/aiken-project/src/module.rs index 2d90dbd1..cf6812e5 100644 --- a/crates/aiken-project/src/module.rs +++ b/crates/aiken-project/src/module.rs @@ -47,7 +47,7 @@ impl ParsedModules { Self(HashMap::new()) } - pub fn sequence(&self) -> Result, Error> { + pub fn sequence(&self, our_modules: &HashSet) -> Result, Error> { let inputs = self .0 .values() @@ -56,18 +56,18 @@ impl ParsedModules { let capacity = inputs.len(); - let mut graph = Graph::<(), ()>::with_capacity(capacity, capacity * 5); + let mut graph = Graph::::with_capacity(capacity, capacity * 5); - // TODO: maybe use a bimap? let mut indices = HashMap::with_capacity(capacity); - let mut values = HashMap::with_capacity(capacity); + + let mut our_indices = HashSet::with_capacity(our_modules.len()); for (value, _) in &inputs { - let index = graph.add_node(()); - + let index = graph.add_node(value.to_string()); indices.insert(value.clone(), index); - - values.insert(index, value.clone()); + if our_modules.contains(value) { + our_indices.insert(index); + } } for (value, deps) in inputs { @@ -80,12 +80,42 @@ impl ParsedModules { } } + let mut messed_up_indices = false; + + // Prune the dependency graph to only keep nodes that have a path to one of our (i.e. the + // current project) module. This effectively prunes dependencies that are unused from the + // graph to ensure that we only compile the modules we actually depend on. + graph.retain_nodes(|graph, ix| { + // When discarding a node, indices in the graph end up being rewritten. Yet, we need to + // know starting indices for our search, so when we remove a dependency, we need find + // back what those indices are. + if messed_up_indices { + our_indices = HashSet::with_capacity(our_modules.len()); + for j in graph.node_indices() { + if our_modules.contains(graph[j].as_str()) { + our_indices.insert(j); + } + } + } + + for start in our_indices.iter() { + if algo::astar(&*graph, *start, |end| end == ix, |_| 1, |_| 0).is_some() { + messed_up_indices = false; + return true; + } + } + + messed_up_indices = true; + false + }); + match algo::toposort(&graph, None) { Ok(sequence) => { let sequence = sequence .iter() - .filter_map(|i| values.remove(i)) + .filter_map(|i| graph.node_weight(*i)) .rev() + .cloned() .collect(); Ok(sequence) @@ -99,7 +129,8 @@ impl ParsedModules { let modules = path .iter() - .filter_map(|index| values.remove(index)) + .filter_map(|i| graph.node_weight(*i)) + .cloned() .collect(); Err(Error::ImportCycle { modules }) @@ -140,10 +171,10 @@ impl DerefMut for ParsedModules { } } -fn find_cycle( +fn find_cycle( origin: NodeIndex, parent: NodeIndex, - graph: &petgraph::Graph<(), ()>, + graph: &petgraph::Graph, path: &mut Vec, seen: &mut HashSet, ) -> bool { From 1caed3e87c155c226671bbbc8b8d8bd799a396c3 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Thu, 14 Mar 2024 23:08:39 +0100 Subject: [PATCH 2/3] Use BTreeSet instead of HashSet whenever possible. --- crates/aiken-project/src/lib.rs | 8 ++++---- crates/aiken-project/src/module.rs | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/aiken-project/src/lib.rs b/crates/aiken-project/src/lib.rs index 212d4545..cea3b478 100644 --- a/crates/aiken-project/src/lib.rs +++ b/crates/aiken-project/src/lib.rs @@ -51,7 +51,7 @@ use pallas::ledger::{ traverse::ComputeHash, }; use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeSet, HashMap}, fs::{self, File}, io::BufReader, path::{Path, PathBuf}, @@ -198,7 +198,7 @@ where let mut modules = self.parse_sources(self.config.name.clone())?; - let our_modules: HashSet = modules.keys().cloned().collect(); + let our_modules: BTreeSet = modules.keys().cloned().collect(); self.with_dependencies(&mut modules)?; @@ -296,7 +296,7 @@ where let mut modules = self.parse_sources(self.config.name.clone())?; - let our_modules: HashSet = modules.keys().cloned().collect(); + let our_modules: BTreeSet = modules.keys().cloned().collect(); self.with_dependencies(&mut modules)?; @@ -684,7 +684,7 @@ where fn type_check( &mut self, - our_modules: &HashSet, + our_modules: &BTreeSet, mut all_modules: ParsedModules, tracing: Tracing, validate_module_name: bool, diff --git a/crates/aiken-project/src/module.rs b/crates/aiken-project/src/module.rs index cf6812e5..5d3f281e 100644 --- a/crates/aiken-project/src/module.rs +++ b/crates/aiken-project/src/module.rs @@ -8,7 +8,7 @@ use aiken_lang::{ }; use petgraph::{algo, graph::NodeIndex, Direction, Graph}; use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeSet, HashMap}, io, ops::{Deref, DerefMut}, path::PathBuf, @@ -47,7 +47,7 @@ impl ParsedModules { Self(HashMap::new()) } - pub fn sequence(&self, our_modules: &HashSet) -> Result, Error> { + pub fn sequence(&self, our_modules: &BTreeSet) -> Result, Error> { let inputs = self .0 .values() @@ -60,7 +60,7 @@ impl ParsedModules { let mut indices = HashMap::with_capacity(capacity); - let mut our_indices = HashSet::with_capacity(our_modules.len()); + let mut our_indices = BTreeSet::new(); for (value, _) in &inputs { let index = graph.add_node(value.to_string()); @@ -90,7 +90,7 @@ impl ParsedModules { // know starting indices for our search, so when we remove a dependency, we need find // back what those indices are. if messed_up_indices { - our_indices = HashSet::with_capacity(our_modules.len()); + our_indices = BTreeSet::new(); for j in graph.node_indices() { if our_modules.contains(graph[j].as_str()) { our_indices.insert(j); @@ -125,7 +125,7 @@ impl ParsedModules { let mut path = vec![]; - find_cycle(origin, origin, &graph, &mut path, &mut HashSet::new()); + find_cycle(origin, origin, &graph, &mut path, &mut BTreeSet::new()); let modules = path .iter() @@ -176,7 +176,7 @@ fn find_cycle( parent: NodeIndex, graph: &petgraph::Graph, path: &mut Vec, - seen: &mut HashSet, + seen: &mut BTreeSet, ) -> bool { seen.insert(parent); From 9986bc6bfd6d7f67ad94e325b6a141ec989f6673 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Fri, 15 Mar 2024 00:05:37 +0100 Subject: [PATCH 3/3] Remove duplication between docs & compile And move some logic out of project/lib to be near the CheckedModule instead. The project API is already quite heavy and long, so making it more lightweight is generally what we want to tend to. --- crates/aiken-project/src/lib.rs | 99 +++++++----------------------- crates/aiken-project/src/module.rs | 80 +++++++++++++++++++++++- 2 files changed, 100 insertions(+), 79 deletions(-) diff --git a/crates/aiken-project/src/lib.rs b/crates/aiken-project/src/lib.rs index cea3b478..e0b8f657 100644 --- a/crates/aiken-project/src/lib.rs +++ b/crates/aiken-project/src/lib.rs @@ -194,15 +194,11 @@ where self.read_source_files()?; - let destination = destination.unwrap_or_else(|| self.root.join("docs")); - let mut modules = self.parse_sources(self.config.name.clone())?; - let our_modules: BTreeSet = modules.keys().cloned().collect(); + self.type_check(&mut modules, Tracing::silent(), false)?; - self.with_dependencies(&mut modules)?; - - self.type_check(&our_modules, modules, Tracing::silent(), false)?; + let destination = destination.unwrap_or_else(|| self.root.join("docs")); self.event_listener.handle_event(Event::GeneratingDocFiles { output_path: destination.clone(), @@ -296,11 +292,7 @@ where let mut modules = self.parse_sources(self.config.name.clone())?; - let our_modules: BTreeSet = modules.keys().cloned().collect(); - - self.with_dependencies(&mut modules)?; - - self.type_check(&our_modules, modules, options.tracing, true)?; + self.type_check(&mut modules, options.tracing, true)?; match options.code_gen_mode { CodeGenMode::Build(uplc_dump) => { @@ -684,78 +676,33 @@ where fn type_check( &mut self, - our_modules: &BTreeSet, - mut all_modules: ParsedModules, + modules: &mut ParsedModules, tracing: Tracing, validate_module_name: bool, - ) -> Result<(), Error> { - let processing_sequence = all_modules.sequence(our_modules)?; + ) -> Result<(), Vec> { + let our_modules: BTreeSet = modules.keys().cloned().collect(); - for name in processing_sequence { - if let Some(ParsedModule { - name, - path, - code, - kind, - extra, - package, - ast, - }) = all_modules.remove(&name) - { - let mut type_warnings = Vec::new(); + self.with_dependencies(modules)?; - let ast = ast - .infer( - &self.id_gen, - kind, - &self.config.name.to_string(), - &self.module_types, - tracing, - &mut type_warnings, - ) - .map_err(|error| Error::Type { - path: path.clone(), - src: code.clone(), - named: NamedSource::new(path.display().to_string(), code.clone()), - error, - })?; + for name in modules.sequence(&our_modules)? { + if let Some(module) = modules.remove(&name) { + let (checked_module, warnings) = module.infer( + &self.id_gen, + &self.config.name.to_string(), + tracing, + validate_module_name, + &mut self.module_sources, + &mut self.module_types, + &mut self.functions, + &mut self.data_types, + )?; - if validate_module_name { - ast.validate_module_name()?; + if our_modules.contains(checked_module.name.as_str()) { + self.warnings.extend(warnings); } - // Register any warnings emitted as type warnings - let type_warnings = type_warnings - .into_iter() - .map(|w| Warning::from_type_warning(w, path.clone(), code.clone())); - - if our_modules.contains(name.as_str()) { - self.warnings.extend(type_warnings); - } - - // Register module sources for an easier access later. - self.module_sources - .insert(name.clone(), (code.clone(), LineNumbers::new(&code))); - - // Register the types from this module so they can be - // imported into other modules. - self.module_types - .insert(name.clone(), ast.type_info.clone()); - - // Register function definitions & data-types for easier access later. - ast.register_definitions(&mut self.functions, &mut self.data_types); - - let checked_module = CheckedModule { - kind, - extra, - name: name.clone(), - code, - ast, - package, - input_path: path, - }; - - self.checked_modules.insert(name, checked_module); + self.checked_modules + .insert(checked_module.name.clone(), checked_module); } } diff --git a/crates/aiken-project/src/module.rs b/crates/aiken-project/src/module.rs index 5d3f281e..ff70e38a 100644 --- a/crates/aiken-project/src/module.rs +++ b/crates/aiken-project/src/module.rs @@ -1,11 +1,17 @@ -use crate::error::Error; +use crate::{Error, Warning}; use aiken_lang::{ ast::{ - DataType, Definition, Function, Located, ModuleKind, TypedModule, TypedValidator, - UntypedModule, Validator, + DataType, DataTypeKey, Definition, Function, FunctionAccessKey, Located, ModuleKind, + Tracing, TypedDataType, TypedFunction, TypedModule, TypedValidator, UntypedModule, + Validator, }, + line_numbers::LineNumbers, parser::extra::{comments_before, Comment, ModuleExtra}, + tipo::TypeInfo, + IdGenerator, }; +use indexmap::IndexMap; +use miette::NamedSource; use petgraph::{algo, graph::NodeIndex, Direction, Graph}; use std::{ collections::{BTreeSet, HashMap}, @@ -38,6 +44,74 @@ impl ParsedModule { (name, deps) } + + #[allow(clippy::too_many_arguments)] + pub fn infer( + self, + id_gen: &IdGenerator, + package: &str, + tracing: Tracing, + validate_module_name: bool, + module_sources: &mut HashMap, + module_types: &mut HashMap, + functions: &mut IndexMap, + data_types: &mut IndexMap, + ) -> Result<(CheckedModule, Vec), Error> { + let mut warnings = Vec::new(); + + let ast = self + .ast + .infer( + id_gen, + self.kind, + package, + module_types, + tracing, + &mut warnings, + ) + .map_err(|error| Error::Type { + path: self.path.clone(), + src: self.code.clone(), + named: NamedSource::new(self.path.display().to_string(), self.code.clone()), + error, + })?; + + let warnings = warnings + .into_iter() + .map(|w| Warning::from_type_warning(w, self.path.clone(), self.code.clone())) + .collect::>(); + + // Unless we're compiling prelude documentation, prevent keywords in module name + if validate_module_name { + ast.validate_module_name()?; + } + + // Register module sources for an easier access later. + module_sources.insert( + self.name.clone(), + (self.code.clone(), LineNumbers::new(&self.code)), + ); + + // Register the types from this module so they can be + // imported into other modules. + module_types.insert(self.name.clone(), ast.type_info.clone()); + + // Register function definitions & data-types for easier access later. + ast.register_definitions(functions, data_types); + + Ok(( + CheckedModule { + ast, + kind: self.kind, + extra: self.extra, + name: self.name, + code: self.code, + package: self.package, + input_path: self.path, + }, + warnings, + )) + } } pub struct ParsedModules(HashMap);