From eadf7094110850ceffc20dd8186a694621b1b290 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Wed, 15 May 2024 13:05:52 +0200 Subject: [PATCH 1/4] Fix scope management issue when deep-inferring callee. Fixes #941. However, this currently breaks the stdlib somehow with some FreeUnique on the shrinker step of the optimizer. --- crates/aiken-lang/src/tests/check.rs | 33 ++++++++++ crates/aiken-lang/src/tipo/error.rs | 11 +++- crates/aiken-lang/src/tipo/expr.rs | 99 +++++++++++++++++++--------- crates/aiken-lang/src/tipo/infer.rs | 8 +-- 4 files changed, 113 insertions(+), 38 deletions(-) diff --git a/crates/aiken-lang/src/tests/check.rs b/crates/aiken-lang/src/tests/check.rs index 273ff485..68fd3b7d 100644 --- a/crates/aiken-lang/src/tests/check.rs +++ b/crates/aiken-lang/src/tests/check.rs @@ -2483,3 +2483,36 @@ fn not_indexable() { Err((_, Error::NotIndexable { .. })) )) } + +#[test] +fn out_of_scope_access() { + let source_code = r#" + pub fn a(x: Int) { + b(x) + } + + fn b(y: Int) { + x + y + } + "#; + + assert!(matches!( + dbg!(check_validator(parse(source_code))), + Err((_, Error::UnknownVariable { .. })) + )) +} + +#[test] +fn mutually_recursive_1() { + let source_code = r#" + pub fn foo(x) { + bar(x) + } + + pub fn bar(y) { + foo(y) + } + "#; + + assert!(check(parse(source_code)).is_ok()); +} diff --git a/crates/aiken-lang/src/tipo/error.rs b/crates/aiken-lang/src/tipo/error.rs index 58639924..53d5192a 100644 --- a/crates/aiken-lang/src/tipo/error.rs +++ b/crates/aiken-lang/src/tipo/error.rs @@ -1,6 +1,6 @@ use super::Type; use crate::{ - ast::{Annotation, BinOp, CallArg, LogicalOpChainKind, Span, UntypedPattern}, + ast::{Annotation, BinOp, CallArg, LogicalOpChainKind, Span, UntypedFunction, UntypedPattern}, error::ExtraData, expr::{self, UntypedExpr}, format::Formatter, @@ -1028,6 +1028,12 @@ The best thing to do from here is to remove it."#))] #[label("unbound generic at boundary")] location: Span, }, + + #[error("Cannot infer caller without inferring callee first")] + MustInferFirst { + function: UntypedFunction, + location: Span, + }, } impl ExtraData for Error { @@ -1085,7 +1091,8 @@ impl ExtraData for Error { | Error::GenericLeftAtBoundary { .. } | Error::UnexpectedMultiPatternAssignment { .. } | Error::ExpectOnOpaqueType { .. } - | Error::ValidatorMustReturnBool { .. } => None, + | Error::ValidatorMustReturnBool { .. } + | Error::MustInferFirst { .. } => None, Error::UnknownType { name, .. } | Error::UnknownTypeConstructor { name, .. } diff --git a/crates/aiken-lang/src/tipo/expr.rs b/crates/aiken-lang/src/tipo/expr.rs index 2b40100b..6045150d 100644 --- a/crates/aiken-lang/src/tipo/expr.rs +++ b/crates/aiken-lang/src/tipo/expr.rs @@ -26,7 +26,12 @@ use crate::{ line_numbers::LineNumbers, tipo::{fields::FieldMap, PatternConstructor, TypeVar}, }; -use std::{cmp::Ordering, collections::HashMap, ops::Deref, rc::Rc}; +use std::{ + cmp::Ordering, + collections::{BTreeSet, HashMap}, + ops::Deref, + rc::Rc, +}; use vec1::Vec1; pub(crate) fn infer_function( @@ -66,33 +71,67 @@ pub(crate) fn infer_function( .function_types() .unwrap_or_else(|| panic!("Preregistered type for fn {name} was not a fn")); + // ━━━ open new scope ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + let initial_scope = environment.open_new_scope(); + + let arguments = arguments + .iter() + .zip(&args_types) + .map(|(arg_name, tipo)| arg_name.to_owned().set_type(tipo.clone())) + .collect(); + + let hydrator = hydrators + .remove(name) + .unwrap_or_else(|| panic!("Could not find hydrator for fn {name}")); + + let mut expr_typer = ExprTyper::new(environment, lines, tracing); + expr_typer.hydrator = hydrator; + expr_typer.unseen = BTreeSet::from_iter(hydrators.keys().cloned()); + // Infer the type using the preregistered args + return types as a starting point - let (tipo, arguments, body, safe_to_generalise) = environment.in_new_scope(|environment| { - let args = arguments - .iter() - .zip(&args_types) - .map(|(arg_name, tipo)| arg_name.to_owned().set_type(tipo.clone())) - .collect(); + let inferred = + expr_typer.infer_fn_with_known_types(arguments, body.to_owned(), Some(return_type)); - let hydrator = hydrators - .remove(name) - .unwrap_or_else(|| panic!("Could not find hydrator for fn {name}")); + // We try to always perform a deep-first inferrence. So callee are inferred before callers, + // since this provides better -- and necessary -- information in particular with regards to + // generics. + // + // In principle, the compiler requires function definitions to be processed *in order*. So if + // A calls B, we must have inferred B before A. This is detected during inferrence, and we + // raise an error about it. Here however, we backtrack from that error and infer the caller + // first. Then, re-attempt to infer the current function. It may takes multiple attempts, but + // should eventually succeed. + // + // Note that we need to close the scope before backtracking to not mess with the scope of the + // callee. Otherwise, identifiers present in the caller's scope may become available to the + // callee. + if let Err(Error::MustInferFirst { function, .. }) = inferred { + hydrators.insert(name.to_string(), expr_typer.hydrator); - let mut expr_typer = ExprTyper::new(environment, hydrators, lines, tracing); + environment.close_scope(initial_scope); - expr_typer.hydrator = hydrator; + infer_function( + &function, + environment.current_module, + hydrators, + environment, + lines, + tracing, + )?; - let (args, body, return_type) = - expr_typer.infer_fn_with_known_types(args, body.to_owned(), Some(return_type))?; + return infer_function(fun, module_name, hydrators, environment, lines, tracing); + } - let args_types = args.iter().map(|a| a.tipo.clone()).collect(); + let (arguments, body, return_type) = inferred?; - let tipo = function(args_types, return_type); + let args_types = arguments.iter().map(|a| a.tipo.clone()).collect(); - let safe_to_generalise = !expr_typer.ungeneralised_function_used; + let tipo = function(args_types, return_type); - Ok::<_, Error>((tipo, args, body, safe_to_generalise)) - })?; + let safe_to_generalise = !expr_typer.ungeneralised_function_used; + + environment.close_scope(initial_scope); + // ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ // Assert that the inferred type matches the type of any recursive call environment.unify(preregistered_type, tipo.clone(), *location, false)?; @@ -147,8 +186,6 @@ pub(crate) struct ExprTyper<'a, 'b> { pub(crate) environment: &'a mut Environment<'b>, - pub(crate) hydrators: &'a mut HashMap, - // We tweak the tracing behavior during type-check. Traces are either kept or left out of the // typed AST depending on this setting. pub(crate) tracing: Tracing, @@ -156,6 +193,9 @@ pub(crate) struct ExprTyper<'a, 'b> { // Type hydrator for creating types from annotations pub(crate) hydrator: Hydrator, + // A static set of remaining function names that are known but not yet inferred + pub(crate) unseen: BTreeSet, + // We keep track of whether any ungeneralised functions have been used // to determine whether it is safe to generalise this expression after // it has been inferred. @@ -165,14 +205,13 @@ pub(crate) struct ExprTyper<'a, 'b> { impl<'a, 'b> ExprTyper<'a, 'b> { pub fn new( environment: &'a mut Environment<'b>, - hydrators: &'a mut HashMap, lines: &'a LineNumbers, tracing: Tracing, ) -> Self { Self { hydrator: Hydrator::new(), + unseen: BTreeSet::new(), environment, - hydrators, tracing, ungeneralised_function_used: false, lines, @@ -2346,15 +2385,11 @@ impl<'a, 'b> ExprTyper<'a, 'b> { // NOTE: Recursive functions should not run into this multiple time. // If we have no hydrator for this function, it means that we have already // encountered it. - if self.hydrators.get(&fun.name).is_some() { - infer_function( - fun, - self.environment.current_module, - self.hydrators, - self.environment, - self.lines, - self.tracing, - )?; + if self.unseen.contains(&fun.name) { + return Err(Error::MustInferFirst { + function: fun.clone(), + location: *location, + }); } } } diff --git a/crates/aiken-lang/src/tipo/infer.rs b/crates/aiken-lang/src/tipo/infer.rs index f43d061a..51c70828 100644 --- a/crates/aiken-lang/src/tipo/infer.rs +++ b/crates/aiken-lang/src/tipo/infer.rs @@ -330,8 +330,8 @@ fn infer_definition( }); } - let typed_via = ExprTyper::new(environment, hydrators, lines, tracing) - .infer(arg.via.clone())?; + let typed_via = + ExprTyper::new(environment, lines, tracing).infer(arg.via.clone())?; let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap(); @@ -624,8 +624,8 @@ fn infer_definition( value, tipo: _, }) => { - let typed_expr = ExprTyper::new(environment, hydrators, lines, tracing) - .infer_const(&annotation, *value)?; + let typed_expr = + ExprTyper::new(environment, lines, tracing).infer_const(&annotation, *value)?; let tipo = typed_expr.tipo(); From 27b3536f0976306123e1284d8755ca636fedb8b0 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Thu, 16 May 2024 23:20:52 +0200 Subject: [PATCH 2/4] Also preserve warnings when resetting scope for backtracking. This is crucial as some checks regarding variable usages depends on warnings; so we may accidentally remove variables from the AST as a consequence of backtracking for deep inferrence. --- crates/aiken-lang/src/tipo/expr.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/aiken-lang/src/tipo/expr.rs b/crates/aiken-lang/src/tipo/expr.rs index 6045150d..e0abb1ee 100644 --- a/crates/aiken-lang/src/tipo/expr.rs +++ b/crates/aiken-lang/src/tipo/expr.rs @@ -71,6 +71,8 @@ pub(crate) fn infer_function( .function_types() .unwrap_or_else(|| panic!("Preregistered type for fn {name} was not a fn")); + let warnings = environment.warnings.clone(); + // ━━━ open new scope ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ let initial_scope = environment.open_new_scope(); @@ -106,10 +108,12 @@ pub(crate) fn infer_function( // callee. Otherwise, identifiers present in the caller's scope may become available to the // callee. if let Err(Error::MustInferFirst { function, .. }) = inferred { + // Reset the environment & scope. hydrators.insert(name.to_string(), expr_typer.hydrator); - environment.close_scope(initial_scope); + *environment.warnings = warnings; + // Backtrack and infer callee first. infer_function( &function, environment.current_module, @@ -119,6 +123,7 @@ pub(crate) fn infer_function( tracing, )?; + // Then, try again the entire function definition. return infer_function(fun, module_name, hydrators, environment, lines, tracing); } From ea3e79c1329cf2a1c4cec6c2e9c569fc110a4284 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Thu, 16 May 2024 23:33:23 +0200 Subject: [PATCH 3/4] Renamed 'unseed' -> 'not_yet_inferred' --- crates/aiken-lang/src/tipo/expr.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/aiken-lang/src/tipo/expr.rs b/crates/aiken-lang/src/tipo/expr.rs index e0abb1ee..5e1fb612 100644 --- a/crates/aiken-lang/src/tipo/expr.rs +++ b/crates/aiken-lang/src/tipo/expr.rs @@ -88,7 +88,7 @@ pub(crate) fn infer_function( let mut expr_typer = ExprTyper::new(environment, lines, tracing); expr_typer.hydrator = hydrator; - expr_typer.unseen = BTreeSet::from_iter(hydrators.keys().cloned()); + expr_typer.not_yet_inferred = BTreeSet::from_iter(hydrators.keys().cloned()); // Infer the type using the preregistered args + return types as a starting point let inferred = @@ -199,7 +199,7 @@ pub(crate) struct ExprTyper<'a, 'b> { pub(crate) hydrator: Hydrator, // A static set of remaining function names that are known but not yet inferred - pub(crate) unseen: BTreeSet, + pub(crate) not_yet_inferred: BTreeSet, // We keep track of whether any ungeneralised functions have been used // to determine whether it is safe to generalise this expression after @@ -215,7 +215,7 @@ impl<'a, 'b> ExprTyper<'a, 'b> { ) -> Self { Self { hydrator: Hydrator::new(), - unseen: BTreeSet::new(), + not_yet_inferred: BTreeSet::new(), environment, tracing, ungeneralised_function_used: false, @@ -2390,7 +2390,7 @@ impl<'a, 'b> ExprTyper<'a, 'b> { // NOTE: Recursive functions should not run into this multiple time. // If we have no hydrator for this function, it means that we have already // encountered it. - if self.unseen.contains(&fun.name) { + if self.not_yet_inferred.contains(&fun.name) { return Err(Error::MustInferFirst { function: fun.clone(), location: *location, From 7ff6eba869cb63ccd31c2ef4f276ba0183ef8560 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Thu, 16 May 2024 23:42:53 +0200 Subject: [PATCH 4/4] Prefer '.clone_from' over mutating a clone. Clippy says it's more efficient. I trust clippy. Clippy good. --- crates/aiken-lang/src/gen_uplc.rs | 2 +- crates/aiken-lang/src/parser/definition/data_type.rs | 5 ++--- crates/aiken-lang/src/tipo/error.rs | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/aiken-lang/src/gen_uplc.rs b/crates/aiken-lang/src/gen_uplc.rs index 5ce4b72b..dfd9a093 100644 --- a/crates/aiken-lang/src/gen_uplc.rs +++ b/crates/aiken-lang/src/gen_uplc.rs @@ -3918,7 +3918,7 @@ impl<'a> CodeGenerator<'a> { .map(|(_, tipo)| get_generic_variant_name(tipo)) .join(""); - *variant_name = variant.clone(); + variant_name.clone_from(&variant); if !dependency_functions .iter() diff --git a/crates/aiken-lang/src/parser/definition/data_type.rs b/crates/aiken-lang/src/parser/definition/data_type.rs index 1137481f..2785ddce 100644 --- a/crates/aiken-lang/src/parser/definition/data_type.rs +++ b/crates/aiken-lang/src/parser/definition/data_type.rs @@ -1,9 +1,8 @@ -use chumsky::prelude::*; - use crate::{ ast, parser::{annotation, error::ParseError, token::Token, utils}, }; +use chumsky::prelude::*; pub fn parser() -> impl Parser { let unlabeled_constructor_type_args = annotation() @@ -67,7 +66,7 @@ pub fn parser() -> impl Parser { - *annotated_names = new_names.clone(); + annotated_names.clone_from(new_names); self } _ => self,