Merge pull request #1093 from aiken-lang/benchmarks-wrapup

Wrapping-up benchmarks
2025-02-09 17:04:39 +01:00 · 2025-02-09 17:04:39 +01:00 · 94246bdb2b
parent c382e6fba8 a6cdb5583d
commit 94246bdb2b
16 changed files with 836 additions and 606 deletions
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@ -6,7 +6,6 @@ on:
 jobs:
  nix-build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
@ -20,5 +19,14 @@ jobs:
        uses: DeterminateSystems/magic-nix-cache-action@v1
      - name: Build Aiken
-        run: nix build
+        shell: bash
-
+        run: |
          set +e
          nix build
          exitcode="$?"
          if [[ "$exitcode" != "0" ]] ; then
            echo "::warning::Nix build failed with exit code $exitcode"
            exit 0
          else
            exit "$exitcode"
          fi
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,9 +2,21 @@
 ## v1.1.11 - UNRELEASED
 ### Added
 - **aiken**: New `aiken bench` command to run benchmarks. @Riley-Kilgore, @KtorZ
  The command is very similar to `aiken check`, and will collect and run benchmarks found across the codebase. The output by default is a set of pretty terminal plots for each dimension (mem & cpu) for each test bench. The complete dataset of points can be obtained in a structured (JSON) format by redirecting the output to a file.
 - **aiken-lang**: New `bench` keyword and capabilities to the test framework. @Riley-Kilgore, @KtorZ
  A `bench` is a new type of test that takes in a single `Sampler<a> = fn(Int) -> Fuzzer<a>` as parameter, similar to how property-based test receive `Fuzzer<a>`. A `Sampler` is in fact, a _scaled Fuzzer_ which receive a monotically increasing size as parameter. This allows fine-grained control over generated values. Unlike tests, benchmarks can return _anything_ since their output is ignored.
  Read more about benchmarks in the [user manual](https://aiken-lang.org/language-tour/bench).
 ### Changed
- **aiken**: support for `bench` keyword to define benchmarks. @Riley-Kilgore
+
 - **aiken-lang**: The compiler now raises a warning when attempting to destructure a record constructor without using named fields. See [#1084](https://github.com/aiken-lang/aiken/issues/1084). @KtorZ
 - **aiken-lang**: Fix blueprint schema definitions related to pairs (no longer omit (sometimes) Pairs definitions, and generate them as data List). See [#1086](https://github.com/aiken-lang/aiken/issues/1086) and [#970](https://github.com/aiken-lang/aiken/issues/970). @KtorZ
@ -16,34 +28,6 @@
 - **aiken-lang**: `write_bits` can now be used from aiken/builtins. @Microproofs
 ### Changed
 - **aiken-project**: The `aiken.toml` file no longer supports `v1` and `v2` for the plutus version field. @rvcas
 - **aiken-project**: `Error::TomlLoading` now looks much better - [see](https://github.com/aiken-lang/aiken/issues/1032#issuecomment-2562122101). @rvcas
 - **aiken-lang**: 10-20% optimization improvements via case-constr, rearranging function definitions (while maintaining dependency ordering),
                  and allowing inlining in if_then_else_error cases which preserve the same error semantics for a program. @Microproofs
 ### Fixed
 - **aiken**: panic error when using `aiken uplc decode` on cbor encoded flat bytes. @rvcas
 - **aiken-lang**: comment formatting in pipelines leading to confusion. @rvcas
 - **aiken-lang**: preserve holes discard name in function captures (see [#1080](https://github.com/aiken-lang/aiken/issues/1080)). @KtorZ
 - **uplc**: Added deserialization match for the new builtin indices.
 ## v1.1.11 - UNRELEASED
 ### Added
 - **aiken**: support for `bench` keyword to define benchmarks. @Riley-Kilgore
 ## v1.1.10 - 2025-01-21
 ### Added
 - **aiken-project**: `export` output now supports the functions `return_type`. @rvcas
 - **aiken-lang**: `write_bits` can now be used from aiken/builtins. @Microproofs
 ### Changed
 - **aiken-project**: The `aiken.toml` file no longer supports `v1` and `v2` for the plutus version field. @rvcas
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/crates/aiken-lang/src/test_framework.rs
+++ b/crates/aiken-lang/src/test_framework.rs
@ -28,6 +28,12 @@ use uplc::{
 };
 use vec1::{vec1, Vec1};
 #[derive(Debug, Clone, Copy)]
 pub enum RunnableKind {
    Test,
    Bench,
 }
 /// ----- Test -----------------------------------------------------------------
 ///
 /// Aiken supports two kinds of tests: unit and property. A unit test is a simply
@ -117,15 +123,15 @@ impl Test {
        })
    }
-    pub fn from_test_definition(
+    pub fn from_function_definition(
        generator: &mut CodeGenerator<'_>,
        test: TypedTest,
        module_name: String,
        input_path: PathBuf,
-        is_benchmark: bool,
+        kind: RunnableKind,
    ) -> Test {
        if test.arguments.is_empty() {
-            if is_benchmark {
+            if matches!(kind, RunnableKind::Bench) {
                unreachable!("benchmark must have at least one argument");
            } else {
                Self::unit_test(generator, test, module_name, input_path)
@ -153,8 +159,8 @@ impl Test {
            // apply onto it later.
            let generator_program = generator.clone().generate_raw(&via, &[], &module_name);
-            if is_benchmark {
+            match kind {
-                Test::Benchmark(Benchmark {
+                RunnableKind::Bench => Test::Benchmark(Benchmark {
                    input_path,
                    module: module_name,
                    name: test.name,
@ -165,9 +171,8 @@ impl Test {
                        type_info,
                        stripped_type_info,
                    },
-                })
+                }),
-            } else {
+                RunnableKind::Test => Self::property_test(
                Self::property_test(
                    input_path,
                    module_name,
                    test.name,
@ -178,27 +183,26 @@ impl Test {
                        stripped_type_info,
                        type_info,
                    },
-                )
+                ),
            }
        }
    }
-    pub fn from_benchmark_definition(
+    pub fn run(
-        generator: &mut CodeGenerator<'_>,
+        self,
-        test: TypedTest,
+        seed: u32,
-        module_name: String,
+        max_success: usize,
-        input_path: PathBuf,
+        plutus_version: &PlutusVersion,
-    ) -> Test {
+    ) -> TestResult<(Constant, Rc<Type>), PlutusData> {
-        Self::from_test_definition(generator, test, module_name, input_path, true)
+        match self {
            Test::UnitTest(unit_test) => TestResult::UnitTestResult(unit_test.run(plutus_version)),
            Test::PropertyTest(property_test) => {
                TestResult::PropertyTestResult(property_test.run(seed, max_success, plutus_version))
            }
            Test::Benchmark(benchmark) => {
                TestResult::BenchmarkResult(benchmark.run(seed, max_success, plutus_version))
            }
        }
    pub fn from_function_definition(
        generator: &mut CodeGenerator<'_>,
        test: TypedTest,
        module_name: String,
        input_path: PathBuf,
    ) -> Test {
        Self::from_test_definition(generator, test, module_name, input_path, false)
    }
 }
@ -217,7 +221,7 @@ pub struct UnitTest {
 unsafe impl Send for UnitTest {}
 impl UnitTest {
-    pub fn run<T>(self, plutus_version: &PlutusVersion) -> TestResult<(Constant, Rc<Type>), T> {
+    pub fn run(self, plutus_version: &PlutusVersion) -> UnitTestResult<(Constant, Rc<Type>)> {
        let mut eval_result = Program::<NamedDeBruijn>::try_from(self.program.clone())
            .unwrap()
            .eval_version(ExBudget::max(), &plutus_version.into());
@ -233,13 +237,13 @@ impl UnitTest {
        }
        traces.extend(eval_result.logs());
-        TestResult::UnitTestResult(UnitTestResult {
+        UnitTestResult {
            success,
            test: self.to_owned(),
            spent_budget: eval_result.cost(),
            traces,
            assertion: self.assertion,
-        })
+        }
    }
 }
@ -270,7 +274,7 @@ pub struct Fuzzer<T> {
 }
 #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
-#[error("Fuzzer exited unexpectedly: {uplc_error}")]
+#[error("Fuzzer exited unexpectedly: {uplc_error}.")]
 pub struct FuzzerError {
    traces: Vec<String>,
    uplc_error: uplc::machine::Error,
@ -317,12 +321,12 @@ impl PropertyTest {
    /// Run a property test from a given seed. The property is run at most DEFAULT_MAX_SUCCESS times. It
    /// may stops earlier on failure; in which case a 'counterexample' is returned.
-    pub fn run<U>(
+    pub fn run(
        self,
        seed: u32,
        n: usize,
        plutus_version: &PlutusVersion,
-    ) -> TestResult<U, PlutusData> {
+    ) -> PropertyTestResult<PlutusData> {
        let mut labels = BTreeMap::new();
        let mut remaining = n;
@ -352,13 +356,13 @@ impl PropertyTest {
            ),
        };
-        TestResult::PropertyTestResult(PropertyTestResult {
+        PropertyTestResult {
            test: self,
            counterexample,
            iterations,
            labels,
            traces,
-        })
+        }
    }
    pub fn run_n_times<'a>(
@ -372,9 +376,7 @@ impl PropertyTest {
        let mut counterexample = None;
        while *remaining > 0 && counterexample.is_none() {
-            let (next_prng, cex) = self.run_once(prng, labels, plutus_version)?;
+            (prng, counterexample) = self.run_once(prng, labels, plutus_version)?;
            prng = next_prng;
            counterexample = cex;
            *remaining -= 1;
        }
@ -492,6 +494,29 @@ pub struct Sampler<T> {
    pub stripped_type_info: Rc<Type>,
 }
 #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
 pub enum BenchmarkError {
    #[error("Sampler exited unexpectedly: {uplc_error}.")]
    SamplerError {
        traces: Vec<String>,
        uplc_error: uplc::machine::Error,
    },
    #[error("Bench exited unexpectedly: {uplc_error}.")]
    BenchError {
        traces: Vec<String>,
        uplc_error: uplc::machine::Error,
    },
 }
 impl BenchmarkError {
    pub fn traces(&self) -> &[String] {
        match self {
            BenchmarkError::SamplerError { traces, .. }
            | BenchmarkError::BenchError { traces, .. } => traces.as_slice(),
        }
    }
 }
 #[derive(Debug, Clone)]
 pub struct Benchmark {
    pub input_path: PathBuf,
@ -505,50 +530,61 @@ pub struct Benchmark {
 unsafe impl Send for Benchmark {}
 impl Benchmark {
-    pub fn benchmark(
+    pub const DEFAULT_MAX_SIZE: usize = 30;
    pub fn run(
        self,
        seed: u32,
-        max_iterations: usize,
+        max_size: usize,
        plutus_version: &PlutusVersion,
-    ) -> Vec<BenchmarkResult> {
+    ) -> BenchmarkResult {
-        let mut results = Vec::with_capacity(max_iterations);
+        let mut measures = Vec::with_capacity(max_size);
        let mut iteration = 0;
        let mut prng = Prng::from_seed(seed);
        let mut error = None;
        let mut size = 0;
-        while max_iterations > iteration {
+        while error.is_none() && max_size >= size {
            let fuzzer = self
                .sampler
                .program
-                .apply_data(Data::integer(num_bigint::BigInt::from(iteration as i64)));
+                .apply_term(&Term::Constant(Constant::Integer(size.into()).into()));
            match prng.sample(&fuzzer) {
                Ok(None) => {
                    panic!("A seeded PRNG returned 'None' which indicates a sampler is ill-formed and implemented wrongly; please contact library's authors.");
                }
                Ok(Some((new_prng, value))) => {
                    prng = new_prng;
-                    let mut eval_result = self.eval(&value, plutus_version);
+                    let mut result = self.eval(&value, plutus_version);
-                    results.push(BenchmarkResult {
+                    match result.result() {
-                        test: self.clone(),
+                        Ok(_) => measures.push((size, result.cost())),
-                        cost: eval_result.cost(),
+                        Err(uplc_error) => {
-                        success: true,
+                            error = Some(BenchmarkError::BenchError {
-                        traces: eval_result.logs().to_vec(),
+                                traces: result
                                    .logs()
                                    .into_iter()
                                    .filter(|s| PropertyTest::extract_label(s).is_none())
                                    .collect(),
                                uplc_error,
                            });
                        }
                Ok(None) => {
                    break;
                    }
                Err(e) => {
                    results.push(BenchmarkResult {
                        test: self.clone(),
                        cost: ExBudget::default(),
                        success: false,
                        traces: vec![format!("Fuzzer error: {}", e)],
                    });
                    break;
                }
            }
            iteration += 1;
                }
-        results
+                Err(FuzzerError { traces, uplc_error }) => {
                    error = Some(BenchmarkError::SamplerError { traces, uplc_error });
                }
            }
            size += 1;
        }
        BenchmarkResult {
            bench: self,
            measures,
            error,
        }
    }
    pub fn eval(&self, value: &PlutusData, plutus_version: &PlutusVersion) -> EvalResult {
@ -650,7 +686,6 @@ impl Prng {
    pub fn sample(
        &self,
        fuzzer: &Program<Name>,
        // iteration: usize,
    ) -> Result<Option<(Prng, PlutusData)>, FuzzerError> {
        let program = Program::<NamedDeBruijn>::try_from(fuzzer.apply_data(self.uplc())).unwrap();
        let mut result = program.eval(ExBudget::max());
@ -1069,7 +1104,7 @@ where
 pub enum TestResult<U, T> {
    UnitTestResult(UnitTestResult<U>),
    PropertyTestResult(PropertyTestResult<T>),
-    Benchmark(BenchmarkResult),
+    BenchmarkResult(BenchmarkResult),
 }
 unsafe impl<U, T> Send for TestResult<U, T> {}
@ -1084,7 +1119,7 @@ impl TestResult<(Constant, Rc<Type>), PlutusData> {
            TestResult::PropertyTestResult(test) => {
                TestResult::PropertyTestResult(test.reify(data_types))
            }
-            TestResult::Benchmark(result) => TestResult::Benchmark(result),
+            TestResult::BenchmarkResult(result) => TestResult::BenchmarkResult(result),
        }
    }
 }
@ -1107,7 +1142,7 @@ impl<U, T> TestResult<U, T> {
                }
                OnTestFailure::SucceedImmediately => counterexample.is_some(),
            },
-            TestResult::Benchmark(BenchmarkResult { success, .. }) => *success,
+            TestResult::BenchmarkResult(BenchmarkResult { error, .. }) => error.is_none(),
        }
    }
@ -1117,7 +1152,7 @@ impl<U, T> TestResult<U, T> {
            TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
                test.module.as_str()
            }
-            TestResult::Benchmark(BenchmarkResult { ref test, .. }) => test.module.as_str(),
+            TestResult::BenchmarkResult(BenchmarkResult { ref bench, .. }) => bench.module.as_str(),
        }
    }
@ -1127,7 +1162,7 @@ impl<U, T> TestResult<U, T> {
            TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
                test.name.as_str()
            }
-            TestResult::Benchmark(BenchmarkResult { ref test, .. }) => test.name.as_str(),
+            TestResult::BenchmarkResult(BenchmarkResult { ref bench, .. }) => bench.name.as_str(),
        }
    }
@ -1135,7 +1170,9 @@ impl<U, T> TestResult<U, T> {
        match self {
            TestResult::UnitTestResult(UnitTestResult { traces, .. })
            | TestResult::PropertyTestResult(PropertyTestResult { traces, .. }) => traces,
-            TestResult::Benchmark(BenchmarkResult { traces, .. }) => traces,
+            TestResult::BenchmarkResult(BenchmarkResult { error, .. }) => {
                error.as_ref().map(|e| e.traces()).unwrap_or_default()
            }
        }
    }
 }
@ -1473,10 +1510,9 @@ impl Assertion<UntypedExpr> {
 #[derive(Debug, Clone)]
 pub struct BenchmarkResult {
-    pub test: Benchmark,
+    pub bench: Benchmark,
-    pub cost: ExBudget,
+    pub measures: Vec<(usize, ExBudget)>,
-    pub success: bool,
+    pub error: Option<BenchmarkError>,
    pub traces: Vec<String>,
 }
 unsafe impl Send for BenchmarkResult {}
--- a/crates/aiken-lang/src/tipo/error.rs
+++ b/crates/aiken-lang/src/tipo/error.rs
@ -317,6 +317,14 @@ You can use '{discard}' and numbers to distinguish between similar names.
        location: Span,
    },
    #[error("I notice a benchmark definition without any argument.\n")]
    #[diagnostic(url("https://aiken-lang.org/language-tour/bench"))]
    #[diagnostic(code("arity::bench"))]
    IncorrectBenchmarkArity {
        #[label("must have exactly one argument")]
        location: Span,
    },
    #[error(
        "I saw {} field{} in a context where there should be {}.\n",
        given.if_supports_color(Stdout, |s| s.purple()),
@ -1158,6 +1166,7 @@ impl ExtraData for Error {
            | Error::UnknownPurpose { .. }
            | Error::UnknownValidatorHandler { .. }
            | Error::UnexpectedValidatorFallback { .. }
            | Error::IncorrectBenchmarkArity { .. }
            | Error::MustInferFirst { .. } => None,
            Error::UnknownType { name, .. }
--- a/crates/aiken-lang/src/tipo/infer.rs
+++ b/crates/aiken-lang/src/tipo/infer.rs
@ -12,7 +12,8 @@ use crate::{
        TypedDefinition, TypedModule, TypedValidator, UntypedArg, UntypedDefinition, UntypedModule,
        UntypedPattern, UntypedValidator, Use, Validator,
    },
-    expr::{TypedExpr, UntypedAssignmentKind},
+    expr::{TypedExpr, UntypedAssignmentKind, UntypedExpr},
    parser::token::Token,
    tipo::{expr::infer_function, Span, Type, TypeVar},
    IdGenerator,
 };
@ -347,67 +348,8 @@ fn infer_definition(
                        });
                    }
-                    let typed_via = ExprTyper::new(environment, tracing).infer(arg.via.clone())?;
+                    extract_via_information(&f, arg, hydrators, environment, tracing, infer_fuzzer)
-
+                        .map(|(typed_via, annotation)| (Some(typed_via), Some(annotation)))
                    let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap();
                    let provided_inner_type = arg
                        .arg
                        .annotation
                        .as_ref()
                        .map(|ann| hydrator.type_from_annotation(ann, environment))
                        .transpose()?;
                    let (inferred_annotation, inferred_inner_type) = infer_fuzzer(
                        environment,
                        provided_inner_type.clone(),
                        &typed_via.tipo(),
                        &arg.via.location(),
                    )?;
                    // Ensure that the annotation, if any, matches the type inferred from the
                    // Fuzzer.
                    if let Some(provided_inner_type) = provided_inner_type {
                        if !arg
                            .arg
                            .annotation
                            .as_ref()
                            .unwrap()
                            .is_logically_equal(&inferred_annotation)
                        {
                            return Err(Error::CouldNotUnify {
                                location: arg.arg.location,
                                expected: inferred_inner_type.clone(),
                                given: provided_inner_type.clone(),
                                situation: Some(UnifyErrorSituation::FuzzerAnnotationMismatch),
                                rigid_type_names: hydrator.rigid_names(),
                            });
                        }
                    }
                    // Replace the pre-registered type for the test function, to allow inferring
                    // the function body with the right type arguments.
                    let scope = environment
                        .scope
                        .get_mut(&f.name)
                        .expect("Could not find preregistered type for test");
                    if let Type::Fn {
                        ref ret,
                        ref alias,
                        args: _,
                    } = scope.tipo.as_ref()
                    {
                        scope.tipo = Rc::new(Type::Fn {
                            ret: ret.clone(),
                            args: vec![inferred_inner_type.clone()],
                            alias: alias.clone(),
                        })
                    }
                    Ok((
                        Some((typed_via, inferred_inner_type)),
                        Some(inferred_annotation),
                    ))
                }
                None => Ok((None, None)),
            }?;
@ -466,130 +408,50 @@ fn infer_definition(
        }
        Definition::Benchmark(f) => {
            let err_incorrect_arity = || {
                Err(Error::IncorrectBenchmarkArity {
                    location: f
                        .location
                        .map(|start, end| (start + Token::Benchmark.to_string().len() + 1, end)),
                })
            };
            let (typed_via, annotation) = match f.arguments.first() {
                None => return err_incorrect_arity(),
                Some(arg) => {
                    if f.arguments.len() > 1 {
-                        return Err(Error::IncorrectTestArity {
+                        return err_incorrect_arity();
                            count: f.arguments.len(),
                            location: f
                                .arguments
                                .get(1)
                                .expect("arguments.len() > 1")
                                .arg
                                .location,
                        });
                    }
-                    let typed_via = ExprTyper::new(environment, tracing).infer(arg.via.clone())?;
+                    extract_via_information(&f, arg, hydrators, environment, tracing, infer_sampler)
                    let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap();
                    let provided_inner_type = arg
                        .arg
                        .annotation
                        .as_ref()
                        .map(|ann| hydrator.type_from_annotation(ann, environment))
                        .transpose()?;
                    let (inferred_annotation, inferred_inner_type) = infer_sampler(
                        environment,
                        provided_inner_type.clone(),
                        &typed_via.tipo(),
                        &arg.via.location(),
                    )?;
                    // Ensure that the annotation, if any, matches the type inferred from the
                    // Sampler.
                    if let Some(provided_inner_type) = provided_inner_type {
                        if !arg
                            .arg
                            .annotation
                            .as_ref()
                            .unwrap()
                            .is_logically_equal(&inferred_annotation)
                        {
                            return Err(Error::CouldNotUnify {
                                location: arg.arg.location,
                                expected: inferred_inner_type.clone(),
                                given: provided_inner_type.clone(),
                                situation: Some(UnifyErrorSituation::SamplerAnnotationMismatch),
                                rigid_type_names: hydrator.rigid_names(),
                            });
                }
                    }
                    // Replace the pre-registered type for the benchmark function, to allow inferring
                    // the function body with the right type arguments.
                    let scope = environment
                        .scope
                        .get_mut(&f.name)
                        .expect("Could not find preregistered type for benchmark");
                    if let Type::Fn {
                        ref ret,
                        ref alias,
                        args: _,
                    } = scope.tipo.as_ref()
                    {
                        scope.tipo = Rc::new(Type::Fn {
                            ret: ret.clone(),
                            args: vec![inferred_inner_type.clone()],
                            alias: alias.clone(),
                        })
                    }
                    Ok((
                        Some((typed_via, inferred_inner_type)),
                        Some(inferred_annotation),
                    ))
                }
                None => Ok((None, None)),
            }?;
            let typed_f = infer_function(&f.into(), module_name, hydrators, environment, tracing)?;
-            let is_bool = environment.unify(
+            let arguments = {
-                typed_f.return_type.clone(),
+                let arg = typed_f
-                Type::bool(),
+                    .arguments
-                typed_f.location,
+                    .first()
-                false,
+                    .expect("has exactly one argument")
-            );
+                    .to_owned();
-            let is_void = environment.unify(
+                vec![ArgVia {
-                typed_f.return_type.clone(),
+                    arg: TypedArg {
-                Type::void(),
+                        tipo: typed_via.1,
-                typed_f.location,
+                        annotation: Some(annotation),
-                false,
+                        ..arg
-            );
+                    },
-
+                    via: typed_via.0,
-            if is_bool.or(is_void).is_err() {
+                }]
-                return Err(Error::IllegalTestType {
+            };
                    location: typed_f.location,
                });
            }
            Ok(Definition::Benchmark(Function {
                doc: typed_f.doc,
                location: typed_f.location,
                name: typed_f.name,
                public: typed_f.public,
-                arguments: match typed_via {
+                arguments,
                    Some((via, tipo)) => {
                        let arg = typed_f
                            .arguments
                            .first()
                            .expect("has exactly one argument")
                            .to_owned();
                        vec![ArgVia {
                            arg: TypedArg {
                                tipo,
                                annotation,
                                ..arg
                            },
                            via,
                        }]
                    }
                    None => vec![],
                },
                return_annotation: typed_f.return_annotation,
                return_type: typed_f.return_type,
                body: typed_f.body,
@ -823,6 +685,83 @@ fn infer_definition(
    }
 }
 #[allow(clippy::result_large_err)]
 fn extract_via_information<F>(
    f: &Function<(), UntypedExpr, ArgVia<UntypedArg, UntypedExpr>>,
    arg: &ArgVia<UntypedArg, UntypedExpr>,
    hydrators: &mut HashMap<String, Hydrator>,
    environment: &mut Environment<'_>,
    tracing: Tracing,
    infer_via: F,
 ) -> Result<((TypedExpr, Rc<Type>), Annotation), Error>
 where
    F: FnOnce(
        &mut Environment<'_>,
        Option<Rc<Type>>,
        &Rc<Type>,
        &Span,
    ) -> Result<(Annotation, Rc<Type>), Error>,
 {
    let typed_via = ExprTyper::new(environment, tracing).infer(arg.via.clone())?;
    let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap();
    let provided_inner_type = arg
        .arg
        .annotation
        .as_ref()
        .map(|ann| hydrator.type_from_annotation(ann, environment))
        .transpose()?;
    let (inferred_annotation, inferred_inner_type) = infer_via(
        environment,
        provided_inner_type.clone(),
        &typed_via.tipo(),
        &arg.via.location(),
    )?;
    // Ensure that the annotation, if any, matches the type inferred from the
    // Fuzzer.
    if let Some(provided_inner_type) = provided_inner_type {
        if !arg
            .arg
            .annotation
            .as_ref()
            .unwrap()
            .is_logically_equal(&inferred_annotation)
        {
            return Err(Error::CouldNotUnify {
                location: arg.arg.location,
                expected: inferred_inner_type.clone(),
                given: provided_inner_type.clone(),
                situation: Some(UnifyErrorSituation::FuzzerAnnotationMismatch),
                rigid_type_names: hydrator.rigid_names(),
            });
        }
    }
    // Replace the pre-registered type for the test function, to allow inferring
    // the function body with the right type arguments.
    let scope = environment
        .scope
        .get_mut(&f.name)
        .expect("Could not find preregistered type for test");
    if let Type::Fn {
        ref ret,
        ref alias,
        args: _,
    } = scope.tipo.as_ref()
    {
        scope.tipo = Rc::new(Type::Fn {
            ret: ret.clone(),
            args: vec![inferred_inner_type.clone()],
            alias: alias.clone(),
        })
    }
    Ok(((typed_via, inferred_inner_type), inferred_annotation))
 }
 #[allow(clippy::result_large_err)]
 fn infer_fuzzer(
    environment: &mut Environment<'_>,
--- a/crates/aiken-lang/src/tipo/pretty.rs
+++ b/crates/aiken-lang/src/tipo/pretty.rs
@ -685,6 +685,7 @@ mod tests {
            }),
            "Identity<fn(Bool) -> Bool>",
        );
        assert_string!(Type::sampler(Type::int()), "Sampler<Int>");
    }
    #[test]
--- a/crates/aiken-project/Cargo.toml
+++ b/crates/aiken-project/Cargo.toml
@ -11,7 +11,7 @@ authors = [
    "Kasey White <kwhitemsg@gmail.com>",
    "KtorZ <matthias.benkort@gmail.com>",
 ]
-rust-version = "1.70.0"
+rust-version = "1.80.0"
 build = "build.rs"
 [dependencies]
@ -42,10 +42,12 @@ pulldown-cmark = { version = "0.12.0", default-features = false, features = [
 rayon = "1.7.0"
 regex = "1.7.1"
 reqwest = { version = "0.11.14", features = ["blocking", "json"] }
 rgb = "0.8.50"
 semver = { version = "1.0.23", features = ["serde"] }
 serde = { version = "1.0.152", features = ["derive"] }
 serde_json = { version = "1.0.94", features = ["preserve_order"] }
 strip-ansi-escapes = "0.1.1"
 textplots = { git = "https://github.com/aiken-lang/textplots-rs.git" }
 thiserror = "1.0.39"
 tokio = { version = "1.26.0", features = ["full"] }
 toml = "0.7.2"
--- a/crates/aiken-project/src/error.rs
+++ b/crates/aiken-project/src/error.rs
@ -3,7 +3,7 @@ use aiken_lang::{
    ast::{self, Span},
    error::ExtraData,
    parser::error::ParseError,
-    test_framework::{PropertyTestResult, TestResult, UnitTestResult},
+    test_framework::{BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult},
    tipo,
 };
 use miette::{
@ -193,7 +193,11 @@ impl Error {
                test.input_path.to_path_buf(),
                test.program.to_pretty(),
            ),
-            TestResult::Benchmark(_) => ("bench".to_string(), PathBuf::new(), String::new()), // todo
+            TestResult::BenchmarkResult(BenchmarkResult { bench, .. }) => (
                bench.name.to_string(),
                bench.input_path.to_path_buf(),
                bench.program.to_pretty(),
            ),
        };
        Error::TestFailure {
--- a/crates/aiken-project/src/lib.rs
+++ b/crates/aiken-project/src/lib.rs
@ -40,7 +40,7 @@ use aiken_lang::{
    format::{Formatter, MAX_COLUMNS},
    gen_uplc::CodeGenerator,
    line_numbers::LineNumbers,
-    test_framework::{Test, TestResult},
+    test_framework::{RunnableKind, Test, TestResult},
    tipo::{Type, TypeInfo},
    utils, IdGenerator,
 };
@ -299,20 +299,21 @@ where
    pub fn benchmark(
        &mut self,
-        match_tests: Option<Vec<String>>,
+        match_benchmarks: Option<Vec<String>>,
        exact_match: bool,
        seed: u32,
-        times_to_run: usize,
+        max_size: usize,
        tracing: Tracing,
        env: Option<String>,
    ) -> Result<(), Vec<Error>> {
        let options = Options {
-            tracing: Tracing::silent(),
+            tracing,
            env,
            code_gen_mode: CodeGenMode::Benchmark {
-                match_tests,
+                match_benchmarks,
                exact_match,
                seed,
-                times_to_run,
+                max_size,
            },
            blueprint_path: self.blueprint_path(None),
        };
@ -432,7 +433,7 @@ where
                    self.event_listener.handle_event(Event::RunningTests);
                }
-                let tests = self.run_tests(tests, seed, property_max_success);
+                let tests = self.run_runnables(tests, seed, property_max_success);
                self.checks_count = if tests.is_empty() {
                    None
@ -466,33 +467,39 @@ where
                }
            }
            CodeGenMode::Benchmark {
-                match_tests,
+                match_benchmarks,
                exact_match,
                seed,
-                times_to_run,
+                max_size,
            } => {
-                let tests =
+                let verbose = false;
                    self.collect_benchmarks(false, match_tests, exact_match, options.tracing)?;
-                if !tests.is_empty() {
+                let benchmarks = self.collect_benchmarks(
                    verbose,
                    match_benchmarks,
                    exact_match,
                    options.tracing,
                )?;
                if !benchmarks.is_empty() {
                    self.event_listener.handle_event(Event::RunningBenchmarks);
                }
-                let tests = self.run_benchmarks(tests, seed, times_to_run);
+                let benchmarks = self.run_runnables(benchmarks, seed, max_size);
-                let errors: Vec<Error> = tests
+                let errors: Vec<Error> = benchmarks
                    .iter()
                    .filter_map(|e| {
                        if e.is_success() {
                            None
                        } else {
-                            Some(Error::from_test_result(e, false))
+                            Some(Error::from_test_result(e, verbose))
                        }
                    })
                    .collect();
                self.event_listener
-                    .handle_event(Event::FinishedBenchmarks { seed, tests });
+                    .handle_event(Event::FinishedBenchmarks { seed, benchmarks });
                if !errors.is_empty() {
                    Err(errors)
@ -954,7 +961,7 @@ where
    fn collect_test_items(
        &mut self,
-        kind: &str, // "test" or "bench"
+        kind: RunnableKind,
        verbose: bool,
        match_tests: Option<Vec<String>>,
        exact_match: bool,
@ -993,8 +1000,8 @@ where
            for def in checked_module.ast.definitions() {
                let func = match (kind, def) {
-                    ("test", Definition::Test(func)) => Some(func),
+                    (RunnableKind::Test, Definition::Test(func)) => Some(func),
-                    ("bench", Definition::Benchmark(func)) => Some(func),
+                    (RunnableKind::Bench, Definition::Benchmark(func)) => Some(func),
                    _ => None,
                };
@ -1048,21 +1055,13 @@ where
                })
            }
-            tests.push(match kind {
+            tests.push(Test::from_function_definition(
                "test" => Test::from_function_definition(
                &mut generator,
                test.to_owned(),
                module_name,
                input_path,
-                ),
+                kind,
-                "bench" => Test::from_benchmark_definition(
+            ));
                    &mut generator,
                    test.to_owned(),
                    module_name,
                    input_path,
                ),
                _ => unreachable!("Invalid test kind"),
            });
        }
        Ok(tests)
@ -1075,7 +1074,13 @@ where
        exact_match: bool,
        tracing: Tracing,
    ) -> Result<Vec<Test>, Error> {
-        self.collect_test_items("test", verbose, match_tests, exact_match, tracing)
+        self.collect_test_items(
            RunnableKind::Test,
            verbose,
            match_tests,
            exact_match,
            tracing,
        )
    }
    fn collect_benchmarks(
@ -1085,14 +1090,20 @@ where
        exact_match: bool,
        tracing: Tracing,
    ) -> Result<Vec<Test>, Error> {
-        self.collect_test_items("bench", verbose, match_tests, exact_match, tracing)
+        self.collect_test_items(
            RunnableKind::Bench,
            verbose,
            match_tests,
            exact_match,
            tracing,
        )
    }
-    fn run_tests(
+    fn run_runnables(
        &self,
        tests: Vec<Test>,
        seed: u32,
-        property_max_success: usize,
+        max_success: usize,
    ) -> Vec<TestResult<UntypedExpr, UntypedExpr>> {
        use rayon::prelude::*;
@ -1102,42 +1113,7 @@ where
        tests
            .into_par_iter()
-            .map(|test| match test {
+            .map(|test| test.run(seed, max_success, plutus_version))
                Test::UnitTest(unit_test) => unit_test.run(plutus_version),
                Test::PropertyTest(property_test) => {
                    property_test.run(seed, property_max_success, plutus_version)
                }
                Test::Benchmark(_) => unreachable!("Benchmarks cannot be run in PBT."),
            })
            .collect::<Vec<TestResult<(Constant, Rc<Type>), PlutusData>>>()
            .into_iter()
            .map(|test| test.reify(&data_types))
            .collect()
    }
    fn run_benchmarks(
        &self,
        tests: Vec<Test>,
        seed: u32,
        property_max_success: usize,
    ) -> Vec<TestResult<UntypedExpr, UntypedExpr>> {
        use rayon::prelude::*;
        let data_types = utils::indexmap::as_ref_values(&self.data_types);
        let plutus_version = &self.config.plutus;
        tests
            .into_par_iter()
            .flat_map(|test| match test {
                Test::UnitTest(_) | Test::PropertyTest(_) => {
                    unreachable!("Tests cannot be ran during benchmarking.")
                }
                Test::Benchmark(benchmark) => benchmark
                    .benchmark(seed, property_max_success, plutus_version)
                    .into_iter()
                    .map(TestResult::Benchmark)
                    .collect::<Vec<_>>(),
            })
            .collect::<Vec<TestResult<(Constant, Rc<Type>), PlutusData>>>()
            .into_iter()
            .map(|test| test.reify(&data_types))
--- a/crates/aiken-project/src/options.rs
+++ b/crates/aiken-project/src/options.rs
@ -30,10 +30,10 @@ pub enum CodeGenMode {
    },
    Build(bool),
    Benchmark {
-        match_tests: Option<Vec<String>>,
+        match_benchmarks: Option<Vec<String>>,
        exact_match: bool,
        seed: u32,
-        times_to_run: usize,
+        max_size: usize,
    },
    NoOp,
 }
--- a/crates/aiken-project/src/telemetry.rs
+++ b/crates/aiken-project/src/telemetry.rs
@ -1,6 +1,6 @@
 use aiken_lang::{
    expr::UntypedExpr,
-    test_framework::{PropertyTestResult, TestResult, UnitTestResult},
+    test_framework::{BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult},
 };
 pub use json::{json_schema, Json};
 use std::{
@ -10,6 +10,7 @@ use std::{
    path::PathBuf,
 };
 pub use terminal::Terminal;
 use uplc::machine::cost_model::ExBudget;
 mod json;
 mod terminal;
@ -50,7 +51,7 @@ pub enum Event {
    },
    FinishedBenchmarks {
        seed: u32,
-        tests: Vec<TestResult<UntypedExpr, UntypedExpr>>,
+        benchmarks: Vec<TestResult<UntypedExpr, UntypedExpr>>,
    },
    WaitingForBuildDirLock,
    ResolvingPackages {
@ -117,6 +118,18 @@ pub(crate) fn group_by_module(
 }
 pub(crate) fn find_max_execution_units<T>(xs: &[TestResult<T, T>]) -> (usize, usize, usize) {
    fn max_execution_units(max_mem: i64, max_cpu: i64, cost: &ExBudget) -> (i64, i64) {
        if cost.mem >= max_mem && cost.cpu >= max_cpu {
            (cost.mem, cost.cpu)
        } else if cost.mem > max_mem {
            (cost.mem, max_cpu)
        } else if cost.cpu > max_cpu {
            (max_mem, cost.cpu)
        } else {
            (max_mem, max_cpu)
        }
    }
    let (max_mem, max_cpu, max_iter) =
        xs.iter()
            .fold((0, 0, 0), |(max_mem, max_cpu, max_iter), test| match test {
@ -124,18 +137,15 @@ pub(crate) fn find_max_execution_units<T>(xs: &[TestResult<T, T>]) -> (usize, us
                    (max_mem, max_cpu, std::cmp::max(max_iter, *iterations))
                }
                TestResult::UnitTestResult(UnitTestResult { spent_budget, .. }) => {
-                    if spent_budget.mem >= max_mem && spent_budget.cpu >= max_cpu {
+                    let (max_mem, max_cpu) = max_execution_units(max_mem, max_cpu, spent_budget);
                        (spent_budget.mem, spent_budget.cpu, max_iter)
                    } else if spent_budget.mem > max_mem {
                        (spent_budget.mem, max_cpu, max_iter)
                    } else if spent_budget.cpu > max_cpu {
                        (max_mem, spent_budget.cpu, max_iter)
                    } else {
                    (max_mem, max_cpu, max_iter)
                }
                TestResult::BenchmarkResult(BenchmarkResult { measures, .. }) => {
                    let (mut max_mem, mut max_cpu) = (max_mem, max_cpu);
                    for (_, measure) in measures {
                        (max_mem, max_cpu) = max_execution_units(max_mem, max_cpu, measure);
                    }
-                TestResult::Benchmark(..) => {
+                    (max_mem, max_cpu, max_iter)
                    unreachable!("property returned benchmark result ?!")
                }
            });
--- a/crates/aiken-project/src/telemetry/json.rs
+++ b/crates/aiken-project/src/telemetry/json.rs
@ -39,16 +39,22 @@ impl EventListener for Json {
                });
                println!("{}", serde_json::to_string_pretty(&json_output).unwrap());
            }
-            Event::FinishedBenchmarks { tests, seed } => {
+            Event::FinishedBenchmarks { benchmarks, seed } => {
-                let benchmark_results: Vec<_> = tests
+                let benchmark_results: Vec<_> = benchmarks
                    .into_iter()
                    .filter_map(|test| {
-                        if let TestResult::Benchmark(result) = test {
+                        if let TestResult::BenchmarkResult(result) = test {
                            Some(serde_json::json!({
-                                "name": result.test.name,
+                                "name": result.bench.name,
-                                "module": result.test.module,
+                                "module": result.bench.module,
-                                "memory": result.cost.mem,
+                                "measures": result.measures
-                                "cpu": result.cost.cpu
+                                    .into_iter()
                                    .map(|measure| serde_json::json!({
                                        "size": measure.0,
                                        "memory": measure.1.mem,
                                        "cpu": measure.1.cpu
                                    }))
                                    .collect::<Vec<_>>()
                            }))
                        } else {
                            None
@ -74,7 +80,7 @@ fn fmt_test_json(result: &TestResult<UntypedExpr, UntypedExpr>) -> serde_json::V
        TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
            &test.on_test_failure
        }
-        TestResult::Benchmark(_) => unreachable!("benchmark returned in JSON output"),
+        TestResult::BenchmarkResult(_) => unreachable!("benchmark returned in JSON output"),
    };
    let mut test = json!({
@ -120,7 +126,7 @@ fn fmt_test_json(result: &TestResult<UntypedExpr, UntypedExpr>) -> serde_json::V
                Err(err) => json!({"error": err.to_string()}),
            };
        }
-        TestResult::Benchmark(_) => unreachable!("benchmark returned in JSON output"),
+        TestResult::BenchmarkResult(_) => unreachable!("benchmark returned in JSON output"),
    }
    if !result.traces().is_empty() {
--- a/crates/aiken-project/src/telemetry/terminal.rs
+++ b/crates/aiken-project/src/telemetry/terminal.rs
@ -4,11 +4,21 @@ use aiken_lang::{
    ast::OnTestFailure,
    expr::UntypedExpr,
    format::Formatter,
-    test_framework::{AssertionStyleOptions, PropertyTestResult, TestResult, UnitTestResult},
+    test_framework::{
        AssertionStyleOptions, BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult,
    },
 };
 use owo_colors::{OwoColorize, Stream::Stderr};
 use rgb::RGB8;
 use std::sync::LazyLock;
 use uplc::machine::cost_model::ExBudget;
 static BENCH_PLOT_COLOR: LazyLock<RGB8> = LazyLock::new(|| RGB8 {
    r: 250,
    g: 211,
    b: 144,
 });
 #[derive(Debug, Default, Clone, Copy)]
 pub struct Terminal;
@ -224,14 +234,47 @@ impl EventListener for Terminal {
                    "...".if_supports_color(Stderr, |s| s.bold())
                );
            }
-            Event::FinishedBenchmarks { tests, .. } => {
+            Event::FinishedBenchmarks { seed, benchmarks } => {
-                for test in tests {
+                let (max_mem, max_cpu, max_iter) = find_max_execution_units(&benchmarks);
-                    if let TestResult::Benchmark(result) = test {
+
-                        println!("{} {} ", result.test.name.bold(), "BENCH".blue(),);
+                for (module, results) in &group_by_module(&benchmarks) {
-                        println!("  Memory: {} bytes", result.cost.mem);
+                    let title = module
-                        println!("  CPU: {} units", result.cost.cpu);
+                        .if_supports_color(Stderr, |s| s.bold())
                        .if_supports_color(Stderr, |s| s.blue())
                        .to_string();
                    let benchmarks = results
                        .iter()
                        .map(|r| fmt_test(r, max_mem, max_cpu, max_iter, true))
                        .collect::<Vec<String>>()
                        .join("\n")
                        .chars()
                        .skip(1) // Remove extra first newline
                        .collect::<String>();
                    let seed_info = format!(
                        "with {opt}={seed}",
                        opt = "--seed".if_supports_color(Stderr, |s| s.bold()),
                        seed = format!("{seed}").if_supports_color(Stderr, |s| s.bold())
                    );
                    if !benchmarks.is_empty() {
                        println!();
                    }
                    println!(
                        "{}\n",
                        pretty::indent(
                            &pretty::open_box(&title, &benchmarks, &seed_info, |border| border
                                .if_supports_color(Stderr, |s| s.bright_black())
                                .to_string()),
                            4
                        )
                    );
                }
                if !benchmarks.is_empty() {
                    println!();
                }
            }
        }
@ -246,7 +289,23 @@ fn fmt_test(
    styled: bool,
 ) -> String {
    // Status
-    let mut test = if result.is_success() {
+    let mut test = if matches!(result, TestResult::BenchmarkResult { .. }) {
        format!(
            "\n{label}{title}\n",
            label = if result.is_success() {
                String::new()
            } else {
                pretty::style_if(styled, "FAIL ".to_string(), |s| {
                    s.if_supports_color(Stderr, |s| s.bold())
                        .if_supports_color(Stderr, |s| s.red())
                        .to_string()
                })
            },
            title = pretty::style_if(styled, result.title().to_string(), |s| s
                .if_supports_color(Stderr, |s| s.bright_blue())
                .to_string())
        )
    } else if result.is_success() {
        pretty::style_if(styled, "PASS".to_string(), |s| {
            s.if_supports_color(Stderr, |s| s.bold())
                .if_supports_color(Stderr, |s| s.green())
@ -292,29 +351,76 @@ fn fmt_test(
                if *iterations > 1 { "s" } else { "" }
            );
        }
-        TestResult::Benchmark(benchmark) => {
+        TestResult::BenchmarkResult(BenchmarkResult { error: Some(e), .. }) => {
            let mem_pad = pretty::pad_left(benchmark.cost.mem.to_string(), max_mem, " ");
            let cpu_pad = pretty::pad_left(benchmark.cost.cpu.to_string(), max_cpu, " ");
            test = format!(
-                "{test} [mem: {mem_unit}, cpu: {cpu_unit}]",
+                "{test}{}",
-                mem_unit = pretty::style_if(styled, mem_pad, |s| s
+                e.to_string().if_supports_color(Stderr, |s| s.red())
                    .if_supports_color(Stderr, |s| s.cyan())
                    .to_string()),
                cpu_unit = pretty::style_if(styled, cpu_pad, |s| s
                    .if_supports_color(Stderr, |s| s.cyan())
                    .to_string()),
            );
        }
        TestResult::BenchmarkResult(BenchmarkResult {
            measures,
            error: None,
            ..
        }) => {
            let max_size = measures
                .iter()
                .map(|(size, _)| *size)
                .max()
                .unwrap_or_default();
            let mem_chart = format!(
                "{title}\n{chart}",
                title = "memory units"
                    .if_supports_color(Stderr, |s| s.yellow())
                    .if_supports_color(Stderr, |s| s.bold()),
                chart = plot(
                    &BENCH_PLOT_COLOR,
                    measures
                        .iter()
                        .map(|(size, budget)| (*size as f32, budget.mem as f32))
                        .collect::<Vec<_>>(),
                    max_size
                )
            );
            let cpu_chart = format!(
                "{title}\n{chart}",
                title = "cpu units"
                    .if_supports_color(Stderr, |s| s.yellow())
                    .if_supports_color(Stderr, |s| s.bold()),
                chart = plot(
                    &BENCH_PLOT_COLOR,
                    measures
                        .iter()
                        .map(|(size, budget)| (*size as f32, budget.cpu as f32))
                        .collect::<Vec<_>>(),
                    max_size
                )
            );
            let charts = mem_chart
                .lines()
                .zip(cpu_chart.lines())
                .map(|(l, r)| format!("  {}{r}", pretty::pad_right(l.to_string(), 55, " ")))
                .collect::<Vec<_>>()
                .join("\n");
            test = format!("{test}{charts}",);
        }
    }
    // Title
-    test = format!(
+    test = match result {
        TestResult::BenchmarkResult(..) => test,
        TestResult::UnitTestResult(..) | TestResult::PropertyTestResult(..) => {
            format!(
                "{test} {title}",
                title = pretty::style_if(styled, result.title().to_string(), |s| s
                    .if_supports_color(Stderr, |s| s.bright_blue())
                    .to_string())
-    );
+            )
        }
    };
    // Annotations
    match result {
@ -470,3 +576,14 @@ fn fmt_test_summary<T>(tests: &[&TestResult<T, T>], styled: bool) -> String {
            .to_string()),
    )
 }
 fn plot(color: &RGB8, points: Vec<(f32, f32)>, max_size: usize) -> String {
    use textplots::{Chart, ColorPlot, Shape};
    let mut chart = Chart::new(80, 50, 1.0, max_size as f32);
    let plot = Shape::Lines(&points);
    let chart = chart.linecolorplot(&plot, *color);
    chart.borders();
    chart.axis();
    chart.figures();
    chart.to_string()
 }
--- a/crates/aiken-project/src/test_framework.rs
+++ b/crates/aiken-project/src/test_framework.rs
@ -101,6 +101,7 @@ mod test {
                test.to_owned(),
                module_name.to_string(),
                PathBuf::new(),
                RunnableKind::Test,
            ),
            data_types,
        )
@ -245,12 +246,11 @@ mod test {
            }
        "#});
-        assert!(prop
+        assert!(TestResult::PropertyTestResult::<(), _>(prop.run(
            .run::<()>(
            42,
            PropertyTest::DEFAULT_MAX_SUCCESS,
            &PlutusVersion::default()
-            )
+        ))
        .is_success());
    }
@ -273,13 +273,12 @@ mod test {
            }
        "#});
-        match prop.run::<()>(
+        let result = prop.run(
            42,
            PropertyTest::DEFAULT_MAX_SUCCESS,
            &PlutusVersion::default(),
-        ) {
+        );
-            TestResult::UnitTestResult(..) => unreachable!("property returned unit-test result ?!"),
+
            TestResult::PropertyTestResult(result) => {
        assert!(
            result
                .labels
@ -287,10 +286,7 @@ mod test {
                .eq(vec![(&"head".to_string(), &53), (&"tail".to_string(), &47)]),
            "labels: {:#?}",
            result.labels
-                )
+        );
            }
            TestResult::Benchmark(..) => unreachable!("property returned benchmark result ?!"),
        }
    }
    #[test]
--- a/crates/aiken/src/cmd/benchmark.rs
+++ b/crates/aiken/src/cmd/benchmark.rs
@ -1,4 +1,8 @@
-use aiken_lang::test_framework::PropertyTest;
+use super::build::{trace_filter_parser, trace_level_parser};
 use aiken_lang::{
    ast::{TraceLevel, Tracing},
    test_framework::Benchmark,
 };
 use aiken_project::watch::with_project;
 use rand::prelude::*;
 use std::{
@ -13,37 +17,69 @@ pub struct Args {
    /// Path to project
    directory: Option<PathBuf>,
-    /// An initial seed to initialize the pseudo-random generator for property-tests.
+    /// An initial seed to initialize the pseudo-random generator for benchmarks.
    #[clap(long)]
    seed: Option<u32>,
-    /// How many times we will run each benchmark in the relevant project.
+    /// The maximum size to benchmark with. Note that this does not necessarily equates the number
-    #[clap(long, default_value_t = PropertyTest::DEFAULT_MAX_SUCCESS)]
+    /// of measurements actually performed but controls the maximum size given to a Sampler.
-    times_to_run: usize,
+    #[clap(long, default_value_t = Benchmark::DEFAULT_MAX_SIZE)]
    max_size: usize,
-    /// Only run tests if they match any of these strings.
+    /// Only run benchmarks if they match any of these strings.
    ///
    /// You can match a module with `-m aiken/list` or `-m list`.
-    /// You can match a test with `-m "aiken/list.{map}"` or `-m "aiken/option.{flatten_1}"`
+    /// You can match a benchmark with `-m "aiken/list.{map}"` or `-m "aiken/option.{flatten_1}"`
    #[clap(short, long)]
-    match_tests: Option<Vec<String>>,
+    match_benchmarks: Option<Vec<String>>,
-    /// This is meant to be used with `--match-tests`.
+    /// This is meant to be used with `--match-benchmarks`.
-    /// It forces test names to match exactly
+    /// It forces benchmark names to match exactly
    #[clap(short, long)]
    exact_match: bool,
    /// Environment to use for benchmarking
    env: Option<String>,
    /// Filter traces to be included in the generated program(s).
    ///
    ///   - user-defined:
    ///       only consider traces that you've explicitly defined
    ///       either through the 'trace' keyword of via the trace-if-false
    ///       ('?') operator.
    ///
    ///   - compiler-generated:
    ///       only included internal traces generated by the
    ///       Aiken compiler, for example in usage of 'expect'.
    ///
    ///   - all:
    ///       include both user-defined and compiler-generated traces.
    ///
    /// [default: all]
    #[clap(short = 'f', long, value_parser=trace_filter_parser(), default_missing_value="all", verbatim_doc_comment, alias="filter_traces")]
    trace_filter: Option<fn(TraceLevel) -> Tracing>,
    /// Choose the verbosity level of traces:
    ///
    ///   - silent: disable traces altogether
    ///   - compact: only culprit line numbers are shown on failures
    ///   - verbose: enable full verbose traces as provided by the user or the compiler
    ///
    /// [optional]
    #[clap(short, long, value_parser=trace_level_parser(), default_value_t=TraceLevel::Silent, verbatim_doc_comment)]
    trace_level: TraceLevel,
 }
 pub fn exec(
    Args {
        directory,
-        match_tests,
+        match_benchmarks,
        exact_match,
        seed,
-        times_to_run,
+        max_size,
        env,
        trace_filter,
        trace_level,
    }: Args,
 ) -> miette::Result<()> {
    let mut rng = rand::thread_rng();
@ -55,12 +91,15 @@ pub fn exec(
        false,
        !io::stdout().is_terminal(),
        |p| {
            // We don't want to check here, we want to benchmark
            p.benchmark(
-                match_tests.clone(),
+                match_benchmarks.clone(),
                exact_match,
                seed,
-                times_to_run,
+                max_size,
                match trace_filter {
                    Some(trace_filter) => trace_filter(trace_level),
                    None => Tracing::All(trace_level),
                },
                env.clone(),
            )
        },