fuse together bench & test runners, and collect all bench measures.

This commit removes some duplication between bench and test runners, as well as fixing the results coming out of running benchmarks. Running benchmarks is expected to yield multiple measures, for each of the iteration. For now, it'll suffice to show results for each size; but eventually, we'll possibly try to interpolate results with different curves and pick the best candidate. Signed-off-by: KtorZ <5680256+KtorZ@users.noreply.github.com>
2025-02-08 17:40:07 +01:00 · 2025-02-08 17:40:07 +01:00 · 2dbc33e91f
parent a7f4ecef9d
commit 2dbc33e91f
7 changed files with 134 additions and 191 deletions
--- a/crates/aiken-lang/src/test_framework.rs
+++ b/crates/aiken-lang/src/test_framework.rs
@ -28,6 +28,12 @@ use uplc::{
 };
 use vec1::{vec1, Vec1};

+#[derive(Debug, Clone, Copy)]
+pub enum RunnableKind {
+    Test,
+    Bench,
+}
+
 /// ----- Test -----------------------------------------------------------------
 ///
 /// Aiken supports two kinds of tests: unit and property. A unit test is a simply
@ -117,15 +123,15 @@ impl Test {
        })
    }

-    pub fn from_test_definition(
+    pub fn from_function_definition(
        generator: &mut CodeGenerator<'_>,
        test: TypedTest,
        module_name: String,
        input_path: PathBuf,
-        is_benchmark: bool,
+        kind: RunnableKind,
    ) -> Test {
        if test.arguments.is_empty() {
-            if is_benchmark {
+            if matches!(kind, RunnableKind::Bench) {
                unreachable!("benchmark must have at least one argument");
            } else {
                Self::unit_test(generator, test, module_name, input_path)
@ -153,8 +159,8 @@ impl Test {
            // apply onto it later.
            let generator_program = generator.clone().generate_raw(&via, &[], &module_name);

-            if is_benchmark {
-                Test::Benchmark(Benchmark {
+            match kind {
+                RunnableKind::Bench => Test::Benchmark(Benchmark {
                    input_path,
                    module: module_name,
                    name: test.name,
@ -165,9 +171,8 @@ impl Test {
                        type_info,
                        stripped_type_info,
                    },
-                })
-            } else {
-                Self::property_test(
+                }),
+                RunnableKind::Test => Self::property_test(
                    input_path,
                    module_name,
                    test.name,
@ -178,27 +183,26 @@ impl Test {
                        stripped_type_info,
                        type_info,
                    },
-                )
+                ),
            }
        }
    }

-    pub fn from_benchmark_definition(
-        generator: &mut CodeGenerator<'_>,
-        test: TypedTest,
-        module_name: String,
-        input_path: PathBuf,
-    ) -> Test {
-        Self::from_test_definition(generator, test, module_name, input_path, true)
-    }
-
-    pub fn from_function_definition(
-        generator: &mut CodeGenerator<'_>,
-        test: TypedTest,
-        module_name: String,
-        input_path: PathBuf,
-    ) -> Test {
-        Self::from_test_definition(generator, test, module_name, input_path, false)
+    pub fn run(
+        self,
+        seed: u32,
+        max_success: usize,
+        plutus_version: &PlutusVersion,
+    ) -> TestResult<(Constant, Rc<Type>), PlutusData> {
+        match self {
+            Test::UnitTest(unit_test) => TestResult::UnitTestResult(unit_test.run(plutus_version)),
+            Test::PropertyTest(property_test) => {
+                TestResult::PropertyTestResult(property_test.run(seed, max_success, plutus_version))
+            }
+            Test::Benchmark(benchmark) => {
+                TestResult::BenchmarkResult(benchmark.run(seed, max_success, plutus_version))
+            }
+        }
    }
 }

@ -217,7 +221,7 @@ pub struct UnitTest {
 unsafe impl Send for UnitTest {}

 impl UnitTest {
-    pub fn run<T>(self, plutus_version: &PlutusVersion) -> TestResult<(Constant, Rc<Type>), T> {
+    pub fn run(self, plutus_version: &PlutusVersion) -> UnitTestResult<(Constant, Rc<Type>)> {
        let mut eval_result = Program::<NamedDeBruijn>::try_from(self.program.clone())
            .unwrap()
            .eval_version(ExBudget::max(), &plutus_version.into());
@ -233,13 +237,13 @@ impl UnitTest {
        }
        traces.extend(eval_result.logs());

-        TestResult::UnitTestResult(UnitTestResult {
+        UnitTestResult {
            success,
            test: self.to_owned(),
            spent_budget: eval_result.cost(),
            traces,
            assertion: self.assertion,
-        })
+        }
    }
 }

@ -317,12 +321,12 @@ impl PropertyTest {

    /// Run a property test from a given seed. The property is run at most DEFAULT_MAX_SUCCESS times. It
    /// may stops earlier on failure; in which case a 'counterexample' is returned.
-    pub fn run<U>(
+    pub fn run(
        self,
        seed: u32,
        n: usize,
        plutus_version: &PlutusVersion,
-    ) -> TestResult<U, PlutusData> {
+    ) -> PropertyTestResult<PlutusData> {
        let mut labels = BTreeMap::new();
        let mut remaining = n;

@ -352,13 +356,13 @@ impl PropertyTest {
            ),
        };

-        TestResult::PropertyTestResult(PropertyTestResult {
+        PropertyTestResult {
            test: self,
            counterexample,
            iterations,
            labels,
            traces,
-        })
+        }
    }

    pub fn run_n_times<'a>(
@ -503,51 +507,43 @@ pub struct Benchmark {
 unsafe impl Send for Benchmark {}

 impl Benchmark {
-    pub fn benchmark(
+    pub fn run(
        self,
        seed: u32,
        max_iterations: usize,
        plutus_version: &PlutusVersion,
-    ) -> Vec<BenchmarkResult> {
-        let mut results = Vec::with_capacity(max_iterations);
+    ) -> BenchmarkResult {
+        let mut measures = Vec::with_capacity(max_iterations);
        let mut iteration = 0;
        let mut prng = Prng::from_seed(seed);
+        let mut success = true;
+
+        while success && max_iterations > iteration {
+            let size = Data::integer(num_bigint::BigInt::from(iteration as i64));
+            let fuzzer = self.sampler.program.apply_data(size);

-        while max_iterations > iteration {
-            let fuzzer = self
-                .sampler
-                .program
-                .apply_data(Data::integer(num_bigint::BigInt::from(iteration as i64)));
            match prng.sample(&fuzzer) {
-                Ok(Some((new_prng, value))) => {
-                    prng = new_prng;
-                    let mut eval_result = self.eval(&value, plutus_version);
-                    results.push(BenchmarkResult {
-                        bench: self.clone(),
-                        cost: eval_result.cost(),
-                        success: true,
-                        traces: eval_result.logs().to_vec(),
-                    });
-                }
-
                Ok(None) => {
                    panic!("A seeded PRNG returned 'None' which indicates a sampler is ill-formed and implemented wrongly; please contact library's authors.");
                }

-                Err(e) => {
-                    results.push(BenchmarkResult {
-                        bench: self.clone(),
-                        cost: ExBudget::default(),
-                        success: false,
-                        traces: vec![e.to_string()],
-                    });
-                    break;
+                Ok(Some((new_prng, value))) => {
+                    prng = new_prng;
+                    measures.push(self.eval(&value, plutus_version).cost())
+                }
+
+                Err(_e) => {
+                    success = false;
                }
            }
            iteration += 1;
        }

-        results
+        BenchmarkResult {
+            bench: self,
+            measures,
+            success,
+        }
    }

    pub fn eval(&self, value: &PlutusData, plutus_version: &PlutusVersion) -> EvalResult {
@ -1133,8 +1129,8 @@ impl<U, T> TestResult<U, T> {
    pub fn traces(&self) -> &[String] {
        match self {
            TestResult::UnitTestResult(UnitTestResult { traces, .. })
-            | TestResult::PropertyTestResult(PropertyTestResult { traces, .. })
-            | TestResult::BenchmarkResult(BenchmarkResult { traces, .. }) => traces,
+            | TestResult::PropertyTestResult(PropertyTestResult { traces, .. }) => traces,
+            TestResult::BenchmarkResult(BenchmarkResult { .. }) => &[],
        }
    }
 }
@ -1473,9 +1469,8 @@ impl Assertion<UntypedExpr> {
 #[derive(Debug, Clone)]
 pub struct BenchmarkResult {
    pub bench: Benchmark,
-    pub cost: ExBudget,
+    pub measures: Vec<ExBudget>,
    pub success: bool,
-    pub traces: Vec<String>,
 }

 unsafe impl Send for BenchmarkResult {}
--- a/crates/aiken-project/src/lib.rs
+++ b/crates/aiken-project/src/lib.rs
@ -40,7 +40,7 @@ use aiken_lang::{
    format::{Formatter, MAX_COLUMNS},
    gen_uplc::CodeGenerator,
    line_numbers::LineNumbers,
-    test_framework::{Test, TestResult},
+    test_framework::{RunnableKind, Test, TestResult},
    tipo::{Type, TypeInfo},
    utils, IdGenerator,
 };
@ -83,12 +83,6 @@ enum AddModuleBy {
    Path(PathBuf),
 }

-#[derive(Debug, Clone, Copy)]
-enum Runnable {
-    Test,
-    Bench,
-}
-
 pub struct Project<T>
 where
    T: EventListener,
@ -305,20 +299,20 @@ where

    pub fn benchmark(
        &mut self,
-        match_tests: Option<Vec<String>>,
+        match_benchmarks: Option<Vec<String>>,
        exact_match: bool,
        seed: u32,
-        times_to_run: usize,
+        iterations: usize,
        env: Option<String>,
    ) -> Result<(), Vec<Error>> {
        let options = Options {
            tracing: Tracing::silent(),
            env,
            code_gen_mode: CodeGenMode::Benchmark {
-                match_tests,
+                match_benchmarks,
                exact_match,
                seed,
-                times_to_run,
+                iterations,
            },
            blueprint_path: self.blueprint_path(None),
        };
@ -438,7 +432,7 @@ where
                    self.event_listener.handle_event(Event::RunningTests);
                }

-                let tests = self.run_tests(tests, seed, property_max_success);
+                let tests = self.run_runnables(tests, seed, property_max_success);

                self.checks_count = if tests.is_empty() {
                    None
@ -472,21 +466,25 @@ where
                }
            }
            CodeGenMode::Benchmark {
-                match_tests,
+                match_benchmarks,
                exact_match,
                seed,
-                times_to_run,
+                iterations,
            } => {
                let verbose = false;

-                let tests =
-                    self.collect_benchmarks(verbose, match_tests, exact_match, options.tracing)?;
+                let benchmarks = self.collect_benchmarks(
+                    verbose,
+                    match_benchmarks,
+                    exact_match,
+                    options.tracing,
+                )?;

-                if !tests.is_empty() {
+                if !benchmarks.is_empty() {
                    self.event_listener.handle_event(Event::RunningBenchmarks);
                }

-                let benchmarks = self.run_benchmarks(tests, seed, times_to_run);
+                let benchmarks = self.run_runnables(benchmarks, seed, iterations);

                let errors: Vec<Error> = benchmarks
                    .iter()
@ -962,7 +960,7 @@ where

    fn collect_test_items(
        &mut self,
-        kind: Runnable,
+        kind: RunnableKind,
        verbose: bool,
        match_tests: Option<Vec<String>>,
        exact_match: bool,
@ -1001,8 +999,8 @@ where

            for def in checked_module.ast.definitions() {
                let func = match (kind, def) {
-                    (Runnable::Test, Definition::Test(func)) => Some(func),
-                    (Runnable::Bench, Definition::Benchmark(func)) => Some(func),
+                    (RunnableKind::Test, Definition::Test(func)) => Some(func),
+                    (RunnableKind::Bench, Definition::Benchmark(func)) => Some(func),
                    _ => None,
                };

@ -1056,20 +1054,13 @@ where
                })
            }

-            tests.push(match kind {
-                Runnable::Test => Test::from_function_definition(
-                    &mut generator,
-                    test.to_owned(),
-                    module_name,
-                    input_path,
-                ),
-                Runnable::Bench => Test::from_benchmark_definition(
-                    &mut generator,
-                    test.to_owned(),
-                    module_name,
-                    input_path,
-                ),
-            });
+            tests.push(Test::from_function_definition(
+                &mut generator,
+                test.to_owned(),
+                module_name,
+                input_path,
+                kind,
+            ));
        }

        Ok(tests)
@ -1082,7 +1073,13 @@ where
        exact_match: bool,
        tracing: Tracing,
    ) -> Result<Vec<Test>, Error> {
-        self.collect_test_items(Runnable::Test, verbose, match_tests, exact_match, tracing)
+        self.collect_test_items(
+            RunnableKind::Test,
+            verbose,
+            match_tests,
+            exact_match,
+            tracing,
+        )
    }

    fn collect_benchmarks(
@ -1092,14 +1089,20 @@ where
        exact_match: bool,
        tracing: Tracing,
    ) -> Result<Vec<Test>, Error> {
-        self.collect_test_items(Runnable::Bench, verbose, match_tests, exact_match, tracing)
+        self.collect_test_items(
+            RunnableKind::Bench,
+            verbose,
+            match_tests,
+            exact_match,
+            tracing,
+        )
    }

-    fn run_tests(
+    fn run_runnables(
        &self,
        tests: Vec<Test>,
        seed: u32,
-        property_max_success: usize,
+        max_success: usize,
    ) -> Vec<TestResult<UntypedExpr, UntypedExpr>> {
        use rayon::prelude::*;

@ -1109,44 +1112,7 @@ where

        tests
            .into_par_iter()
-            .map(|test| match test {
-                Test::UnitTest(unit_test) => unit_test.run(plutus_version),
-                Test::PropertyTest(property_test) => {
-                    property_test.run(seed, property_max_success, plutus_version)
-                }
-                Test::Benchmark(_) => {
-                    unreachable!("found unexpected benchmark amongst collected tests.")
-                }
-            })
-            .collect::<Vec<TestResult<(Constant, Rc<Type>), PlutusData>>>()
-            .into_iter()
-            .map(|test| test.reify(&data_types))
-            .collect()
-    }
-
-    fn run_benchmarks(
-        &self,
-        tests: Vec<Test>,
-        seed: u32,
-        property_max_success: usize,
-    ) -> Vec<TestResult<UntypedExpr, UntypedExpr>> {
-        use rayon::prelude::*;
-
-        let data_types = utils::indexmap::as_ref_values(&self.data_types);
-        let plutus_version = &self.config.plutus;
-
-        tests
-            .into_par_iter()
-            .flat_map(|test| match test {
-                Test::UnitTest(_) | Test::PropertyTest(_) => {
-                    unreachable!("Tests cannot be ran during benchmarking.")
-                }
-                Test::Benchmark(benchmark) => benchmark
-                    .benchmark(seed, property_max_success, plutus_version)
-                    .into_iter()
-                    .map(TestResult::BenchmarkResult)
-                    .collect::<Vec<_>>(),
-            })
+            .map(|test| test.run(seed, max_success, plutus_version))
            .collect::<Vec<TestResult<(Constant, Rc<Type>), PlutusData>>>()
            .into_iter()
            .map(|test| test.reify(&data_types))
--- a/crates/aiken-project/src/options.rs
+++ b/crates/aiken-project/src/options.rs
@ -30,10 +30,10 @@ pub enum CodeGenMode {
    },
    Build(bool),
    Benchmark {
-        match_tests: Option<Vec<String>>,
+        match_benchmarks: Option<Vec<String>>,
        exact_match: bool,
        seed: u32,
-        times_to_run: usize,
+        iterations: usize,
    },
    NoOp,
 }
--- a/crates/aiken-project/src/telemetry.rs
+++ b/crates/aiken-project/src/telemetry.rs
@ -135,7 +135,7 @@ pub(crate) fn find_max_execution_units<T>(xs: &[TestResult<T, T>]) -> (usize, us
                    }
                }
                TestResult::BenchmarkResult(..) => {
-                    unreachable!("property returned benchmark result ?!")
+                    unreachable!("unexpected benchmark found amongst test results.")
                }
            });

--- a/crates/aiken-project/src/telemetry/json.rs
+++ b/crates/aiken-project/src/telemetry/json.rs
@ -47,8 +47,13 @@ impl EventListener for Json {
                            Some(serde_json::json!({
                                "name": result.bench.name,
                                "module": result.bench.module,
-                                "memory": result.cost.mem,
-                                "cpu": result.cost.cpu
+                                "measures": result.measures
+                                    .into_iter()
+                                    .map(|measure| serde_json::json!({
+                                        "memory": measure.mem,
+                                        "cpu": measure.cpu
+                                    }))
+                                    .collect::<Vec<_>>()
                            }))
                        } else {
                            None
--- a/crates/aiken-project/src/telemetry/terminal.rs
+++ b/crates/aiken-project/src/telemetry/terminal.rs
@ -224,15 +224,8 @@ impl EventListener for Terminal {
                    "...".if_supports_color(Stderr, |s| s.bold())
                );
            }
-            Event::FinishedBenchmarks { benchmarks, .. } => {
-                for bench in benchmarks {
-                    if let TestResult::BenchmarkResult(result) = bench {
-                        println!("{} {} ", result.bench.name.bold(), "BENCH".blue(),);
-                        println!("  Memory: {} bytes", result.cost.mem);
-                        println!("  CPU: {} units", result.cost.cpu);
-                        println!();
-                    }
-                }
+            Event::FinishedBenchmarks { .. } => {
+                eprintln!("TODO: FinishedBenchmarks");
            }
        }
    }
@ -292,19 +285,8 @@ fn fmt_test(
                if *iterations > 1 { "s" } else { "" }
            );
        }
-        TestResult::BenchmarkResult(benchmark) => {
-            let mem_pad = pretty::pad_left(benchmark.cost.mem.to_string(), max_mem, " ");
-            let cpu_pad = pretty::pad_left(benchmark.cost.cpu.to_string(), max_cpu, " ");
-
-            test = format!(
-                "{test} [mem: {mem_unit}, cpu: {cpu_unit}]",
-                mem_unit = pretty::style_if(styled, mem_pad, |s| s
-                    .if_supports_color(Stderr, |s| s.cyan())
-                    .to_string()),
-                cpu_unit = pretty::style_if(styled, cpu_pad, |s| s
-                    .if_supports_color(Stderr, |s| s.cyan())
-                    .to_string()),
-            );
+        TestResult::BenchmarkResult(..) => {
+            unreachable!("unexpected benchmark found amongst test results.")
        }
    }

--- a/crates/aiken-project/src/test_framework.rs
+++ b/crates/aiken-project/src/test_framework.rs
@ -101,6 +101,7 @@ mod test {
                test.to_owned(),
                module_name.to_string(),
                PathBuf::new(),
+                RunnableKind::Test,
            ),
            data_types,
        )
@ -245,13 +246,12 @@ mod test {
            }
        "#});

-        assert!(prop
-            .run::<()>(
-                42,
-                PropertyTest::DEFAULT_MAX_SUCCESS,
-                &PlutusVersion::default()
-            )
-            .is_success());
+        assert!(TestResult::PropertyTestResult::<(), _>(prop.run(
+            42,
+            PropertyTest::DEFAULT_MAX_SUCCESS,
+            &PlutusVersion::default()
+        ))
+        .is_success());
    }

    #[test]
@ -273,25 +273,20 @@ mod test {
            }
        "#});

-        match prop.run::<()>(
+        let result = prop.run(
            42,
            PropertyTest::DEFAULT_MAX_SUCCESS,
            &PlutusVersion::default(),
-        ) {
-            TestResult::BenchmarkResult(..) | TestResult::UnitTestResult(..) => {
-                unreachable!("property returned non-property result ?!")
-            }
-            TestResult::PropertyTestResult(result) => {
-                assert!(
-                    result
-                        .labels
-                        .iter()
-                        .eq(vec![(&"head".to_string(), &53), (&"tail".to_string(), &47)]),
-                    "labels: {:#?}",
-                    result.labels
-                )
-            }
-        }
+        );
+
+        assert!(
+            result
+                .labels
+                .iter()
+                .eq(vec![(&"head".to_string(), &53), (&"tail".to_string(), &47)]),
+            "labels: {:#?}",
+            result.labels
+        );
    }

    #[test]