rework sizing of benchmarks, taking measures at different points

The idea is to get a good sample of measures from running benchmarks with various sizes, so one can get an idea of how well a function performs at various sizes. Given that size can be made arbitrarily large, and that we currently report all benchmarks, I installed a fibonacci heuristic to gather data points from 0 to the max size using an increasing stepping. Defined as a trait as I already anticipate we might need different sizing strategy, likely driven by the user via a command-line option; but for now, this will do. Signed-off-by: KtorZ <5680256+KtorZ@users.noreply.github.com>
2025-02-08 18:26:02 +01:00 · 2025-02-08 18:26:02 +01:00 · 41440f131b
parent 2dbc33e91f
commit 41440f131b
3 changed files with 89 additions and 25 deletions
--- a/crates/aiken-lang/src/test_framework.rs
+++ b/crates/aiken-lang/src/test_framework.rs
@ -14,7 +14,7 @@ use pallas_primitives::alonzo::{Constr, PlutusData};
 use patricia_tree::PatriciaMap;
 use std::{
    borrow::Borrow,
-    collections::BTreeMap,
+    collections::{BTreeMap, VecDeque},
    convert::TryFrom,
    fmt::{Debug, Display},
    ops::Deref,
@ -506,21 +506,84 @@ pub struct Benchmark {
 unsafe impl Send for Benchmark {}
 trait Sizer {
    fn is_done(&self) -> bool;
    fn next(&mut self) -> usize;
 }
 struct FibonacciSizer {
    max_size: usize,
    previous_sizes: VecDeque<usize>,
    current_size: usize,
 }
 impl FibonacciSizer {
    fn new(max_size: usize) -> Self {
        Self {
            max_size,
            previous_sizes: VecDeque::new(),
            current_size: 1,
        }
    }
 }
 impl Sizer for FibonacciSizer {
    fn is_done(&self) -> bool {
        self.current_size >= self.max_size
    }
    fn next(&mut self) -> usize {
        match self.previous_sizes.len() {
            0 => {
                self.previous_sizes.push_front(1);
                return 0;
            }
            1 => {
                self.previous_sizes.push_front(1);
                return 1;
            }
            _ => self.current_size += self.previous_sizes.pop_back().unwrap(),
        }
        self.previous_sizes.push_front(self.current_size);
        self.current_size.min(self.max_size)
    }
 }
 #[cfg(test)]
 mod test_sizer {
    use super::{FibonacciSizer, Sizer};
    #[test]
    pub fn fib_sizer_sequence() {
        let mut sizer = FibonacciSizer::new(100);
        let mut sizes = Vec::new();
        while !sizer.is_done() {
            sizes.push(sizer.next())
        }
        assert_eq!(sizes, vec![0, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 100])
    }
 }
 impl Benchmark {
    pub const DEFAULT_MAX_SIZE: usize = 10;
    pub fn run(
        self,
        seed: u32,
-        max_iterations: usize,
+        max_size: usize,
        plutus_version: &PlutusVersion,
    ) -> BenchmarkResult {
-        let mut measures = Vec::with_capacity(max_iterations);
+        let mut measures = Vec::with_capacity(max_size);
-        let mut iteration = 0;
+        let mut sizer = FibonacciSizer::new(max_size);
        let mut prng = Prng::from_seed(seed);
        let mut success = true;
-        while success && max_iterations > iteration {
+        while success && !sizer.is_done() {
-            let size = Data::integer(num_bigint::BigInt::from(iteration as i64));
+            let size = sizer.next();
-            let fuzzer = self.sampler.program.apply_data(size);
+            let size_as_data = Data::integer(num_bigint::BigInt::from(size));
            let fuzzer = self.sampler.program.apply_data(size_as_data);
            match prng.sample(&fuzzer) {
                Ok(None) => {
@ -529,14 +592,13 @@ impl Benchmark {
                Ok(Some((new_prng, value))) => {
                    prng = new_prng;
-                    measures.push(self.eval(&value, plutus_version).cost())
+                    measures.push((size, self.eval(&value, plutus_version).cost()))
                }
                Err(_e) => {
                    success = false;
                }
            }
            iteration += 1;
        }
        BenchmarkResult {
@ -1469,7 +1531,7 @@ impl Assertion<UntypedExpr> {
 #[derive(Debug, Clone)]
 pub struct BenchmarkResult {
    pub bench: Benchmark,
-    pub measures: Vec<ExBudget>,
+    pub measures: Vec<(usize, ExBudget)>,
    pub success: bool,
 }
--- a/crates/aiken-project/src/telemetry/json.rs
+++ b/crates/aiken-project/src/telemetry/json.rs
@ -50,8 +50,9 @@ impl EventListener for Json {
                                "measures": result.measures
                                    .into_iter()
                                    .map(|measure| serde_json::json!({
-                                        "memory": measure.mem,
+                                        "size": measure.0,
-                                        "cpu": measure.cpu
+                                        "memory": measure.1.mem,
                                        "cpu": measure.1.cpu
                                    }))
                                    .collect::<Vec<_>>()
                            }))
--- a/crates/aiken/src/cmd/benchmark.rs
+++ b/crates/aiken/src/cmd/benchmark.rs
@ -1,4 +1,4 @@
-use aiken_lang::test_framework::PropertyTest;
+use aiken_lang::test_framework::Benchmark;
 use aiken_project::watch::with_project;
 use rand::prelude::*;
 use std::{
@ -17,18 +17,20 @@ pub struct Args {
    #[clap(long)]
    seed: Option<u32>,
-    /// How many times we will run each benchmark in the relevant project.
+    /// The maximum size to benchmark with. Note that this does not necessarily equates the number
-    #[clap(long, default_value_t = PropertyTest::DEFAULT_MAX_SUCCESS)]
+    /// of measurements actually performed but controls the maximum size given to a Sampler.
-    times_to_run: usize,
+    #[clap(long, default_value_t = Benchmark::DEFAULT_MAX_SIZE)]
    max_size: usize,
-    /// Only run tests if they match any of these strings.
+    /// Only run benchmarks if they match any of these strings.
    ///
    /// You can match a module with `-m aiken/list` or `-m list`.
    /// You can match a test with `-m "aiken/list.{map}"` or `-m "aiken/option.{flatten_1}"`
    #[clap(short, long)]
-    match_tests: Option<Vec<String>>,
+    match_benchmarks: Option<Vec<String>>,
-    /// This is meant to be used with `--match-tests`.
+    /// This is meant to be used with `--match-benchmarks`.
-    /// It forces test names to match exactly
+    /// It forces benchmark names to match exactly
    #[clap(short, long)]
    exact_match: bool,
@ -39,10 +41,10 @@ pub struct Args {
 pub fn exec(
    Args {
        directory,
-        match_tests,
+        match_benchmarks,
        exact_match,
        seed,
-        times_to_run,
+        max_size,
        env,
    }: Args,
 ) -> miette::Result<()> {
@ -55,12 +57,11 @@ pub fn exec(
        false,
        !io::stdout().is_terminal(),
        |p| {
            // We don't want to check here, we want to benchmark
            p.benchmark(
-                match_tests.clone(),
+                match_benchmarks.clone(),
                exact_match,
                seed,
-                times_to_run,
+                max_size,
                env.clone(),
            )
        },