rework sizing of benchmarks, taking measures at different points

The idea is to get a good sample of measures from running benchmarks with various sizes, so one can get an idea of how well a function performs at various sizes. Given that size can be made arbitrarily large, and that we currently report all benchmarks, I installed a fibonacci heuristic to gather data points from 0 to the max size using an increasing stepping. Defined as a trait as I already anticipate we might need different sizing strategy, likely driven by the user via a command-line option; but for now, this will do. Signed-off-by: KtorZ <5680256+KtorZ@users.noreply.github.com>
2025-02-08 18:26:02 +01:00 · 2025-02-08 18:26:02 +01:00 · 41440f131b
parent 2dbc33e91f
commit 41440f131b
3 changed files with 89 additions and 25 deletions
--- a/crates/aiken-lang/src/test_framework.rs
+++ b/crates/aiken-lang/src/test_framework.rs
@ -14,7 +14,7 @@ use pallas_primitives::alonzo::{Constr, PlutusData};
 use patricia_tree::PatriciaMap;
 use std::{
    borrow::Borrow,
-    collections::BTreeMap,
+    collections::{BTreeMap, VecDeque},
    convert::TryFrom,
    fmt::{Debug, Display},
    ops::Deref,
@ -506,21 +506,84 @@ pub struct Benchmark {

 unsafe impl Send for Benchmark {}

+trait Sizer {
+    fn is_done(&self) -> bool;
+    fn next(&mut self) -> usize;
+}
+
+struct FibonacciSizer {
+    max_size: usize,
+    previous_sizes: VecDeque<usize>,
+    current_size: usize,
+}
+
+impl FibonacciSizer {
+    fn new(max_size: usize) -> Self {
+        Self {
+            max_size,
+            previous_sizes: VecDeque::new(),
+            current_size: 1,
+        }
+    }
+}
+
+impl Sizer for FibonacciSizer {
+    fn is_done(&self) -> bool {
+        self.current_size >= self.max_size
+    }
+
+    fn next(&mut self) -> usize {
+        match self.previous_sizes.len() {
+            0 => {
+                self.previous_sizes.push_front(1);
+                return 0;
+            }
+            1 => {
+                self.previous_sizes.push_front(1);
+                return 1;
+            }
+            _ => self.current_size += self.previous_sizes.pop_back().unwrap(),
+        }
+
+        self.previous_sizes.push_front(self.current_size);
+
+        self.current_size.min(self.max_size)
+    }
+}
+
+#[cfg(test)]
+mod test_sizer {
+    use super::{FibonacciSizer, Sizer};
+
+    #[test]
+    pub fn fib_sizer_sequence() {
+        let mut sizer = FibonacciSizer::new(100);
+        let mut sizes = Vec::new();
+        while !sizer.is_done() {
+            sizes.push(sizer.next())
+        }
+        assert_eq!(sizes, vec![0, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 100])
+    }
+}
+
 impl Benchmark {
+    pub const DEFAULT_MAX_SIZE: usize = 10;
+
    pub fn run(
        self,
        seed: u32,
-        max_iterations: usize,
+        max_size: usize,
        plutus_version: &PlutusVersion,
    ) -> BenchmarkResult {
-        let mut measures = Vec::with_capacity(max_iterations);
-        let mut iteration = 0;
+        let mut measures = Vec::with_capacity(max_size);
+        let mut sizer = FibonacciSizer::new(max_size);
        let mut prng = Prng::from_seed(seed);
        let mut success = true;

-        while success && max_iterations > iteration {
-            let size = Data::integer(num_bigint::BigInt::from(iteration as i64));
-            let fuzzer = self.sampler.program.apply_data(size);
+        while success && !sizer.is_done() {
+            let size = sizer.next();
+            let size_as_data = Data::integer(num_bigint::BigInt::from(size));
+            let fuzzer = self.sampler.program.apply_data(size_as_data);

            match prng.sample(&fuzzer) {
                Ok(None) => {
@ -529,14 +592,13 @@ impl Benchmark {

                Ok(Some((new_prng, value))) => {
                    prng = new_prng;
-                    measures.push(self.eval(&value, plutus_version).cost())
+                    measures.push((size, self.eval(&value, plutus_version).cost()))
                }

                Err(_e) => {
                    success = false;
                }
            }
-            iteration += 1;
        }

        BenchmarkResult {
@ -1469,7 +1531,7 @@ impl Assertion<UntypedExpr> {
 #[derive(Debug, Clone)]
 pub struct BenchmarkResult {
    pub bench: Benchmark,
-    pub measures: Vec<ExBudget>,
+    pub measures: Vec<(usize, ExBudget)>,
    pub success: bool,
 }

--- a/crates/aiken-project/src/telemetry/json.rs
+++ b/crates/aiken-project/src/telemetry/json.rs
@ -50,8 +50,9 @@ impl EventListener for Json {
                                "measures": result.measures
                                    .into_iter()
                                    .map(|measure| serde_json::json!({
-                                        "memory": measure.mem,
-                                        "cpu": measure.cpu
+                                        "size": measure.0,
+                                        "memory": measure.1.mem,
+                                        "cpu": measure.1.cpu
                                    }))
                                    .collect::<Vec<_>>()
                            }))
--- a/crates/aiken/src/cmd/benchmark.rs
+++ b/crates/aiken/src/cmd/benchmark.rs
@ -1,4 +1,4 @@
-use aiken_lang::test_framework::PropertyTest;
+use aiken_lang::test_framework::Benchmark;
 use aiken_project::watch::with_project;
 use rand::prelude::*;
 use std::{
@ -17,18 +17,20 @@ pub struct Args {
    #[clap(long)]
    seed: Option<u32>,

-    /// How many times we will run each benchmark in the relevant project.
-    #[clap(long, default_value_t = PropertyTest::DEFAULT_MAX_SUCCESS)]
-    times_to_run: usize,
+    /// The maximum size to benchmark with. Note that this does not necessarily equates the number
+    /// of measurements actually performed but controls the maximum size given to a Sampler.
+    #[clap(long, default_value_t = Benchmark::DEFAULT_MAX_SIZE)]
+    max_size: usize,

-    /// Only run tests if they match any of these strings.
+    /// Only run benchmarks if they match any of these strings.
+    ///
    /// You can match a module with `-m aiken/list` or `-m list`.
    /// You can match a test with `-m "aiken/list.{map}"` or `-m "aiken/option.{flatten_1}"`
    #[clap(short, long)]
-    match_tests: Option<Vec<String>>,
+    match_benchmarks: Option<Vec<String>>,

-    /// This is meant to be used with `--match-tests`.
-    /// It forces test names to match exactly
+    /// This is meant to be used with `--match-benchmarks`.
+    /// It forces benchmark names to match exactly
    #[clap(short, long)]
    exact_match: bool,

@ -39,10 +41,10 @@ pub struct Args {
 pub fn exec(
    Args {
        directory,
-        match_tests,
+        match_benchmarks,
        exact_match,
        seed,
-        times_to_run,
+        max_size,
        env,
    }: Args,
 ) -> miette::Result<()> {
@ -55,12 +57,11 @@ pub fn exec(
        false,
        !io::stdout().is_terminal(),
        |p| {
-            // We don't want to check here, we want to benchmark
            p.benchmark(
-                match_tests.clone(),
+                match_benchmarks.clone(),
                exact_match,
                seed,
-                times_to_run,
+                max_size,
                env.clone(),
            )
        },