From 8edd8d37dbed78e9efc6668dcb76ec17add6ab4a Mon Sep 17 00:00:00 2001
From: KtorZ <5680256+KtorZ@users.noreply.github.com>
Date: Sun, 9 Feb 2025 16:17:15 +0100
Subject: [PATCH] fix benchmark output when either the sampler or bench fails

  This is likely even better than what was done for property testing. We
  shall revise that one perhaps one day.

Signed-off-by: KtorZ <5680256+KtorZ@users.noreply.github.com>
---
 crates/aiken-lang/src/test_framework.rs       | 59 +++++++++++++++----
 crates/aiken-project/src/error.rs             |  8 ++-
 crates/aiken-project/src/lib.rs               | 11 ++--
 crates/aiken-project/src/options.rs           |  2 +-
 .../aiken-project/src/telemetry/terminal.rs   | 42 +++++++++----
 crates/aiken/src/cmd/benchmark.rs             | 40 ++++++++++++-
 6 files changed, 131 insertions(+), 31 deletions(-)
diff --git a/crates/aiken-lang/src/test_framework.rs b/crates/aiken-lang/src/test_framework.rs
index 01887356..821d04f3 100644
--- a/crates/aiken-lang/src/test_framework.rs
+++ b/crates/aiken-lang/src/test_framework.rs
@@ -274,7 +274,7 @@ pub struct Fuzzer<T> {
 }
 
 #[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
-#[error("Fuzzer exited unexpectedly: {uplc_error}")]
+#[error("Fuzzer exited unexpectedly: {uplc_error}.")]
 pub struct FuzzerError {
     traces: Vec<String>,
     uplc_error: uplc::machine::Error,
@@ -494,6 +494,29 @@ pub struct Sampler<T> {
     pub stripped_type_info: Rc<Type>,
 }
 
+#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
+pub enum BenchmarkError {
+    #[error("Sampler exited unexpectedly: {uplc_error}.")]
+    SamplerError {
+        traces: Vec<String>,
+        uplc_error: uplc::machine::Error,
+    },
+    #[error("Bench exited unexpectedly: {uplc_error}.")]
+    BenchError {
+        traces: Vec<String>,
+        uplc_error: uplc::machine::Error,
+    },
+}
+
+impl BenchmarkError {
+    pub fn traces(&self) -> &[String] {
+        match self {
+            BenchmarkError::SamplerError { traces, .. }
+            | BenchmarkError::BenchError { traces, .. } => traces.as_slice(),
+        }
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct Benchmark {
     pub input_path: PathBuf,
@@ -517,10 +540,10 @@ impl Benchmark {
     ) -> BenchmarkResult {
         let mut measures = Vec::with_capacity(max_size);
         let mut prng = Prng::from_seed(seed);
-        let mut success = true;
+        let mut error = None;
         let mut size = 0;
 
-        while success && max_size >= size {
+        while error.is_none() && max_size >= size {
             let fuzzer = self
                 .sampler
                 .program
@@ -533,11 +556,24 @@ impl Benchmark {
 
                 Ok(Some((new_prng, value))) => {
                     prng = new_prng;
-                    measures.push((size, self.eval(&value, plutus_version).cost()))
+                    let mut result = self.eval(&value, plutus_version);
+                    match result.result() {
+                        Ok(_) => measures.push((size, result.cost())),
+                        Err(uplc_error) => {
+                            error = Some(BenchmarkError::BenchError {
+                                traces: result
+                                    .logs()
+                                    .into_iter()
+                                    .filter(|s| PropertyTest::extract_label(s).is_none())
+                                    .collect(),
+                                uplc_error,
+                            });
+                        }
+                    }
                 }
 
-                Err(_e) => {
-                    success = false;
+                Err(FuzzerError { traces, uplc_error }) => {
+                    error = Some(BenchmarkError::SamplerError { traces, uplc_error });
                 }
             }
 
@@ -547,7 +583,7 @@ impl Benchmark {
         BenchmarkResult {
             bench: self,
             measures,
-            success,
+            error,
         }
     }
 
@@ -650,7 +686,6 @@ impl Prng {
     pub fn sample(
         &self,
         fuzzer: &Program<Name>,
-        // iteration: usize,
     ) -> Result<Option<(Prng, PlutusData)>, FuzzerError> {
         let program = Program::<NamedDeBruijn>::try_from(fuzzer.apply_data(self.uplc())).unwrap();
         let mut result = program.eval(ExBudget::max());
@@ -1107,7 +1142,7 @@ impl<U, T> TestResult<U, T> {
                 }
                 OnTestFailure::SucceedImmediately => counterexample.is_some(),
             },
-            TestResult::BenchmarkResult(BenchmarkResult { success, .. }) => *success,
+            TestResult::BenchmarkResult(BenchmarkResult { error, .. }) => error.is_none(),
         }
     }
 
@@ -1135,7 +1170,9 @@ impl<U, T> TestResult<U, T> {
         match self {
             TestResult::UnitTestResult(UnitTestResult { traces, .. })
             | TestResult::PropertyTestResult(PropertyTestResult { traces, .. }) => traces,
-            TestResult::BenchmarkResult(BenchmarkResult { .. }) => &[],
+            TestResult::BenchmarkResult(BenchmarkResult { error, .. }) => {
+                error.as_ref().map(|e| e.traces()).unwrap_or_default()
+            }
         }
     }
 }
@@ -1475,7 +1512,7 @@ impl Assertion<UntypedExpr> {
 pub struct BenchmarkResult {
     pub bench: Benchmark,
     pub measures: Vec<(usize, ExBudget)>,
-    pub success: bool,
+    pub error: Option<BenchmarkError>,
 }
 
 unsafe impl Send for BenchmarkResult {}
diff --git a/crates/aiken-project/src/error.rs b/crates/aiken-project/src/error.rs
index 656e19f8..0c210121 100644
--- a/crates/aiken-project/src/error.rs
+++ b/crates/aiken-project/src/error.rs
@@ -3,7 +3,7 @@ use aiken_lang::{
     ast::{self, Span},
     error::ExtraData,
     parser::error::ParseError,
-    test_framework::{PropertyTestResult, TestResult, UnitTestResult},
+    test_framework::{BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult},
     tipo,
 };
 use miette::{
@@ -193,7 +193,11 @@ impl Error {
                 test.input_path.to_path_buf(),
                 test.program.to_pretty(),
             ),
-            TestResult::BenchmarkResult(_) => ("bench".to_string(), PathBuf::new(), String::new()), // todo
+            TestResult::BenchmarkResult(BenchmarkResult { bench, .. }) => (
+                bench.name.to_string(),
+                bench.input_path.to_path_buf(),
+                bench.program.to_pretty(),
+            ),
         };
 
         Error::TestFailure {
diff --git a/crates/aiken-project/src/lib.rs b/crates/aiken-project/src/lib.rs
index 9e343be5..7b1cc13f 100644
--- a/crates/aiken-project/src/lib.rs
+++ b/crates/aiken-project/src/lib.rs
@@ -302,17 +302,18 @@ where
         match_benchmarks: Option<Vec<String>>,
         exact_match: bool,
         seed: u32,
-        iterations: usize,
+        max_size: usize,
+        tracing: Tracing,
         env: Option<String>,
     ) -> Result<(), Vec<Error>> {
         let options = Options {
-            tracing: Tracing::silent(),
+            tracing,
             env,
             code_gen_mode: CodeGenMode::Benchmark {
                 match_benchmarks,
                 exact_match,
                 seed,
-                iterations,
+                max_size,
             },
             blueprint_path: self.blueprint_path(None),
         };
@@ -469,7 +470,7 @@ where
                 match_benchmarks,
                 exact_match,
                 seed,
-                iterations,
+                max_size,
             } => {
                 let verbose = false;
 
@@ -484,7 +485,7 @@ where
                     self.event_listener.handle_event(Event::RunningBenchmarks);
                 }
 
-                let benchmarks = self.run_runnables(benchmarks, seed, iterations);
+                let benchmarks = self.run_runnables(benchmarks, seed, max_size);
 
                 let errors: Vec<Error> = benchmarks
                     .iter()
diff --git a/crates/aiken-project/src/options.rs b/crates/aiken-project/src/options.rs
index 8b4fdf9b..0e5706d6 100644
--- a/crates/aiken-project/src/options.rs
+++ b/crates/aiken-project/src/options.rs
@@ -33,7 +33,7 @@ pub enum CodeGenMode {
         match_benchmarks: Option<Vec<String>>,
         exact_match: bool,
         seed: u32,
-        iterations: usize,
+        max_size: usize,
     },
     NoOp,
 }
diff --git a/crates/aiken-project/src/telemetry/terminal.rs b/crates/aiken-project/src/telemetry/terminal.rs
index 813ab488..afacfcef 100644
--- a/crates/aiken-project/src/telemetry/terminal.rs
+++ b/crates/aiken-project/src/telemetry/terminal.rs
@@ -247,7 +247,10 @@ impl EventListener for Terminal {
                         .iter()
                         .map(|r| fmt_test(r, max_mem, max_cpu, max_iter, true))
                         .collect::<Vec<String>>()
-                        .join("\n");
+                        .join("\n")
+                        .chars()
+                        .skip(1) // Remove extra first newline
+                        .collect::<String>();
 
                     let seed_info = format!(
                         "with {opt}={seed}",
@@ -287,7 +290,21 @@ fn fmt_test(
 ) -> String {
     // Status
     let mut test = if matches!(result, TestResult::BenchmarkResult { .. }) {
-        String::new()
+        format!(
+            "\n{label}{title}\n",
+            label = if result.is_success() {
+                String::new()
+            } else {
+                pretty::style_if(styled, "FAIL ".to_string(), |s| {
+                    s.if_supports_color(Stderr, |s| s.bold())
+                        .if_supports_color(Stderr, |s| s.red())
+                        .to_string()
+                })
+            },
+            title = pretty::style_if(styled, result.title().to_string(), |s| s
+                .if_supports_color(Stderr, |s| s.bright_blue())
+                .to_string())
+        )
     } else if result.is_success() {
         pretty::style_if(styled, "PASS".to_string(), |s| {
             s.if_supports_color(Stderr, |s| s.bold())
@@ -334,7 +351,17 @@ fn fmt_test(
                 if *iterations > 1 { "s" } else { "" }
             );
         }
-        TestResult::BenchmarkResult(BenchmarkResult { measures, .. }) => {
+        TestResult::BenchmarkResult(BenchmarkResult { error: Some(e), .. }) => {
+            test = format!(
+                "{test}{}",
+                e.to_string().if_supports_color(Stderr, |s| s.red())
+            );
+        }
+        TestResult::BenchmarkResult(BenchmarkResult {
+            measures,
+            error: None,
+            ..
+        }) => {
             let max_size = measures
                 .iter()
                 .map(|(size, _)| *size)
@@ -384,14 +411,7 @@ fn fmt_test(
 
     // Title
     test = match result {
-        TestResult::BenchmarkResult(..) => {
-            format!(
-                "{title}\n{test}\n",
-                title = pretty::style_if(styled, result.title().to_string(), |s| s
-                    .if_supports_color(Stderr, |s| s.bright_blue())
-                    .to_string())
-            )
-        }
+        TestResult::BenchmarkResult(..) => test,
         TestResult::UnitTestResult(..) | TestResult::PropertyTestResult(..) => {
             format!(
                 "{test} {title}",
diff --git a/crates/aiken/src/cmd/benchmark.rs b/crates/aiken/src/cmd/benchmark.rs
index 1a495f8d..11121f60 100644
--- a/crates/aiken/src/cmd/benchmark.rs
+++ b/crates/aiken/src/cmd/benchmark.rs
@@ -1,4 +1,8 @@
-use aiken_lang::test_framework::Benchmark;
+use super::build::{trace_filter_parser, trace_level_parser};
+use aiken_lang::{
+    ast::{TraceLevel, Tracing},
+    test_framework::Benchmark,
+};
 use aiken_project::watch::with_project;
 use rand::prelude::*;
 use std::{
@@ -36,6 +40,34 @@ pub struct Args {
 
     /// Environment to use for benchmarking
     env: Option<String>,
+
+    /// Filter traces to be included in the generated program(s).
+    ///
+    ///   - user-defined:
+    ///       only consider traces that you've explicitly defined
+    ///       either through the 'trace' keyword of via the trace-if-false
+    ///       ('?') operator.
+    ///
+    ///   - compiler-generated:
+    ///       only included internal traces generated by the
+    ///       Aiken compiler, for example in usage of 'expect'.
+    ///
+    ///   - all:
+    ///       include both user-defined and compiler-generated traces.
+    ///
+    /// [default: all]
+    #[clap(short = 'f', long, value_parser=trace_filter_parser(), default_missing_value="all", verbatim_doc_comment, alias="filter_traces")]
+    trace_filter: Option<fn(TraceLevel) -> Tracing>,
+
+    /// Choose the verbosity level of traces:
+    ///
+    ///   - silent: disable traces altogether
+    ///   - compact: only culprit line numbers are shown on failures
+    ///   - verbose: enable full verbose traces as provided by the user or the compiler
+    ///
+    /// [optional]
+    #[clap(short, long, value_parser=trace_level_parser(), default_value_t=TraceLevel::Silent, verbatim_doc_comment)]
+    trace_level: TraceLevel,
 }
 
 pub fn exec(
@@ -46,6 +78,8 @@ pub fn exec(
         seed,
         max_size,
         env,
+        trace_filter,
+        trace_level,
     }: Args,
 ) -> miette::Result<()> {
     let mut rng = rand::thread_rng();
@@ -62,6 +96,10 @@ pub fn exec(
                 exact_match,
                 seed,
                 max_size,
+                match trace_filter {
+                    Some(trace_filter) => trace_filter(trace_level),
+                    None => Tracing::All(trace_level),
+                },
                 env.clone(),
             )
         },