Borrow integrated shrinking approach from MiniThesis.

2024-02-26 21:59:23 +01:00
parent 3762473a60
commit a703db4d14
8 changed files with 773 additions and 305 deletions
--- a/crates/aiken-lang/src/tipo/infer.rs
+++ b/crates/aiken-lang/src/tipo/infer.rs
@@ -339,10 +339,24 @@ fn infer_definition(
                    Type::Fn { ret, .. } => {
                        let ann = tipo_to_annotation(ret, location)?;
                        match ann {
-                            Annotation::Tuple { elems, .. } if elems.len() == 2 => {
-                                Ok(elems.get(1).expect("Tuple has two elements").to_owned())
+                            Annotation::Constructor {
+                                module,
+                                name,
+                                arguments,
+                                ..
+                            } if module.as_ref().unwrap_or(&String::new()).is_empty()
+                                && name == "Option" =>
+                            {
+                                match &arguments[..] {
+                                    [Annotation::Tuple { elems, .. }] if elems.len() == 2 => {
+                                        Ok(elems.get(1).expect("Tuple has two elements").to_owned())
+                                    }
+                                    _ => {
+                                        todo!("expected a single generic argument unifying as 2-tuple")
+                                    }
+                                }
                            }
-                            _ => todo!("Fuzzer returns something else than a 2-tuple? "),
+                            _ => todo!("expected an Option<a>"),
                        }
                    }
                    Type::Var { .. } | Type::App { .. } | Type::Tuple { .. } => {
--- a/crates/aiken-project/src/error.rs
+++ b/crates/aiken-project/src/error.rs
@@ -93,7 +93,7 @@ pub enum Error {
        path: PathBuf,
        verbose: bool,
        src: String,
-        evaluation_hint: Option<String>,
+        assertion: Option<String>,
    },

    #[error(
@@ -323,7 +323,7 @@ impl Diagnostic for Error {
            Error::MissingManifest { .. } => Some(Box::new("Try running `aiken new <REPOSITORY/PROJECT>` to initialise a project with an example manifest.")),
            Error::TomlLoading { .. } => None,
            Error::Format { .. } => None,
-            Error::TestFailure { evaluation_hint, .. }  => match evaluation_hint {
+            Error::TestFailure { assertion, .. }  => match assertion {
                None => None,
                Some(hint) => Some(Box::new(hint.to_string()))
            },
--- a/crates/aiken-project/src/lib.rs
+++ b/crates/aiken-project/src/lib.rs
@@ -21,13 +21,19 @@ use crate::blueprint::{
    schema::{Annotated, Schema},
    Blueprint,
 };
+use crate::{
+    config::Config,
+    error::{Error, Warning},
+    module::{CheckedModule, CheckedModules, ParsedModule, ParsedModules},
+    telemetry::Event,
+};
 use aiken_lang::{
    ast::{
        Definition, Function, ModuleKind, Span, Tracing, TypedDataType, TypedFunction, Validator,
    },
    builtins,
    expr::TypedExpr,
-    gen_uplc::builder::{cast_validator_args, DataTypeKey, FunctionAccessKey},
+    gen_uplc::builder::{DataTypeKey, FunctionAccessKey},
    tipo::{Type, TypeInfo},
    IdGenerator,
 };
@@ -40,8 +46,7 @@ use pallas::ledger::{
    primitives::babbage::{self as cardano, PolicyId},
    traverse::ComputeHash,
 };
-
-use script::{EvalHint, EvalInfo, PropertyTest, Test};
+use script::{Assertion, Test, TestResult};
 use std::{
    collections::HashMap,
    fs::{self, File},
@@ -56,13 +61,6 @@ use uplc::{
    PlutusData,
 };

-use crate::{
-    config::Config,
-    error::{Error, Warning},
-    module::{CheckedModule, CheckedModules, ParsedModule, ParsedModules},
-    telemetry::Event,
-};
-
 #[derive(Debug)]
 pub struct Source {
    pub path: PathBuf,
@@ -323,24 +321,15 @@ where
                    self.event_listener.handle_event(Event::RunningTests);
                }

-                let results = self.run_tests(tests.iter().collect());
+                let results = self.run_tests(tests);

                let errors: Vec<Error> = results
                    .iter()
                    .filter_map(|e| {
-                        if e.success {
+                        if e.is_success() {
                            None
                        } else {
-                            Some(Error::TestFailure {
-                                name: e.test.name().to_string(),
-                                path: e.test.input_path().to_path_buf(),
-                                evaluation_hint: e
-                                    .test
-                                    .evaluation_hint()
-                                    .map(|hint| hint.to_string()),
-                                src: e.test.program().to_pretty(),
-                                verbose,
-                            })
+                            Some(e.into_error(verbose))
                        }
                    })
                    .collect();
@@ -816,7 +805,7 @@ where
                })
            }

-            let evaluation_hint = func_def.test_hint().map(|(bin_op, left_src, right_src)| {
+            let assertion = func_def.test_hint().map(|(bin_op, left_src, right_src)| {
                let left = generator
                    .clone()
                    .generate_raw(&left_src, &module_name)
@@ -829,7 +818,7 @@ where
                    .try_into()
                    .unwrap();

-                EvalHint {
+                Assertion {
                    bin_op,
                    left,
                    right,
@@ -846,7 +835,7 @@ where
                    name.to_string(),
                    *can_error,
                    program.try_into().unwrap(),
-                    evaluation_hint,
+                    assertion,
                );

                programs.push(test);
@@ -862,14 +851,16 @@ where
                        ret: body.tipo(),
                    }),
                    is_capture: false,
-                    args: vec![parameter.clone().into()],
+                    args: vec![parameter.into()],
                    body: Box::new(body.clone()),
                    return_annotation: None,
                };

-                let program = generator.clone().generate_raw(&body, &module_name);
-
-                let term = cast_validator_args(program.term, &[parameter.into()]);
+                let program = generator
+                    .clone()
+                    .generate_raw(&body, &module_name)
+                    .try_into()
+                    .unwrap();

                let fuzzer: Program<NamedDeBruijn> = generator
                    .clone()
@@ -882,7 +873,7 @@ where
                    module_name,
                    name.to_string(),
                    *can_error,
-                    Program { term, ..program }.try_into().unwrap(),
+                    program,
                    fuzzer,
                );

@@ -893,36 +884,16 @@ where
        Ok(programs)
    }

-    fn run_tests<'a>(&'a self, tests: Vec<&'a Test>) -> Vec<EvalInfo<'a>> {
-        // FIXME: Find a way to re-introduce parallel testing despite the references (which aren't
-        // sizeable).
-        // We do now hold references to tests because the property tests results are all pointing
-        // to the same test, so we end up copying the same test over and over.
-        //
-        // So we might want to rework the evaluation result to avoid that and keep parallel testing
-        // possible.
-        // use rayon::prelude::*;
+    fn run_tests(&self, tests: Vec<Test>) -> Vec<TestResult> {
+        use rayon::prelude::*;

        tests
-            .iter()
-            .flat_map(|test| match test {
-                Test::UnitTest(unit_test) => {
-                    let mut result = unit_test.run();
-                    vec![test.report(&mut result)]
-                }
-                Test::PropertyTest(ref property_test) => {
-                    let mut seed = PropertyTest::new_seed(42);
-
-                    let mut results = vec![];
-                    for _ in 0..100 {
-                        let (new_seed, sample) = property_test.sample(seed);
-                        seed = new_seed;
-                        let mut result = property_test.run(&sample);
-                        results.push(test.report(&mut result));
-                    }
-
-                    results
-                }
+            .into_par_iter()
+            .map(|test| match test {
+                Test::UnitTest(unit_test) => unit_test.run(),
+                // TODO: Get the seed from the command-line, defaulting to a random one when not
+                // provided.
+                Test::PropertyTest(property_test) => property_test.run(42),
            })
            .collect()
    }
--- a/crates/aiken-project/src/pretty.rs
+++ b/crates/aiken-project/src/pretty.rs
@@ -7,8 +7,8 @@ pub fn ansi_len(s: &str) -> usize {
        .count()
 }

-pub fn len_longest_line(s: &str) -> usize {
-    s.lines().fold(0, |max, l| {
+pub fn len_longest_line(zero: usize, s: &str) -> usize {
+    s.lines().fold(zero, |max, l| {
        let n = ansi_len(l);
        if n > max {
            n
@@ -23,7 +23,7 @@ pub fn boxed(title: &str, content: &str) -> String {
 }

 pub fn boxed_with(title: &str, content: &str, border_style: fn(&str) -> String) -> String {
-    let n = len_longest_line(content);
+    let n = len_longest_line(ansi_len(title) + 1, content);

    let content = content
        .lines()
@@ -62,7 +62,7 @@ pub fn open_box(
    border_style: fn(&str) -> String,
 ) -> String {
    let i = ansi_len(content.lines().collect::<Vec<_>>().first().unwrap());
-    let j = len_longest_line(content);
+    let j = len_longest_line(ansi_len(title) + 1, content);
    let k = ansi_len(footer);

    let content = content
@@ -79,7 +79,11 @@ pub fn open_box(

    let bottom = format!(
        "{} {}",
-        pad_right(border_style("┕"), j - k + 1, &border_style("━")),
+        pad_right(
+            border_style("┕"),
+            if j < k { 0 } else { j + 1 - k },
+            &border_style("━")
+        ),
        footer
    );

--- a/crates/aiken-project/src/script.rs
+++ b/crates/aiken-project/src/script.rs
@@ -1,16 +1,38 @@
 use crate::{pretty, ExBudget};
 use aiken_lang::ast::BinOp;
+use pallas::codec::utils::Int;
+use pallas::ledger::primitives::alonzo::{BigInt, Constr, PlutusData};
 use std::{
    borrow::Borrow,
    fmt::{self, Display},
-    path::{Path, PathBuf},
+    path::PathBuf,
    rc::Rc,
 };
 use uplc::{
    ast::{Constant, Data, NamedDeBruijn, Program, Term},
-    machine::eval_result::EvalResult,
+    machine::{eval_result::EvalResult, value::from_pallas_bigint},
 };

+// ----------------------------------------------------------------------------
+//
+// Test
+//
+// Aiken supports two kinds of tests: unit and property. A unit test is a simply
+// UPLC program which returns must be a lambda that returns a boolean.
+//
+// A property on the other-hand is a template for generating tests, which is also
+// a lambda but that takes an extra argument. The argument is generated from a
+// fuzzer which is meant to yield random values in a pseudo-random (albeit seeded)
+// sequence. On failures, the value that caused a failure is simplified using an
+// approach similar to what's described in MiniThesis<https://github.com/DRMacIver/minithesis>,
+// which is a simplified version of Hypothesis, a property-based testing framework
+// with integrated shrinking.
+//
+// Our approach could perhaps be called "microthesis", as it implements a subset of
+// minithesis. More specifically, we do not currently support pre-conditions, nor
+// targets.
+// ----------------------------------------------------------------------------
+
 #[derive(Debug, Clone)]
 pub enum Test {
    UnitTest(UnitTest),
@@ -26,7 +48,7 @@ impl Test {
        name: String,
        can_error: bool,
        program: Program<NamedDeBruijn>,
-        evaluation_hint: Option<EvalHint>,
+        assertion: Option<Assertion>,
    ) -> Test {
        Test::UnitTest(UnitTest {
            input_path,
@@ -34,7 +56,7 @@ impl Test {
            name,
            program,
            can_error,
-            evaluation_hint,
+            assertion,
        })
    }

@@ -55,58 +77,14 @@ impl Test {
            fuzzer,
        })
    }
-
-    pub fn name(&self) -> &str {
-        match self {
-            Test::UnitTest(test) => &test.name,
-            Test::PropertyTest(test) => &test.name,
-        }
-    }
-
-    pub fn module(&self) -> &str {
-        match self {
-            Test::UnitTest(test) => &test.module,
-            Test::PropertyTest(test) => &test.module,
-        }
-    }
-
-    pub fn input_path(&self) -> &Path {
-        match self {
-            Test::UnitTest(test) => &test.input_path,
-            Test::PropertyTest(test) => &test.input_path,
-        }
-    }
-
-    pub fn program(&self) -> &Program<NamedDeBruijn> {
-        match self {
-            Test::UnitTest(test) => &test.program,
-            Test::PropertyTest(test) => &test.program,
-        }
-    }
-
-    pub fn evaluation_hint(&self) -> Option<&EvalHint> {
-        match self {
-            Test::UnitTest(test) => test.evaluation_hint.as_ref(),
-            Test::PropertyTest(_) => None,
-        }
-    }
-
-    pub fn report<'a>(&'a self, eval_result: &mut EvalResult) -> EvalInfo<'a> {
-        let can_error = match self {
-            Test::UnitTest(test) => test.can_error,
-            Test::PropertyTest(test) => test.can_error,
-        };
-
-        EvalInfo {
-            test: self,
-            success: !eval_result.failed(can_error),
-            spent_budget: eval_result.cost(),
-            logs: eval_result.logs(),
-            output: eval_result.result().ok(),
-        }
-    }
 }

+// ----------------------------------------------------------------------------
+//
+// UnitTest
+//
+// ----------------------------------------------------------------------------
+
 #[derive(Debug, Clone)]
 pub struct UnitTest {
    pub input_path: PathBuf,
@@ -114,17 +92,30 @@ pub struct UnitTest {
    pub name: String,
    pub can_error: bool,
    pub program: Program<NamedDeBruijn>,
-    pub evaluation_hint: Option<EvalHint>,
+    pub assertion: Option<Assertion>,
 }

 unsafe impl Send for UnitTest {}

 impl UnitTest {
-    pub fn run(&self) -> EvalResult {
-        self.program.clone().eval(ExBudget::max())
+    pub fn run(self) -> TestResult {
+        let mut eval_result = self.program.clone().eval(ExBudget::max());
+        TestResult::UnitTestResult(UnitTestResult {
+            test: self.to_owned(),
+            success: !eval_result.failed(self.can_error),
+            spent_budget: eval_result.cost(),
+            logs: eval_result.logs(),
+            output: eval_result.result().ok(),
+        })
    }
 }

+// ----------------------------------------------------------------------------
+//
+// PropertyTest
+//
+// ----------------------------------------------------------------------------
+
 #[derive(Debug, Clone)]
 pub struct PropertyTest {
    pub input_path: PathBuf,
@@ -138,46 +129,514 @@ pub struct PropertyTest {
 unsafe impl Send for PropertyTest {}

 impl PropertyTest {
-    pub fn new_seed(seed: u32) -> Term<NamedDeBruijn> {
-        Term::Constant(Rc::new(Constant::Data(Data::constr(
-            0,
-            vec![
-                Data::integer(seed.into()),
-                Data::integer(0.into()), // Size
-            ],
-        ))))
+    const MAX_TEST_RUN: usize = 100;
+
+    /// Run a property test from a given seed. The property is run at most MAX_TEST_RUN times. It
+    /// may stops earlier on failure; in which case a 'counterexample' is returned.
+    pub fn run(self, seed: u32) -> TestResult {
+        let n = PropertyTest::MAX_TEST_RUN;
+
+        let (counterexample, iterations) = match self.run_n_times(n, seed, None) {
+            None => (None, n),
+            Some((remaining, counterexample)) => (Some(counterexample), n - remaining + 1),
+        };
+
+        TestResult::PropertyTestResult(PropertyTestResult {
+            test: self,
+            counterexample,
+            iterations,
+        })
    }

-    pub fn sample(&self, seed: Term<NamedDeBruijn>) -> (Term<NamedDeBruijn>, Term<NamedDeBruijn>) {
-        let term = self.fuzzer.apply_term(&seed).eval(ExBudget::max()).result();
-
-        if let Ok(Term::Constant(rc)) = term {
-            match &rc.borrow() {
-                Constant::ProtoPair(_, _, new_seed, value) => (
-                    Term::Constant(new_seed.clone()),
-                    Term::Constant(value.clone()),
-                ),
-                _ => todo!("Fuzzer yielded a new seed that isn't an integer?"),
-            }
+    fn run_n_times(
+        &self,
+        remaining: usize,
+        seed: u32,
+        counterexample: Option<(usize, Term<NamedDeBruijn>)>,
+    ) -> Option<(usize, Term<NamedDeBruijn>)> {
+        // We short-circuit failures in case we have any. The counterexample is already simplified
+        // at this point.
+        if remaining > 0 && counterexample.is_none() {
+            let (next_seed, counterexample) = self.run_once(seed);
+            self.run_n_times(
+                remaining - 1,
+                next_seed,
+                counterexample.map(|c| (remaining, c)),
+            )
        } else {
-            todo!("Fuzzer yielded something else than a pair? {:#?}", term)
+            counterexample
        }
    }

-    pub fn run(&self, sample: &Term<NamedDeBruijn>) -> EvalResult {
-        self.program.apply_term(sample).eval(ExBudget::max())
+    fn run_once(&self, seed: u32) -> (u32, Option<Term<NamedDeBruijn>>) {
+        let (next_prng, value) = Prng::from_seed(seed)
+            .sample(&self.fuzzer)
+            .expect("running seeded Prng cannot fail.");
+
+        let result = self.program.apply_term(&value).eval(ExBudget::max());
+
+        if let Prng::Seeded {
+            seed: next_seed, ..
+        } = next_prng
+        {
+            if result.failed(self.can_error) {
+                let mut counterexample = Counterexample {
+                    result,
+                    value,
+                    choices: next_prng.choices(),
+                    can_error: self.can_error,
+                    program: &self.program,
+                    fuzzer: &self.fuzzer,
+                };
+
+                if !counterexample.choices.is_empty() {
+                    counterexample.simplify();
+                }
+
+                (next_seed, Some(counterexample.value))
+            } else {
+                (next_seed, None)
+            }
+        } else {
+            unreachable!("Prng constructed from a seed necessarily yield a seed.");
+        }
    }
 }

+// ----------------------------------------------------------------------------
+//
+// Prng
+//
+// ----------------------------------------------------------------------------
+
+#[derive(Debug)]
+pub enum Prng {
+    Seeded {
+        seed: u32,
+        choices: Vec<u32>,
+        uplc: PlutusData,
+    },
+    Replayed {
+        choices: Vec<u32>,
+        uplc: PlutusData,
+    },
+}
+
+impl Prng {
+    /// Constructor tag for Prng's 'Seeded'
+    const SEEDED: u64 = 0;
+    /// Constructor tag for Prng's 'Replayed'
+    const REPLAYED: u64 = 1;
+
+    /// Constructor tag for Option's 'Some'
+    const OK: u64 = 0;
+    /// Constructor tag for Option's 'None'
+    const ERR: u64 = 1;
+
+    pub fn uplc(&self) -> PlutusData {
+        match self {
+            Prng::Seeded { uplc, .. } => uplc.clone(),
+            Prng::Replayed { uplc, .. } => uplc.clone(),
+        }
+    }
+
+    pub fn choices(&self) -> Vec<u32> {
+        match self {
+            Prng::Seeded { choices, .. } => {
+                let mut choices = choices.to_vec();
+                choices.reverse();
+                choices
+            }
+            Prng::Replayed { choices, .. } => choices.to_vec(),
+        }
+    }
+
+    /// Construct a Pseudo-random number generator from a seed.
+    pub fn from_seed(seed: u32) -> Prng {
+        Prng::Seeded {
+            seed,
+            choices: vec![],
+            uplc: Data::constr(
+                Prng::SEEDED,
+                vec![
+                    Data::integer(seed.into()), // Prng's seed
+                    Data::list(vec![]),         // Random choices
+                ],
+            ),
+        }
+    }
+
+    /// Construct a Pseudo-random number generator from a pre-defined list of choices.
+    pub fn from_choices(choices: &[u32]) -> Prng {
+        Prng::Replayed {
+            choices: choices.to_vec(),
+            uplc: Data::constr(
+                Prng::REPLAYED,
+                vec![Data::list(
+                    choices.iter().map(|i| Data::integer((*i).into())).collect(),
+                )],
+            ),
+        }
+    }
+
+    /// Generate a pseudo-random value from a fuzzer using the given PRNG.
+    pub fn sample(&self, fuzzer: &Program<NamedDeBruijn>) -> Option<(Prng, Term<NamedDeBruijn>)> {
+        let result = fuzzer
+            .apply_data(self.uplc())
+            .eval(ExBudget::max())
+            .result()
+            .expect("Fuzzer crashed?");
+
+        Prng::from_result(result)
+    }
+
+    /// Obtain a Prng back from a fuzzer execution. As a reminder, fuzzers have the following
+    /// signature:
+    ///
+    ///     type Fuzzer<a> = fn(Prng) -> Option<(Prng, a)>
+    ///
+    /// In nominal scenarios (i.e. when the fuzzer is made from a seed and evolve pseudo-randomly),
+    /// it cannot yield 'None'. When replayed however, we can't easily guarantee that the changes
+    /// made during shrinking aren't breaking underlying invariants (if only, because we run out of
+    /// values to replay). In such case, the replayed sequence is simply invalid and the fuzzer
+    /// aborted altogether with 'None'.
+    pub fn from_result(result: Term<NamedDeBruijn>) -> Option<(Self, Term<NamedDeBruijn>)> {
+        /// Interpret the given 'PlutusData' as one of two Prng constructors.
+        fn as_prng(cst: &PlutusData) -> Prng {
+            if let PlutusData::Constr(Constr { tag, fields, .. }) = cst {
+                if *tag == 121 + Prng::SEEDED {
+                    if let [seed, PlutusData::Array(choices)] = &fields[..] {
+                        return Prng::Seeded {
+                            seed: as_u32(seed),
+                            choices: choices.iter().map(as_u32).collect(),
+                            uplc: cst.clone(),
+                        };
+                    }
+                }
+
+                if *tag == 121 + Prng::REPLAYED {
+                    return Prng::Replayed {
+                        choices: fields.iter().map(as_u32).collect(),
+                        uplc: cst.clone(),
+                    };
+                }
+            }
+
+            panic!("Malformed Prng: {cst:#?}")
+        }
+
+        fn as_u32(field: &PlutusData) -> u32 {
+            if let PlutusData::BigInt(BigInt::Int(Int(i))) = field {
+                return u32::try_from(*i).expect("Choice doesn't fit in u32?");
+            }
+
+            panic!("Malformed choice's value: {field:#?}")
+        }
+
+        /// Convert wrapped integer & bytearrays as raw constant terms. Because fuzzer
+        /// return a pair, those values end up being wrapped in 'Data', but test
+        /// functions will expect them in their raw constant form.
+        ///
+        /// Anything else is Data, so we're good.
+        fn as_value(data: &PlutusData) -> Term<NamedDeBruijn> {
+            Term::Constant(Rc::new(match data {
+                PlutusData::BigInt(n) => Constant::Integer(from_pallas_bigint(n)),
+                PlutusData::BoundedBytes(bytes) => Constant::ByteString(bytes.clone().into()),
+                _ => Constant::Data(data.clone()),
+            }))
+        }
+
+        if let Term::Constant(rc) = &result {
+            if let Constant::Data(PlutusData::Constr(Constr { tag, fields, .. })) = &rc.borrow() {
+                if *tag == 121 + Prng::OK {
+                    if let [PlutusData::Array(elems)] = &fields[..] {
+                        if let [new_seed, value] = &elems[..] {
+                            return Some((as_prng(new_seed), as_value(value)));
+                        }
+                    }
+                }
+
+                // May occurs when replaying a fuzzer from a shrinked sequence of
+                // choices. If we run out of choices, or a choice end up being
+                // invalid as per the expectation, the fuzzer can't go further and
+                // fail.
+                if *tag == 121 + Prng::ERR {
+                    return None;
+                }
+            }
+        }
+
+        // In principle, this cannot happen provided that the 'result' was produced from a
+        // type-checked fuzzer. The type-checker enforces that fuzzers are of the right shape
+        // describe above.
+        unreachable!("Fuzzer yielded a malformed result? {result:#?}")
+    }
+}
+
+// ----------------------------------------------------------------------------
+//
+// Counterexample
+//
+// A counterexample is constructed on test failures.
+//
+// ----------------------------------------------------------------------------
+
+#[derive(Debug)]
+pub struct Counterexample<'a> {
+    pub value: Term<NamedDeBruijn>,
+    pub choices: Vec<u32>,
+    pub result: EvalResult,
+    pub can_error: bool,
+    pub program: &'a Program<NamedDeBruijn>,
+    pub fuzzer: &'a Program<NamedDeBruijn>,
+}
+
+impl<'a> Counterexample<'a> {
+    fn consider(&mut self, choices: &[u32]) -> bool {
+        if choices == self.choices {
+            return true;
+        }
+
+        // TODO: Memoize test cases & choices in a cache. Due to the nature of
+        // our integrated shrinking approach, we may end up re-executing the same
+        // test cases many times. Given that tests are fully deterministic, we can
+        // memoize the already seen choices to avoid re-running the generators and
+        // the test (which can be quite expensive).
+        match Prng::from_choices(choices).sample(self.fuzzer) {
+            // Shrinked choices led to an impossible generation.
+            None => false,
+
+            // Shrinked choices let to a new valid generated value, now, is it better?
+            Some((_, value)) => {
+                let result = self.program.apply_term(&value).eval(ExBudget::max());
+
+                // If the test no longer fails, it isn't better as we're only
+                // interested in counterexamples.
+                if !result.failed(self.can_error) {
+                    return false;
+                }
+
+                // If these new choices are shorter or smaller, then we pick them
+                // as new choices and inform that it's been an improvement.
+                if choices.len() <= self.choices.len() || choices < &self.choices {
+                    self.value = value;
+                    self.choices = choices.to_vec();
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Try to simplify a 'Counterexample' by manipulating the random sequence of generated values
+    /// (a.k.a. choices). While the implementation is quite involved, the strategy is rather simple
+    /// at least conceptually:
+    ///
+    /// Each time a (seeded) fuzzer generates a new value and a new seed, it also stores the
+    /// generated value in a vector, which we call 'choices'. If we re-run the test case with this
+    /// exact choice sequence, we end up with the exact same outcome.
+    ///
+    /// But, we can tweak chunks of this sequence in hope to generate a _smaller sequence_, thus
+    /// generally resulting in a _smaller counterexample_. Each transformations is applied on
+    /// chunks of size 8, 4, 2 and 1; until we no longer make progress (i.e. hit a fix point).
+    ///
+    /// As per MiniThesis, we consider the following transformations:
+    ///
+    /// - Deleting chunks
+    /// - Transforming chunks into sequence of zeroes
+    /// - Decrementing chunks of values
+    /// - Replacing chunks of values
+    /// - Sorting chunks
+    /// - Redistribute values between nearby pairs
+    fn simplify(&mut self) {
+        let mut prev;
+
+        loop {
+            prev = self.choices.clone();
+
+            // Delete choices by chunks of size 8, 4, 2, 1.
+            let mut k: isize = 8;
+            while k > 0 {
+                let mut i: isize = (self.choices.len() as isize) - k - 1;
+                while i >= 0 {
+                    if i >= self.choices.len() as isize {
+                        i -= 1;
+                        continue;
+                    }
+                    let mut choices = self.choices[0..(i + k) as usize].to_vec();
+                    if !self.consider(&choices) {
+                        // Perform an extra reduction step that decrease the size of choices near
+                        // the end, to cope with dependencies between choices, e.g. drawing a
+                        // number as a list length, and then drawing that many elements.
+                        //
+                        // This isn't perfect, but allows to make progresses in many cases.
+                        if i > 0 && *choices.get((i - 1) as usize).unwrap_or(&0) > 0 {
+                            choices[(i - 1) as usize] -= 1;
+                            if self.consider(&choices) {
+                                i += 1;
+                            }
+                        }
+                        i -= 1;
+                    }
+                }
+                k /= 2;
+            }
+
+            // Now we try replacing region of choices with zeroes. Note that unlike the above we
+            // skip k = 1 because we handle that in the next step. Often (but not always) a block
+            // of all zeroes is the smallest value that a region can be.
+            let mut k: isize = 8;
+            while k > 1 {
+                let mut i: isize = self.choices.len() as isize - k;
+
+                while i >= 0 {
+                    i -= if self.zeroes(i, k) { k } else { 1 }
+                }
+
+                k /= 2
+            }
+
+            // TODO: Remaining shrinking strategies...
+            //
+            // - Swaps
+            // - Sorting
+            // - Pair adjustments
+
+            // If we've reached a fixed point, then we cannot shrink further. We've reached a
+            // (local) minimum, which is as good as a counterexample we'll get with this approach.
+            if prev.as_slice() == self.choices.as_slice() {
+                break;
+            }
+        }
+    }
+
+    // Replace a block between indices 'i' and 'k' by zeroes.
+    fn zeroes(&mut self, i: isize, k: isize) -> bool {
+        let mut choices = self.choices.clone();
+
+        for j in i..(i + k) {
+            if j >= self.choices.len() as isize {
+                return false;
+            }
+            choices[j as usize] = 0;
+        }
+
+        self.consider(&choices)
+    }
+}
+
+// ----------------------------------------------------------------------------
+//
+// TestResult
+//
+// ----------------------------------------------------------------------------
+
+#[derive(Debug)]
+pub enum TestResult {
+    UnitTestResult(UnitTestResult),
+    PropertyTestResult(PropertyTestResult),
+}
+
+unsafe impl Send for TestResult {}
+
+impl TestResult {
+    pub fn is_success(&self) -> bool {
+        match self {
+            TestResult::UnitTestResult(UnitTestResult { success, .. }) => *success,
+            TestResult::PropertyTestResult(PropertyTestResult {
+                counterexample,
+                test,
+                ..
+            }) => {
+                if test.can_error {
+                    counterexample.is_some()
+                } else {
+                    counterexample.is_none()
+                }
+            }
+        }
+    }
+
+    pub fn module(&self) -> &str {
+        match self {
+            TestResult::UnitTestResult(UnitTestResult { ref test, .. }) => test.module.as_str(),
+            TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
+                test.module.as_str()
+            }
+        }
+    }
+
+    pub fn title(&self) -> &str {
+        match self {
+            TestResult::UnitTestResult(UnitTestResult { ref test, .. }) => test.name.as_str(),
+            TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
+                test.name.as_str()
+            }
+        }
+    }
+
+    pub fn logs(&self) -> &[String] {
+        match self {
+            TestResult::UnitTestResult(UnitTestResult { ref logs, .. }) => logs.as_slice(),
+            TestResult::PropertyTestResult(..) => &[],
+        }
+    }
+
+    pub fn into_error(&self, verbose: bool) -> crate::Error {
+        let (name, path, assertion, src) = match self {
+            TestResult::UnitTestResult(UnitTestResult { test, .. }) => (
+                test.name.to_string(),
+                test.input_path.to_path_buf(),
+                test.assertion.as_ref().map(|hint| hint.to_string()),
+                test.program.to_pretty(),
+            ),
+            TestResult::PropertyTestResult(PropertyTestResult { test, .. }) => (
+                test.name.to_string(),
+                test.input_path.to_path_buf(),
+                None,
+                test.program.to_pretty(),
+            ),
+        };
+        crate::Error::TestFailure {
+            name,
+            path,
+            assertion,
+            src,
+            verbose,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct UnitTestResult {
+    pub success: bool,
+    pub spent_budget: ExBudget,
+    pub output: Option<Term<NamedDeBruijn>>,
+    pub logs: Vec<String>,
+    pub test: UnitTest,
+}
+
+unsafe impl Send for UnitTestResult {}
+
+#[derive(Debug)]
+pub struct PropertyTestResult {
+    pub test: PropertyTest,
+    pub counterexample: Option<Term<NamedDeBruijn>>,
+    pub iterations: usize,
+}
+
+unsafe impl Send for PropertyTestResult {}
+
 #[derive(Debug, Clone)]
-pub struct EvalHint {
+pub struct Assertion {
    pub bin_op: BinOp,
    pub left: Program<NamedDeBruijn>,
    pub right: Program<NamedDeBruijn>,
    pub can_error: bool,
 }

-impl Display for EvalHint {
+impl Display for Assertion {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let unlimited_budget = ExBudget {
            mem: i64::MAX,
@@ -240,12 +699,3 @@ impl Display for EvalHint {
        f.write_str(&msg)
    }
 }
-
-#[derive(Debug)]
-pub struct EvalInfo<'a> {
-    pub success: bool,
-    pub spent_budget: ExBudget,
-    pub output: Option<Term<NamedDeBruijn>>,
-    pub logs: Vec<String>,
-    pub test: &'a Test,
-}
--- a/crates/aiken-project/src/telemetry.rs
+++ b/crates/aiken-project/src/telemetry.rs
@@ -1,9 +1,6 @@
 use crate::pretty;
-use crate::script::EvalInfo;
-use owo_colors::{
-    OwoColorize,
-    Stream::{self, Stderr},
-};
+use crate::script::{PropertyTestResult, TestResult, UnitTestResult};
+use owo_colors::{OwoColorize, Stream::Stderr};
 use std::{collections::BTreeMap, fmt::Display, path::PathBuf};
 use uplc::machine::cost_model::ExBudget;

@@ -11,7 +8,7 @@ pub trait EventListener {
    fn handle_event(&self, _event: Event) {}
 }

-pub enum Event<'a> {
+pub enum Event {
    StartingCompilation {
        name: String,
        version: String,
@@ -35,12 +32,9 @@ pub enum Event<'a> {
        name: String,
        path: PathBuf,
    },
-    EvaluatingFunction {
-        results: Vec<EvalInfo<'a>>,
-    },
    RunningTests,
    FinishedTests {
-        tests: Vec<EvalInfo<'a>>,
+        tests: Vec<TestResult>,
    },
    WaitingForBuildDirLock,
    ResolvingPackages {
@@ -164,20 +158,6 @@ impl EventListener for Terminal {
                    name.if_supports_color(Stderr, |s| s.bright_blue()),
                );
            }
-            Event::EvaluatingFunction { results } => {
-                eprintln!(
-                    "{}\n",
-                    "  Evaluating function ..."
-                        .if_supports_color(Stderr, |s| s.bold())
-                        .if_supports_color(Stderr, |s| s.purple())
-                );
-
-                let (max_mem, max_cpu) = find_max_execution_units(&results);
-
-                for eval_info in &results {
-                    println!("    {}", fmt_eval(eval_info, max_mem, max_cpu, Stderr))
-                }
-            }
            Event::RunningTests => {
                eprintln!(
                    "{} {}\n",
@@ -190,19 +170,19 @@ impl EventListener for Terminal {
            Event::FinishedTests { tests } => {
                let (max_mem, max_cpu) = find_max_execution_units(&tests);

-                for (module, infos) in &group_by_module(&tests) {
+                for (module, results) in &group_by_module(&tests) {
                    let title = module
                        .if_supports_color(Stderr, |s| s.bold())
                        .if_supports_color(Stderr, |s| s.blue())
                        .to_string();

-                    let tests = infos
+                    let tests = results
                        .iter()
-                        .map(|eval_info| fmt_test(eval_info, max_mem, max_cpu, true))
+                        .map(|r| fmt_test(r, max_mem, max_cpu, true))
                        .collect::<Vec<String>>()
                        .join("\n");

-                    let summary = fmt_test_summary(infos, true);
+                    let summary = fmt_test_summary(results, true);

                    eprintln!(
                        "{}\n",
@@ -269,81 +249,116 @@ impl EventListener for Terminal {
    }
 }

-fn fmt_test(eval_info: &EvalInfo, max_mem: usize, max_cpu: usize, styled: bool) -> String {
-    let EvalInfo {
-        success,
-        test,
-        spent_budget,
-        logs,
-        ..
-    } = eval_info;
-
-    let ExBudget { mem, cpu } = spent_budget;
-    let mem_pad = pretty::pad_left(mem.to_string(), max_mem, " ");
-    let cpu_pad = pretty::pad_left(cpu.to_string(), max_cpu, " ");
-
-    let test = format!(
-        "{status} [mem: {mem_unit}, cpu: {cpu_unit}] {module}",
-        status = if *success {
-            pretty::style_if(styled, "PASS".to_string(), |s| {
-                s.if_supports_color(Stderr, |s| s.bold())
-                    .if_supports_color(Stderr, |s| s.green())
-                    .to_string()
-            })
-        } else {
-            pretty::style_if(styled, "FAIL".to_string(), |s| {
-                s.if_supports_color(Stderr, |s| s.bold())
-                    .if_supports_color(Stderr, |s| s.red())
-                    .to_string()
-            })
-        },
-        mem_unit = pretty::style_if(styled, mem_pad, |s| s
-            .if_supports_color(Stderr, |s| s.cyan())
-            .to_string()),
-        cpu_unit = pretty::style_if(styled, cpu_pad, |s| s
-            .if_supports_color(Stderr, |s| s.cyan())
-            .to_string()),
-        module = pretty::style_if(styled, test.name().to_string(), |s| s
-            .if_supports_color(Stderr, |s| s.bright_blue())
-            .to_string()),
-    );
-
-    let logs = if logs.is_empty() {
-        String::new()
+fn fmt_test(result: &TestResult, max_mem: usize, max_cpu: usize, styled: bool) -> String {
+    // Status
+    let mut test = if result.is_success() {
+        pretty::style_if(styled, "PASS".to_string(), |s| {
+            s.if_supports_color(Stderr, |s| s.bold())
+                .if_supports_color(Stderr, |s| s.green())
+                .to_string()
+        })
    } else {
-        logs.iter()
-            .map(|line| {
-                format!(
-                    "{arrow} {styled_line}",
-                    arrow = "↳".if_supports_color(Stderr, |s| s.bright_yellow()),
-                    styled_line = line
-                        .split('\n')
-                        .map(|l| format!("{}", l.if_supports_color(Stderr, |s| s.bright_black())))
-                        .collect::<Vec<_>>()
-                        .join("\n")
-                )
-            })
-            .collect::<Vec<_>>()
-            .join("\n")
+        pretty::style_if(styled, "FAIL".to_string(), |s| {
+            s.if_supports_color(Stderr, |s| s.bold())
+                .if_supports_color(Stderr, |s| s.red())
+                .to_string()
+        })
    };

-    if logs.is_empty() {
-        test
-    } else {
-        [test, logs].join("\n")
+    // Execution units / iteration steps
+    match result {
+        TestResult::UnitTestResult(UnitTestResult { spent_budget, .. }) => {
+            let ExBudget { mem, cpu } = spent_budget;
+            let mem_pad = pretty::pad_left(mem.to_string(), max_mem, " ");
+            let cpu_pad = pretty::pad_left(cpu.to_string(), max_cpu, " ");
+
+            test = format!(
+                "{test} [mem: {mem_unit}, cpu: {cpu_unit}]",
+                mem_unit = pretty::style_if(styled, mem_pad, |s| s
+                    .if_supports_color(Stderr, |s| s.cyan())
+                    .to_string()),
+                cpu_unit = pretty::style_if(styled, cpu_pad, |s| s
+                    .if_supports_color(Stderr, |s| s.cyan())
+                    .to_string()),
+            );
+        }
+        TestResult::PropertyTestResult(PropertyTestResult { iterations, .. }) => {
+            test = pretty::pad_right(
+                format!(
+                    "{test} [after {} test{}]",
+                    pretty::pad_left(iterations.to_string(), 3, " "),
+                    if *iterations > 1 { "s" } else { "" }
+                ),
+                14 + max_mem + max_cpu,
+                " ",
+            );
+        }
    }
+
+    // Title
+    test = format!(
+        "{test} {title}",
+        title = pretty::style_if(styled, result.title().to_string(), |s| s
+            .if_supports_color(Stderr, |s| s.bright_blue())
+            .to_string())
+    );
+
+    // CounterExample
+    //    if let TestResult::PropertyTestResult(PropertyTestResult {
+    //        counterexample: Some(counterexample),
+    //        ..
+    //    }) = result
+    //    {
+    //        test = format!(
+    //            "{test}\n{}",
+    //            pretty::boxed_with(
+    //                &pretty::style_if(styled, "counterexample".to_string(), |s| s
+    //                    .if_supports_color(Stderr, |s| s.red())
+    //                    .if_supports_color(Stderr, |s| s.bold())
+    //                    .to_string()),
+    //                &counterexample.to_pretty(),
+    //                |s| s.red().to_string()
+    //            )
+    //        )
+    //    }
+
+    // Traces
+    if !result.logs().is_empty() {
+        test = format!(
+            "{test}\n{logs}",
+            logs = result
+                .logs()
+                .iter()
+                .map(|line| {
+                    format!(
+                        "{arrow} {styled_line}",
+                        arrow = "↳".if_supports_color(Stderr, |s| s.bright_yellow()),
+                        styled_line = line
+                            .split('\n')
+                            .map(|l| format!(
+                                "{}",
+                                l.if_supports_color(Stderr, |s| s.bright_black())
+                            ))
+                            .collect::<Vec<_>>()
+                            .join("\n")
+                    )
+                })
+                .collect::<Vec<_>>()
+                .join("\n")
+        );
+    };
+
+    test
 }

-fn fmt_test_summary(tests: &[&EvalInfo], styled: bool) -> String {
-    let (n_passed, n_failed) = tests
-        .iter()
-        .fold((0, 0), |(n_passed, n_failed), test_info| {
-            if test_info.success {
-                (n_passed + 1, n_failed)
-            } else {
-                (n_passed, n_failed + 1)
-            }
-        });
+fn fmt_test_summary(tests: &[&TestResult], styled: bool) -> String {
+    let (n_passed, n_failed) = tests.iter().fold((0, 0), |(n_passed, n_failed), result| {
+        if result.is_success() {
+            (n_passed + 1, n_failed)
+        } else {
+            (n_passed, n_failed + 1)
+        }
+    });
    format!(
        "{} | {} | {}",
        pretty::style_if(styled, format!("{} tests", tests.len()), |s| s
@@ -360,55 +375,32 @@ fn fmt_test_summary(tests: &[&EvalInfo], styled: bool) -> String {
    )
 }

-fn fmt_eval(eval_info: &EvalInfo, max_mem: usize, max_cpu: usize, stream: Stream) -> String {
-    let EvalInfo {
-        output,
-        test,
-        spent_budget,
-        ..
-    } = eval_info;
-
-    let ExBudget { mem, cpu } = spent_budget;
-
-    format!(
-        "    {}::{} [mem: {}, cpu: {}]\n    │\n    ╰─▶ {}",
-        test.module().if_supports_color(stream, |s| s.blue()),
-        test.name().if_supports_color(stream, |s| s.bright_blue()),
-        pretty::pad_left(mem.to_string(), max_mem, " "),
-        pretty::pad_left(cpu.to_string(), max_cpu, " "),
-        output
-            .as_ref()
-            .map(|x| format!("{x}"))
-            .unwrap_or_else(|| "Error.".to_string()),
-    )
-}
-
-fn group_by_module<'a>(infos: &'a Vec<EvalInfo<'a>>) -> BTreeMap<String, Vec<&'a EvalInfo<'a>>> {
+fn group_by_module(results: &Vec<TestResult>) -> BTreeMap<String, Vec<&TestResult>> {
    let mut modules = BTreeMap::new();
-    for eval_info in infos {
-        let xs: &mut Vec<&EvalInfo> = modules
-            .entry(eval_info.test.module().to_string())
-            .or_default();
-        xs.push(eval_info);
+    for r in results {
+        let xs: &mut Vec<&TestResult> = modules.entry(r.module().to_string()).or_default();
+        xs.push(r);
    }
    modules
 }

-fn find_max_execution_units(xs: &[EvalInfo]) -> (usize, usize) {
-    let (max_mem, max_cpu) = xs.iter().fold(
-        (0, 0),
-        |(max_mem, max_cpu), EvalInfo { spent_budget, .. }| {
-            if spent_budget.mem >= max_mem && spent_budget.cpu >= max_cpu {
-                (spent_budget.mem, spent_budget.cpu)
-            } else if spent_budget.mem > max_mem {
-                (spent_budget.mem, max_cpu)
-            } else if spent_budget.cpu > max_cpu {
-                (max_mem, spent_budget.cpu)
-            } else {
-                (max_mem, max_cpu)
+fn find_max_execution_units(xs: &[TestResult]) -> (usize, usize) {
+    let (max_mem, max_cpu) = xs
+        .iter()
+        .fold((0, 0), |(max_mem, max_cpu), test| match test {
+            TestResult::PropertyTestResult(..) => (max_mem, max_cpu),
+            TestResult::UnitTestResult(UnitTestResult { spent_budget, .. }) => {
+                if spent_budget.mem >= max_mem && spent_budget.cpu >= max_cpu {
+                    (spent_budget.mem, spent_budget.cpu)
+                } else if spent_budget.mem > max_mem {
+                    (spent_budget.mem, max_cpu)
+                } else if spent_budget.cpu > max_cpu {
+                    (max_mem, spent_budget.cpu)
+                } else {
+                    (max_mem, max_cpu)
+                }
            }
-        },
-    );
+        });

    (max_mem.to_string().len(), max_cpu.to_string().len())
 }
--- a/crates/uplc/src/machine/eval_result.rs
+++ b/crates/uplc/src/machine/eval_result.rs
@@ -40,7 +40,7 @@ impl EvalResult {
        } else {
            self.result.is_err()
                || matches!(self.result, Ok(Term::Error))
-                || matches!(self.result, Ok(Term::Constant(ref con)) if matches!(con.as_ref(), Constant::Bool(false)))
+                || !matches!(self.result, Ok(Term::Constant(ref con)) if matches!(con.as_ref(), Constant::Bool(true)))
        }
    }

--- a/examples/acceptance_tests/093/lib/foo.ak
+++ b/examples/acceptance_tests/093/lib/foo.ak
@@ -1,12 +1,49 @@
+use aiken/builtin
+
+const max_int: Int = 255
+
 type PRNG {
-  seed: Int,
-  size: Int,
+  Seeded { seed: Int, choices: List<Int> }
+  Replayed { choices: List<Int> }
 }

-fn any_int(prng: PRNG) {
-  (prng, prng.seed)
+fn any_int(prng: PRNG) -> Option<(PRNG, Int)> {
+  when prng is {
+    Seeded { seed, choices } -> {
+      let digest =
+        seed
+          |> builtin.integer_to_bytearray(True, 32, _)
+          |> builtin.blake2b_256()
+
+      let choice =
+        digest
+          |> builtin.index_bytearray(0)
+
+      let new_seed =
+        digest
+          |> builtin.slice_bytearray(1, 4, _)
+          |> builtin.bytearray_to_integer(True, _)
+
+      Some((Seeded { seed: new_seed, choices: [choice, ..choices] }, choice))
+    }
+
+    Replayed { choices } ->
+      when choices is {
+        [] -> None
+        [head, ..tail] ->
+          if head >= 0 && head <= max_int {
+            Some((Replayed { choices: tail }, head))
+          } else {
+            None
+          }
+      }
+  }
 }

-test prop_test_foo(n via any_int) {
-  n > 0
+test prop_foo_1(n via any_int) {
+  n >= 0 && n <= 255
+}
+
+test prop_foo_2(n via any_int) fail {
+  n < 100
 }