Merge pull request #1093 from aiken-lang/benchmarks-wrapup

Wrapping-up benchmarks
This commit is contained in:
Matthias Benkort 2025-02-09 17:04:39 +01:00 committed by GitHub
commit 94246bdb2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 836 additions and 606 deletions

View File

@ -6,7 +6,6 @@ on:
jobs:
nix-build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
@ -20,5 +19,14 @@ jobs:
uses: DeterminateSystems/magic-nix-cache-action@v1
- name: Build Aiken
run: nix build
shell: bash
run: |
set +e
nix build
exitcode="$?"
if [[ "$exitcode" != "0" ]] ; then
echo "::warning::Nix build failed with exit code $exitcode"
exit 0
else
exit "$exitcode"
fi

View File

@ -2,9 +2,21 @@
## v1.1.11 - UNRELEASED
### Added
- **aiken**: New `aiken bench` command to run benchmarks. @Riley-Kilgore, @KtorZ
The command is very similar to `aiken check`, and will collect and run benchmarks found across the codebase. The output by default is a set of pretty terminal plots for each dimension (mem & cpu) for each test bench. The complete dataset of points can be obtained in a structured (JSON) format by redirecting the output to a file.
- **aiken-lang**: New `bench` keyword and capabilities to the test framework. @Riley-Kilgore, @KtorZ
A `bench` is a new type of test that takes in a single `Sampler<a> = fn(Int) -> Fuzzer<a>` as parameter, similar to how property-based test receive `Fuzzer<a>`. A `Sampler` is in fact, a _scaled Fuzzer_ which receive a monotically increasing size as parameter. This allows fine-grained control over generated values. Unlike tests, benchmarks can return _anything_ since their output is ignored.
Read more about benchmarks in the [user manual](https://aiken-lang.org/language-tour/bench).
### Changed
- **aiken**: support for `bench` keyword to define benchmarks. @Riley-Kilgore
- **aiken-lang**: The compiler now raises a warning when attempting to destructure a record constructor without using named fields. See [#1084](https://github.com/aiken-lang/aiken/issues/1084). @KtorZ
- **aiken-lang**: Fix blueprint schema definitions related to pairs (no longer omit (sometimes) Pairs definitions, and generate them as data List). See [#1086](https://github.com/aiken-lang/aiken/issues/1086) and [#970](https://github.com/aiken-lang/aiken/issues/970). @KtorZ
@ -16,34 +28,6 @@
- **aiken-lang**: `write_bits` can now be used from aiken/builtins. @Microproofs
### Changed
- **aiken-project**: The `aiken.toml` file no longer supports `v1` and `v2` for the plutus version field. @rvcas
- **aiken-project**: `Error::TomlLoading` now looks much better - [see](https://github.com/aiken-lang/aiken/issues/1032#issuecomment-2562122101). @rvcas
- **aiken-lang**: 10-20% optimization improvements via case-constr, rearranging function definitions (while maintaining dependency ordering),
and allowing inlining in if_then_else_error cases which preserve the same error semantics for a program. @Microproofs
### Fixed
- **aiken**: panic error when using `aiken uplc decode` on cbor encoded flat bytes. @rvcas
- **aiken-lang**: comment formatting in pipelines leading to confusion. @rvcas
- **aiken-lang**: preserve holes discard name in function captures (see [#1080](https://github.com/aiken-lang/aiken/issues/1080)). @KtorZ
- **uplc**: Added deserialization match for the new builtin indices.
## v1.1.11 - UNRELEASED
### Added
- **aiken**: support for `bench` keyword to define benchmarks. @Riley-Kilgore
## v1.1.10 - 2025-01-21
### Added
- **aiken-project**: `export` output now supports the functions `return_type`. @rvcas
- **aiken-lang**: `write_bits` can now be used from aiken/builtins. @Microproofs
### Changed
- **aiken-project**: The `aiken.toml` file no longer supports `v1` and `v2` for the plutus version field. @rvcas

439
Cargo.lock generated vendored

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,12 @@ use uplc::{
};
use vec1::{vec1, Vec1};
#[derive(Debug, Clone, Copy)]
pub enum RunnableKind {
Test,
Bench,
}
/// ----- Test -----------------------------------------------------------------
///
/// Aiken supports two kinds of tests: unit and property. A unit test is a simply
@ -117,15 +123,15 @@ impl Test {
})
}
pub fn from_test_definition(
pub fn from_function_definition(
generator: &mut CodeGenerator<'_>,
test: TypedTest,
module_name: String,
input_path: PathBuf,
is_benchmark: bool,
kind: RunnableKind,
) -> Test {
if test.arguments.is_empty() {
if is_benchmark {
if matches!(kind, RunnableKind::Bench) {
unreachable!("benchmark must have at least one argument");
} else {
Self::unit_test(generator, test, module_name, input_path)
@ -153,8 +159,8 @@ impl Test {
// apply onto it later.
let generator_program = generator.clone().generate_raw(&via, &[], &module_name);
if is_benchmark {
Test::Benchmark(Benchmark {
match kind {
RunnableKind::Bench => Test::Benchmark(Benchmark {
input_path,
module: module_name,
name: test.name,
@ -165,9 +171,8 @@ impl Test {
type_info,
stripped_type_info,
},
})
} else {
Self::property_test(
}),
RunnableKind::Test => Self::property_test(
input_path,
module_name,
test.name,
@ -178,27 +183,26 @@ impl Test {
stripped_type_info,
type_info,
},
)
),
}
}
}
pub fn from_benchmark_definition(
generator: &mut CodeGenerator<'_>,
test: TypedTest,
module_name: String,
input_path: PathBuf,
) -> Test {
Self::from_test_definition(generator, test, module_name, input_path, true)
}
pub fn from_function_definition(
generator: &mut CodeGenerator<'_>,
test: TypedTest,
module_name: String,
input_path: PathBuf,
) -> Test {
Self::from_test_definition(generator, test, module_name, input_path, false)
pub fn run(
self,
seed: u32,
max_success: usize,
plutus_version: &PlutusVersion,
) -> TestResult<(Constant, Rc<Type>), PlutusData> {
match self {
Test::UnitTest(unit_test) => TestResult::UnitTestResult(unit_test.run(plutus_version)),
Test::PropertyTest(property_test) => {
TestResult::PropertyTestResult(property_test.run(seed, max_success, plutus_version))
}
Test::Benchmark(benchmark) => {
TestResult::BenchmarkResult(benchmark.run(seed, max_success, plutus_version))
}
}
}
}
@ -217,7 +221,7 @@ pub struct UnitTest {
unsafe impl Send for UnitTest {}
impl UnitTest {
pub fn run<T>(self, plutus_version: &PlutusVersion) -> TestResult<(Constant, Rc<Type>), T> {
pub fn run(self, plutus_version: &PlutusVersion) -> UnitTestResult<(Constant, Rc<Type>)> {
let mut eval_result = Program::<NamedDeBruijn>::try_from(self.program.clone())
.unwrap()
.eval_version(ExBudget::max(), &plutus_version.into());
@ -233,13 +237,13 @@ impl UnitTest {
}
traces.extend(eval_result.logs());
TestResult::UnitTestResult(UnitTestResult {
UnitTestResult {
success,
test: self.to_owned(),
spent_budget: eval_result.cost(),
traces,
assertion: self.assertion,
})
}
}
}
@ -270,7 +274,7 @@ pub struct Fuzzer<T> {
}
#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
#[error("Fuzzer exited unexpectedly: {uplc_error}")]
#[error("Fuzzer exited unexpectedly: {uplc_error}.")]
pub struct FuzzerError {
traces: Vec<String>,
uplc_error: uplc::machine::Error,
@ -317,12 +321,12 @@ impl PropertyTest {
/// Run a property test from a given seed. The property is run at most DEFAULT_MAX_SUCCESS times. It
/// may stops earlier on failure; in which case a 'counterexample' is returned.
pub fn run<U>(
pub fn run(
self,
seed: u32,
n: usize,
plutus_version: &PlutusVersion,
) -> TestResult<U, PlutusData> {
) -> PropertyTestResult<PlutusData> {
let mut labels = BTreeMap::new();
let mut remaining = n;
@ -352,13 +356,13 @@ impl PropertyTest {
),
};
TestResult::PropertyTestResult(PropertyTestResult {
PropertyTestResult {
test: self,
counterexample,
iterations,
labels,
traces,
})
}
}
pub fn run_n_times<'a>(
@ -372,9 +376,7 @@ impl PropertyTest {
let mut counterexample = None;
while *remaining > 0 && counterexample.is_none() {
let (next_prng, cex) = self.run_once(prng, labels, plutus_version)?;
prng = next_prng;
counterexample = cex;
(prng, counterexample) = self.run_once(prng, labels, plutus_version)?;
*remaining -= 1;
}
@ -492,6 +494,29 @@ pub struct Sampler<T> {
pub stripped_type_info: Rc<Type>,
}
#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
pub enum BenchmarkError {
#[error("Sampler exited unexpectedly: {uplc_error}.")]
SamplerError {
traces: Vec<String>,
uplc_error: uplc::machine::Error,
},
#[error("Bench exited unexpectedly: {uplc_error}.")]
BenchError {
traces: Vec<String>,
uplc_error: uplc::machine::Error,
},
}
impl BenchmarkError {
pub fn traces(&self) -> &[String] {
match self {
BenchmarkError::SamplerError { traces, .. }
| BenchmarkError::BenchError { traces, .. } => traces.as_slice(),
}
}
}
#[derive(Debug, Clone)]
pub struct Benchmark {
pub input_path: PathBuf,
@ -505,50 +530,61 @@ pub struct Benchmark {
unsafe impl Send for Benchmark {}
impl Benchmark {
pub fn benchmark(
pub const DEFAULT_MAX_SIZE: usize = 30;
pub fn run(
self,
seed: u32,
max_iterations: usize,
max_size: usize,
plutus_version: &PlutusVersion,
) -> Vec<BenchmarkResult> {
let mut results = Vec::with_capacity(max_iterations);
let mut iteration = 0;
) -> BenchmarkResult {
let mut measures = Vec::with_capacity(max_size);
let mut prng = Prng::from_seed(seed);
let mut error = None;
let mut size = 0;
while max_iterations > iteration {
while error.is_none() && max_size >= size {
let fuzzer = self
.sampler
.program
.apply_data(Data::integer(num_bigint::BigInt::from(iteration as i64)));
.apply_term(&Term::Constant(Constant::Integer(size.into()).into()));
match prng.sample(&fuzzer) {
Ok(None) => {
panic!("A seeded PRNG returned 'None' which indicates a sampler is ill-formed and implemented wrongly; please contact library's authors.");
}
Ok(Some((new_prng, value))) => {
prng = new_prng;
let mut eval_result = self.eval(&value, plutus_version);
results.push(BenchmarkResult {
test: self.clone(),
cost: eval_result.cost(),
success: true,
traces: eval_result.logs().to_vec(),
});
let mut result = self.eval(&value, plutus_version);
match result.result() {
Ok(_) => measures.push((size, result.cost())),
Err(uplc_error) => {
error = Some(BenchmarkError::BenchError {
traces: result
.logs()
.into_iter()
.filter(|s| PropertyTest::extract_label(s).is_none())
.collect(),
uplc_error,
});
}
}
}
Ok(None) => {
break;
}
Err(e) => {
results.push(BenchmarkResult {
test: self.clone(),
cost: ExBudget::default(),
success: false,
traces: vec![format!("Fuzzer error: {}", e)],
});
break;
Err(FuzzerError { traces, uplc_error }) => {
error = Some(BenchmarkError::SamplerError { traces, uplc_error });
}
}
iteration += 1;
size += 1;
}
results
BenchmarkResult {
bench: self,
measures,
error,
}
}
pub fn eval(&self, value: &PlutusData, plutus_version: &PlutusVersion) -> EvalResult {
@ -650,7 +686,6 @@ impl Prng {
pub fn sample(
&self,
fuzzer: &Program<Name>,
// iteration: usize,
) -> Result<Option<(Prng, PlutusData)>, FuzzerError> {
let program = Program::<NamedDeBruijn>::try_from(fuzzer.apply_data(self.uplc())).unwrap();
let mut result = program.eval(ExBudget::max());
@ -1069,7 +1104,7 @@ where
pub enum TestResult<U, T> {
UnitTestResult(UnitTestResult<U>),
PropertyTestResult(PropertyTestResult<T>),
Benchmark(BenchmarkResult),
BenchmarkResult(BenchmarkResult),
}
unsafe impl<U, T> Send for TestResult<U, T> {}
@ -1084,7 +1119,7 @@ impl TestResult<(Constant, Rc<Type>), PlutusData> {
TestResult::PropertyTestResult(test) => {
TestResult::PropertyTestResult(test.reify(data_types))
}
TestResult::Benchmark(result) => TestResult::Benchmark(result),
TestResult::BenchmarkResult(result) => TestResult::BenchmarkResult(result),
}
}
}
@ -1107,7 +1142,7 @@ impl<U, T> TestResult<U, T> {
}
OnTestFailure::SucceedImmediately => counterexample.is_some(),
},
TestResult::Benchmark(BenchmarkResult { success, .. }) => *success,
TestResult::BenchmarkResult(BenchmarkResult { error, .. }) => error.is_none(),
}
}
@ -1117,7 +1152,7 @@ impl<U, T> TestResult<U, T> {
TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
test.module.as_str()
}
TestResult::Benchmark(BenchmarkResult { ref test, .. }) => test.module.as_str(),
TestResult::BenchmarkResult(BenchmarkResult { ref bench, .. }) => bench.module.as_str(),
}
}
@ -1127,7 +1162,7 @@ impl<U, T> TestResult<U, T> {
TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
test.name.as_str()
}
TestResult::Benchmark(BenchmarkResult { ref test, .. }) => test.name.as_str(),
TestResult::BenchmarkResult(BenchmarkResult { ref bench, .. }) => bench.name.as_str(),
}
}
@ -1135,7 +1170,9 @@ impl<U, T> TestResult<U, T> {
match self {
TestResult::UnitTestResult(UnitTestResult { traces, .. })
| TestResult::PropertyTestResult(PropertyTestResult { traces, .. }) => traces,
TestResult::Benchmark(BenchmarkResult { traces, .. }) => traces,
TestResult::BenchmarkResult(BenchmarkResult { error, .. }) => {
error.as_ref().map(|e| e.traces()).unwrap_or_default()
}
}
}
}
@ -1473,10 +1510,9 @@ impl Assertion<UntypedExpr> {
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub test: Benchmark,
pub cost: ExBudget,
pub success: bool,
pub traces: Vec<String>,
pub bench: Benchmark,
pub measures: Vec<(usize, ExBudget)>,
pub error: Option<BenchmarkError>,
}
unsafe impl Send for BenchmarkResult {}

View File

@ -317,6 +317,14 @@ You can use '{discard}' and numbers to distinguish between similar names.
location: Span,
},
#[error("I notice a benchmark definition without any argument.\n")]
#[diagnostic(url("https://aiken-lang.org/language-tour/bench"))]
#[diagnostic(code("arity::bench"))]
IncorrectBenchmarkArity {
#[label("must have exactly one argument")]
location: Span,
},
#[error(
"I saw {} field{} in a context where there should be {}.\n",
given.if_supports_color(Stdout, |s| s.purple()),
@ -1158,6 +1166,7 @@ impl ExtraData for Error {
| Error::UnknownPurpose { .. }
| Error::UnknownValidatorHandler { .. }
| Error::UnexpectedValidatorFallback { .. }
| Error::IncorrectBenchmarkArity { .. }
| Error::MustInferFirst { .. } => None,
Error::UnknownType { name, .. }

View File

@ -12,7 +12,8 @@ use crate::{
TypedDefinition, TypedModule, TypedValidator, UntypedArg, UntypedDefinition, UntypedModule,
UntypedPattern, UntypedValidator, Use, Validator,
},
expr::{TypedExpr, UntypedAssignmentKind},
expr::{TypedExpr, UntypedAssignmentKind, UntypedExpr},
parser::token::Token,
tipo::{expr::infer_function, Span, Type, TypeVar},
IdGenerator,
};
@ -347,67 +348,8 @@ fn infer_definition(
});
}
let typed_via = ExprTyper::new(environment, tracing).infer(arg.via.clone())?;
let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap();
let provided_inner_type = arg
.arg
.annotation
.as_ref()
.map(|ann| hydrator.type_from_annotation(ann, environment))
.transpose()?;
let (inferred_annotation, inferred_inner_type) = infer_fuzzer(
environment,
provided_inner_type.clone(),
&typed_via.tipo(),
&arg.via.location(),
)?;
// Ensure that the annotation, if any, matches the type inferred from the
// Fuzzer.
if let Some(provided_inner_type) = provided_inner_type {
if !arg
.arg
.annotation
.as_ref()
.unwrap()
.is_logically_equal(&inferred_annotation)
{
return Err(Error::CouldNotUnify {
location: arg.arg.location,
expected: inferred_inner_type.clone(),
given: provided_inner_type.clone(),
situation: Some(UnifyErrorSituation::FuzzerAnnotationMismatch),
rigid_type_names: hydrator.rigid_names(),
});
}
}
// Replace the pre-registered type for the test function, to allow inferring
// the function body with the right type arguments.
let scope = environment
.scope
.get_mut(&f.name)
.expect("Could not find preregistered type for test");
if let Type::Fn {
ref ret,
ref alias,
args: _,
} = scope.tipo.as_ref()
{
scope.tipo = Rc::new(Type::Fn {
ret: ret.clone(),
args: vec![inferred_inner_type.clone()],
alias: alias.clone(),
})
}
Ok((
Some((typed_via, inferred_inner_type)),
Some(inferred_annotation),
))
extract_via_information(&f, arg, hydrators, environment, tracing, infer_fuzzer)
.map(|(typed_via, annotation)| (Some(typed_via), Some(annotation)))
}
None => Ok((None, None)),
}?;
@ -466,130 +408,50 @@ fn infer_definition(
}
Definition::Benchmark(f) => {
let err_incorrect_arity = || {
Err(Error::IncorrectBenchmarkArity {
location: f
.location
.map(|start, end| (start + Token::Benchmark.to_string().len() + 1, end)),
})
};
let (typed_via, annotation) = match f.arguments.first() {
None => return err_incorrect_arity(),
Some(arg) => {
if f.arguments.len() > 1 {
return Err(Error::IncorrectTestArity {
count: f.arguments.len(),
location: f
.arguments
.get(1)
.expect("arguments.len() > 1")
.arg
.location,
});
return err_incorrect_arity();
}
let typed_via = ExprTyper::new(environment, tracing).infer(arg.via.clone())?;
let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap();
let provided_inner_type = arg
.arg
.annotation
.as_ref()
.map(|ann| hydrator.type_from_annotation(ann, environment))
.transpose()?;
let (inferred_annotation, inferred_inner_type) = infer_sampler(
environment,
provided_inner_type.clone(),
&typed_via.tipo(),
&arg.via.location(),
)?;
// Ensure that the annotation, if any, matches the type inferred from the
// Sampler.
if let Some(provided_inner_type) = provided_inner_type {
if !arg
.arg
.annotation
.as_ref()
.unwrap()
.is_logically_equal(&inferred_annotation)
{
return Err(Error::CouldNotUnify {
location: arg.arg.location,
expected: inferred_inner_type.clone(),
given: provided_inner_type.clone(),
situation: Some(UnifyErrorSituation::SamplerAnnotationMismatch),
rigid_type_names: hydrator.rigid_names(),
});
}
}
// Replace the pre-registered type for the benchmark function, to allow inferring
// the function body with the right type arguments.
let scope = environment
.scope
.get_mut(&f.name)
.expect("Could not find preregistered type for benchmark");
if let Type::Fn {
ref ret,
ref alias,
args: _,
} = scope.tipo.as_ref()
{
scope.tipo = Rc::new(Type::Fn {
ret: ret.clone(),
args: vec![inferred_inner_type.clone()],
alias: alias.clone(),
})
}
Ok((
Some((typed_via, inferred_inner_type)),
Some(inferred_annotation),
))
extract_via_information(&f, arg, hydrators, environment, tracing, infer_sampler)
}
None => Ok((None, None)),
}?;
let typed_f = infer_function(&f.into(), module_name, hydrators, environment, tracing)?;
let is_bool = environment.unify(
typed_f.return_type.clone(),
Type::bool(),
typed_f.location,
false,
);
let arguments = {
let arg = typed_f
.arguments
.first()
.expect("has exactly one argument")
.to_owned();
let is_void = environment.unify(
typed_f.return_type.clone(),
Type::void(),
typed_f.location,
false,
);
if is_bool.or(is_void).is_err() {
return Err(Error::IllegalTestType {
location: typed_f.location,
});
}
vec![ArgVia {
arg: TypedArg {
tipo: typed_via.1,
annotation: Some(annotation),
..arg
},
via: typed_via.0,
}]
};
Ok(Definition::Benchmark(Function {
doc: typed_f.doc,
location: typed_f.location,
name: typed_f.name,
public: typed_f.public,
arguments: match typed_via {
Some((via, tipo)) => {
let arg = typed_f
.arguments
.first()
.expect("has exactly one argument")
.to_owned();
vec![ArgVia {
arg: TypedArg {
tipo,
annotation,
..arg
},
via,
}]
}
None => vec![],
},
arguments,
return_annotation: typed_f.return_annotation,
return_type: typed_f.return_type,
body: typed_f.body,
@ -823,6 +685,83 @@ fn infer_definition(
}
}
#[allow(clippy::result_large_err)]
fn extract_via_information<F>(
f: &Function<(), UntypedExpr, ArgVia<UntypedArg, UntypedExpr>>,
arg: &ArgVia<UntypedArg, UntypedExpr>,
hydrators: &mut HashMap<String, Hydrator>,
environment: &mut Environment<'_>,
tracing: Tracing,
infer_via: F,
) -> Result<((TypedExpr, Rc<Type>), Annotation), Error>
where
F: FnOnce(
&mut Environment<'_>,
Option<Rc<Type>>,
&Rc<Type>,
&Span,
) -> Result<(Annotation, Rc<Type>), Error>,
{
let typed_via = ExprTyper::new(environment, tracing).infer(arg.via.clone())?;
let hydrator: &mut Hydrator = hydrators.get_mut(&f.name).unwrap();
let provided_inner_type = arg
.arg
.annotation
.as_ref()
.map(|ann| hydrator.type_from_annotation(ann, environment))
.transpose()?;
let (inferred_annotation, inferred_inner_type) = infer_via(
environment,
provided_inner_type.clone(),
&typed_via.tipo(),
&arg.via.location(),
)?;
// Ensure that the annotation, if any, matches the type inferred from the
// Fuzzer.
if let Some(provided_inner_type) = provided_inner_type {
if !arg
.arg
.annotation
.as_ref()
.unwrap()
.is_logically_equal(&inferred_annotation)
{
return Err(Error::CouldNotUnify {
location: arg.arg.location,
expected: inferred_inner_type.clone(),
given: provided_inner_type.clone(),
situation: Some(UnifyErrorSituation::FuzzerAnnotationMismatch),
rigid_type_names: hydrator.rigid_names(),
});
}
}
// Replace the pre-registered type for the test function, to allow inferring
// the function body with the right type arguments.
let scope = environment
.scope
.get_mut(&f.name)
.expect("Could not find preregistered type for test");
if let Type::Fn {
ref ret,
ref alias,
args: _,
} = scope.tipo.as_ref()
{
scope.tipo = Rc::new(Type::Fn {
ret: ret.clone(),
args: vec![inferred_inner_type.clone()],
alias: alias.clone(),
})
}
Ok(((typed_via, inferred_inner_type), inferred_annotation))
}
#[allow(clippy::result_large_err)]
fn infer_fuzzer(
environment: &mut Environment<'_>,

View File

@ -685,6 +685,7 @@ mod tests {
}),
"Identity<fn(Bool) -> Bool>",
);
assert_string!(Type::sampler(Type::int()), "Sampler<Int>");
}
#[test]

View File

@ -11,7 +11,7 @@ authors = [
"Kasey White <kwhitemsg@gmail.com>",
"KtorZ <matthias.benkort@gmail.com>",
]
rust-version = "1.70.0"
rust-version = "1.80.0"
build = "build.rs"
[dependencies]
@ -42,10 +42,12 @@ pulldown-cmark = { version = "0.12.0", default-features = false, features = [
rayon = "1.7.0"
regex = "1.7.1"
reqwest = { version = "0.11.14", features = ["blocking", "json"] }
rgb = "0.8.50"
semver = { version = "1.0.23", features = ["serde"] }
serde = { version = "1.0.152", features = ["derive"] }
serde_json = { version = "1.0.94", features = ["preserve_order"] }
strip-ansi-escapes = "0.1.1"
textplots = { git = "https://github.com/aiken-lang/textplots-rs.git" }
thiserror = "1.0.39"
tokio = { version = "1.26.0", features = ["full"] }
toml = "0.7.2"

View File

@ -3,7 +3,7 @@ use aiken_lang::{
ast::{self, Span},
error::ExtraData,
parser::error::ParseError,
test_framework::{PropertyTestResult, TestResult, UnitTestResult},
test_framework::{BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult},
tipo,
};
use miette::{
@ -193,7 +193,11 @@ impl Error {
test.input_path.to_path_buf(),
test.program.to_pretty(),
),
TestResult::Benchmark(_) => ("bench".to_string(), PathBuf::new(), String::new()), // todo
TestResult::BenchmarkResult(BenchmarkResult { bench, .. }) => (
bench.name.to_string(),
bench.input_path.to_path_buf(),
bench.program.to_pretty(),
),
};
Error::TestFailure {

View File

@ -40,7 +40,7 @@ use aiken_lang::{
format::{Formatter, MAX_COLUMNS},
gen_uplc::CodeGenerator,
line_numbers::LineNumbers,
test_framework::{Test, TestResult},
test_framework::{RunnableKind, Test, TestResult},
tipo::{Type, TypeInfo},
utils, IdGenerator,
};
@ -299,20 +299,21 @@ where
pub fn benchmark(
&mut self,
match_tests: Option<Vec<String>>,
match_benchmarks: Option<Vec<String>>,
exact_match: bool,
seed: u32,
times_to_run: usize,
max_size: usize,
tracing: Tracing,
env: Option<String>,
) -> Result<(), Vec<Error>> {
let options = Options {
tracing: Tracing::silent(),
tracing,
env,
code_gen_mode: CodeGenMode::Benchmark {
match_tests,
match_benchmarks,
exact_match,
seed,
times_to_run,
max_size,
},
blueprint_path: self.blueprint_path(None),
};
@ -432,7 +433,7 @@ where
self.event_listener.handle_event(Event::RunningTests);
}
let tests = self.run_tests(tests, seed, property_max_success);
let tests = self.run_runnables(tests, seed, property_max_success);
self.checks_count = if tests.is_empty() {
None
@ -466,33 +467,39 @@ where
}
}
CodeGenMode::Benchmark {
match_tests,
match_benchmarks,
exact_match,
seed,
times_to_run,
max_size,
} => {
let tests =
self.collect_benchmarks(false, match_tests, exact_match, options.tracing)?;
let verbose = false;
if !tests.is_empty() {
let benchmarks = self.collect_benchmarks(
verbose,
match_benchmarks,
exact_match,
options.tracing,
)?;
if !benchmarks.is_empty() {
self.event_listener.handle_event(Event::RunningBenchmarks);
}
let tests = self.run_benchmarks(tests, seed, times_to_run);
let benchmarks = self.run_runnables(benchmarks, seed, max_size);
let errors: Vec<Error> = tests
let errors: Vec<Error> = benchmarks
.iter()
.filter_map(|e| {
if e.is_success() {
None
} else {
Some(Error::from_test_result(e, false))
Some(Error::from_test_result(e, verbose))
}
})
.collect();
self.event_listener
.handle_event(Event::FinishedBenchmarks { seed, tests });
.handle_event(Event::FinishedBenchmarks { seed, benchmarks });
if !errors.is_empty() {
Err(errors)
@ -954,7 +961,7 @@ where
fn collect_test_items(
&mut self,
kind: &str, // "test" or "bench"
kind: RunnableKind,
verbose: bool,
match_tests: Option<Vec<String>>,
exact_match: bool,
@ -993,8 +1000,8 @@ where
for def in checked_module.ast.definitions() {
let func = match (kind, def) {
("test", Definition::Test(func)) => Some(func),
("bench", Definition::Benchmark(func)) => Some(func),
(RunnableKind::Test, Definition::Test(func)) => Some(func),
(RunnableKind::Bench, Definition::Benchmark(func)) => Some(func),
_ => None,
};
@ -1048,21 +1055,13 @@ where
})
}
tests.push(match kind {
"test" => Test::from_function_definition(
&mut generator,
test.to_owned(),
module_name,
input_path,
),
"bench" => Test::from_benchmark_definition(
&mut generator,
test.to_owned(),
module_name,
input_path,
),
_ => unreachable!("Invalid test kind"),
});
tests.push(Test::from_function_definition(
&mut generator,
test.to_owned(),
module_name,
input_path,
kind,
));
}
Ok(tests)
@ -1075,7 +1074,13 @@ where
exact_match: bool,
tracing: Tracing,
) -> Result<Vec<Test>, Error> {
self.collect_test_items("test", verbose, match_tests, exact_match, tracing)
self.collect_test_items(
RunnableKind::Test,
verbose,
match_tests,
exact_match,
tracing,
)
}
fn collect_benchmarks(
@ -1085,14 +1090,20 @@ where
exact_match: bool,
tracing: Tracing,
) -> Result<Vec<Test>, Error> {
self.collect_test_items("bench", verbose, match_tests, exact_match, tracing)
self.collect_test_items(
RunnableKind::Bench,
verbose,
match_tests,
exact_match,
tracing,
)
}
fn run_tests(
fn run_runnables(
&self,
tests: Vec<Test>,
seed: u32,
property_max_success: usize,
max_success: usize,
) -> Vec<TestResult<UntypedExpr, UntypedExpr>> {
use rayon::prelude::*;
@ -1102,42 +1113,7 @@ where
tests
.into_par_iter()
.map(|test| match test {
Test::UnitTest(unit_test) => unit_test.run(plutus_version),
Test::PropertyTest(property_test) => {
property_test.run(seed, property_max_success, plutus_version)
}
Test::Benchmark(_) => unreachable!("Benchmarks cannot be run in PBT."),
})
.collect::<Vec<TestResult<(Constant, Rc<Type>), PlutusData>>>()
.into_iter()
.map(|test| test.reify(&data_types))
.collect()
}
fn run_benchmarks(
&self,
tests: Vec<Test>,
seed: u32,
property_max_success: usize,
) -> Vec<TestResult<UntypedExpr, UntypedExpr>> {
use rayon::prelude::*;
let data_types = utils::indexmap::as_ref_values(&self.data_types);
let plutus_version = &self.config.plutus;
tests
.into_par_iter()
.flat_map(|test| match test {
Test::UnitTest(_) | Test::PropertyTest(_) => {
unreachable!("Tests cannot be ran during benchmarking.")
}
Test::Benchmark(benchmark) => benchmark
.benchmark(seed, property_max_success, plutus_version)
.into_iter()
.map(TestResult::Benchmark)
.collect::<Vec<_>>(),
})
.map(|test| test.run(seed, max_success, plutus_version))
.collect::<Vec<TestResult<(Constant, Rc<Type>), PlutusData>>>()
.into_iter()
.map(|test| test.reify(&data_types))

View File

@ -30,10 +30,10 @@ pub enum CodeGenMode {
},
Build(bool),
Benchmark {
match_tests: Option<Vec<String>>,
match_benchmarks: Option<Vec<String>>,
exact_match: bool,
seed: u32,
times_to_run: usize,
max_size: usize,
},
NoOp,
}

View File

@ -1,6 +1,6 @@
use aiken_lang::{
expr::UntypedExpr,
test_framework::{PropertyTestResult, TestResult, UnitTestResult},
test_framework::{BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult},
};
pub use json::{json_schema, Json};
use std::{
@ -10,6 +10,7 @@ use std::{
path::PathBuf,
};
pub use terminal::Terminal;
use uplc::machine::cost_model::ExBudget;
mod json;
mod terminal;
@ -50,7 +51,7 @@ pub enum Event {
},
FinishedBenchmarks {
seed: u32,
tests: Vec<TestResult<UntypedExpr, UntypedExpr>>,
benchmarks: Vec<TestResult<UntypedExpr, UntypedExpr>>,
},
WaitingForBuildDirLock,
ResolvingPackages {
@ -117,6 +118,18 @@ pub(crate) fn group_by_module(
}
pub(crate) fn find_max_execution_units<T>(xs: &[TestResult<T, T>]) -> (usize, usize, usize) {
fn max_execution_units(max_mem: i64, max_cpu: i64, cost: &ExBudget) -> (i64, i64) {
if cost.mem >= max_mem && cost.cpu >= max_cpu {
(cost.mem, cost.cpu)
} else if cost.mem > max_mem {
(cost.mem, max_cpu)
} else if cost.cpu > max_cpu {
(max_mem, cost.cpu)
} else {
(max_mem, max_cpu)
}
}
let (max_mem, max_cpu, max_iter) =
xs.iter()
.fold((0, 0, 0), |(max_mem, max_cpu, max_iter), test| match test {
@ -124,18 +137,15 @@ pub(crate) fn find_max_execution_units<T>(xs: &[TestResult<T, T>]) -> (usize, us
(max_mem, max_cpu, std::cmp::max(max_iter, *iterations))
}
TestResult::UnitTestResult(UnitTestResult { spent_budget, .. }) => {
if spent_budget.mem >= max_mem && spent_budget.cpu >= max_cpu {
(spent_budget.mem, spent_budget.cpu, max_iter)
} else if spent_budget.mem > max_mem {
(spent_budget.mem, max_cpu, max_iter)
} else if spent_budget.cpu > max_cpu {
(max_mem, spent_budget.cpu, max_iter)
} else {
(max_mem, max_cpu, max_iter)
}
let (max_mem, max_cpu) = max_execution_units(max_mem, max_cpu, spent_budget);
(max_mem, max_cpu, max_iter)
}
TestResult::Benchmark(..) => {
unreachable!("property returned benchmark result ?!")
TestResult::BenchmarkResult(BenchmarkResult { measures, .. }) => {
let (mut max_mem, mut max_cpu) = (max_mem, max_cpu);
for (_, measure) in measures {
(max_mem, max_cpu) = max_execution_units(max_mem, max_cpu, measure);
}
(max_mem, max_cpu, max_iter)
}
});

View File

@ -39,16 +39,22 @@ impl EventListener for Json {
});
println!("{}", serde_json::to_string_pretty(&json_output).unwrap());
}
Event::FinishedBenchmarks { tests, seed } => {
let benchmark_results: Vec<_> = tests
Event::FinishedBenchmarks { benchmarks, seed } => {
let benchmark_results: Vec<_> = benchmarks
.into_iter()
.filter_map(|test| {
if let TestResult::Benchmark(result) = test {
if let TestResult::BenchmarkResult(result) = test {
Some(serde_json::json!({
"name": result.test.name,
"module": result.test.module,
"memory": result.cost.mem,
"cpu": result.cost.cpu
"name": result.bench.name,
"module": result.bench.module,
"measures": result.measures
.into_iter()
.map(|measure| serde_json::json!({
"size": measure.0,
"memory": measure.1.mem,
"cpu": measure.1.cpu
}))
.collect::<Vec<_>>()
}))
} else {
None
@ -74,7 +80,7 @@ fn fmt_test_json(result: &TestResult<UntypedExpr, UntypedExpr>) -> serde_json::V
TestResult::PropertyTestResult(PropertyTestResult { ref test, .. }) => {
&test.on_test_failure
}
TestResult::Benchmark(_) => unreachable!("benchmark returned in JSON output"),
TestResult::BenchmarkResult(_) => unreachable!("benchmark returned in JSON output"),
};
let mut test = json!({
@ -120,7 +126,7 @@ fn fmt_test_json(result: &TestResult<UntypedExpr, UntypedExpr>) -> serde_json::V
Err(err) => json!({"error": err.to_string()}),
};
}
TestResult::Benchmark(_) => unreachable!("benchmark returned in JSON output"),
TestResult::BenchmarkResult(_) => unreachable!("benchmark returned in JSON output"),
}
if !result.traces().is_empty() {

View File

@ -4,11 +4,21 @@ use aiken_lang::{
ast::OnTestFailure,
expr::UntypedExpr,
format::Formatter,
test_framework::{AssertionStyleOptions, PropertyTestResult, TestResult, UnitTestResult},
test_framework::{
AssertionStyleOptions, BenchmarkResult, PropertyTestResult, TestResult, UnitTestResult,
},
};
use owo_colors::{OwoColorize, Stream::Stderr};
use rgb::RGB8;
use std::sync::LazyLock;
use uplc::machine::cost_model::ExBudget;
static BENCH_PLOT_COLOR: LazyLock<RGB8> = LazyLock::new(|| RGB8 {
r: 250,
g: 211,
b: 144,
});
#[derive(Debug, Default, Clone, Copy)]
pub struct Terminal;
@ -224,14 +234,47 @@ impl EventListener for Terminal {
"...".if_supports_color(Stderr, |s| s.bold())
);
}
Event::FinishedBenchmarks { tests, .. } => {
for test in tests {
if let TestResult::Benchmark(result) = test {
println!("{} {} ", result.test.name.bold(), "BENCH".blue(),);
println!(" Memory: {} bytes", result.cost.mem);
println!(" CPU: {} units", result.cost.cpu);
Event::FinishedBenchmarks { seed, benchmarks } => {
let (max_mem, max_cpu, max_iter) = find_max_execution_units(&benchmarks);
for (module, results) in &group_by_module(&benchmarks) {
let title = module
.if_supports_color(Stderr, |s| s.bold())
.if_supports_color(Stderr, |s| s.blue())
.to_string();
let benchmarks = results
.iter()
.map(|r| fmt_test(r, max_mem, max_cpu, max_iter, true))
.collect::<Vec<String>>()
.join("\n")
.chars()
.skip(1) // Remove extra first newline
.collect::<String>();
let seed_info = format!(
"with {opt}={seed}",
opt = "--seed".if_supports_color(Stderr, |s| s.bold()),
seed = format!("{seed}").if_supports_color(Stderr, |s| s.bold())
);
if !benchmarks.is_empty() {
println!();
}
println!(
"{}\n",
pretty::indent(
&pretty::open_box(&title, &benchmarks, &seed_info, |border| border
.if_supports_color(Stderr, |s| s.bright_black())
.to_string()),
4
)
);
}
if !benchmarks.is_empty() {
println!();
}
}
}
@ -246,7 +289,23 @@ fn fmt_test(
styled: bool,
) -> String {
// Status
let mut test = if result.is_success() {
let mut test = if matches!(result, TestResult::BenchmarkResult { .. }) {
format!(
"\n{label}{title}\n",
label = if result.is_success() {
String::new()
} else {
pretty::style_if(styled, "FAIL ".to_string(), |s| {
s.if_supports_color(Stderr, |s| s.bold())
.if_supports_color(Stderr, |s| s.red())
.to_string()
})
},
title = pretty::style_if(styled, result.title().to_string(), |s| s
.if_supports_color(Stderr, |s| s.bright_blue())
.to_string())
)
} else if result.is_success() {
pretty::style_if(styled, "PASS".to_string(), |s| {
s.if_supports_color(Stderr, |s| s.bold())
.if_supports_color(Stderr, |s| s.green())
@ -292,29 +351,76 @@ fn fmt_test(
if *iterations > 1 { "s" } else { "" }
);
}
TestResult::Benchmark(benchmark) => {
let mem_pad = pretty::pad_left(benchmark.cost.mem.to_string(), max_mem, " ");
let cpu_pad = pretty::pad_left(benchmark.cost.cpu.to_string(), max_cpu, " ");
TestResult::BenchmarkResult(BenchmarkResult { error: Some(e), .. }) => {
test = format!(
"{test} [mem: {mem_unit}, cpu: {cpu_unit}]",
mem_unit = pretty::style_if(styled, mem_pad, |s| s
.if_supports_color(Stderr, |s| s.cyan())
.to_string()),
cpu_unit = pretty::style_if(styled, cpu_pad, |s| s
.if_supports_color(Stderr, |s| s.cyan())
.to_string()),
"{test}{}",
e.to_string().if_supports_color(Stderr, |s| s.red())
);
}
TestResult::BenchmarkResult(BenchmarkResult {
measures,
error: None,
..
}) => {
let max_size = measures
.iter()
.map(|(size, _)| *size)
.max()
.unwrap_or_default();
let mem_chart = format!(
"{title}\n{chart}",
title = "memory units"
.if_supports_color(Stderr, |s| s.yellow())
.if_supports_color(Stderr, |s| s.bold()),
chart = plot(
&BENCH_PLOT_COLOR,
measures
.iter()
.map(|(size, budget)| (*size as f32, budget.mem as f32))
.collect::<Vec<_>>(),
max_size
)
);
let cpu_chart = format!(
"{title}\n{chart}",
title = "cpu units"
.if_supports_color(Stderr, |s| s.yellow())
.if_supports_color(Stderr, |s| s.bold()),
chart = plot(
&BENCH_PLOT_COLOR,
measures
.iter()
.map(|(size, budget)| (*size as f32, budget.cpu as f32))
.collect::<Vec<_>>(),
max_size
)
);
let charts = mem_chart
.lines()
.zip(cpu_chart.lines())
.map(|(l, r)| format!(" {}{r}", pretty::pad_right(l.to_string(), 55, " ")))
.collect::<Vec<_>>()
.join("\n");
test = format!("{test}{charts}",);
}
}
// Title
test = format!(
"{test} {title}",
title = pretty::style_if(styled, result.title().to_string(), |s| s
.if_supports_color(Stderr, |s| s.bright_blue())
.to_string())
);
test = match result {
TestResult::BenchmarkResult(..) => test,
TestResult::UnitTestResult(..) | TestResult::PropertyTestResult(..) => {
format!(
"{test} {title}",
title = pretty::style_if(styled, result.title().to_string(), |s| s
.if_supports_color(Stderr, |s| s.bright_blue())
.to_string())
)
}
};
// Annotations
match result {
@ -470,3 +576,14 @@ fn fmt_test_summary<T>(tests: &[&TestResult<T, T>], styled: bool) -> String {
.to_string()),
)
}
fn plot(color: &RGB8, points: Vec<(f32, f32)>, max_size: usize) -> String {
use textplots::{Chart, ColorPlot, Shape};
let mut chart = Chart::new(80, 50, 1.0, max_size as f32);
let plot = Shape::Lines(&points);
let chart = chart.linecolorplot(&plot, *color);
chart.borders();
chart.axis();
chart.figures();
chart.to_string()
}

View File

@ -101,6 +101,7 @@ mod test {
test.to_owned(),
module_name.to_string(),
PathBuf::new(),
RunnableKind::Test,
),
data_types,
)
@ -245,13 +246,12 @@ mod test {
}
"#});
assert!(prop
.run::<()>(
42,
PropertyTest::DEFAULT_MAX_SUCCESS,
&PlutusVersion::default()
)
.is_success());
assert!(TestResult::PropertyTestResult::<(), _>(prop.run(
42,
PropertyTest::DEFAULT_MAX_SUCCESS,
&PlutusVersion::default()
))
.is_success());
}
#[test]
@ -273,24 +273,20 @@ mod test {
}
"#});
match prop.run::<()>(
let result = prop.run(
42,
PropertyTest::DEFAULT_MAX_SUCCESS,
&PlutusVersion::default(),
) {
TestResult::UnitTestResult(..) => unreachable!("property returned unit-test result ?!"),
TestResult::PropertyTestResult(result) => {
assert!(
result
.labels
.iter()
.eq(vec![(&"head".to_string(), &53), (&"tail".to_string(), &47)]),
"labels: {:#?}",
result.labels
)
}
TestResult::Benchmark(..) => unreachable!("property returned benchmark result ?!"),
}
);
assert!(
result
.labels
.iter()
.eq(vec![(&"head".to_string(), &53), (&"tail".to_string(), &47)]),
"labels: {:#?}",
result.labels
);
}
#[test]

View File

@ -1,4 +1,8 @@
use aiken_lang::test_framework::PropertyTest;
use super::build::{trace_filter_parser, trace_level_parser};
use aiken_lang::{
ast::{TraceLevel, Tracing},
test_framework::Benchmark,
};
use aiken_project::watch::with_project;
use rand::prelude::*;
use std::{
@ -13,37 +17,69 @@ pub struct Args {
/// Path to project
directory: Option<PathBuf>,
/// An initial seed to initialize the pseudo-random generator for property-tests.
/// An initial seed to initialize the pseudo-random generator for benchmarks.
#[clap(long)]
seed: Option<u32>,
/// How many times we will run each benchmark in the relevant project.
#[clap(long, default_value_t = PropertyTest::DEFAULT_MAX_SUCCESS)]
times_to_run: usize,
/// The maximum size to benchmark with. Note that this does not necessarily equates the number
/// of measurements actually performed but controls the maximum size given to a Sampler.
#[clap(long, default_value_t = Benchmark::DEFAULT_MAX_SIZE)]
max_size: usize,
/// Only run tests if they match any of these strings.
/// Only run benchmarks if they match any of these strings.
///
/// You can match a module with `-m aiken/list` or `-m list`.
/// You can match a test with `-m "aiken/list.{map}"` or `-m "aiken/option.{flatten_1}"`
/// You can match a benchmark with `-m "aiken/list.{map}"` or `-m "aiken/option.{flatten_1}"`
#[clap(short, long)]
match_tests: Option<Vec<String>>,
match_benchmarks: Option<Vec<String>>,
/// This is meant to be used with `--match-tests`.
/// It forces test names to match exactly
/// This is meant to be used with `--match-benchmarks`.
/// It forces benchmark names to match exactly
#[clap(short, long)]
exact_match: bool,
/// Environment to use for benchmarking
env: Option<String>,
/// Filter traces to be included in the generated program(s).
///
/// - user-defined:
/// only consider traces that you've explicitly defined
/// either through the 'trace' keyword of via the trace-if-false
/// ('?') operator.
///
/// - compiler-generated:
/// only included internal traces generated by the
/// Aiken compiler, for example in usage of 'expect'.
///
/// - all:
/// include both user-defined and compiler-generated traces.
///
/// [default: all]
#[clap(short = 'f', long, value_parser=trace_filter_parser(), default_missing_value="all", verbatim_doc_comment, alias="filter_traces")]
trace_filter: Option<fn(TraceLevel) -> Tracing>,
/// Choose the verbosity level of traces:
///
/// - silent: disable traces altogether
/// - compact: only culprit line numbers are shown on failures
/// - verbose: enable full verbose traces as provided by the user or the compiler
///
/// [optional]
#[clap(short, long, value_parser=trace_level_parser(), default_value_t=TraceLevel::Silent, verbatim_doc_comment)]
trace_level: TraceLevel,
}
pub fn exec(
Args {
directory,
match_tests,
match_benchmarks,
exact_match,
seed,
times_to_run,
max_size,
env,
trace_filter,
trace_level,
}: Args,
) -> miette::Result<()> {
let mut rng = rand::thread_rng();
@ -55,12 +91,15 @@ pub fn exec(
false,
!io::stdout().is_terminal(),
|p| {
// We don't want to check here, we want to benchmark
p.benchmark(
match_tests.clone(),
match_benchmarks.clone(),
exact_match,
seed,
times_to_run,
max_size,
match trace_filter {
Some(trace_filter) => trace_filter(trace_level),
None => Tracing::All(trace_level),
},
env.clone(),
)
},