diff --git a/Cargo.lock b/Cargo.lock index 37cb592a..1557548c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,6 +205,33 @@ version = "6.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435" +[[package]] +name = "peg" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af728fe826811af3b38c37e93de6d104485953ea373d656eebae53d6987fcd2c" +dependencies = [ + "peg-macros", + "peg-runtime", +] + +[[package]] +name = "peg-macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4536be147b770b824895cbad934fccce8e49f14b4c4946eaa46a6e4a12fcdc16" +dependencies = [ + "peg-runtime", + "proc-macro2", + "quote", +] + +[[package]] +name = "peg-runtime" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9b0efd3ba03c3a409d44d60425f279ec442bcf0b9e63ff4e410da31c8b0f69f" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -352,6 +379,7 @@ dependencies = [ "chumsky", "flat", "hex", + "peg", "strum", "strum_macros", "thiserror", diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 2a1ee50e..634ff773 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> { UplcCommand::Flat { input } => { let code = std::fs::read_to_string(&input)?; - let program = parser::program(&code).unwrap(); + let program = parser::program(&code)?; let program = Program::::try_from(program)?; diff --git a/crates/uplc/Cargo.toml b/crates/uplc/Cargo.toml index c989772b..890e2223 100644 --- a/crates/uplc/Cargo.toml +++ b/crates/uplc/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" chumsky = "0.8.0" flat = { path = "../flat" } hex = "0.4.3" +peg = "0.8.0" strum = "0.24.0" strum_macros = "0.24.0" thiserror = "1.0.31" diff --git a/crates/uplc/src/lib.rs b/crates/uplc/src/lib.rs index e766d546..9afe7885 100644 --- a/crates/uplc/src/lib.rs +++ b/crates/uplc/src/lib.rs @@ -1,3 +1,5 @@ +#![recursion_limit = "10000"] + pub mod ast; pub mod builtins; mod debruijn; diff --git a/crates/uplc/src/parser.rs b/crates/uplc/src/parser.rs index e85bd00e..666f588d 100644 --- a/crates/uplc/src/parser.rs +++ b/crates/uplc/src/parser.rs @@ -11,6 +11,95 @@ use crate::{ builtins::DefaultFunction, }; +peg::parser! { + grammar parser() for str { + pub rule program() -> Program + = "(" _* "program" _+ v:version() _+ t:term() _* ")" { Program {version: v, term: t} } + + rule version() -> (usize, usize, usize) + = major:number() "." minor:number() "." patch:number() { + (major, minor, patch) + } + + rule term() -> Term + = constant() + / builtin() + / var() + / lambda() + / apply() + / delay() + / force() + / error() + + rule constant() -> Term + = "(" _* "con" _+ con:( + constant_integer() + / constant_bytestring() + / constant_string() + / constant_unit() + / constant_bool() + ) _* ")" { + Term::Constant(con) + } + + rule builtin() -> Term + = "(" b:ident() ")" { Term::Builtin(DefaultFunction::from_str(&b).unwrap()) } + + rule var() -> Term + = n:name() { Term::Var(n) } + + rule lambda() -> Term + = "(" _* "lam" _+ parameter_name:name() _+ t:term() _* ")" { + Term::Lambda { parameter_name, body: Box::new(t) } + } + + #[cache_left_rec] + rule apply() -> Term + = "[" _* initial:term() _+ terms:(t:term() _* { t })+ "]" { + terms + .into_iter() + .fold(initial, |lhs, rhs| Term::Apply { + function: Box::new(lhs), + argument: Box::new(rhs) + }) + } + + rule delay() -> Term + = "(" _* "delay" _+ t:term() _* ")" { Term::Delay(Box::new(t)) } + + rule force() -> Term + = "(" _* "force" _+ t:term() _* ")" { Term::Force(Box::new(t)) } + + rule error() -> Term + = "(" _* "error" _* ")" { Term::Error } + + rule constant_integer() -> Constant + = "integer" _+ i:number() { Constant::Integer(i as isize) } + + rule constant_bytestring() -> Constant + = "bytestring" _+ "#" i:ident() { Constant::ByteString(hex::decode(i).unwrap()) } + + rule constant_string() -> Constant + = "string" _+ "\"" s:[^ '"']* "\"" { Constant::String(String::from_iter(s)) } + + rule constant_bool() -> Constant + = "bool" _+ b:$("True" / "False") { Constant::Bool(b == "True") } + + rule constant_unit() -> Constant + = "unit" _+ "()" { Constant::Unit } + + rule number() -> usize + = n:$(['0'..='9']+) {? n.parse().or(Err("usize")) } + + rule name() -> Name + = text:ident() { Name { text, unique: 0.into() } } + + rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9' | '_']+ { String::from_iter(i) } + + rule _ = [' ' | '\n'] + } +} + struct ParserState { identifiers: HashMap, current: Unique, @@ -39,10 +128,8 @@ impl ParserState { } } -pub fn program(src: &str) -> Result, Vec>> { - let parser = program_(); - - parser.parse(src) +pub fn program(src: &str) -> Result, peg::error::ParseError> { + parser::program(src) } fn program_() -> impl Parser, Error = Simple> { @@ -71,21 +158,19 @@ fn version() -> impl Parser> { fn term() -> impl Parser, Error = Simple> { recursive(|term| { - let atom = || var().or(term.clone()); - let delay = keyword("delay") - .ignore_then(atom().padded()) + .ignore_then(term.clone().padded()) .delimited_by(just('(').padded(), just(')').padded()) .map(|t| dbg!(Term::Delay(Box::new(t)))); let force = keyword("force") - .ignore_then(atom().padded()) + .ignore_then(term.clone().padded()) .delimited_by(just('(').padded(), just(')').padded()) .map(|t| dbg!(Term::Force(Box::new(t)))); let lambda = keyword("lam") .ignore_then(name().padded()) - .then(atom()) + .then(term.clone()) .delimited_by(just('(').padded(), just(')').padded()) .map(|(parameter_name, t)| { dbg!(Term::Lambda { @@ -94,15 +179,14 @@ fn term() -> impl Parser, Error = Simple> { }) }); - let apply = atom() + let apply = term + .clone() .padded() - .then(atom()) + .then(term.clone().padded().repeated()) .delimited_by(just('[').padded(), just(']').padded()) - .map(|(function, argument)| { - dbg!(Term::Apply { - function: Box::new(function), - argument: Box::new(argument), - }) + .foldl(|lhs, rhs| Term::Apply { + function: Box::new(lhs), + argument: Box::new(rhs), }); constant()