fix: moving to a parser that can handle left recursion

This commit is contained in:
rvcas 2022-06-10 01:55:28 -04:00
parent 86089b4bee
commit b05f0846a8
No known key found for this signature in database
GPG Key ID: C09B64E263F7D68C
5 changed files with 132 additions and 17 deletions

28
Cargo.lock generated
View File

@ -205,6 +205,33 @@ version = "6.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435" checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435"
[[package]]
name = "peg"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af728fe826811af3b38c37e93de6d104485953ea373d656eebae53d6987fcd2c"
dependencies = [
"peg-macros",
"peg-runtime",
]
[[package]]
name = "peg-macros"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4536be147b770b824895cbad934fccce8e49f14b4c4946eaa46a6e4a12fcdc16"
dependencies = [
"peg-runtime",
"proc-macro2",
"quote",
]
[[package]]
name = "peg-runtime"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9b0efd3ba03c3a409d44d60425f279ec442bcf0b9e63ff4e410da31c8b0f69f"
[[package]] [[package]]
name = "proc-macro-error" name = "proc-macro-error"
version = "1.0.4" version = "1.0.4"
@ -352,6 +379,7 @@ dependencies = [
"chumsky", "chumsky",
"flat", "flat",
"hex", "hex",
"peg",
"strum", "strum",
"strum_macros", "strum_macros",
"thiserror", "thiserror",

View File

@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> {
UplcCommand::Flat { input } => { UplcCommand::Flat { input } => {
let code = std::fs::read_to_string(&input)?; let code = std::fs::read_to_string(&input)?;
let program = parser::program(&code).unwrap(); let program = parser::program(&code)?;
let program = Program::<DeBruijn>::try_from(program)?; let program = Program::<DeBruijn>::try_from(program)?;

View File

@ -9,6 +9,7 @@ edition = "2021"
chumsky = "0.8.0" chumsky = "0.8.0"
flat = { path = "../flat" } flat = { path = "../flat" }
hex = "0.4.3" hex = "0.4.3"
peg = "0.8.0"
strum = "0.24.0" strum = "0.24.0"
strum_macros = "0.24.0" strum_macros = "0.24.0"
thiserror = "1.0.31" thiserror = "1.0.31"

View File

@ -1,3 +1,5 @@
#![recursion_limit = "10000"]
pub mod ast; pub mod ast;
pub mod builtins; pub mod builtins;
mod debruijn; mod debruijn;

View File

@ -11,6 +11,95 @@ use crate::{
builtins::DefaultFunction, builtins::DefaultFunction,
}; };
peg::parser! {
grammar parser() for str {
pub rule program() -> Program<Name>
= "(" _* "program" _+ v:version() _+ t:term() _* ")" { Program {version: v, term: t} }
rule version() -> (usize, usize, usize)
= major:number() "." minor:number() "." patch:number() {
(major, minor, patch)
}
rule term() -> Term<Name>
= constant()
/ builtin()
/ var()
/ lambda()
/ apply()
/ delay()
/ force()
/ error()
rule constant() -> Term<Name>
= "(" _* "con" _+ con:(
constant_integer()
/ constant_bytestring()
/ constant_string()
/ constant_unit()
/ constant_bool()
) _* ")" {
Term::Constant(con)
}
rule builtin() -> Term<Name>
= "(" b:ident() ")" { Term::Builtin(DefaultFunction::from_str(&b).unwrap()) }
rule var() -> Term<Name>
= n:name() { Term::Var(n) }
rule lambda() -> Term<Name>
= "(" _* "lam" _+ parameter_name:name() _+ t:term() _* ")" {
Term::Lambda { parameter_name, body: Box::new(t) }
}
#[cache_left_rec]
rule apply() -> Term<Name>
= "[" _* initial:term() _+ terms:(t:term() _* { t })+ "]" {
terms
.into_iter()
.fold(initial, |lhs, rhs| Term::Apply {
function: Box::new(lhs),
argument: Box::new(rhs)
})
}
rule delay() -> Term<Name>
= "(" _* "delay" _+ t:term() _* ")" { Term::Delay(Box::new(t)) }
rule force() -> Term<Name>
= "(" _* "force" _+ t:term() _* ")" { Term::Force(Box::new(t)) }
rule error() -> Term<Name>
= "(" _* "error" _* ")" { Term::Error }
rule constant_integer() -> Constant
= "integer" _+ i:number() { Constant::Integer(i as isize) }
rule constant_bytestring() -> Constant
= "bytestring" _+ "#" i:ident() { Constant::ByteString(hex::decode(i).unwrap()) }
rule constant_string() -> Constant
= "string" _+ "\"" s:[^ '"']* "\"" { Constant::String(String::from_iter(s)) }
rule constant_bool() -> Constant
= "bool" _+ b:$("True" / "False") { Constant::Bool(b == "True") }
rule constant_unit() -> Constant
= "unit" _+ "()" { Constant::Unit }
rule number() -> usize
= n:$(['0'..='9']+) {? n.parse().or(Err("usize")) }
rule name() -> Name
= text:ident() { Name { text, unique: 0.into() } }
rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9' | '_']+ { String::from_iter(i) }
rule _ = [' ' | '\n']
}
}
struct ParserState { struct ParserState {
identifiers: HashMap<String, Unique>, identifiers: HashMap<String, Unique>,
current: Unique, current: Unique,
@ -39,10 +128,8 @@ impl ParserState {
} }
} }
pub fn program(src: &str) -> Result<Program<Name>, Vec<Simple<char>>> { pub fn program(src: &str) -> Result<Program<Name>, peg::error::ParseError<peg::str::LineCol>> {
let parser = program_(); parser::program(src)
parser.parse(src)
} }
fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> { fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
@ -71,21 +158,19 @@ fn version() -> impl Parser<char, (usize, usize, usize), Error = Simple<char>> {
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> { fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
recursive(|term| { recursive(|term| {
let atom = || var().or(term.clone());
let delay = keyword("delay") let delay = keyword("delay")
.ignore_then(atom().padded()) .ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded()) .delimited_by(just('(').padded(), just(')').padded())
.map(|t| dbg!(Term::Delay(Box::new(t)))); .map(|t| dbg!(Term::Delay(Box::new(t))));
let force = keyword("force") let force = keyword("force")
.ignore_then(atom().padded()) .ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded()) .delimited_by(just('(').padded(), just(')').padded())
.map(|t| dbg!(Term::Force(Box::new(t)))); .map(|t| dbg!(Term::Force(Box::new(t))));
let lambda = keyword("lam") let lambda = keyword("lam")
.ignore_then(name().padded()) .ignore_then(name().padded())
.then(atom()) .then(term.clone())
.delimited_by(just('(').padded(), just(')').padded()) .delimited_by(just('(').padded(), just(')').padded())
.map(|(parameter_name, t)| { .map(|(parameter_name, t)| {
dbg!(Term::Lambda { dbg!(Term::Lambda {
@ -94,15 +179,14 @@ fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
}) })
}); });
let apply = atom() let apply = term
.clone()
.padded() .padded()
.then(atom()) .then(term.clone().padded().repeated())
.delimited_by(just('[').padded(), just(']').padded()) .delimited_by(just('[').padded(), just(']').padded())
.map(|(function, argument)| { .foldl(|lhs, rhs| Term::Apply {
dbg!(Term::Apply { function: Box::new(lhs),
function: Box::new(function), argument: Box::new(rhs),
argument: Box::new(argument),
})
}); });
constant() constant()