fix: moving to a parser that can handle left recursion
This commit is contained in:
parent
86089b4bee
commit
b05f0846a8
|
@ -205,6 +205,33 @@ version = "6.0.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435"
|
||||
|
||||
[[package]]
|
||||
name = "peg"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af728fe826811af3b38c37e93de6d104485953ea373d656eebae53d6987fcd2c"
|
||||
dependencies = [
|
||||
"peg-macros",
|
||||
"peg-runtime",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peg-macros"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4536be147b770b824895cbad934fccce8e49f14b4c4946eaa46a6e4a12fcdc16"
|
||||
dependencies = [
|
||||
"peg-runtime",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peg-runtime"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9b0efd3ba03c3a409d44d60425f279ec442bcf0b9e63ff4e410da31c8b0f69f"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
|
@ -352,6 +379,7 @@ dependencies = [
|
|||
"chumsky",
|
||||
"flat",
|
||||
"hex",
|
||||
"peg",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror",
|
||||
|
|
|
@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> {
|
|||
UplcCommand::Flat { input } => {
|
||||
let code = std::fs::read_to_string(&input)?;
|
||||
|
||||
let program = parser::program(&code).unwrap();
|
||||
let program = parser::program(&code)?;
|
||||
|
||||
let program = Program::<DeBruijn>::try_from(program)?;
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ edition = "2021"
|
|||
chumsky = "0.8.0"
|
||||
flat = { path = "../flat" }
|
||||
hex = "0.4.3"
|
||||
peg = "0.8.0"
|
||||
strum = "0.24.0"
|
||||
strum_macros = "0.24.0"
|
||||
thiserror = "1.0.31"
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#![recursion_limit = "10000"]
|
||||
|
||||
pub mod ast;
|
||||
pub mod builtins;
|
||||
mod debruijn;
|
||||
|
|
|
@ -11,6 +11,95 @@ use crate::{
|
|||
builtins::DefaultFunction,
|
||||
};
|
||||
|
||||
peg::parser! {
|
||||
grammar parser() for str {
|
||||
pub rule program() -> Program<Name>
|
||||
= "(" _* "program" _+ v:version() _+ t:term() _* ")" { Program {version: v, term: t} }
|
||||
|
||||
rule version() -> (usize, usize, usize)
|
||||
= major:number() "." minor:number() "." patch:number() {
|
||||
(major, minor, patch)
|
||||
}
|
||||
|
||||
rule term() -> Term<Name>
|
||||
= constant()
|
||||
/ builtin()
|
||||
/ var()
|
||||
/ lambda()
|
||||
/ apply()
|
||||
/ delay()
|
||||
/ force()
|
||||
/ error()
|
||||
|
||||
rule constant() -> Term<Name>
|
||||
= "(" _* "con" _+ con:(
|
||||
constant_integer()
|
||||
/ constant_bytestring()
|
||||
/ constant_string()
|
||||
/ constant_unit()
|
||||
/ constant_bool()
|
||||
) _* ")" {
|
||||
Term::Constant(con)
|
||||
}
|
||||
|
||||
rule builtin() -> Term<Name>
|
||||
= "(" b:ident() ")" { Term::Builtin(DefaultFunction::from_str(&b).unwrap()) }
|
||||
|
||||
rule var() -> Term<Name>
|
||||
= n:name() { Term::Var(n) }
|
||||
|
||||
rule lambda() -> Term<Name>
|
||||
= "(" _* "lam" _+ parameter_name:name() _+ t:term() _* ")" {
|
||||
Term::Lambda { parameter_name, body: Box::new(t) }
|
||||
}
|
||||
|
||||
#[cache_left_rec]
|
||||
rule apply() -> Term<Name>
|
||||
= "[" _* initial:term() _+ terms:(t:term() _* { t })+ "]" {
|
||||
terms
|
||||
.into_iter()
|
||||
.fold(initial, |lhs, rhs| Term::Apply {
|
||||
function: Box::new(lhs),
|
||||
argument: Box::new(rhs)
|
||||
})
|
||||
}
|
||||
|
||||
rule delay() -> Term<Name>
|
||||
= "(" _* "delay" _+ t:term() _* ")" { Term::Delay(Box::new(t)) }
|
||||
|
||||
rule force() -> Term<Name>
|
||||
= "(" _* "force" _+ t:term() _* ")" { Term::Force(Box::new(t)) }
|
||||
|
||||
rule error() -> Term<Name>
|
||||
= "(" _* "error" _* ")" { Term::Error }
|
||||
|
||||
rule constant_integer() -> Constant
|
||||
= "integer" _+ i:number() { Constant::Integer(i as isize) }
|
||||
|
||||
rule constant_bytestring() -> Constant
|
||||
= "bytestring" _+ "#" i:ident() { Constant::ByteString(hex::decode(i).unwrap()) }
|
||||
|
||||
rule constant_string() -> Constant
|
||||
= "string" _+ "\"" s:[^ '"']* "\"" { Constant::String(String::from_iter(s)) }
|
||||
|
||||
rule constant_bool() -> Constant
|
||||
= "bool" _+ b:$("True" / "False") { Constant::Bool(b == "True") }
|
||||
|
||||
rule constant_unit() -> Constant
|
||||
= "unit" _+ "()" { Constant::Unit }
|
||||
|
||||
rule number() -> usize
|
||||
= n:$(['0'..='9']+) {? n.parse().or(Err("usize")) }
|
||||
|
||||
rule name() -> Name
|
||||
= text:ident() { Name { text, unique: 0.into() } }
|
||||
|
||||
rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9' | '_']+ { String::from_iter(i) }
|
||||
|
||||
rule _ = [' ' | '\n']
|
||||
}
|
||||
}
|
||||
|
||||
struct ParserState {
|
||||
identifiers: HashMap<String, Unique>,
|
||||
current: Unique,
|
||||
|
@ -39,10 +128,8 @@ impl ParserState {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn program(src: &str) -> Result<Program<Name>, Vec<Simple<char>>> {
|
||||
let parser = program_();
|
||||
|
||||
parser.parse(src)
|
||||
pub fn program(src: &str) -> Result<Program<Name>, peg::error::ParseError<peg::str::LineCol>> {
|
||||
parser::program(src)
|
||||
}
|
||||
|
||||
fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
|
||||
|
@ -71,21 +158,19 @@ fn version() -> impl Parser<char, (usize, usize, usize), Error = Simple<char>> {
|
|||
|
||||
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
|
||||
recursive(|term| {
|
||||
let atom = || var().or(term.clone());
|
||||
|
||||
let delay = keyword("delay")
|
||||
.ignore_then(atom().padded())
|
||||
.ignore_then(term.clone().padded())
|
||||
.delimited_by(just('(').padded(), just(')').padded())
|
||||
.map(|t| dbg!(Term::Delay(Box::new(t))));
|
||||
|
||||
let force = keyword("force")
|
||||
.ignore_then(atom().padded())
|
||||
.ignore_then(term.clone().padded())
|
||||
.delimited_by(just('(').padded(), just(')').padded())
|
||||
.map(|t| dbg!(Term::Force(Box::new(t))));
|
||||
|
||||
let lambda = keyword("lam")
|
||||
.ignore_then(name().padded())
|
||||
.then(atom())
|
||||
.then(term.clone())
|
||||
.delimited_by(just('(').padded(), just(')').padded())
|
||||
.map(|(parameter_name, t)| {
|
||||
dbg!(Term::Lambda {
|
||||
|
@ -94,15 +179,14 @@ fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
|
|||
})
|
||||
});
|
||||
|
||||
let apply = atom()
|
||||
let apply = term
|
||||
.clone()
|
||||
.padded()
|
||||
.then(atom())
|
||||
.then(term.clone().padded().repeated())
|
||||
.delimited_by(just('[').padded(), just(']').padded())
|
||||
.map(|(function, argument)| {
|
||||
dbg!(Term::Apply {
|
||||
function: Box::new(function),
|
||||
argument: Box::new(argument),
|
||||
})
|
||||
.foldl(|lhs, rhs| Term::Apply {
|
||||
function: Box::new(lhs),
|
||||
argument: Box::new(rhs),
|
||||
});
|
||||
|
||||
constant()
|
||||
|
|
Loading…
Reference in New Issue