fix: moving to a parser that can handle left recursion
This commit is contained in:
parent
86089b4bee
commit
b05f0846a8
|
@ -205,6 +205,33 @@ version = "6.0.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435"
|
checksum = "029d8d0b2f198229de29dca79676f2738ff952edf3fde542eb8bf94d8c21b435"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "peg"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af728fe826811af3b38c37e93de6d104485953ea373d656eebae53d6987fcd2c"
|
||||||
|
dependencies = [
|
||||||
|
"peg-macros",
|
||||||
|
"peg-runtime",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "peg-macros"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4536be147b770b824895cbad934fccce8e49f14b4c4946eaa46a6e4a12fcdc16"
|
||||||
|
dependencies = [
|
||||||
|
"peg-runtime",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "peg-runtime"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f9b0efd3ba03c3a409d44d60425f279ec442bcf0b9e63ff4e410da31c8b0f69f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro-error"
|
name = "proc-macro-error"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
|
@ -352,6 +379,7 @@ dependencies = [
|
||||||
"chumsky",
|
"chumsky",
|
||||||
"flat",
|
"flat",
|
||||||
"hex",
|
"hex",
|
||||||
|
"peg",
|
||||||
"strum",
|
"strum",
|
||||||
"strum_macros",
|
"strum_macros",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
|
|
|
@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> {
|
||||||
UplcCommand::Flat { input } => {
|
UplcCommand::Flat { input } => {
|
||||||
let code = std::fs::read_to_string(&input)?;
|
let code = std::fs::read_to_string(&input)?;
|
||||||
|
|
||||||
let program = parser::program(&code).unwrap();
|
let program = parser::program(&code)?;
|
||||||
|
|
||||||
let program = Program::<DeBruijn>::try_from(program)?;
|
let program = Program::<DeBruijn>::try_from(program)?;
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ edition = "2021"
|
||||||
chumsky = "0.8.0"
|
chumsky = "0.8.0"
|
||||||
flat = { path = "../flat" }
|
flat = { path = "../flat" }
|
||||||
hex = "0.4.3"
|
hex = "0.4.3"
|
||||||
|
peg = "0.8.0"
|
||||||
strum = "0.24.0"
|
strum = "0.24.0"
|
||||||
strum_macros = "0.24.0"
|
strum_macros = "0.24.0"
|
||||||
thiserror = "1.0.31"
|
thiserror = "1.0.31"
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
#![recursion_limit = "10000"]
|
||||||
|
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
pub mod builtins;
|
pub mod builtins;
|
||||||
mod debruijn;
|
mod debruijn;
|
||||||
|
|
|
@ -11,6 +11,95 @@ use crate::{
|
||||||
builtins::DefaultFunction,
|
builtins::DefaultFunction,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
peg::parser! {
|
||||||
|
grammar parser() for str {
|
||||||
|
pub rule program() -> Program<Name>
|
||||||
|
= "(" _* "program" _+ v:version() _+ t:term() _* ")" { Program {version: v, term: t} }
|
||||||
|
|
||||||
|
rule version() -> (usize, usize, usize)
|
||||||
|
= major:number() "." minor:number() "." patch:number() {
|
||||||
|
(major, minor, patch)
|
||||||
|
}
|
||||||
|
|
||||||
|
rule term() -> Term<Name>
|
||||||
|
= constant()
|
||||||
|
/ builtin()
|
||||||
|
/ var()
|
||||||
|
/ lambda()
|
||||||
|
/ apply()
|
||||||
|
/ delay()
|
||||||
|
/ force()
|
||||||
|
/ error()
|
||||||
|
|
||||||
|
rule constant() -> Term<Name>
|
||||||
|
= "(" _* "con" _+ con:(
|
||||||
|
constant_integer()
|
||||||
|
/ constant_bytestring()
|
||||||
|
/ constant_string()
|
||||||
|
/ constant_unit()
|
||||||
|
/ constant_bool()
|
||||||
|
) _* ")" {
|
||||||
|
Term::Constant(con)
|
||||||
|
}
|
||||||
|
|
||||||
|
rule builtin() -> Term<Name>
|
||||||
|
= "(" b:ident() ")" { Term::Builtin(DefaultFunction::from_str(&b).unwrap()) }
|
||||||
|
|
||||||
|
rule var() -> Term<Name>
|
||||||
|
= n:name() { Term::Var(n) }
|
||||||
|
|
||||||
|
rule lambda() -> Term<Name>
|
||||||
|
= "(" _* "lam" _+ parameter_name:name() _+ t:term() _* ")" {
|
||||||
|
Term::Lambda { parameter_name, body: Box::new(t) }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cache_left_rec]
|
||||||
|
rule apply() -> Term<Name>
|
||||||
|
= "[" _* initial:term() _+ terms:(t:term() _* { t })+ "]" {
|
||||||
|
terms
|
||||||
|
.into_iter()
|
||||||
|
.fold(initial, |lhs, rhs| Term::Apply {
|
||||||
|
function: Box::new(lhs),
|
||||||
|
argument: Box::new(rhs)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
rule delay() -> Term<Name>
|
||||||
|
= "(" _* "delay" _+ t:term() _* ")" { Term::Delay(Box::new(t)) }
|
||||||
|
|
||||||
|
rule force() -> Term<Name>
|
||||||
|
= "(" _* "force" _+ t:term() _* ")" { Term::Force(Box::new(t)) }
|
||||||
|
|
||||||
|
rule error() -> Term<Name>
|
||||||
|
= "(" _* "error" _* ")" { Term::Error }
|
||||||
|
|
||||||
|
rule constant_integer() -> Constant
|
||||||
|
= "integer" _+ i:number() { Constant::Integer(i as isize) }
|
||||||
|
|
||||||
|
rule constant_bytestring() -> Constant
|
||||||
|
= "bytestring" _+ "#" i:ident() { Constant::ByteString(hex::decode(i).unwrap()) }
|
||||||
|
|
||||||
|
rule constant_string() -> Constant
|
||||||
|
= "string" _+ "\"" s:[^ '"']* "\"" { Constant::String(String::from_iter(s)) }
|
||||||
|
|
||||||
|
rule constant_bool() -> Constant
|
||||||
|
= "bool" _+ b:$("True" / "False") { Constant::Bool(b == "True") }
|
||||||
|
|
||||||
|
rule constant_unit() -> Constant
|
||||||
|
= "unit" _+ "()" { Constant::Unit }
|
||||||
|
|
||||||
|
rule number() -> usize
|
||||||
|
= n:$(['0'..='9']+) {? n.parse().or(Err("usize")) }
|
||||||
|
|
||||||
|
rule name() -> Name
|
||||||
|
= text:ident() { Name { text, unique: 0.into() } }
|
||||||
|
|
||||||
|
rule ident() -> String = i:['a'..='z' | 'A'..='Z' | '0'..='9' | '_']+ { String::from_iter(i) }
|
||||||
|
|
||||||
|
rule _ = [' ' | '\n']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct ParserState {
|
struct ParserState {
|
||||||
identifiers: HashMap<String, Unique>,
|
identifiers: HashMap<String, Unique>,
|
||||||
current: Unique,
|
current: Unique,
|
||||||
|
@ -39,10 +128,8 @@ impl ParserState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn program(src: &str) -> Result<Program<Name>, Vec<Simple<char>>> {
|
pub fn program(src: &str) -> Result<Program<Name>, peg::error::ParseError<peg::str::LineCol>> {
|
||||||
let parser = program_();
|
parser::program(src)
|
||||||
|
|
||||||
parser.parse(src)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
|
fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
|
||||||
|
@ -71,21 +158,19 @@ fn version() -> impl Parser<char, (usize, usize, usize), Error = Simple<char>> {
|
||||||
|
|
||||||
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
|
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
|
||||||
recursive(|term| {
|
recursive(|term| {
|
||||||
let atom = || var().or(term.clone());
|
|
||||||
|
|
||||||
let delay = keyword("delay")
|
let delay = keyword("delay")
|
||||||
.ignore_then(atom().padded())
|
.ignore_then(term.clone().padded())
|
||||||
.delimited_by(just('(').padded(), just(')').padded())
|
.delimited_by(just('(').padded(), just(')').padded())
|
||||||
.map(|t| dbg!(Term::Delay(Box::new(t))));
|
.map(|t| dbg!(Term::Delay(Box::new(t))));
|
||||||
|
|
||||||
let force = keyword("force")
|
let force = keyword("force")
|
||||||
.ignore_then(atom().padded())
|
.ignore_then(term.clone().padded())
|
||||||
.delimited_by(just('(').padded(), just(')').padded())
|
.delimited_by(just('(').padded(), just(')').padded())
|
||||||
.map(|t| dbg!(Term::Force(Box::new(t))));
|
.map(|t| dbg!(Term::Force(Box::new(t))));
|
||||||
|
|
||||||
let lambda = keyword("lam")
|
let lambda = keyword("lam")
|
||||||
.ignore_then(name().padded())
|
.ignore_then(name().padded())
|
||||||
.then(atom())
|
.then(term.clone())
|
||||||
.delimited_by(just('(').padded(), just(')').padded())
|
.delimited_by(just('(').padded(), just(')').padded())
|
||||||
.map(|(parameter_name, t)| {
|
.map(|(parameter_name, t)| {
|
||||||
dbg!(Term::Lambda {
|
dbg!(Term::Lambda {
|
||||||
|
@ -94,15 +179,14 @@ fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
let apply = atom()
|
let apply = term
|
||||||
|
.clone()
|
||||||
.padded()
|
.padded()
|
||||||
.then(atom())
|
.then(term.clone().padded().repeated())
|
||||||
.delimited_by(just('[').padded(), just(']').padded())
|
.delimited_by(just('[').padded(), just(']').padded())
|
||||||
.map(|(function, argument)| {
|
.foldl(|lhs, rhs| Term::Apply {
|
||||||
dbg!(Term::Apply {
|
function: Box::new(lhs),
|
||||||
function: Box::new(function),
|
argument: Box::new(rhs),
|
||||||
argument: Box::new(argument),
|
|
||||||
})
|
|
||||||
});
|
});
|
||||||
|
|
||||||
constant()
|
constant()
|
||||||
|
|
Loading…
Reference in New Issue