feat: new uplc parser

This commit is contained in:
rvcas 2022-06-08 21:51:04 -04:00
parent 33fcb77681
commit 1c8f3a736b
No known key found for this signature in database
GPG Key ID: C09B64E263F7D68C
5 changed files with 204 additions and 265 deletions

92
Cargo.lock generated
View File

@ -2,6 +2,15 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "ahash"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
dependencies = [
"const-random",
]
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.57" version = "1.0.57"
@ -32,10 +41,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bytes" name = "cfg-if"
version = "1.1.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4"
dependencies = [
"ahash",
]
[[package]] [[package]]
name = "clap" name = "clap"
@ -77,15 +95,33 @@ dependencies = [
] ]
[[package]] [[package]]
name = "combine" name = "const-random"
version = "4.6.4" version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a604e93b79d1808327a6fca85a6f2d69de66461e7620f5a4cbf5fb4d1d7c948" checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4"
dependencies = [ dependencies = [
"bytes", "const-random-macro",
"memchr", "proc-macro-hack",
] ]
[[package]]
name = "const-random-macro"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40"
dependencies = [
"getrandom",
"lazy_static",
"proc-macro-hack",
"tiny-keccak",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]] [[package]]
name = "flat" name = "flat"
version = "0.0.0" version = "0.0.0"
@ -94,6 +130,17 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "getrandom"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.11.2" version = "0.11.2"
@ -143,12 +190,6 @@ version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]] [[package]]
name = "neptune" name = "neptune"
version = "0.0.0" version = "0.0.0"
@ -188,6 +229,12 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.39" version = "1.0.39"
@ -283,6 +330,15 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.0" version = "1.0.0"
@ -293,7 +349,7 @@ checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
name = "uplc" name = "uplc"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"combine", "chumsky",
"flat", "flat",
"hex", "hex",
"strum", "strum",
@ -307,6 +363,12 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"

View File

@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> {
UplcCommand::Flat { input } => { UplcCommand::Flat { input } => {
let code = std::fs::read_to_string(&input)?; let code = std::fs::read_to_string(&input)?;
let program = parser::program(&code)?; let program = parser::program(&code).unwrap();
let program = Program::<DeBruijn>::try_from(program)?; let program = Program::<DeBruijn>::try_from(program)?;

View File

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
combine = "4.6.4" chumsky = "0.8.0"
flat = { path = "../flat" } flat = { path = "../flat" }
hex = "0.4.3" hex = "0.4.3"
strum = "0.24.0" strum = "0.24.0"

View File

@ -3,6 +3,3 @@ pub mod builtins;
mod debruijn; mod debruijn;
mod flat; mod flat;
pub mod parser; pub mod parser;
#[macro_use]
extern crate combine;

View File

@ -1,16 +1,9 @@
use std::{collections::HashMap, str::FromStr}; use std::{collections::HashMap, str::FromStr};
use combine::{ use chumsky::{
attempt, between, choice, prelude::{end, filter, just, recursive, Simple},
error::StringStreamError, text::{ident, int, keyword, TextParser},
look_ahead, many1, Parser,
parser::{
char::{alpha_num, digit, hex_digit, letter, space, spaces, string},
combinator::no_partial,
},
skip_many1,
stream::{position, state},
token, ParseError, Parser, Stream,
}; };
use crate::{ use crate::{
@ -23,8 +16,6 @@ struct ParserState {
current: Unique, current: Unique,
} }
type StateStream<Input> = state::Stream<Input, ParserState>;
impl ParserState { impl ParserState {
fn new() -> Self { fn new() -> Self {
ParserState { ParserState {
@ -48,270 +39,159 @@ impl ParserState {
} }
} }
pub fn program(src: &str) -> Result<Program<Name>, StringStreamError> { pub fn program(src: &str) -> Result<Program<Name>, Vec<Simple<char>>> {
let mut parser = program_(); let parser = program_();
let (program, _) = parser.parse(state::Stream { parser.parse(src)
stream: position::Stream::new(src.trim()),
state: ParserState::new(),
})?;
Ok(program)
} }
fn program_<Input>() -> impl Parser<StateStream<Input>, Output = Program<Name>> fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
where keyword("program")
Input: Stream<Token = char>, .ignore_then(version().padded())
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .then(term())
{ .map(|(version, term)| Program { version, term })
let prog = string("program").with(skip_many1(space())).with( .delimited_by(just('(').padded(), just(')').padded())
(version(), skip_many1(space()), term()) .then_ignore(end())
.map(|(version, _, term)| Program { version, term }),
);
between(token('('), token(')'), prog).skip(spaces())
} }
fn version<Input>() -> impl Parser<StateStream<Input>, Output = (usize, usize, usize)> fn version() -> impl Parser<char, (usize, usize, usize), Error = Simple<char>> {
where int(10)
Input: Stream<Token = char>, .then_ignore(just('.'))
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .then(int(10))
{ .then_ignore(just('.'))
( .then(int(10))
many1(digit()), .map(|((major, minor), patch)| {
token('.'),
many1(digit()),
token('.'),
many1(digit()),
)
.map(
|(major, _, minor, _, patch): (String, char, String, char, String)| {
( (
major.parse::<usize>().unwrap(), major.parse::<usize>().unwrap(),
minor.parse::<usize>().unwrap(), minor.parse::<usize>().unwrap(),
patch.parse::<usize>().unwrap(), patch.parse::<usize>().unwrap(),
) )
}, })
}
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
recursive(|term| {
let delay = keyword("delay")
.ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|t| Term::Delay(Box::new(t)));
let force = keyword("force")
.ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|t| Term::Force(Box::new(t)));
let lambda = keyword("lam")
.ignore_then(name().padded())
.then(term.clone())
.delimited_by(just('(').padded(), just(')').padded())
.map(|(parameter_name, t)| Term::Lambda {
parameter_name,
body: Box::new(t),
});
let apply = term
.clone()
.padded()
.then(term)
.delimited_by(just('[').padded(), just(']').padded())
.map(|(function, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
});
constant()
.or(builtin())
.or(var())
.or(lambda)
.or(apply)
.or(delay)
.or(force)
.or(error())
})
}
fn constant() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("con")
.ignore_then(
constant_integer()
.or(constant_bytestring())
.or(constant_string())
.or(constant_unit())
.or(constant_bool()),
) )
.delimited_by(just('(').padded(), just(')').padded())
.map(Term::Constant)
} }
fn term<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>> fn builtin() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
where keyword("builtin")
Input: Stream<Token = char>, .ignore_then(ident().padded())
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .delimited_by(just('(').padded(), just(')').padded())
{ .map(|builtin_name: String| {
opaque!(no_partial( Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap())
choice(( })
attempt(constant()),
attempt(builtin()),
attempt(var()),
attempt(lambda()),
attempt(apply()),
attempt(delay()),
attempt(force()),
attempt(error()),
))
.skip(spaces())
))
} }
fn var<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>> fn var() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
name().map(Term::Var) name().map(Term::Var)
} }
fn delay<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>> fn error() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
where keyword("error")
Input: Stream<Token = char>, .ignored()
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .delimited_by(just('(').padded(), just(')').padded())
{ .map(|_| Term::Error)
between(
token('('),
token(')'),
(spaces(), string("delay"), skip_many1(space()), term())
.map(|(_, _, _, term)| Term::Delay(Box::new(term))),
)
} }
fn force<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>> fn name() -> impl Parser<char, Name, Error = Simple<char>> {
where ident().map(|text| Name {
Input: Stream<Token = char>, text,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, unique: 0.into(),
{ })
between(
token('('),
token(')'),
(spaces(), string("force"), skip_many1(space()), term())
.map(|(_, _, _, term)| Term::Force(Box::new(term))),
)
} }
fn lambda<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>> fn constant_integer() -> impl Parser<char, Constant, Error = Simple<char>> {
where keyword("integer")
Input: Stream<Token = char>, .padded()
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .ignore_then(int(10))
{
between(
token('('),
token(')'),
(
spaces(),
string("lam"),
skip_many1(space()),
name(),
skip_many1(space()),
term(),
)
.map(|(_, _, _, parameter_name, _, term)| Term::Lambda {
parameter_name,
body: Box::new(term),
}),
)
}
fn apply<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('['),
token(']'),
(spaces(), term(), spaces(), term()).map(|(_, function, _, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
}),
)
}
fn builtin<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
string("builtin")
.with(skip_many1(space()))
.with(many1(alpha_num()))
.map(|builtin_name: String| {
Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap())
}),
)
}
fn error<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
string("error")
.with(skip_many1(space()))
.map(|_| Term::Error),
)
}
fn constant<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
(
spaces(),
string("con"),
skip_many1(space()),
choice((
attempt(constant_integer()),
attempt(constant_bytestring()),
attempt(constant_string()),
attempt(constant_unit()),
attempt(constant_bool()),
)),
spaces(),
)
.map(|(_, _, _, con, _)| Term::Constant(con)),
)
}
fn constant_integer<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("integer")
.with(skip_many1(space()))
.with(many1(digit()))
.map(|d: String| Constant::Integer(d.parse::<isize>().unwrap())) .map(|d: String| Constant::Integer(d.parse::<isize>().unwrap()))
} }
fn constant_bytestring<Input>() -> impl Parser<StateStream<Input>, Output = Constant> fn constant_bytestring() -> impl Parser<char, Constant, Error = Simple<char>> {
where keyword("bytestring")
Input: Stream<Token = char>, .padded()
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .ignore_then(just('#'))
{ .ignore_then(int(16))
string("bytestring")
.with(skip_many1(space()))
.with(token('#'))
.with(many1(hex_digit()))
.map(|b: String| Constant::ByteString(hex::decode(b).unwrap())) .map(|b: String| Constant::ByteString(hex::decode(b).unwrap()))
} }
fn constant_string<Input>() -> impl Parser<StateStream<Input>, Output = Constant> fn constant_string() -> impl Parser<char, Constant, Error = Simple<char>> {
where keyword("string")
Input: Stream<Token = char>, .padded()
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .ignore_then(just('"'))
{ .ignore_then(filter(|c| *c != '"').repeated())
string("string") .then_ignore(just('"'))
.with(skip_many1(space())) .collect::<String>()
.with(between(token('"'), token('"'), many1(alpha_num())))
.map(Constant::String) .map(Constant::String)
} }
fn constant_unit<Input>() -> impl Parser<StateStream<Input>, Output = Constant> fn constant_unit() -> impl Parser<char, Constant, Error = Simple<char>> {
where keyword("unit")
Input: Stream<Token = char>, .padded()
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .ignore_then(just('('))
{ .ignore_then(just(')'))
string("unit") .ignored()
.with(skip_many1(space()))
.with(string("()"))
.map(|_| Constant::Unit) .map(|_| Constant::Unit)
} }
fn constant_bool<Input>() -> impl Parser<StateStream<Input>, Output = Constant> fn constant_bool() -> impl Parser<char, Constant, Error = Simple<char>> {
where keyword("bool")
Input: Stream<Token = char>, .padded()
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, .ignore_then(just("True").or(just("False")))
{
string("bool")
.with(skip_many1(space()))
.with(string("True").or(string("False")))
.map(|b| Constant::Bool(b == "True")) .map(|b| Constant::Bool(b == "True"))
} }
fn name<Input>() -> impl Parser<StateStream<Input>, Output = Name>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
look_ahead(letter())
.with(many1(alpha_num().or(token('_').or(token('\'')))))
.map_input(|text: String, input: &mut StateStream<Input>| Name {
unique: input.state.intern(&text),
text,
})
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
#[test] #[test]