feat: new uplc parser

This commit is contained in:
rvcas 2022-06-08 21:51:04 -04:00
parent 33fcb77681
commit 1c8f3a736b
No known key found for this signature in database
GPG Key ID: C09B64E263F7D68C
5 changed files with 204 additions and 265 deletions

92
Cargo.lock generated
View File

@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
dependencies = [
"const-random",
]
[[package]]
name = "anyhow"
version = "1.0.57"
@ -32,10 +41,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bytes"
version = "1.1.0"
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4"
dependencies = [
"ahash",
]
[[package]]
name = "clap"
@ -77,15 +95,33 @@ dependencies = [
]
[[package]]
name = "combine"
version = "4.6.4"
name = "const-random"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a604e93b79d1808327a6fca85a6f2d69de66461e7620f5a4cbf5fb4d1d7c948"
checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4"
dependencies = [
"bytes",
"memchr",
"const-random-macro",
"proc-macro-hack",
]
[[package]]
name = "const-random-macro"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40"
dependencies = [
"getrandom",
"lazy_static",
"proc-macro-hack",
"tiny-keccak",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "flat"
version = "0.0.0"
@ -94,6 +130,17 @@ dependencies = [
"thiserror",
]
[[package]]
name = "getrandom"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.11.2"
@ -143,12 +190,6 @@ version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "neptune"
version = "0.0.0"
@ -188,6 +229,12 @@ dependencies = [
"version_check",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.39"
@ -283,6 +330,15 @@ dependencies = [
"syn",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
name = "unicode-ident"
version = "1.0.0"
@ -293,7 +349,7 @@ checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
name = "uplc"
version = "0.1.0"
dependencies = [
"combine",
"chumsky",
"flat",
"hex",
"strum",
@ -307,6 +363,12 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "winapi"
version = "0.3.9"

View File

@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> {
UplcCommand::Flat { input } => {
let code = std::fs::read_to_string(&input)?;
let program = parser::program(&code)?;
let program = parser::program(&code).unwrap();
let program = Program::<DeBruijn>::try_from(program)?;

View File

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
combine = "4.6.4"
chumsky = "0.8.0"
flat = { path = "../flat" }
hex = "0.4.3"
strum = "0.24.0"

View File

@ -3,6 +3,3 @@ pub mod builtins;
mod debruijn;
mod flat;
pub mod parser;
#[macro_use]
extern crate combine;

View File

@ -1,16 +1,9 @@
use std::{collections::HashMap, str::FromStr};
use combine::{
attempt, between, choice,
error::StringStreamError,
look_ahead, many1,
parser::{
char::{alpha_num, digit, hex_digit, letter, space, spaces, string},
combinator::no_partial,
},
skip_many1,
stream::{position, state},
token, ParseError, Parser, Stream,
use chumsky::{
prelude::{end, filter, just, recursive, Simple},
text::{ident, int, keyword, TextParser},
Parser,
};
use crate::{
@ -23,8 +16,6 @@ struct ParserState {
current: Unique,
}
type StateStream<Input> = state::Stream<Input, ParserState>;
impl ParserState {
fn new() -> Self {
ParserState {
@ -48,270 +39,159 @@ impl ParserState {
}
}
pub fn program(src: &str) -> Result<Program<Name>, StringStreamError> {
let mut parser = program_();
pub fn program(src: &str) -> Result<Program<Name>, Vec<Simple<char>>> {
let parser = program_();
let (program, _) = parser.parse(state::Stream {
stream: position::Stream::new(src.trim()),
state: ParserState::new(),
})?;
Ok(program)
parser.parse(src)
}
fn program_<Input>() -> impl Parser<StateStream<Input>, Output = Program<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
let prog = string("program").with(skip_many1(space())).with(
(version(), skip_many1(space()), term())
.map(|(version, _, term)| Program { version, term }),
);
between(token('('), token(')'), prog).skip(spaces())
fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
keyword("program")
.ignore_then(version().padded())
.then(term())
.map(|(version, term)| Program { version, term })
.delimited_by(just('(').padded(), just(')').padded())
.then_ignore(end())
}
fn version<Input>() -> impl Parser<StateStream<Input>, Output = (usize, usize, usize)>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
(
many1(digit()),
token('.'),
many1(digit()),
token('.'),
many1(digit()),
)
.map(
|(major, _, minor, _, patch): (String, char, String, char, String)| {
fn version() -> impl Parser<char, (usize, usize, usize), Error = Simple<char>> {
int(10)
.then_ignore(just('.'))
.then(int(10))
.then_ignore(just('.'))
.then(int(10))
.map(|((major, minor), patch)| {
(
major.parse::<usize>().unwrap(),
minor.parse::<usize>().unwrap(),
patch.parse::<usize>().unwrap(),
)
},
})
}
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
recursive(|term| {
let delay = keyword("delay")
.ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|t| Term::Delay(Box::new(t)));
let force = keyword("force")
.ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|t| Term::Force(Box::new(t)));
let lambda = keyword("lam")
.ignore_then(name().padded())
.then(term.clone())
.delimited_by(just('(').padded(), just(')').padded())
.map(|(parameter_name, t)| Term::Lambda {
parameter_name,
body: Box::new(t),
});
let apply = term
.clone()
.padded()
.then(term)
.delimited_by(just('[').padded(), just(']').padded())
.map(|(function, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
});
constant()
.or(builtin())
.or(var())
.or(lambda)
.or(apply)
.or(delay)
.or(force)
.or(error())
})
}
fn constant() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("con")
.ignore_then(
constant_integer()
.or(constant_bytestring())
.or(constant_string())
.or(constant_unit())
.or(constant_bool()),
)
.delimited_by(just('(').padded(), just(')').padded())
.map(Term::Constant)
}
fn term<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
opaque!(no_partial(
choice((
attempt(constant()),
attempt(builtin()),
attempt(var()),
attempt(lambda()),
attempt(apply()),
attempt(delay()),
attempt(force()),
attempt(error()),
))
.skip(spaces())
))
fn builtin() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("builtin")
.ignore_then(ident().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|builtin_name: String| {
Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap())
})
}
fn var<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
fn var() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
name().map(Term::Var)
}
fn delay<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('('),
token(')'),
(spaces(), string("delay"), skip_many1(space()), term())
.map(|(_, _, _, term)| Term::Delay(Box::new(term))),
)
fn error() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("error")
.ignored()
.delimited_by(just('(').padded(), just(')').padded())
.map(|_| Term::Error)
}
fn force<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('('),
token(')'),
(spaces(), string("force"), skip_many1(space()), term())
.map(|(_, _, _, term)| Term::Force(Box::new(term))),
)
fn name() -> impl Parser<char, Name, Error = Simple<char>> {
ident().map(|text| Name {
text,
unique: 0.into(),
})
}
fn lambda<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('('),
token(')'),
(
spaces(),
string("lam"),
skip_many1(space()),
name(),
skip_many1(space()),
term(),
)
.map(|(_, _, _, parameter_name, _, term)| Term::Lambda {
parameter_name,
body: Box::new(term),
}),
)
}
fn apply<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('['),
token(']'),
(spaces(), term(), spaces(), term()).map(|(_, function, _, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
}),
)
}
fn builtin<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
string("builtin")
.with(skip_many1(space()))
.with(many1(alpha_num()))
.map(|builtin_name: String| {
Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap())
}),
)
}
fn error<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
string("error")
.with(skip_many1(space()))
.map(|_| Term::Error),
)
}
fn constant<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
(
spaces(),
string("con"),
skip_many1(space()),
choice((
attempt(constant_integer()),
attempt(constant_bytestring()),
attempt(constant_string()),
attempt(constant_unit()),
attempt(constant_bool()),
)),
spaces(),
)
.map(|(_, _, _, con, _)| Term::Constant(con)),
)
}
fn constant_integer<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("integer")
.with(skip_many1(space()))
.with(many1(digit()))
fn constant_integer() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("integer")
.padded()
.ignore_then(int(10))
.map(|d: String| Constant::Integer(d.parse::<isize>().unwrap()))
}
fn constant_bytestring<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("bytestring")
.with(skip_many1(space()))
.with(token('#'))
.with(many1(hex_digit()))
fn constant_bytestring() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("bytestring")
.padded()
.ignore_then(just('#'))
.ignore_then(int(16))
.map(|b: String| Constant::ByteString(hex::decode(b).unwrap()))
}
fn constant_string<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("string")
.with(skip_many1(space()))
.with(between(token('"'), token('"'), many1(alpha_num())))
fn constant_string() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("string")
.padded()
.ignore_then(just('"'))
.ignore_then(filter(|c| *c != '"').repeated())
.then_ignore(just('"'))
.collect::<String>()
.map(Constant::String)
}
fn constant_unit<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("unit")
.with(skip_many1(space()))
.with(string("()"))
fn constant_unit() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("unit")
.padded()
.ignore_then(just('('))
.ignore_then(just(')'))
.ignored()
.map(|_| Constant::Unit)
}
fn constant_bool<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("bool")
.with(skip_many1(space()))
.with(string("True").or(string("False")))
fn constant_bool() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("bool")
.padded()
.ignore_then(just("True").or(just("False")))
.map(|b| Constant::Bool(b == "True"))
}
fn name<Input>() -> impl Parser<StateStream<Input>, Output = Name>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
look_ahead(letter())
.with(many1(alpha_num().or(token('_').or(token('\'')))))
.map_input(|text: String, input: &mut StateStream<Input>| Name {
unique: input.state.intern(&text),
text,
})
}
#[cfg(test)]
mod test {
#[test]