From 1c8f3a736bcc0aad313ae2b72b8ec7627cbb27c9 Mon Sep 17 00:00:00 2001 From: rvcas Date: Wed, 8 Jun 2022 21:51:04 -0400 Subject: [PATCH] feat: new uplc parser --- Cargo.lock | 92 ++++++++-- crates/cli/src/main.rs | 2 +- crates/uplc/Cargo.toml | 2 +- crates/uplc/src/lib.rs | 3 - crates/uplc/src/parser.rs | 370 +++++++++++++------------------------- 5 files changed, 204 insertions(+), 265 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 60dfd553..37cb592a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" +dependencies = [ + "const-random", +] + [[package]] name = "anyhow" version = "1.0.57" @@ -32,10 +41,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "bytes" -version = "1.1.0" +name = "cfg-if" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" +dependencies = [ + "ahash", +] [[package]] name = "clap" @@ -77,15 +95,33 @@ dependencies = [ ] [[package]] -name = "combine" -version = "4.6.4" +name = "const-random" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a604e93b79d1808327a6fca85a6f2d69de66461e7620f5a4cbf5fb4d1d7c948" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" dependencies = [ - "bytes", - "memchr", + "const-random-macro", + "proc-macro-hack", ] +[[package]] +name = "const-random-macro" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" +dependencies = [ + "getrandom", + "lazy_static", + "proc-macro-hack", + "tiny-keccak", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "flat" version = "0.0.0" @@ -94,6 +130,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hashbrown" version = "0.11.2" @@ -143,12 +190,6 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - [[package]] name = "neptune" version = "0.0.0" @@ -188,6 +229,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.39" @@ -283,6 +330,15 @@ dependencies = [ "syn", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "unicode-ident" version = "1.0.0" @@ -293,7 +349,7 @@ checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" name = "uplc" version = "0.1.0" dependencies = [ - "combine", + "chumsky", "flat", "hex", "strum", @@ -307,6 +363,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "winapi" version = "0.3.9" diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 634ff773..2a1ee50e 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> { UplcCommand::Flat { input } => { let code = std::fs::read_to_string(&input)?; - let program = parser::program(&code)?; + let program = parser::program(&code).unwrap(); let program = Program::::try_from(program)?; diff --git a/crates/uplc/Cargo.toml b/crates/uplc/Cargo.toml index ef1dcbc6..c989772b 100644 --- a/crates/uplc/Cargo.toml +++ b/crates/uplc/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -combine = "4.6.4" +chumsky = "0.8.0" flat = { path = "../flat" } hex = "0.4.3" strum = "0.24.0" diff --git a/crates/uplc/src/lib.rs b/crates/uplc/src/lib.rs index 1c0fdeb3..e766d546 100644 --- a/crates/uplc/src/lib.rs +++ b/crates/uplc/src/lib.rs @@ -3,6 +3,3 @@ pub mod builtins; mod debruijn; mod flat; pub mod parser; - -#[macro_use] -extern crate combine; diff --git a/crates/uplc/src/parser.rs b/crates/uplc/src/parser.rs index 1fa7cc98..8412c053 100644 --- a/crates/uplc/src/parser.rs +++ b/crates/uplc/src/parser.rs @@ -1,16 +1,9 @@ use std::{collections::HashMap, str::FromStr}; -use combine::{ - attempt, between, choice, - error::StringStreamError, - look_ahead, many1, - parser::{ - char::{alpha_num, digit, hex_digit, letter, space, spaces, string}, - combinator::no_partial, - }, - skip_many1, - stream::{position, state}, - token, ParseError, Parser, Stream, +use chumsky::{ + prelude::{end, filter, just, recursive, Simple}, + text::{ident, int, keyword, TextParser}, + Parser, }; use crate::{ @@ -23,8 +16,6 @@ struct ParserState { current: Unique, } -type StateStream = state::Stream; - impl ParserState { fn new() -> Self { ParserState { @@ -48,270 +39,159 @@ impl ParserState { } } -pub fn program(src: &str) -> Result, StringStreamError> { - let mut parser = program_(); +pub fn program(src: &str) -> Result, Vec>> { + let parser = program_(); - let (program, _) = parser.parse(state::Stream { - stream: position::Stream::new(src.trim()), - state: ParserState::new(), - })?; - - Ok(program) + parser.parse(src) } -fn program_() -> impl Parser, Output = Program> -where - Input: Stream, - Input::Error: ParseError, -{ - let prog = string("program").with(skip_many1(space())).with( - (version(), skip_many1(space()), term()) - .map(|(version, _, term)| Program { version, term }), - ); - - between(token('('), token(')'), prog).skip(spaces()) +fn program_() -> impl Parser, Error = Simple> { + keyword("program") + .ignore_then(version().padded()) + .then(term()) + .map(|(version, term)| Program { version, term }) + .delimited_by(just('(').padded(), just(')').padded()) + .then_ignore(end()) } -fn version() -> impl Parser, Output = (usize, usize, usize)> -where - Input: Stream, - Input::Error: ParseError, -{ - ( - many1(digit()), - token('.'), - many1(digit()), - token('.'), - many1(digit()), - ) - .map( - |(major, _, minor, _, patch): (String, char, String, char, String)| { - ( - major.parse::().unwrap(), - minor.parse::().unwrap(), - patch.parse::().unwrap(), - ) - }, +fn version() -> impl Parser> { + int(10) + .then_ignore(just('.')) + .then(int(10)) + .then_ignore(just('.')) + .then(int(10)) + .map(|((major, minor), patch)| { + ( + major.parse::().unwrap(), + minor.parse::().unwrap(), + patch.parse::().unwrap(), + ) + }) +} + +fn term() -> impl Parser, Error = Simple> { + recursive(|term| { + let delay = keyword("delay") + .ignore_then(term.clone().padded()) + .delimited_by(just('(').padded(), just(')').padded()) + .map(|t| Term::Delay(Box::new(t))); + + let force = keyword("force") + .ignore_then(term.clone().padded()) + .delimited_by(just('(').padded(), just(')').padded()) + .map(|t| Term::Force(Box::new(t))); + + let lambda = keyword("lam") + .ignore_then(name().padded()) + .then(term.clone()) + .delimited_by(just('(').padded(), just(')').padded()) + .map(|(parameter_name, t)| Term::Lambda { + parameter_name, + body: Box::new(t), + }); + + let apply = term + .clone() + .padded() + .then(term) + .delimited_by(just('[').padded(), just(']').padded()) + .map(|(function, argument)| Term::Apply { + function: Box::new(function), + argument: Box::new(argument), + }); + + constant() + .or(builtin()) + .or(var()) + .or(lambda) + .or(apply) + .or(delay) + .or(force) + .or(error()) + }) +} + +fn constant() -> impl Parser, Error = Simple> { + keyword("con") + .ignore_then( + constant_integer() + .or(constant_bytestring()) + .or(constant_string()) + .or(constant_unit()) + .or(constant_bool()), ) + .delimited_by(just('(').padded(), just(')').padded()) + .map(Term::Constant) } -fn term() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - opaque!(no_partial( - choice(( - attempt(constant()), - attempt(builtin()), - attempt(var()), - attempt(lambda()), - attempt(apply()), - attempt(delay()), - attempt(force()), - attempt(error()), - )) - .skip(spaces()) - )) +fn builtin() -> impl Parser, Error = Simple> { + keyword("builtin") + .ignore_then(ident().padded()) + .delimited_by(just('(').padded(), just(')').padded()) + .map(|builtin_name: String| { + Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap()) + }) } -fn var() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ +fn var() -> impl Parser, Error = Simple> { name().map(Term::Var) } -fn delay() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('('), - token(')'), - (spaces(), string("delay"), skip_many1(space()), term()) - .map(|(_, _, _, term)| Term::Delay(Box::new(term))), - ) +fn error() -> impl Parser, Error = Simple> { + keyword("error") + .ignored() + .delimited_by(just('(').padded(), just(')').padded()) + .map(|_| Term::Error) } -fn force() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('('), - token(')'), - (spaces(), string("force"), skip_many1(space()), term()) - .map(|(_, _, _, term)| Term::Force(Box::new(term))), - ) +fn name() -> impl Parser> { + ident().map(|text| Name { + text, + unique: 0.into(), + }) } -fn lambda() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('('), - token(')'), - ( - spaces(), - string("lam"), - skip_many1(space()), - name(), - skip_many1(space()), - term(), - ) - .map(|(_, _, _, parameter_name, _, term)| Term::Lambda { - parameter_name, - body: Box::new(term), - }), - ) -} - -fn apply() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('['), - token(']'), - (spaces(), term(), spaces(), term()).map(|(_, function, _, argument)| Term::Apply { - function: Box::new(function), - argument: Box::new(argument), - }), - ) -} - -fn builtin() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('(').skip(spaces()), - token(')'), - string("builtin") - .with(skip_many1(space())) - .with(many1(alpha_num())) - .map(|builtin_name: String| { - Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap()) - }), - ) -} - -fn error() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('(').skip(spaces()), - token(')'), - string("error") - .with(skip_many1(space())) - .map(|_| Term::Error), - ) -} - -fn constant() -> impl Parser, Output = Term> -where - Input: Stream, - Input::Error: ParseError, -{ - between( - token('(').skip(spaces()), - token(')'), - ( - spaces(), - string("con"), - skip_many1(space()), - choice(( - attempt(constant_integer()), - attempt(constant_bytestring()), - attempt(constant_string()), - attempt(constant_unit()), - attempt(constant_bool()), - )), - spaces(), - ) - .map(|(_, _, _, con, _)| Term::Constant(con)), - ) -} - -fn constant_integer() -> impl Parser, Output = Constant> -where - Input: Stream, - Input::Error: ParseError, -{ - string("integer") - .with(skip_many1(space())) - .with(many1(digit())) +fn constant_integer() -> impl Parser> { + keyword("integer") + .padded() + .ignore_then(int(10)) .map(|d: String| Constant::Integer(d.parse::().unwrap())) } -fn constant_bytestring() -> impl Parser, Output = Constant> -where - Input: Stream, - Input::Error: ParseError, -{ - string("bytestring") - .with(skip_many1(space())) - .with(token('#')) - .with(many1(hex_digit())) +fn constant_bytestring() -> impl Parser> { + keyword("bytestring") + .padded() + .ignore_then(just('#')) + .ignore_then(int(16)) .map(|b: String| Constant::ByteString(hex::decode(b).unwrap())) } -fn constant_string() -> impl Parser, Output = Constant> -where - Input: Stream, - Input::Error: ParseError, -{ - string("string") - .with(skip_many1(space())) - .with(between(token('"'), token('"'), many1(alpha_num()))) +fn constant_string() -> impl Parser> { + keyword("string") + .padded() + .ignore_then(just('"')) + .ignore_then(filter(|c| *c != '"').repeated()) + .then_ignore(just('"')) + .collect::() .map(Constant::String) } -fn constant_unit() -> impl Parser, Output = Constant> -where - Input: Stream, - Input::Error: ParseError, -{ - string("unit") - .with(skip_many1(space())) - .with(string("()")) +fn constant_unit() -> impl Parser> { + keyword("unit") + .padded() + .ignore_then(just('(')) + .ignore_then(just(')')) + .ignored() .map(|_| Constant::Unit) } -fn constant_bool() -> impl Parser, Output = Constant> -where - Input: Stream, - Input::Error: ParseError, -{ - string("bool") - .with(skip_many1(space())) - .with(string("True").or(string("False"))) +fn constant_bool() -> impl Parser> { + keyword("bool") + .padded() + .ignore_then(just("True").or(just("False"))) .map(|b| Constant::Bool(b == "True")) } -fn name() -> impl Parser, Output = Name> -where - Input: Stream, - Input::Error: ParseError, -{ - look_ahead(letter()) - .with(many1(alpha_num().or(token('_').or(token('\''))))) - .map_input(|text: String, input: &mut StateStream| Name { - unique: input.state.intern(&text), - text, - }) -} - #[cfg(test)] mod test { #[test]