feat: new uplc parser

This commit is contained in:
rvcas
2022-06-08 21:51:04 -04:00
parent 33fcb77681
commit 1c8f3a736b
5 changed files with 204 additions and 265 deletions

View File

@@ -13,7 +13,7 @@ fn main() -> anyhow::Result<()> {
UplcCommand::Flat { input } => {
let code = std::fs::read_to_string(&input)?;
let program = parser::program(&code)?;
let program = parser::program(&code).unwrap();
let program = Program::<DeBruijn>::try_from(program)?;

View File

@@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
combine = "4.6.4"
chumsky = "0.8.0"
flat = { path = "../flat" }
hex = "0.4.3"
strum = "0.24.0"

View File

@@ -3,6 +3,3 @@ pub mod builtins;
mod debruijn;
mod flat;
pub mod parser;
#[macro_use]
extern crate combine;

View File

@@ -1,16 +1,9 @@
use std::{collections::HashMap, str::FromStr};
use combine::{
attempt, between, choice,
error::StringStreamError,
look_ahead, many1,
parser::{
char::{alpha_num, digit, hex_digit, letter, space, spaces, string},
combinator::no_partial,
},
skip_many1,
stream::{position, state},
token, ParseError, Parser, Stream,
use chumsky::{
prelude::{end, filter, just, recursive, Simple},
text::{ident, int, keyword, TextParser},
Parser,
};
use crate::{
@@ -23,8 +16,6 @@ struct ParserState {
current: Unique,
}
type StateStream<Input> = state::Stream<Input, ParserState>;
impl ParserState {
fn new() -> Self {
ParserState {
@@ -48,270 +39,159 @@ impl ParserState {
}
}
pub fn program(src: &str) -> Result<Program<Name>, StringStreamError> {
let mut parser = program_();
pub fn program(src: &str) -> Result<Program<Name>, Vec<Simple<char>>> {
let parser = program_();
let (program, _) = parser.parse(state::Stream {
stream: position::Stream::new(src.trim()),
state: ParserState::new(),
})?;
Ok(program)
parser.parse(src)
}
fn program_<Input>() -> impl Parser<StateStream<Input>, Output = Program<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
let prog = string("program").with(skip_many1(space())).with(
(version(), skip_many1(space()), term())
.map(|(version, _, term)| Program { version, term }),
);
between(token('('), token(')'), prog).skip(spaces())
fn program_() -> impl Parser<char, Program<Name>, Error = Simple<char>> {
keyword("program")
.ignore_then(version().padded())
.then(term())
.map(|(version, term)| Program { version, term })
.delimited_by(just('(').padded(), just(')').padded())
.then_ignore(end())
}
fn version<Input>() -> impl Parser<StateStream<Input>, Output = (usize, usize, usize)>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
(
many1(digit()),
token('.'),
many1(digit()),
token('.'),
many1(digit()),
)
.map(
|(major, _, minor, _, patch): (String, char, String, char, String)| {
(
major.parse::<usize>().unwrap(),
minor.parse::<usize>().unwrap(),
patch.parse::<usize>().unwrap(),
)
},
fn version() -> impl Parser<char, (usize, usize, usize), Error = Simple<char>> {
int(10)
.then_ignore(just('.'))
.then(int(10))
.then_ignore(just('.'))
.then(int(10))
.map(|((major, minor), patch)| {
(
major.parse::<usize>().unwrap(),
minor.parse::<usize>().unwrap(),
patch.parse::<usize>().unwrap(),
)
})
}
fn term() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
recursive(|term| {
let delay = keyword("delay")
.ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|t| Term::Delay(Box::new(t)));
let force = keyword("force")
.ignore_then(term.clone().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|t| Term::Force(Box::new(t)));
let lambda = keyword("lam")
.ignore_then(name().padded())
.then(term.clone())
.delimited_by(just('(').padded(), just(')').padded())
.map(|(parameter_name, t)| Term::Lambda {
parameter_name,
body: Box::new(t),
});
let apply = term
.clone()
.padded()
.then(term)
.delimited_by(just('[').padded(), just(']').padded())
.map(|(function, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
});
constant()
.or(builtin())
.or(var())
.or(lambda)
.or(apply)
.or(delay)
.or(force)
.or(error())
})
}
fn constant() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("con")
.ignore_then(
constant_integer()
.or(constant_bytestring())
.or(constant_string())
.or(constant_unit())
.or(constant_bool()),
)
.delimited_by(just('(').padded(), just(')').padded())
.map(Term::Constant)
}
fn term<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
opaque!(no_partial(
choice((
attempt(constant()),
attempt(builtin()),
attempt(var()),
attempt(lambda()),
attempt(apply()),
attempt(delay()),
attempt(force()),
attempt(error()),
))
.skip(spaces())
))
fn builtin() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("builtin")
.ignore_then(ident().padded())
.delimited_by(just('(').padded(), just(')').padded())
.map(|builtin_name: String| {
Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap())
})
}
fn var<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
fn var() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
name().map(Term::Var)
}
fn delay<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('('),
token(')'),
(spaces(), string("delay"), skip_many1(space()), term())
.map(|(_, _, _, term)| Term::Delay(Box::new(term))),
)
fn error() -> impl Parser<char, Term<Name>, Error = Simple<char>> {
keyword("error")
.ignored()
.delimited_by(just('(').padded(), just(')').padded())
.map(|_| Term::Error)
}
fn force<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('('),
token(')'),
(spaces(), string("force"), skip_many1(space()), term())
.map(|(_, _, _, term)| Term::Force(Box::new(term))),
)
fn name() -> impl Parser<char, Name, Error = Simple<char>> {
ident().map(|text| Name {
text,
unique: 0.into(),
})
}
fn lambda<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('('),
token(')'),
(
spaces(),
string("lam"),
skip_many1(space()),
name(),
skip_many1(space()),
term(),
)
.map(|(_, _, _, parameter_name, _, term)| Term::Lambda {
parameter_name,
body: Box::new(term),
}),
)
}
fn apply<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('['),
token(']'),
(spaces(), term(), spaces(), term()).map(|(_, function, _, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
}),
)
}
fn builtin<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
string("builtin")
.with(skip_many1(space()))
.with(many1(alpha_num()))
.map(|builtin_name: String| {
Term::Builtin(DefaultFunction::from_str(&builtin_name).unwrap())
}),
)
}
fn error<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
string("error")
.with(skip_many1(space()))
.map(|_| Term::Error),
)
}
fn constant<Input>() -> impl Parser<StateStream<Input>, Output = Term<Name>>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
between(
token('(').skip(spaces()),
token(')'),
(
spaces(),
string("con"),
skip_many1(space()),
choice((
attempt(constant_integer()),
attempt(constant_bytestring()),
attempt(constant_string()),
attempt(constant_unit()),
attempt(constant_bool()),
)),
spaces(),
)
.map(|(_, _, _, con, _)| Term::Constant(con)),
)
}
fn constant_integer<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("integer")
.with(skip_many1(space()))
.with(many1(digit()))
fn constant_integer() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("integer")
.padded()
.ignore_then(int(10))
.map(|d: String| Constant::Integer(d.parse::<isize>().unwrap()))
}
fn constant_bytestring<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("bytestring")
.with(skip_many1(space()))
.with(token('#'))
.with(many1(hex_digit()))
fn constant_bytestring() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("bytestring")
.padded()
.ignore_then(just('#'))
.ignore_then(int(16))
.map(|b: String| Constant::ByteString(hex::decode(b).unwrap()))
}
fn constant_string<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("string")
.with(skip_many1(space()))
.with(between(token('"'), token('"'), many1(alpha_num())))
fn constant_string() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("string")
.padded()
.ignore_then(just('"'))
.ignore_then(filter(|c| *c != '"').repeated())
.then_ignore(just('"'))
.collect::<String>()
.map(Constant::String)
}
fn constant_unit<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("unit")
.with(skip_many1(space()))
.with(string("()"))
fn constant_unit() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("unit")
.padded()
.ignore_then(just('('))
.ignore_then(just(')'))
.ignored()
.map(|_| Constant::Unit)
}
fn constant_bool<Input>() -> impl Parser<StateStream<Input>, Output = Constant>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
string("bool")
.with(skip_many1(space()))
.with(string("True").or(string("False")))
fn constant_bool() -> impl Parser<char, Constant, Error = Simple<char>> {
keyword("bool")
.padded()
.ignore_then(just("True").or(just("False")))
.map(|b| Constant::Bool(b == "True"))
}
fn name<Input>() -> impl Parser<StateStream<Input>, Output = Name>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
look_ahead(letter())
.with(many1(alpha_num().or(token('_').or(token('\'')))))
.map_input(|text: String, input: &mut StateStream<Input>| Name {
unique: input.state.intern(&text),
text,
})
}
#[cfg(test)]
mod test {
#[test]