feat: lexer

This commit is contained in:
rvcas 2022-08-12 19:44:34 -04:00
parent 208f2e80ea
commit 1d6809661c
No known key found for this signature in database
GPG Key ID: C09B64E263F7D68C
13 changed files with 1414 additions and 37 deletions

7
Cargo.lock generated
View File

@ -59,6 +59,7 @@ dependencies = [
"chumsky", "chumsky",
"internment", "internment",
"miette", "miette",
"vec1",
] ]
[[package]] [[package]]
@ -885,6 +886,12 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "vec1"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fc1631c774f0f9570797191e01247cbefde789eebfbf128074cb934115a6133"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"

View File

@ -7,6 +7,15 @@ use clap::{Parser, Subcommand};
#[clap(version, about, long_about = None)] #[clap(version, about, long_about = None)]
#[clap(propagate_version = true)] #[clap(propagate_version = true)]
pub enum Args { pub enum Args {
/// Build an aiken project
Build,
/// Start a development server
Dev,
/// Create a new aiken project
New {
/// Project name
name: PathBuf,
},
/// A subcommand for working with Untyped Plutus Core /// A subcommand for working with Untyped Plutus Core
#[clap(subcommand)] #[clap(subcommand)]
Uplc(UplcCommand), Uplc(UplcCommand),
@ -15,33 +24,49 @@ pub enum Args {
/// Commands for working with Untyped Plutus Core /// Commands for working with Untyped Plutus Core
#[derive(Subcommand)] #[derive(Subcommand)]
pub enum UplcCommand { pub enum UplcCommand {
/// Evaluate an Untyped Plutus Core program
Eval {
/// Handle input as flat bytes
#[clap(short, long)]
flat: bool,
/// File to load and evaluate
input: PathBuf,
},
/// Encode textual Untyped Plutus Core to flat bytes /// Encode textual Untyped Plutus Core to flat bytes
Flat { Flat {
/// Textual Untyped Plutus Core file
input: PathBuf, input: PathBuf,
#[clap(short, long)]
print: bool, /// Output file name
#[clap(short, long)] #[clap(short, long)]
out: Option<String>, out: Option<String>,
},
/// Decode flat bytes to textual Untyped Plutus Core /// Print output instead of saving to file
Unflat {
input: PathBuf,
#[clap(short, long)] #[clap(short, long)]
print: bool, print: bool,
#[clap(short, long)]
out: Option<String>,
}, },
/// Format an Untyped Plutus Core program /// Format an Untyped Plutus Core program
Fmt { Fmt {
/// Textual Untyped Plutus Core file
input: PathBuf, input: PathBuf,
/// Print output instead of saving to file
#[clap(short, long)] #[clap(short, long)]
print: bool, print: bool,
}, },
/// Evaluate an Untyped Plutus Core program /// Decode flat bytes to textual Untyped Plutus Core
Eval { Unflat {
/// Flat encoded Untyped Plutus Core file
input: PathBuf, input: PathBuf,
/// Output file name
#[clap(short, long)] #[clap(short, long)]
flat: bool, out: Option<String>,
/// Print output instead of saving to file
#[clap(short, long)]
print: bool,
}, },
} }

View File

@ -14,6 +14,31 @@ fn main() -> anyhow::Result<()> {
let args = Args::default(); let args = Args::default();
match args { match args {
Args::Build => {
// 1. load and parse modules
// * lib - contains modules, types, and functions
// * contracts - contains validators
// * scripts - contains native scripts dsl
// 2. type check everything
// 3. generate uplc and policy/address if relevant
todo!()
}
Args::Dev => {
// launch a development server
// this should allow people to test
// their contracts over http
todo!()
}
Args::New { name } => {
if !name.exists() {
fs::create_dir_all(name.join("lib"))?;
fs::create_dir_all(name.join("policies"))?;
fs::create_dir_all(name.join("scripts"))?;
}
}
Args::Uplc(uplc) => match uplc { Args::Uplc(uplc) => match uplc {
UplcCommand::Flat { input, print, out } => { UplcCommand::Flat { input, print, out } => {
let code = std::fs::read_to_string(&input)?; let code = std::fs::read_to_string(&input)?;
@ -48,6 +73,7 @@ fn main() -> anyhow::Result<()> {
fs::write(&out_name, &bytes)?; fs::write(&out_name, &bytes)?;
} }
} }
UplcCommand::Fmt { input, print } => { UplcCommand::Fmt { input, print } => {
let code = std::fs::read_to_string(&input)?; let code = std::fs::read_to_string(&input)?;
@ -61,6 +87,7 @@ fn main() -> anyhow::Result<()> {
fs::write(&input, pretty)?; fs::write(&input, pretty)?;
} }
} }
UplcCommand::Unflat { input, print, out } => { UplcCommand::Unflat { input, print, out } => {
let bytes = std::fs::read(&input)?; let bytes = std::fs::read(&input)?;
@ -82,6 +109,7 @@ fn main() -> anyhow::Result<()> {
fs::write(&out_name, pretty)?; fs::write(&out_name, pretty)?;
} }
} }
UplcCommand::Eval { input, flat } => { UplcCommand::Eval { input, flat } => {
let program = if flat { let program = if flat {
let bytes = std::fs::read(&input)?; let bytes = std::fs::read(&input)?;

View File

@ -9,3 +9,4 @@ edition = "2021"
chumsky = "0.8.0" chumsky = "0.8.0"
internment = "0.7.0" internment = "0.7.0"
miette = "5.2.0" miette = "5.2.0"
vec1 = "1.8.0"

View File

@ -1,47 +1,505 @@
pub struct Module { use std::{collections::HashMap, fmt, ops::Range, sync::Arc};
pub name: Vec<String>,
pub docs: Vec<String>, use internment::Intern;
pub is_script: bool,
pub is_lib: bool, use crate::{
pub is_policy: bool, expr::{TypedExpr, UntypedExpr},
tipo::{self, PatternConstructor, Type, ValueConstructor},
};
pub type TypedModule = Module<tipo::Module, TypedDefinition>;
pub type UntypedModule = Module<(), UntypedDefinition>;
pub enum ModuleKind {
Contract,
Lib,
Script,
} }
pub enum Definition { pub struct Module<Info, Definitions> {
pub name: Vec<String>,
pub docs: Vec<String>,
pub type_info: Info,
pub definitons: Vec<Definitions>,
pub kind: ModuleKind,
}
pub type TypedDefinition = Definition<Arc<Type>, TypedExpr, String, String>;
pub type UntypedDefinition = Definition<(), UntypedExpr, (), ()>;
pub enum Definition<T, Expr, ConstantRecordTag, PackageName> {
Fn { Fn {
arguments: Vec<String>, location: Span,
arguments: Vec<Vec<Arg<T>>>,
body: Expr, body: Expr,
doc: Option<String>, doc: Option<String>,
name: String, name: String,
public: bool, public: bool,
return_annotation: Option<()>, return_annotation: Option<Annotation>,
return_type: (), return_type: T,
}, },
TypeAlias { TypeAlias {
location: Span,
alias: String, alias: String,
annotation: (), annotation: Annotation,
doc: Option<String>, doc: Option<String>,
parameters: Vec<String>, parameters: Vec<String>,
public: bool, public: bool,
tipo: (), tipo: T,
}, },
DataType { DataType {
constructors: Vec<()>, location: Span,
constructors: Vec<RecordConstructor<T>>,
doc: Option<String>, doc: Option<String>,
name: String, name: String,
opaque: bool, opaque: bool,
parameters: Vec<String>, parameters: Vec<String>,
public: bool, public: bool,
typed_parameters: Vec<()>, typed_parameters: Vec<T>,
}, },
Use { Use {
module: Vec<String>, module: Vec<String>,
as_name: Option<String>, as_name: Option<String>,
// unqualified: Vec<UnqualifiedImport>, unqualified: Vec<UnqualifiedImport>,
// package: PackageName, package: PackageName,
},
ModuleConstant {
doc: Option<String>,
location: Span,
public: bool,
name: String,
annotation: Option<Annotation>,
value: Box<Constant<T, ConstantRecordTag>>,
tipo: T,
}, },
} }
pub enum Expr {} pub type TypedConstant = Constant<Arc<Type>, String>;
pub type UntypedConstant = Constant<(), ()>;
pub enum Constant<T, RecordTag> {
Int {
location: Span,
value: String,
},
String {
location: Span,
value: String,
},
Pair {
location: Span,
elements: Vec<Self>,
},
List {
location: Span,
elements: Vec<Self>,
tipo: T,
},
Record {
location: Span,
module: Option<String>,
name: String,
args: Vec<CallArg<Self>>,
tag: RecordTag,
tipo: T,
field_map: Option<FieldMap>,
},
ByteString {
location: Span,
// segments: Vec<BitStringSegment<Self, T>>,
},
Var {
location: Span,
module: Option<String>,
name: String,
constructor: Option<Box<ValueConstructor>>,
tipo: T,
},
}
pub struct CallArg<A> {
pub label: Option<String>,
pub location: Span,
pub value: A,
}
pub struct FieldMap {
pub arity: usize,
pub fields: HashMap<String, usize>,
}
pub struct RecordConstructor<T> {
pub location: Span,
pub name: String,
pub arguments: Vec<RecordConstructorArg<T>>,
pub documentation: Option<String>,
}
pub struct RecordConstructorArg<T> {
pub label: Option<String>,
// ast
pub annotation: Annotation,
pub location: Span,
pub tipo: T,
pub doc: Option<String>,
}
pub struct Arg<T> {
pub names: ArgName,
pub location: Span,
pub annotation: Option<Annotation>,
pub tipo: T,
}
pub enum ArgName {
Discard { name: String },
LabeledDiscard { label: String, name: String },
Named { name: String },
NamedLabeled { name: String, label: String },
}
pub struct UnqualifiedImport {
pub location: Span,
pub name: String,
pub as_name: Option<String>,
pub layer: Layer,
}
// TypeAst
pub enum Annotation {
Constructor {
location: Span,
module: Option<String>,
name: String,
arguments: Vec<Self>,
},
Fn {
location: Span,
arguments: Vec<Self>,
ret: Box<Self>,
},
Var {
location: Span,
name: String,
},
Tuple {
location: Span,
elems: Vec<Self>,
},
Hole {
location: Span,
name: String,
},
}
pub enum Layer {
Value,
Type,
}
impl Default for Layer {
fn default() -> Self {
Layer::Value
}
}
pub enum BinOp {
// Boolean logic
And,
Or,
// Equality
Eq,
NotEq,
// Order comparison
LtInt,
LtEqInt,
GtEqInt,
GtInt,
// Maths
AddInt,
SubInt,
MultInt,
DivInt,
ModInt,
}
pub enum Pattern<Constructor, Type> {
Int {
location: Span,
value: String,
},
Float {
location: Span,
value: String,
},
String {
location: Span,
value: String,
},
/// The creation of a variable.
/// e.g. `assert [this_is_a_var, .._] = x`
Var {
location: Span,
name: String,
},
/// A reference to a variable in a bit string. This is always a variable
/// being used rather than a new variable being assigned.
VarUsage {
location: Span,
name: String,
tipo: Type,
},
/// A name given to a sub-pattern using the `as` keyword.
/// e.g. `assert #(1, [_, _] as the_list) = x`
Assign {
name: String,
location: Span,
pattern: Box<Self>,
},
/// A pattern that binds to any value but does not assign a variable.
/// Always starts with an underscore.
Discard {
name: String,
location: Span,
},
List {
location: Span,
elements: Vec<Self>,
tail: Option<Box<Self>>,
},
/// The constructor for a custom type. Starts with an uppercase letter.
Constructor {
location: Span,
name: String,
arguments: Vec<CallArg<Self>>,
module: Option<String>,
constructor: Constructor,
with_spread: bool,
tipo: Type,
},
Tuple {
location: Span,
elems: Vec<Self>,
},
}
pub enum AssignmentKind {
Let,
Assert,
}
pub type MultiPattern<PatternConstructor, Type> = Vec<Pattern<PatternConstructor, Type>>;
pub type UntypedMultiPattern = MultiPattern<(), ()>;
pub type TypedMultiPattern = MultiPattern<PatternConstructor, Arc<Type>>;
pub type TypedClause = Clause<TypedExpr, PatternConstructor, Arc<Type>, String>;
pub type UntypedClause = Clause<UntypedExpr, (), (), ()>;
pub struct Clause<Expr, PatternConstructor, Type, RecordTag> {
pub location: Span,
pub pattern: MultiPattern<PatternConstructor, Type>,
pub alternative_patterns: Vec<MultiPattern<PatternConstructor, Type>>,
pub guard: Option<ClauseGuard<Type, RecordTag>>,
pub then: Expr,
}
pub enum ClauseGuard<Type, RecordTag> {
Equals {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
NotEquals {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
GtInt {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
GtEqInt {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
LtInt {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
LtEqInt {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
Or {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
And {
location: Span,
left: Box<Self>,
right: Box<Self>,
},
Var {
location: Span,
tipo: Type,
name: String,
},
TupleIndex {
location: Span,
index: u64,
tipo: Type,
tuple: Box<Self>,
},
Constant(Constant<Type, RecordTag>),
}
pub struct TypedRecordUpdateArg {
pub label: String,
pub location: Span,
pub value: TypedExpr,
pub index: usize,
}
pub struct UntypedRecordUpdateArg {
pub label: String,
// pub location: SrcSpan,
pub value: UntypedExpr,
}
pub struct RecordUpdateSpread {
pub base: Box<UntypedExpr>,
pub location: Span,
}
pub enum TodoKind {
Keyword,
EmptyFunction,
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub struct SrcId(Intern<Vec<String>>);
impl SrcId {
#[cfg(test)]
pub fn empty() -> Self {
SrcId(Intern::new(Vec::new()))
}
}
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct Span {
pub src: SrcId,
pub start: usize,
pub end: usize,
}
impl Span {
#[cfg(test)]
pub fn empty() -> Self {
use chumsky::Span;
Self::new(SrcId::empty(), 0..0)
}
pub fn src(&self) -> SrcId {
self.src
}
pub fn range(&self) -> Range<usize> {
use chumsky::Span;
self.start()..self.end()
}
pub fn union(self, other: Self) -> Self {
use chumsky::Span;
assert_eq!(
self.src, other.src,
"attempted to union spans with different sources"
);
Self {
start: self.start().min(other.start()),
end: self.end().max(other.end()),
..self
}
}
}
impl fmt::Debug for Span {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}:{:?}", self.src, self.range())
}
}
impl chumsky::Span for Span {
type Context = SrcId;
type Offset = usize;
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
assert!(range.start <= range.end);
Self {
src: context,
start: range.start,
end: range.end,
}
}
fn context(&self) -> Self::Context {
self.src
}
fn start(&self) -> Self::Offset {
self.start
}
fn end(&self) -> Self::Offset {
self.end
}
}

4
crates/lang/src/build.rs Normal file
View File

@ -0,0 +1,4 @@
pub enum Origin {
Src,
Test,
}

109
crates/lang/src/error.rs Normal file
View File

@ -0,0 +1,109 @@
use std::{collections::HashSet, fmt};
use crate::{ast::Span, token::Token};
#[derive(Debug)]
pub struct ParseError {
kind: ErrorKind,
span: Span,
while_parsing: Option<(Span, &'static str)>,
expected: HashSet<Pattern>,
label: Option<&'static str>,
}
impl ParseError {
pub fn merge(mut self, other: Self) -> Self {
// TODO: Use HashSet
for expected in other.expected.into_iter() {
self.expected.insert(expected);
}
self
}
}
impl PartialEq for ParseError {
fn eq(&self, other: &Self) -> bool {
self.kind == other.kind && self.span == other.span && self.label == other.label
}
}
impl<T: Into<Pattern>> chumsky::Error<T> for ParseError {
type Span = Span;
type Label = &'static str;
fn expected_input_found<Iter: IntoIterator<Item = Option<T>>>(
span: Self::Span,
expected: Iter,
found: Option<T>,
) -> Self {
Self {
kind: found
.map(Into::into)
.map(ErrorKind::Unexpected)
.unwrap_or(ErrorKind::UnexpectedEnd),
span,
while_parsing: None,
expected: expected
.into_iter()
.map(|x| x.map(Into::into).unwrap_or(Pattern::End))
.collect(),
label: None,
}
}
fn with_label(mut self, label: Self::Label) -> Self {
self.label.get_or_insert(label);
self
}
fn merge(self, other: Self) -> Self {
ParseError::merge(self, other)
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum ErrorKind {
UnexpectedEnd,
Unexpected(Pattern),
Unclosed {
start: Pattern,
before_span: Span,
before: Option<Pattern>,
},
NoEndBranch,
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum Pattern {
Char(char),
Token(Token),
Literal,
TypeIdent,
TermIdent,
End,
}
impl From<char> for Pattern {
fn from(c: char) -> Self {
Self::Char(c)
}
}
impl From<Token> for Pattern {
fn from(tok: Token) -> Self {
Self::Token(tok)
}
}
impl fmt::Display for Pattern {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Pattern::Token(token) => write!(f, "{}", token),
Pattern::Char(c) => write!(f, "{:?}", c),
Pattern::Literal => write!(f, "literal"),
Pattern::TypeIdent => write!(f, "type name"),
Pattern::TermIdent => write!(f, "identifier"),
Pattern::End => write!(f, "end of input"),
}
}
}

269
crates/lang/src/expr.rs Normal file
View File

@ -0,0 +1,269 @@
use std::sync::Arc;
use vec1::Vec1;
use crate::{
ast::{
Annotation, Arg, AssignmentKind, BinOp, CallArg, Clause, Pattern, RecordUpdateSpread, Span,
TodoKind, TypedRecordUpdateArg, UntypedRecordUpdateArg,
},
tipo::{ModuleValueConstructor, PatternConstructor, Type, ValueConstructor},
};
pub enum TypedExpr {
Int {
location: Span,
tipo: Arc<Type>,
value: String,
},
Float {
location: Span,
tipo: Arc<Type>,
value: String,
},
String {
location: Span,
tipo: Arc<Type>,
value: String,
},
Sequence {
location: Span,
expressions: Vec<Self>,
},
/// A chain of pipe expressions.
/// By this point the type checker has expanded it into a series of
/// assignments and function calls, but we still have a Pipeline AST node as
/// even though it is identical to `Sequence` we want to use different
/// locations when showing it in error messages, etc.
Pipeline {
location: Span,
expressions: Vec<Self>,
},
Var {
location: Span,
constructor: ValueConstructor,
name: String,
},
Fn {
location: Span,
tipo: Arc<Type>,
is_capture: bool,
args: Vec<Arg<Arc<Type>>>,
body: Box<Self>,
return_annotation: Option<Annotation>,
},
List {
location: Span,
tipo: Arc<Type>,
elements: Vec<Self>,
tail: Option<Box<Self>>,
},
Call {
location: Span,
tipo: Arc<Type>,
fun: Box<Self>,
args: Vec<CallArg<Self>>,
},
BinOp {
location: Span,
tipo: Arc<Type>,
name: BinOp,
left: Box<Self>,
right: Box<Self>,
},
Assignment {
location: Span,
tipo: Arc<Type>,
value: Box<Self>,
pattern: Pattern<PatternConstructor, Arc<Type>>,
kind: AssignmentKind,
},
Try {
location: Span,
tipo: Arc<Type>,
value: Box<Self>,
then: Box<Self>,
pattern: Pattern<PatternConstructor, Arc<Type>>,
},
When {
location: Span,
tipo: Arc<Type>,
subjects: Vec<Self>,
clauses: Vec<Clause<Self, PatternConstructor, Arc<Type>, String>>,
},
RecordAccess {
location: Span,
tipo: Arc<Type>,
label: String,
index: u64,
record: Box<Self>,
},
ModuleSelect {
location: Span,
tipo: Arc<Type>,
label: String,
module_name: String,
module_alias: String,
constructor: ModuleValueConstructor,
},
Tuple {
location: Span,
tipo: Arc<Type>,
elems: Vec<Self>,
},
TupleIndex {
location: Span,
tipo: Arc<Type>,
index: u64,
tuple: Box<Self>,
},
Todo {
location: Span,
label: Option<String>,
tipo: Arc<Type>,
},
RecordUpdate {
location: Span,
tipo: Arc<Type>,
spread: Box<Self>,
args: Vec<TypedRecordUpdateArg>,
},
Negate {
location: Span,
value: Box<Self>,
},
}
pub enum UntypedExpr {
Int {
location: Span,
value: String,
},
Float {
location: Span,
value: String,
},
String {
location: Span,
value: String,
},
Sequence {
location: Span,
expressions: Vec<Self>,
},
Var {
location: Span,
name: String,
},
Fn {
location: Span,
is_capture: bool,
arguments: Vec<Arg<()>>,
body: Box<Self>,
return_annotation: Option<Annotation>,
},
List {
location: Span,
elements: Vec<Self>,
tail: Option<Box<Self>>,
},
Call {
location: Span,
fun: Box<Self>,
arguments: Vec<CallArg<Self>>,
},
BinOp {
location: Span,
name: BinOp,
left: Box<Self>,
right: Box<Self>,
},
PipeLine {
expressions: Vec1<Self>,
},
Assignment {
location: Span,
value: Box<Self>,
pattern: Pattern<(), ()>,
kind: AssignmentKind,
annotation: Option<Annotation>,
},
Try {
location: Span,
value: Box<Self>,
pattern: Pattern<(), ()>,
then: Box<Self>,
annotation: Option<Annotation>,
},
Case {
location: Span,
subjects: Vec<Self>,
clauses: Vec<Clause<Self, (), (), ()>>,
},
FieldAccess {
location: Span,
label: String,
container: Box<Self>,
},
Tuple {
location: Span,
elems: Vec<Self>,
},
TupleIndex {
location: Span,
index: u64,
tuple: Box<Self>,
},
Todo {
kind: TodoKind,
location: Span,
label: Option<String>,
},
RecordUpdate {
location: Span,
constructor: Box<Self>,
spread: RecordUpdateSpread,
arguments: Vec<UntypedRecordUpdateArg>,
},
Negate {
location: Span,
value: Box<Self>,
},
}

163
crates/lang/src/lexer.rs Normal file
View File

@ -0,0 +1,163 @@
use chumsky::prelude::*;
use internment::Intern;
use crate::{ast::Span, error::ParseError, token::Token};
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
let int = text::int(10)
.map(Intern::new)
.map(|value| Token::Int { value });
let op = choice((
just("==").to(Token::EqualEqual),
just('=').to(Token::Equal),
just("..").to(Token::Dot),
just('.').to(Token::Dot),
just("!=").to(Token::NotEqual),
just('!').to(Token::Bang),
just("<=").to(Token::LessEqual),
just('<').to(Token::Less),
just(">=").to(Token::GreaterEqual),
just('>').to(Token::Greater),
just('+').to(Token::Plus),
just("->").to(Token::RArrow),
just('-').to(Token::Minus),
just('*').to(Token::Star),
just('/').to(Token::Slash),
just('%').to(Token::Percent),
just("|>").to(Token::Pipe),
));
let grouping = choice((
just('(').to(Token::LeftParen),
just(')').to(Token::RightParen),
just('[').to(Token::LeftSquare),
just(']').to(Token::RightSquare),
just('{').to(Token::LeftBrace),
just('}').to(Token::RightBrace),
));
let escape = just('\\').ignore_then(
just('\\')
.or(just('/'))
.or(just('"'))
.or(just('b').to('\x08'))
.or(just('f').to('\x0C'))
.or(just('n').to('\n'))
.or(just('r').to('\r'))
.or(just('t').to('\t')),
);
let string = just('"')
.ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated())
.then_ignore(just('"'))
.collect::<String>()
.map(Intern::new)
.map(|value| Token::String { value })
.labelled("string");
let keyword = text::ident().map(|s: String| match s.as_str() {
"as" => Token::As,
"assert" => Token::Assert,
"const" => Token::Const,
"fn" => Token::Fn,
"if" => Token::If,
"is" => Token::Is,
"let" => Token::Let,
"opaque" => Token::Opaque,
"pub" => Token::Pub,
"use" => Token::Use,
"todo" => Token::Todo,
"try" => Token::Try,
"type" => Token::Type,
"when" => Token::When,
_ => {
if s.chars().next().map_or(false, |c| c.is_uppercase()) {
Token::UpName {
// TODO: do not allow _ in upname
name: Intern::new(s),
}
} else if s.starts_with('_') {
Token::DiscardName {
// TODO: do not allow uppercase letters in discard name
name: Intern::new(s),
}
} else {
Token::Name {
// TODO: do not allow uppercase letters in name
name: Intern::new(s),
}
}
}
});
let token = choice((keyword, int, op, grouping, string))
.or(any().map(Token::Error).validate(|t, span, emit| {
emit(ParseError::expected_input_found(span, None, Some(t)));
t
}))
.map_with_span(move |token, span| (token, span))
.padded()
.recover_with(skip_then_retry_until([]));
let comments = just("//")
.then_ignore(
just('(')
.ignore_then(take_until(just(")#")).ignored())
.or(none_of('\n').ignored().repeated().ignored()),
)
.padded()
.ignored()
.repeated();
token
.padded_by(comments)
.repeated()
.padded()
.then_ignore(end())
}
#[cfg(test)]
mod tests {
use chumsky::prelude::*;
use internment::Intern;
use crate::{
ast::{Span, SrcId},
lexer,
token::Token,
};
#[test]
fn simple() {
let code = "pub type |> >=\n{ Thing _na_thing name";
let len = code.chars().count();
let span = |i| Span::new(SrcId::empty(), i..i + 1);
assert_eq!(
lexer::lexer()
.parse(chumsky::Stream::from_iter(
span(len),
code.chars().enumerate().map(|(i, c)| (c, span(i))),
))
.map(|tokens| tokens.into_iter().map(|(tok, _)| tok).collect::<Vec<_>>()),
Ok(vec![
Token::Pub,
Token::Type,
Token::Pipe,
Token::GreaterEqual,
Token::LeftBrace,
Token::UpName {
name: Intern::new("Thing".to_string())
},
Token::DiscardName {
name: Intern::new("_na_thing".to_string())
},
Token::Name {
name: Intern::new("name".to_string())
}
]),
);
}
}

View File

@ -1,10 +1,8 @@
pub mod ast; pub mod ast;
pub mod build;
#[cfg(test)] pub mod error;
mod tests { pub mod expr;
#[test] pub mod lexer;
fn it_works() { pub mod parser;
let result = 2 + 2; pub mod tipo;
assert_eq!(result, 4); pub mod token;
}
}

View File

@ -0,0 +1,7 @@
use chumsky::prelude::*;
use crate::{ast, error::ParseError, token::Token};
pub fn module_parser() -> impl Parser<Token, ast::UntypedModule, Error = ParseError> {
let imports = just(Token::Use).ignore_then();
}

159
crates/lang/src/tipo.rs Normal file
View File

@ -0,0 +1,159 @@
use std::{cell::RefCell, collections::HashMap, sync::Arc};
use crate::{
ast::{Constant, FieldMap, Span, TypedConstant},
build::Origin,
};
pub enum Type {
/// A nominal (named) type such as `Int`, `Float`, or a programmer defined
/// custom type such as `Person`. The type can take other types as
/// arguments (aka "generics" or "parametric polymorphism").
///
/// If the type is defined in the Gleam prelude the `module` field will be
/// empty, otherwise it will contain the name of the module that
/// defines the type.
///
App {
public: bool,
module: Vec<String>,
name: String,
args: Vec<Arc<Type>>,
},
/// The type of a function. It takes arguments and returns a value.
///
Fn {
args: Vec<Arc<Type>>,
retrn: Arc<Type>,
},
/// A type variable. See the contained `TypeVar` enum for more information.
///
Var { tipo: Arc<RefCell<TypeVar>> },
/// A tuple is an ordered collection of 0 or more values, each of which
/// can have a different type, so the `tuple` type is the sum of all the
/// contained types.
///
Tuple { elems: Vec<Arc<Type>> },
}
pub enum TypeVar {
/// Unbound is an unbound variable. It is one specific type but we don't
/// know what yet in the inference process. It has a unique id which can be used to
/// identify if two unbound variable Rust values are the same Gleam type variable
/// instance or not.
///
Unbound { id: u64 },
/// Link is type variable where it was an unbound variable but we worked out
/// that it is some other type and now we point to that one.
///
Link { tipo: Arc<Type> },
/// A Generic variable stands in for any possible type and cannot be
/// specialised to any one type
///
/// # Example
///
/// ```gleam
/// type Cat(a) {
/// Cat(name: a)
/// }
/// // a is TypeVar::Generic
/// ```
///
Generic { id: u64 },
}
pub struct ValueConstructor {
pub public: bool,
pub variant: ValueConstructorVariant,
pub tipo: Arc<Type>,
}
pub enum ValueConstructorVariant {
/// A locally defined variable or function parameter
LocalVariable { location: Span },
/// A module constant
ModuleConstant {
location: Span,
module: String,
literal: Constant<Arc<Type>, String>,
},
/// A function belonging to the module
ModuleFn {
name: String,
field_map: Option<FieldMap>,
module: Vec<String>,
arity: usize,
location: Span,
},
/// A constructor for a custom type
Record {
name: String,
arity: usize,
field_map: Option<FieldMap>,
location: Span,
module: String,
},
}
pub struct Module {
pub name: Vec<String>,
pub origin: Origin,
pub package: String,
pub types: HashMap<String, TypeConstructor>,
pub types_constructors: HashMap<String, Vec<String>>,
pub values: HashMap<String, ValueConstructor>,
pub accessors: HashMap<String, AccessorsMap>,
}
pub struct TypeConstructor {
pub public: bool,
pub origin: Span,
pub module: Vec<String>,
pub parameters: Vec<Arc<Type>>,
pub typ: Arc<Type>,
}
pub struct AccessorsMap {
pub public: bool,
pub tipo: Arc<Type>,
pub accessors: HashMap<String, RecordAccessor>,
}
pub struct RecordAccessor {
// TODO: smaller int. Doesn't need to be this big
pub index: u64,
pub label: String,
pub tipo: Arc<Type>,
}
pub enum PatternConstructor {
Record {
name: String,
field_map: Option<FieldMap>,
},
}
pub enum ModuleValueConstructor {
Record {
name: String,
arity: usize,
type_: Arc<Type>,
field_map: Option<FieldMap>,
location: Span,
},
Fn {
location: Span,
},
Constant {
literal: TypedConstant,
location: Span,
},
}

149
crates/lang/src/token.rs Normal file
View File

@ -0,0 +1,149 @@
use std::fmt;
use internment::Intern;
#[derive(Copy, Clone, Debug, PartialEq, Hash, Eq)]
pub enum Token {
Error(char),
Name { name: Intern<String> },
UpName { name: Intern<String> },
DiscardName { name: Intern<String> },
Int { value: Intern<String> },
String { value: Intern<String> },
// Groupings
LeftParen, // (
RightParen, // )
LeftSquare, // [
RightSquare, // }
LeftBrace, // {
RightBrace, // }
// Int Operators
Plus,
Minus,
Star,
Slash,
Less,
Greater,
LessEqual,
GreaterEqual,
Percent,
// ByteString Operators
PlusDot, // '+.'
MinusDot, // '-.'
StarDot, // '*.'
SlashDot, // '/.'
LessDot, // '<.'
GreaterDot, // '>.'
LessEqualDot, // '<=.'
GreaterEqualDot, // '>=.'
// Other Punctuation
Colon,
Comma,
Hash, // '#'
Bang, // '!'
Equal,
EqualEqual, // '=='
NotEqual, // '!='
Vbar, // '|'
VbarVbar, // '||'
AmperAmper, // '&&'
Pipe, // '|>'
Dot, // '.'
RArrow, // '->'
DotDot, // '..'
EndOfFile,
// Extra
CommentNormal,
CommentDoc,
CommentModule,
EmptyLine,
// Keywords (alphabetically):
As,
Assert,
Const,
Fn,
If,
Is,
Let,
Opaque,
Pub,
Use,
Todo,
Try,
Type,
When,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Token::Error(c) => {
write!(f, "\"{}\"", c)?;
return Ok(());
}
Token::Name { name } => &**name,
Token::UpName { name } => &**name,
Token::DiscardName { name } => &**name,
Token::Int { value } => &**value,
Token::String { value } => &**value,
Token::LeftParen => "(",
Token::RightParen => ")",
Token::LeftSquare => "[",
Token::RightSquare => "]",
Token::LeftBrace => "{",
Token::RightBrace => "}",
Token::Plus => "+",
Token::Minus => "-",
Token::Star => "*",
Token::Slash => "/",
Token::Less => "<",
Token::Greater => ">",
Token::LessEqual => "<=",
Token::GreaterEqual => ">=",
Token::Percent => "%",
Token::PlusDot => "+.",
Token::MinusDot => "-.",
Token::StarDot => "*.",
Token::SlashDot => "/.",
Token::LessDot => "<.",
Token::GreaterDot => ">.",
Token::LessEqualDot => "<=.",
Token::GreaterEqualDot => ">=.",
Token::Colon => ":",
Token::Comma => ",",
Token::Hash => "#",
Token::Bang => "!",
Token::Equal => "=",
Token::EqualEqual => "==",
Token::NotEqual => "!=",
Token::Vbar => "|",
Token::VbarVbar => "||",
Token::AmperAmper => "&&",
Token::Pipe => "|>",
Token::Dot => ".",
Token::RArrow => "->",
Token::DotDot => "..",
Token::EndOfFile => "EOF",
Token::CommentNormal => "//",
Token::CommentDoc => "///",
Token::CommentModule => "////",
Token::EmptyLine => "EMPTYLINE",
Token::As => "as",
Token::Assert => "assert",
Token::When => "when",
Token::Is => "is",
Token::Const => "const",
Token::Fn => "fn",
Token::If => "if",
Token::Use => "import",
Token::Let => "let",
Token::Opaque => "opaque",
Token::Pub => "pub",
Token::Todo => "todo",
Token::Try => "try",
Token::Type => "type",
};
write!(f, "\"{}\"", s)
}
}