feat: bring over the formatter from gleam

This commit is contained in:
rvcas
2022-11-01 19:53:19 -04:00
parent 91a131d520
commit cba7a6f46e
32 changed files with 2270 additions and 480 deletions

View File

@@ -1,7 +1,5 @@
use std::{fmt, ops::Range, sync::Arc};
use internment::Intern;
use crate::{
builtins::{self, bool},
expr::{TypedExpr, UntypedExpr},
@@ -80,6 +78,7 @@ pub enum Definition<T, Expr, ConstantRecordTag, PackageName> {
public: bool,
return_annotation: Option<Annotation>,
return_type: T,
end_position: usize,
},
TypeAlias {
@@ -122,6 +121,30 @@ pub enum Definition<T, Expr, ConstantRecordTag, PackageName> {
},
}
impl<A, B, C, E> Definition<A, B, C, E> {
pub fn location(&self) -> Span {
match self {
Definition::Fn { location, .. }
| Definition::Use { location, .. }
| Definition::TypeAlias { location, .. }
| Definition::DataType { location, .. }
| Definition::ModuleConstant { location, .. } => *location,
}
}
pub fn put_doc(&mut self, new_doc: String) {
match self {
Definition::Use { .. } => (),
Definition::Fn { doc, .. }
| Definition::TypeAlias { doc, .. }
| Definition::DataType { doc, .. }
| Definition::ModuleConstant { doc, .. } => {
let _ = std::mem::replace(doc, Some(new_doc));
}
}
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DefinitionLocation<'module> {
pub module: Option<&'module str>,
@@ -215,6 +238,15 @@ pub struct CallArg<A> {
pub value: A,
}
impl CallArg<UntypedExpr> {
pub fn is_capture_hole(&self) -> bool {
match &self.value {
UntypedExpr::Var { ref name, .. } => name == CAPTURE_VARIABLE,
_ => false,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct RecordConstructor<T> {
pub location: Span,
@@ -451,6 +483,26 @@ pub enum BinOp {
ModInt,
}
impl BinOp {
pub fn precedence(&self) -> u8 {
// Ensure that this matches the other precedence function for guards
match self {
Self::Or => 1,
Self::And => 2,
Self::Eq | Self::NotEq => 3,
Self::LtInt | Self::LtEqInt | Self::GtEqInt | Self::GtInt => 4,
// Pipe is 5
Self::AddInt | Self::SubInt => 6,
Self::MultInt | Self::DivInt | Self::ModInt => 7,
}
}
}
pub type UntypedPattern = Pattern<(), ()>;
pub type TypedPattern = Pattern<PatternConstructor, Arc<Type>>;
@@ -542,7 +594,7 @@ impl<A, B> Pattern<A, B> {
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum AssignmentKind {
Let,
Assert,
@@ -568,7 +620,6 @@ pub struct Clause<Expr, PatternConstructor, Type, RecordTag> {
impl TypedClause {
pub fn location(&self) -> Span {
Span {
src: SrcId::empty(),
start: self
.pattern
.get(0)
@@ -663,6 +714,23 @@ impl<A, B> ClauseGuard<A, B> {
| ClauseGuard::LtEqInt { location, .. } => *location,
}
}
pub fn precedence(&self) -> u8 {
// Ensure that this matches the other precedence function for guards
match self {
ClauseGuard::Or { .. } => 1,
ClauseGuard::And { .. } => 2,
ClauseGuard::Equals { .. } | ClauseGuard::NotEquals { .. } => 3,
ClauseGuard::GtInt { .. }
| ClauseGuard::GtEqInt { .. }
| ClauseGuard::LtInt { .. }
| ClauseGuard::LtEqInt { .. } => 4,
ClauseGuard::Constant(_) | ClauseGuard::Var { .. } => 5,
}
}
}
impl TypedClauseGuard {
@@ -721,18 +789,8 @@ pub enum TodoKind {
EmptyFunction,
}
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub struct SrcId(Intern<Vec<String>>);
impl SrcId {
pub fn empty() -> Self {
SrcId(Intern::new(Vec::new()))
}
}
#[derive(Copy, Clone, PartialEq, Eq)]
pub struct Span {
pub src: SrcId,
pub start: usize,
pub end: usize,
}
@@ -747,11 +805,7 @@ impl Span {
pub fn empty() -> Self {
use chumsky::Span;
Self::new(SrcId::empty(), 0..0)
}
pub fn src(&self) -> SrcId {
self.src
Self::new((), 0..0)
}
pub fn range(&self) -> Range<usize> {
@@ -763,43 +817,34 @@ impl Span {
pub fn union(self, other: Self) -> Self {
use chumsky::Span;
assert_eq!(
self.src, other.src,
"attempted to union spans with different sources"
);
Self {
start: self.start().min(other.start()),
end: self.end().max(other.end()),
..self
}
}
}
impl fmt::Debug for Span {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}:{:?}", self.src, self.range())
write!(f, "{:?}", self.range())
}
}
impl chumsky::Span for Span {
type Context = SrcId;
type Context = ();
type Offset = usize;
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
fn new(_context: Self::Context, range: Range<Self::Offset>) -> Self {
assert!(range.start <= range.end);
Self {
src: context,
start: range.start,
end: range.end,
}
}
fn context(&self) -> Self::Context {
self.src
}
fn context(&self) -> Self::Context {}
fn start(&self) -> Self::Offset {
self.start

View File

@@ -435,7 +435,6 @@ impl UntypedExpr {
let location = Span {
start: self.location().start,
end: next.location().end,
..self.location()
};
match (self.clone(), next.clone()) {
@@ -508,4 +507,35 @@ impl UntypedExpr {
} => expressions.last().map(Self::location).unwrap_or(*location),
}
}
pub fn start_byte_index(&self) -> usize {
match self {
Self::Sequence {
expressions,
location,
..
} => expressions
.first()
.map(|e| e.start_byte_index())
.unwrap_or(location.start),
Self::PipeLine { expressions, .. } => expressions.first().start_byte_index(),
Self::Try { location, .. } | Self::Assignment { location, .. } => location.start,
_ => self.location().start,
}
}
pub fn binop_precedence(&self) -> u8 {
match self {
Self::BinOp { name, .. } => name.precedence(),
Self::PipeLine { .. } => 5,
_ => std::u8::MAX,
}
}
pub fn is_simple_constant(&self) -> bool {
matches!(
self,
Self::String { .. } | Self::Int { .. } | Self::ByteArray { .. }
)
}
}

1537
crates/lang/src/format.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,13 +5,11 @@ use std::sync::{
pub mod ast;
pub mod builtins;
pub mod error;
pub mod expr;
pub mod lexer;
pub mod format;
pub mod parser;
pub mod pretty;
pub mod tipo;
pub mod token;
#[derive(Debug, Default, Clone)]
pub struct IdGenerator {

View File

@@ -1,45 +1,83 @@
use chumsky::prelude::*;
use vec1::Vec1;
pub mod error;
pub mod extra;
pub mod lexer;
pub mod token;
use crate::{
ast::{self, BinOp, Span, SrcId, TodoKind, CAPTURE_VARIABLE},
error::ParseError,
expr, lexer,
token::Token,
ast::{self, BinOp, Span, TodoKind, CAPTURE_VARIABLE},
expr,
};
pub fn script(src: &str) -> Result<ast::UntypedModule, Vec<ParseError>> {
use error::ParseError;
use extra::ModuleExtra;
use token::Token;
enum DefinitionOrExtra {
Definition(Box<ast::UntypedDefinition>),
ModuleComment(Span),
DocComment(Span),
Comment(Span),
EmptyLine(usize),
}
pub fn module(
src: &str,
kind: ast::ModuleKind,
) -> Result<(ast::UntypedModule, ModuleExtra), Vec<ParseError>> {
let len = src.chars().count();
let span = |i| Span::new(SrcId::empty(), i..i + 1);
let span = |i| Span::new((), i..i + 1);
let tokens = lexer::lexer().parse(chumsky::Stream::from_iter(
span(len),
src.chars().enumerate().map(|(i, c)| (c, span(i))),
))?;
module_parser(ast::ModuleKind::Script)
.parse(chumsky::Stream::from_iter(span(len), tokens.into_iter()))
}
let module_data =
module_parser().parse(chumsky::Stream::from_iter(span(len), tokens.into_iter()))?;
pub fn module_parser(
kind: ast::ModuleKind,
) -> impl Parser<Token, ast::UntypedModule, Error = ParseError> {
choice((
import_parser(),
data_parser(),
type_alias_parser(),
fn_parser(),
))
.repeated()
.then_ignore(end())
.map(move |definitions| ast::UntypedModule {
let mut definitions = Vec::new();
let mut extra = ModuleExtra::new();
for data in module_data {
match data {
DefinitionOrExtra::Definition(def) => definitions.push(*def),
DefinitionOrExtra::ModuleComment(c) => extra.module_comments.push(c),
DefinitionOrExtra::DocComment(c) => extra.doc_comments.push(c),
DefinitionOrExtra::Comment(c) => extra.comments.push(c),
DefinitionOrExtra::EmptyLine(e) => extra.empty_lines.push(e),
}
}
let module = ast::UntypedModule {
kind,
definitions,
docs: vec![],
name: "".to_string(),
type_info: (),
})
};
Ok((module, extra))
}
fn module_parser() -> impl Parser<Token, Vec<DefinitionOrExtra>, Error = ParseError> {
choice((
import_parser()
.map(Box::new)
.map(DefinitionOrExtra::Definition),
data_parser()
.map(Box::new)
.map(DefinitionOrExtra::Definition),
type_alias_parser()
.map(Box::new)
.map(DefinitionOrExtra::Definition),
fn_parser().map(Box::new).map(DefinitionOrExtra::Definition),
))
.repeated()
.then_ignore(end())
}
pub fn import_parser() -> impl Parser<Token, ast::UntypedDefinition, Error = ParseError> {
@@ -188,7 +226,8 @@ pub fn fn_parser() -> impl Parser<Token, ast::UntypedDefinition, Error = ParseEr
.then(
fn_param_parser()
.separated_by(just(Token::Comma))
.delimited_by(just(Token::LeftParen), just(Token::RightParen)),
.delimited_by(just(Token::LeftParen), just(Token::RightParen))
.map_with_span(|arguments, span| (arguments, span)),
)
.then(just(Token::RArrow).ignore_then(type_parser()).or_not())
.then(
@@ -197,7 +236,7 @@ pub fn fn_parser() -> impl Parser<Token, ast::UntypedDefinition, Error = ParseEr
.delimited_by(just(Token::LeftBrace), just(Token::RightBrace)),
)
.map_with_span(
|((((opt_pub, name), arguments), return_annotation), body), span| {
|((((opt_pub, name), (arguments, args_span)), return_annotation), body), span| {
ast::UntypedDefinition::Fn {
arguments,
body: body.unwrap_or(expr::UntypedExpr::Todo {
@@ -206,7 +245,14 @@ pub fn fn_parser() -> impl Parser<Token, ast::UntypedDefinition, Error = ParseEr
label: None,
}),
doc: None,
location: span,
location: Span {
start: span.start,
end: return_annotation
.as_ref()
.map(|l| l.location().end)
.unwrap_or_else(|| args_span.end),
},
end_position: span.end - 1,
name,
public: opt_pub.is_some(),
return_annotation,

View File

@@ -2,7 +2,7 @@ use std::{collections::HashSet, fmt};
use miette::Diagnostic;
use crate::{ast::Span, token::Token};
use crate::{ast::Span, parser::token::Token};
#[derive(Debug, Diagnostic, thiserror::Error)]
#[error("{}", .kind)]

View File

@@ -0,0 +1,35 @@
use crate::ast::Span;
#[derive(Debug, PartialEq, Eq, Default)]
pub struct ModuleExtra {
pub module_comments: Vec<Span>,
pub doc_comments: Vec<Span>,
pub comments: Vec<Span>,
pub empty_lines: Vec<usize>,
}
impl ModuleExtra {
pub fn new() -> Self {
Default::default()
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct Comment<'a> {
pub start: usize,
pub content: &'a str,
}
impl<'a> From<(&Span, &'a str)> for Comment<'a> {
fn from(src: (&Span, &'a str)) -> Comment<'a> {
let start = src.0.start;
let end = src.0.end as usize;
Comment {
start,
content: src
.1
.get(start as usize..end)
.expect("From span to comment"),
}
}
}

View File

@@ -1,6 +1,8 @@
use chumsky::prelude::*;
use crate::{ast::Span, error::ParseError, token::Token};
use crate::ast::Span;
use super::{error::ParseError, token::Token};
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = ParseError> {
let int = text::int(10).map(|value| Token::Int { value });

View File

@@ -9,13 +9,10 @@
//!
//! ## Extensions
//!
//! - `ForceBreak` from Prettier.
//! - `ForcedBreak` from Elixir.
//! - `FlexBreak` from Elixir.
#![allow(clippy::wrong_self_convention)]
// #[cfg(test)]
// mod tests;
use std::collections::VecDeque;
use itertools::Itertools;
@@ -31,9 +28,6 @@ macro_rules! docvec {
};
}
#[derive(Debug)]
pub enum Error {}
/// Coerce a value into a Document.
/// Note we do not implement this for String as a slight pressure to favour str
/// over String.
@@ -136,7 +130,7 @@ pub enum Document<'a> {
Line(usize),
/// Forces contained groups to break
ForceBreak,
ForceBroken(Box<Self>),
/// May break contained document based on best fit, thus flex break
FlexBreak(Box<Self>),
@@ -154,9 +148,6 @@ pub enum Document<'a> {
/// Nests the given document by the given indent
Nest(isize, Box<Self>),
/// Nests the given document to the current cursor position
NestCurrent(Box<Self>),
/// Nests the given document to the current cursor position
Group(Box<Self>),
@@ -167,10 +158,24 @@ pub enum Document<'a> {
Str(&'a str),
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Mode {
Broken,
Unbroken,
//
// These are used for the Fits variant, taken from Elixir's
// Inspect.Algebra's `fits` extension.
//
/// Broken and forced to remain broken
ForcedBroken,
// ForcedUnbroken, // Used for next_break_fits. Not yet implemented.
}
impl Mode {
fn is_forced(&self) -> bool {
matches!(self, Mode::ForcedBroken)
}
}
fn fits(
@@ -189,14 +194,15 @@ fn fits(
};
match document {
Document::Line(_) => return true,
Document::ForceBroken(_) => {
return false;
}
Document::ForceBreak => return false,
Document::Line(_) => return true,
Document::Nest(i, doc) => docs.push_front((i + indent, mode, doc)),
// TODO: Remove
Document::NestCurrent(doc) => docs.push_front((indent, mode, doc)),
Document::Group(doc) if mode.is_forced() => docs.push_front((indent, mode, doc)),
Document::Group(doc) => docs.push_front((indent, Mode::Unbroken, doc)),
@@ -204,7 +210,7 @@ fn fits(
Document::String(s) => limit -= s.len() as isize,
Document::Break { unbroken, .. } => match mode {
Mode::Broken => return true,
Mode::Broken | Mode::ForcedBroken => return true,
Mode::Unbroken => current_width += unbroken.len() as isize,
},
@@ -212,7 +218,7 @@ fn fits(
Document::Vec(vec) => {
for doc in vec.iter().rev() {
docs.push_front((indent, mode.clone(), doc));
docs.push_front((indent, mode, doc));
}
}
}
@@ -230,11 +236,9 @@ fn format(
limit: isize,
mut width: isize,
mut docs: VecDeque<(isize, Mode, &Document<'_>)>,
) -> Result<(), Error> {
) {
while let Some((indent, mode, document)) = docs.pop_front() {
match document {
Document::ForceBreak => (),
Document::Line(i) => {
for _ in 0..*i {
writer.push('\n');
@@ -267,6 +271,7 @@ fn format(
for _ in 0..indent {
writer.push(' ');
}
width = indent;
}
}
@@ -283,7 +288,8 @@ fn format(
width + unbroken.len() as isize
}
Mode::Broken => {
Mode::Broken | Mode::ForcedBroken => {
writer.push_str(broken);
writer.push('\n');
@@ -311,7 +317,7 @@ fn format(
Document::Vec(vec) => {
for doc in vec.iter().rev() {
docs.push_front((indent, mode.clone(), doc));
docs.push_front((indent, mode, doc));
}
}
@@ -319,15 +325,10 @@ fn format(
docs.push_front((indent + i, mode, doc));
}
Document::NestCurrent(doc) => {
docs.push_front((width, mode, doc));
}
Document::Group(doc) | Document::FlexBreak(doc) => {
// TODO: don't clone the doc
let mut group_docs = VecDeque::new();
group_docs.push_back((indent, Mode::Unbroken, doc.as_ref()));
group_docs.push_front((indent, Mode::Unbroken, doc.as_ref()));
if fits(limit, width, group_docs) {
docs.push_front((indent, Mode::Unbroken, doc));
@@ -335,9 +336,12 @@ fn format(
docs.push_front((indent, Mode::Broken, doc));
}
}
Document::ForceBroken(document) => {
docs.push_front((indent, Mode::ForcedBroken, document));
}
}
}
Ok(())
}
pub fn nil<'a>() -> Document<'a> {
@@ -352,10 +356,6 @@ pub fn lines<'a>(i: usize) -> Document<'a> {
Document::Line(i)
}
pub fn force_break<'a>() -> Document<'a> {
Document::ForceBreak
}
pub fn break_<'a>(broken: &'a str, unbroken: &'a str) -> Document<'a> {
Document::Break {
broken,
@@ -381,8 +381,8 @@ impl<'a> Document<'a> {
Self::Nest(indent, Box::new(self))
}
pub fn nest_current(self) -> Self {
Self::NestCurrent(Box::new(self))
pub fn force_break(self) -> Self {
Self::ForceBroken(Box::new(self))
}
pub fn append(self, second: impl Documentable<'a>) -> Self {
@@ -398,8 +398,7 @@ impl<'a> Document<'a> {
pub fn to_pretty_string(self, limit: isize) -> String {
let mut buffer = String::new();
self.pretty_print(limit, &mut buffer)
.expect("Writing to string buffer failed");
self.pretty_print(limit, &mut buffer);
buffer
}
@@ -408,14 +407,12 @@ impl<'a> Document<'a> {
open.to_doc().append(self).append(closed)
}
pub fn pretty_print(&self, limit: isize, writer: &mut String) -> Result<(), Error> {
pub fn pretty_print(&self, limit: isize, writer: &mut String) {
let mut docs = VecDeque::new();
docs.push_back((0, Mode::Unbroken, self));
docs.push_front((0, Mode::Unbroken, self));
format(writer, limit, 0, docs)?;
Ok(())
format(writer, limit, 0, docs);
}
/// Returns true when the document contains no printable characters
@@ -424,12 +421,11 @@ impl<'a> Document<'a> {
use Document::*;
match self {
Line(n) => *n == 0,
ForceBreak => true,
String(s) => s.is_empty(),
Str(s) => s.is_empty(),
// assuming `broken` and `unbroken` are equivalent
Break { broken, .. } => broken.is_empty(),
FlexBreak(d) | Nest(_, d) | NestCurrent(d) | Group(d) => d.is_empty(),
ForceBroken(d) | FlexBreak(d) | Nest(_, d) | Group(d) => d.is_empty(),
Vec(docs) => docs.iter().all(|d| d.is_empty()),
}
}

View File

@@ -1,17 +1,13 @@
use chumsky::prelude::*;
use crate::{
ast::{Span, SrcId},
lexer,
token::Token,
};
use crate::{ast::Span, parser::lexer, parser::token::Token};
#[test]
fn tokens() {
let code = "pub type |> >=\n{ Thing _na_thing name";
let len = code.chars().count();
let span = |i| Span::new(SrcId::empty(), i..i + 1);
let span = |i| Span::new((), i..i + 1);
assert_eq!(
lexer::lexer()

File diff suppressed because it is too large Load Diff

View File

@@ -17,7 +17,7 @@ mod hydrator;
mod infer;
mod pattern;
mod pipe;
mod pretty;
pub mod pretty;
#[derive(Debug, Clone, PartialEq)]
pub enum Type {

View File

@@ -195,6 +195,7 @@ impl<'a> Environment<'a> {
body,
return_annotation,
return_type,
end_position,
} => {
// Lookup the inferred function information
let function = self
@@ -238,6 +239,7 @@ impl<'a> Environment<'a> {
return_annotation,
return_type,
body,
end_position,
}
}

View File

@@ -5,10 +5,10 @@ use vec1::Vec1;
use crate::{
ast::{
Annotation, Arg, ArgName, AssignmentKind, BinOp, CallArg, Clause, ClauseGuard, Constant,
RecordUpdateSpread, Span, SrcId, TodoKind, TypedArg, TypedCallArg, TypedClause,
TypedClauseGuard, TypedConstant, TypedIfBranch, TypedMultiPattern, TypedRecordUpdateArg,
UntypedArg, UntypedClause, UntypedClauseGuard, UntypedConstant, UntypedIfBranch,
UntypedMultiPattern, UntypedPattern, UntypedRecordUpdateArg,
RecordUpdateSpread, Span, TodoKind, TypedArg, TypedCallArg, TypedClause, TypedClauseGuard,
TypedConstant, TypedIfBranch, TypedMultiPattern, TypedRecordUpdateArg, UntypedArg,
UntypedClause, UntypedClauseGuard, UntypedConstant, UntypedIfBranch, UntypedMultiPattern,
UntypedPattern, UntypedRecordUpdateArg,
},
builtins::{bool, byte_array, function, int, list, result, string},
expr::{TypedExpr, UntypedExpr},
@@ -605,7 +605,6 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
.ok_or_else(|| Error::UnknownModuleValue {
name: label.clone(),
location: Span {
src: SrcId::empty(),
start: module_location.end,
end: select_location.end,
},

View File

@@ -6,7 +6,7 @@ use crate::{
TypedModule, UntypedDefinition, UntypedModule,
},
builtins::function,
token::Token,
parser::token::Token,
IdGenerator,
};
@@ -150,6 +150,7 @@ fn infer_definition(
arguments: args,
body,
return_annotation,
end_position,
..
} => {
let preregistered_fn = environment
@@ -227,6 +228,7 @@ fn infer_definition(
.return_type()
.expect("Could not find return type for fn"),
body,
end_position,
})
}

View File

@@ -15,7 +15,7 @@ use super::{
PatternConstructor, Type, ValueConstructor, ValueConstructorVariant,
};
use crate::{
ast::{CallArg, Pattern, Span, SrcId, TypedPattern, UntypedMultiPattern, UntypedPattern},
ast::{CallArg, Pattern, Span, TypedPattern, UntypedMultiPattern, UntypedPattern},
builtins::{int, list, string},
};
@@ -427,7 +427,6 @@ impl<'a, 'b> PatternTyper<'a, 'b> {
if pattern_args.len() == field_map.arity as usize {
return Err(Error::UnnecessarySpreadOperator {
location: Span {
src: SrcId::empty(),
start: location.end - 3,
end: location.end - 1,
},
@@ -437,7 +436,6 @@ impl<'a, 'b> PatternTyper<'a, 'b> {
// The location of the spread operator itself
let spread_location = Span {
src: SrcId::empty(),
start: location.end - 3,
end: location.end - 1,
};

View File

@@ -3,7 +3,7 @@ use std::sync::Arc;
use vec1::Vec1;
use crate::{
ast::{AssignmentKind, CallArg, Pattern, Span, SrcId, PIPE_VARIABLE},
ast::{AssignmentKind, CallArg, Pattern, Span, PIPE_VARIABLE},
builtins::function,
expr::{TypedExpr, UntypedExpr},
};
@@ -48,7 +48,6 @@ impl<'a, 'b, 'c> PipeTyper<'a, 'b, 'c> {
argument_type: first.tipo(),
argument_location: first.location(),
location: Span {
src: SrcId::empty(),
start: first.location().start,
end: *end,
},