Support some single-character escape sequences in UPLC
Fixes #472. This also partially addresses #195. However, as pointed out in one of the comment, there's no 'official rule' when it comes to what should be considered valid escape sequences. Haskell relies mostly on the AttoParsec library and Rust also has its own set of rules. This is in particular true for unicode escape sequences, but there is a common middleground for some usual single character escapes such as \n or \\. So we now at least support these. For more complicated escape sequence, please refer to #195 for now and keep the discussion going there.
This commit is contained in:
parent
9a8e17020b
commit
1d3315005c
|
@ -2720,6 +2720,7 @@ dependencies = [
|
|||
"flat-rs",
|
||||
"hex",
|
||||
"indexmap",
|
||||
"indoc",
|
||||
"itertools",
|
||||
"k256",
|
||||
"miette",
|
||||
|
|
|
@ -41,6 +41,7 @@ thiserror = "1.0.39"
|
|||
|
||||
[dev-dependencies]
|
||||
hex = "0.4.3"
|
||||
indoc = "2.0.1"
|
||||
proptest = "1.1.0"
|
||||
|
||||
[features]
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
use std::{collections::VecDeque, fmt::Debug, rc::Rc};
|
||||
|
||||
use crate::{
|
||||
ast::{
|
||||
Constant, DeBruijn, FakeNamedDeBruijn, Name, NamedDeBruijn, Program, Term, Type, Unique,
|
||||
},
|
||||
builtins::DefaultFunction,
|
||||
};
|
||||
use anyhow::anyhow;
|
||||
use flat_rs::{
|
||||
de::{self, Decode, Decoder},
|
||||
|
@ -7,13 +11,7 @@ use flat_rs::{
|
|||
Flat,
|
||||
};
|
||||
use pallas_primitives::{babbage::PlutusData, Fragment};
|
||||
|
||||
use crate::{
|
||||
ast::{
|
||||
Constant, DeBruijn, FakeNamedDeBruijn, Name, NamedDeBruijn, Program, Term, Type, Unique,
|
||||
},
|
||||
builtins::DefaultFunction,
|
||||
};
|
||||
use std::{collections::VecDeque, fmt::Debug, rc::Rc};
|
||||
|
||||
const BUILTIN_TAG_WIDTH: u32 = 7;
|
||||
const CONST_TAG_WIDTH: u32 = 4;
|
||||
|
@ -810,11 +808,13 @@ pub fn decode_constant_tag(d: &mut Decoder) -> Result<u8, de::Error> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use flat_rs::Flat;
|
||||
|
||||
use crate::ast::{Name, Type};
|
||||
|
||||
use super::{Constant, Program, Term};
|
||||
use crate::{
|
||||
ast::{DeBruijn, Name, Type},
|
||||
parser,
|
||||
};
|
||||
use flat_rs::Flat;
|
||||
use indoc::indoc;
|
||||
|
||||
#[test]
|
||||
fn flat_encode_integer() {
|
||||
|
@ -961,4 +961,34 @@ mod test {
|
|||
|
||||
assert_eq!(actual_program, expected_program)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unflat_string_escape() {
|
||||
let cbor = "490000004901015c0001";
|
||||
|
||||
let program =
|
||||
Program::<DeBruijn>::from_hex(cbor, &mut Vec::new(), &mut Vec::new()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
program.to_pretty().as_str(),
|
||||
indoc! { r#"
|
||||
(program
|
||||
0.0.0
|
||||
(con string "\\")
|
||||
)"#}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uplc_parser_string_escape() {
|
||||
let source = indoc! { r#"
|
||||
(program
|
||||
0.0.0
|
||||
(con string "\n\t\\\"\'\r")
|
||||
)"#};
|
||||
|
||||
let program = parser::program(source).unwrap();
|
||||
|
||||
assert_eq!(program.to_pretty(), source);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -168,7 +168,17 @@ peg::parser! {
|
|||
= "#" i:ident()* { hex::decode(String::from_iter(i)).unwrap() }
|
||||
|
||||
rule string() -> String
|
||||
= "\"" s:[^ '"']* "\"" { String::from_iter(s) }
|
||||
= "\"" s:character()* "\"" { String::from_iter(s) }
|
||||
|
||||
rule character() -> char
|
||||
= "\\n" { '\n' } // newline (line feed)
|
||||
/ "\\r" { '\r' } // carriage return
|
||||
/ "\\t" { '\t' } // horizontal tab
|
||||
/ "\\\"" { '\"' } // double quote
|
||||
/ "\\'" { '\'' } // single quote
|
||||
/ "\\\\" { '\\' } // backslash
|
||||
/ [ ^ '"' ]
|
||||
/ expected!("or any valid ascii character")
|
||||
|
||||
rule data() -> PlutusData
|
||||
= "#" i:ident()* {
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
use pretty::RcDoc;
|
||||
|
||||
use crate::{
|
||||
ast::{Constant, Program, Term, Type},
|
||||
flat::Binder,
|
||||
plutus_data_to_bytes,
|
||||
};
|
||||
use pretty::RcDoc;
|
||||
use std::ascii::escape_default;
|
||||
|
||||
impl<'a, T> Program<T>
|
||||
where
|
||||
|
@ -185,7 +185,15 @@ impl Constant {
|
|||
Constant::String(s) => RcDoc::text("string")
|
||||
.append(RcDoc::line())
|
||||
.append(RcDoc::text("\""))
|
||||
.append(RcDoc::text(s))
|
||||
.append(RcDoc::text(
|
||||
String::from_utf8(
|
||||
s.as_bytes()
|
||||
.iter()
|
||||
.flat_map(|c| escape_default(*c).collect::<Vec<u8>>())
|
||||
.collect(),
|
||||
)
|
||||
.unwrap(),
|
||||
))
|
||||
.append(RcDoc::text("\"")),
|
||||
Constant::Unit => RcDoc::text("unit")
|
||||
.append(RcDoc::line())
|
||||
|
|
Loading…
Reference in New Issue