Support some single-character escape sequences in UPLC

Fixes #472.

  This also partially addresses #195. However, as pointed out in one of
  the comment, there's no 'official rule' when it comes to what should
  be considered valid escape sequences. Haskell relies mostly on the
  AttoParsec library and Rust also has its own set of rules.

  This is in particular true for unicode escape sequences, but there is
  a common middleground for some usual single character escapes such as
  \n or \\. So we now at least support these.

  For more complicated escape sequence, please refer to #195 for now and
  keep the discussion going there.
This commit is contained in:
KtorZ 2023-03-30 11:46:44 +02:00
parent 9a8e17020b
commit 1d3315005c
No known key found for this signature in database
GPG Key ID: 33173CB6F77F4277
5 changed files with 67 additions and 17 deletions

1
Cargo.lock generated vendored
View File

@ -2720,6 +2720,7 @@ dependencies = [
"flat-rs",
"hex",
"indexmap",
"indoc",
"itertools",
"k256",
"miette",

View File

@ -41,6 +41,7 @@ thiserror = "1.0.39"
[dev-dependencies]
hex = "0.4.3"
indoc = "2.0.1"
proptest = "1.1.0"
[features]

View File

@ -1,5 +1,9 @@
use std::{collections::VecDeque, fmt::Debug, rc::Rc};
use crate::{
ast::{
Constant, DeBruijn, FakeNamedDeBruijn, Name, NamedDeBruijn, Program, Term, Type, Unique,
},
builtins::DefaultFunction,
};
use anyhow::anyhow;
use flat_rs::{
de::{self, Decode, Decoder},
@ -7,13 +11,7 @@ use flat_rs::{
Flat,
};
use pallas_primitives::{babbage::PlutusData, Fragment};
use crate::{
ast::{
Constant, DeBruijn, FakeNamedDeBruijn, Name, NamedDeBruijn, Program, Term, Type, Unique,
},
builtins::DefaultFunction,
};
use std::{collections::VecDeque, fmt::Debug, rc::Rc};
const BUILTIN_TAG_WIDTH: u32 = 7;
const CONST_TAG_WIDTH: u32 = 4;
@ -810,11 +808,13 @@ pub fn decode_constant_tag(d: &mut Decoder) -> Result<u8, de::Error> {
#[cfg(test)]
mod test {
use flat_rs::Flat;
use crate::ast::{Name, Type};
use super::{Constant, Program, Term};
use crate::{
ast::{DeBruijn, Name, Type},
parser,
};
use flat_rs::Flat;
use indoc::indoc;
#[test]
fn flat_encode_integer() {
@ -961,4 +961,34 @@ mod test {
assert_eq!(actual_program, expected_program)
}
#[test]
fn unflat_string_escape() {
let cbor = "490000004901015c0001";
let program =
Program::<DeBruijn>::from_hex(cbor, &mut Vec::new(), &mut Vec::new()).unwrap();
assert_eq!(
program.to_pretty().as_str(),
indoc! { r#"
(program
0.0.0
(con string "\\")
)"#}
);
}
#[test]
fn uplc_parser_string_escape() {
let source = indoc! { r#"
(program
0.0.0
(con string "\n\t\\\"\'\r")
)"#};
let program = parser::program(source).unwrap();
assert_eq!(program.to_pretty(), source);
}
}

View File

@ -168,7 +168,17 @@ peg::parser! {
= "#" i:ident()* { hex::decode(String::from_iter(i)).unwrap() }
rule string() -> String
= "\"" s:[^ '"']* "\"" { String::from_iter(s) }
= "\"" s:character()* "\"" { String::from_iter(s) }
rule character() -> char
= "\\n" { '\n' } // newline (line feed)
/ "\\r" { '\r' } // carriage return
/ "\\t" { '\t' } // horizontal tab
/ "\\\"" { '\"' } // double quote
/ "\\'" { '\'' } // single quote
/ "\\\\" { '\\' } // backslash
/ [ ^ '"' ]
/ expected!("or any valid ascii character")
rule data() -> PlutusData
= "#" i:ident()* {

View File

@ -1,10 +1,10 @@
use pretty::RcDoc;
use crate::{
ast::{Constant, Program, Term, Type},
flat::Binder,
plutus_data_to_bytes,
};
use pretty::RcDoc;
use std::ascii::escape_default;
impl<'a, T> Program<T>
where
@ -185,7 +185,15 @@ impl Constant {
Constant::String(s) => RcDoc::text("string")
.append(RcDoc::line())
.append(RcDoc::text("\""))
.append(RcDoc::text(s))
.append(RcDoc::text(
String::from_utf8(
s.as_bytes()
.iter()
.flat_map(|c| escape_default(*c).collect::<Vec<u8>>())
.collect(),
)
.unwrap(),
))
.append(RcDoc::text("\"")),
Constant::Unit => RcDoc::text("unit")
.append(RcDoc::line())