Emit warning when detecting an hex string interpreted as UTF-8 bytes.

This will probably save people minutes/hours of puzzled debugging. This is only a warning because there may be cases where one do actually want to specify an hex-encoded bytearray. In which case, they can get rid of the warning by using the plain bytearray syntax (i.e. as an array of bytes).
This commit is contained in:
KtorZ 2023-02-18 11:36:45 +01:00
parent d72e13c7c8
commit 78770d14b7
No known key found for this signature in database
GPG Key ID: 33173CB6F77F4277
3 changed files with 74 additions and 15 deletions

View File

@ -305,3 +305,17 @@ fn trace_if_false_ko() {
Err((_, Error::CouldNotUnify { .. }))
))
}
#[test]
fn utf8_hex_literal_warning() {
let source_code = r#"
pub const policy_id = "f43a62fdc3965df486de8a0d32fe800963589c41b38946602a0dc535"
"#;
let (warnings, _) = check(parse(source_code)).unwrap();
assert!(matches!(
warnings[0],
Warning::Utf8ByteArrayIsValidHexString { .. }
))
}

View File

@ -1244,6 +1244,31 @@ pub enum Warning {
#[label("unused")]
location: Span,
},
#[error(
"I noticed a suspicious {type_ByteArray} UTF-8 literal which resembles a hash digest.",
type_ByteArray = "ByteArray".bold().bright_blue()
)]
#[diagnostic(help("{}", formatdoc! {
r#"When you specify a {type_ByteArray} literal using plain double-quotes, it's interpreted as an array of UTF-8 bytes. For example, the literal {literal_foo} is interpreted as the byte sequence {foo_bytes}.
However here, you have specified a literal that resembles a hash digest encoded as an hexadecimal string. This is a common case, but you probably want to capture the raw bytes represented by this sequence, and not the hexadecimal sequence. Fear not! Aiken provides a convenient syntax for that: just prefix the literal with {symbol_hash}. This will decode the hexadecimal string for you and capture the non-encoded bytes as a {type_ByteArray}.
{symbol_hash}{value}
"#,
type_ByteArray = "ByteArray".bold().bright_blue(),
literal_foo = "\"foo\"".purple(),
foo_bytes = "#[102, 111, 111]".purple(),
value = "\"{value}\"".purple(),
symbol_hash = "#".purple(),
}))]
#[diagnostic(code("syntax::bytearray_literal_is_hex_string"))]
#[diagnostic(url("https://aiken-lang.org/language-tour/primitive-types#bytearray"))]
Utf8ByteArrayIsValidHexString {
#[label("missing '#' to decode hex string")]
location: Span,
value: String,
},
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]

View File

@ -4,11 +4,11 @@ use vec1::Vec1;
use crate::{
ast::{
Annotation, Arg, ArgName, AssignmentKind, BinOp, CallArg, Clause, ClauseGuard, Constant,
IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing, TypedArg, TypedCallArg,
TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern, TypedRecordUpdateArg,
UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch, UntypedMultiPattern,
UntypedPattern, UntypedRecordUpdateArg,
Annotation, Arg, ArgName, AssignmentKind, BinOp, ByteArrayFormatPreference, CallArg,
Clause, ClauseGuard, Constant, IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing,
TypedArg, TypedCallArg, TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern,
TypedRecordUpdateArg, UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch,
UntypedMultiPattern, UntypedPattern, UntypedRecordUpdateArg,
},
builtins::{bool, byte_array, function, int, list, string, tuple},
expr::{TypedExpr, UntypedExpr},
@ -351,8 +351,10 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
} => self.infer_tuple_index(*tuple, index, location),
UntypedExpr::ByteArray {
location, bytes, ..
} => Ok(self.infer_byte_array(bytes, location)),
bytes,
preferred_format,
location,
} => self.infer_bytearray(bytes, preferred_format, location),
UntypedExpr::RecordUpdate {
location,
@ -373,12 +375,27 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
}
}
fn infer_byte_array(&mut self, bytes: Vec<u8>, location: Span) -> TypedExpr {
TypedExpr::ByteArray {
fn infer_bytearray(
&mut self,
bytes: Vec<u8>,
preferred_format: ByteArrayFormatPreference,
location: Span,
) -> Result<TypedExpr, Error> {
if let ByteArrayFormatPreference::Utf8String = preferred_format {
let value = String::from_utf8(bytes.clone()).unwrap();
let is_hex_string = hex::decode(&value).is_ok();
if bytes.len() >= 56 && is_hex_string {
self.environment
.warnings
.push(Warning::Utf8ByteArrayIsValidHexString { location, value });
}
}
Ok(TypedExpr::ByteArray {
location,
bytes,
tipo: byte_array(),
}
})
}
fn infer_trace_if_false(
@ -1357,11 +1374,14 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
location,
bytes,
preferred_format,
} => Ok(Constant::ByteArray {
} => {
let _ = self.infer_bytearray(bytes.clone(), preferred_format, location)?;
Ok(Constant::ByteArray {
location,
bytes,
preferred_format,
}),
})
}
}?;
// Check type annotation is accurate.