Emit warning when detecting an hex string interpreted as UTF-8 bytes.
This will probably save people minutes/hours of puzzled debugging. This is only a warning because there may be cases where one do actually want to specify an hex-encoded bytearray. In which case, they can get rid of the warning by using the plain bytearray syntax (i.e. as an array of bytes).
This commit is contained in:
parent
d72e13c7c8
commit
78770d14b7
|
@ -305,3 +305,17 @@ fn trace_if_false_ko() {
|
|||
Err((_, Error::CouldNotUnify { .. }))
|
||||
))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_hex_literal_warning() {
|
||||
let source_code = r#"
|
||||
pub const policy_id = "f43a62fdc3965df486de8a0d32fe800963589c41b38946602a0dc535"
|
||||
"#;
|
||||
|
||||
let (warnings, _) = check(parse(source_code)).unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
warnings[0],
|
||||
Warning::Utf8ByteArrayIsValidHexString { .. }
|
||||
))
|
||||
}
|
||||
|
|
|
@ -1244,6 +1244,31 @@ pub enum Warning {
|
|||
#[label("unused")]
|
||||
location: Span,
|
||||
},
|
||||
|
||||
#[error(
|
||||
"I noticed a suspicious {type_ByteArray} UTF-8 literal which resembles a hash digest.",
|
||||
type_ByteArray = "ByteArray".bold().bright_blue()
|
||||
)]
|
||||
#[diagnostic(help("{}", formatdoc! {
|
||||
r#"When you specify a {type_ByteArray} literal using plain double-quotes, it's interpreted as an array of UTF-8 bytes. For example, the literal {literal_foo} is interpreted as the byte sequence {foo_bytes}.
|
||||
|
||||
However here, you have specified a literal that resembles a hash digest encoded as an hexadecimal string. This is a common case, but you probably want to capture the raw bytes represented by this sequence, and not the hexadecimal sequence. Fear not! Aiken provides a convenient syntax for that: just prefix the literal with {symbol_hash}. This will decode the hexadecimal string for you and capture the non-encoded bytes as a {type_ByteArray}.
|
||||
|
||||
╰─▶ {symbol_hash}{value}
|
||||
"#,
|
||||
type_ByteArray = "ByteArray".bold().bright_blue(),
|
||||
literal_foo = "\"foo\"".purple(),
|
||||
foo_bytes = "#[102, 111, 111]".purple(),
|
||||
value = "\"{value}\"".purple(),
|
||||
symbol_hash = "#".purple(),
|
||||
}))]
|
||||
#[diagnostic(code("syntax::bytearray_literal_is_hex_string"))]
|
||||
#[diagnostic(url("https://aiken-lang.org/language-tour/primitive-types#bytearray"))]
|
||||
Utf8ByteArrayIsValidHexString {
|
||||
#[label("missing '#' to decode hex string")]
|
||||
location: Span,
|
||||
value: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
|
|
|
@ -4,11 +4,11 @@ use vec1::Vec1;
|
|||
|
||||
use crate::{
|
||||
ast::{
|
||||
Annotation, Arg, ArgName, AssignmentKind, BinOp, CallArg, Clause, ClauseGuard, Constant,
|
||||
IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing, TypedArg, TypedCallArg,
|
||||
TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern, TypedRecordUpdateArg,
|
||||
UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch, UntypedMultiPattern,
|
||||
UntypedPattern, UntypedRecordUpdateArg,
|
||||
Annotation, Arg, ArgName, AssignmentKind, BinOp, ByteArrayFormatPreference, CallArg,
|
||||
Clause, ClauseGuard, Constant, IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing,
|
||||
TypedArg, TypedCallArg, TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern,
|
||||
TypedRecordUpdateArg, UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch,
|
||||
UntypedMultiPattern, UntypedPattern, UntypedRecordUpdateArg,
|
||||
},
|
||||
builtins::{bool, byte_array, function, int, list, string, tuple},
|
||||
expr::{TypedExpr, UntypedExpr},
|
||||
|
@ -351,8 +351,10 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
|
|||
} => self.infer_tuple_index(*tuple, index, location),
|
||||
|
||||
UntypedExpr::ByteArray {
|
||||
location, bytes, ..
|
||||
} => Ok(self.infer_byte_array(bytes, location)),
|
||||
bytes,
|
||||
preferred_format,
|
||||
location,
|
||||
} => self.infer_bytearray(bytes, preferred_format, location),
|
||||
|
||||
UntypedExpr::RecordUpdate {
|
||||
location,
|
||||
|
@ -373,12 +375,27 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
|
|||
}
|
||||
}
|
||||
|
||||
fn infer_byte_array(&mut self, bytes: Vec<u8>, location: Span) -> TypedExpr {
|
||||
TypedExpr::ByteArray {
|
||||
fn infer_bytearray(
|
||||
&mut self,
|
||||
bytes: Vec<u8>,
|
||||
preferred_format: ByteArrayFormatPreference,
|
||||
location: Span,
|
||||
) -> Result<TypedExpr, Error> {
|
||||
if let ByteArrayFormatPreference::Utf8String = preferred_format {
|
||||
let value = String::from_utf8(bytes.clone()).unwrap();
|
||||
let is_hex_string = hex::decode(&value).is_ok();
|
||||
if bytes.len() >= 56 && is_hex_string {
|
||||
self.environment
|
||||
.warnings
|
||||
.push(Warning::Utf8ByteArrayIsValidHexString { location, value });
|
||||
}
|
||||
}
|
||||
|
||||
Ok(TypedExpr::ByteArray {
|
||||
location,
|
||||
bytes,
|
||||
tipo: byte_array(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn infer_trace_if_false(
|
||||
|
@ -1357,11 +1374,14 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
|
|||
location,
|
||||
bytes,
|
||||
preferred_format,
|
||||
} => Ok(Constant::ByteArray {
|
||||
} => {
|
||||
let _ = self.infer_bytearray(bytes.clone(), preferred_format, location)?;
|
||||
Ok(Constant::ByteArray {
|
||||
location,
|
||||
bytes,
|
||||
preferred_format,
|
||||
}),
|
||||
})
|
||||
}
|
||||
}?;
|
||||
|
||||
// Check type annotation is accurate.
|
||||
|
|
Loading…
Reference in New Issue