From b9acf24dca58751db2726a3620a7ec7316c8c127 Mon Sep 17 00:00:00 2001 From: Hans Svensson Date: Fri, 21 Feb 2020 09:45:11 +0100 Subject: [PATCH 1/4] Make String.aes a stdlib + add more string functions This means moving the FATE operations to StringInternal and adding to/from_list (and Char.to/from_int + Char.to_upper/lower). --- priv/stdlib/String.aes | 86 +++++++++++++++++++++++++++ src/aeso_ast_infer_types.erl | 24 ++++++-- src/aeso_ast_to_fcode.erl | 8 ++- src/aeso_fcode_to_fate.erl | 22 +++++-- test/aeso_calldata_tests.erl | 2 +- test/aeso_compiler_tests.erl | 6 +- test/contracts/bytes_to_x.aes | 2 +- test/contracts/funargs.aes | 2 +- test/contracts/more_strings.aes | 14 +++++ test/contracts/state_handling.aes | 1 + test/contracts/strings.aes | 1 + test/contracts/unapplied_builtins.aes | 1 + 12 files changed, 151 insertions(+), 18 deletions(-) create mode 100644 priv/stdlib/String.aes create mode 100644 test/contracts/more_strings.aes diff --git a/priv/stdlib/String.aes b/priv/stdlib/String.aes new file mode 100644 index 0000000..db5242f --- /dev/null +++ b/priv/stdlib/String.aes @@ -0,0 +1,86 @@ +include "List.aes" +namespace String = + function sha3(s : string) : hash = StringInternal.sha3(s) + function sha256(s : string) : hash = StringInternal.sha256(s) + function blake2b(s : string) : hash = StringInternal.blake2b(s) + + function length(s : string) : int = StringInternal.length(s) + function concat(s1 : string, s2 : string) : string = StringInternal.concat(s1, s2) + + function from_list(cs : list(char)) : string = StringInternal.from_list(cs) + function to_list(s : string) : list(char) = StringInternal.to_list(s) + + function split(i : int, s : string) : string * string = + let cs = StringInternal.to_list(s) + (StringInternal.from_list(List.take(i, cs)), StringInternal.from_list(List.drop(i, cs))) + + function at(ix : int, s : string) = + switch(List.drop(ix - 1, StringInternal.to_list(s))) + [] => None + x :: _ => Some(x) + + function tokens(s : string, pat : string) = + let pat_len = StringInternal.length(pat) + tokens_(StringInternal.to_list(pat), StringInternal.to_list(s), []) + + function to_upper(s : string) = + StringInternal.from_list([ Char.to_upper(c) | c <- StringInternal.to_list(s) ]) + + function to_lower(s : string) = + StringInternal.from_list([ Char.to_lower(c) | c <- StringInternal.to_list(s) ]) + + function contains(str : string, substr : string) : option(int) = + let last_ix = StringInternal.length(str) - (StringInternal.length(substr) - 1) + contains_(1, last_ix, StringInternal.to_list(str), StringInternal.to_list(substr)) + + function + to_int : (string, int) => option(int) + to_int(s, 10) = to_int_(List.reverse(StringInternal.to_list(s)), ch_to_int_10, 0, 1, 10) + to_int(s, 16) = to_int_(List.reverse(StringInternal.to_list(s)), ch_to_int_16, 0, 1, 16) + + function + tokens_(_, [], acc) = [StringInternal.from_list(List.reverse(acc))] + tokens_(pat, str, acc) = + switch(is_prefix(pat, str)) + Some(str') => + StringInternal.from_list(List.reverse(acc)) :: tokens_(pat, str', []) + None => + let c :: cs = str + tokens_(pat, cs, c :: acc) + + function contains_(ix, lix, str, substr) = + if(ix > lix) None + else + switch(is_prefix(substr, str)) + None => + let _ :: str = str + contains_(ix + 1, lix, str, substr) + Some(_) => + Some(ix) + + function + is_prefix([], ys) = Some(ys) + is_prefix(_, []) = None + is_prefix(x :: xs, y :: ys) = + if(x == y) is_prefix(xs, ys) + else None + + function + to_int_([], _, x, _, _) = Some(x) + to_int_(i :: is, c, x, t, f) = + switch(c(i)) + None => None + Some(i) => to_int_(is, c, x + t * i, t * f, f) + + function ch_to_int_10(c) = + let c = Char.to_int(c) + if(c >= 48 && c =< 57) Some(c - 48) + else None + + function ch_to_int_16(c) = + let c = Char.to_int(c) + if(c >= 48 && c =< 57) Some(c - 48) + elif(c >= 65 && c =< 70) Some(c - 55) + elif(c >= 97 && c =< 102) Some(c - 87) + else None + diff --git a/src/aeso_ast_infer_types.erl b/src/aeso_ast_infer_types.erl index c050226..16962d5 100644 --- a/src/aeso_ast_infer_types.erl +++ b/src/aeso_ast_infer_types.erl @@ -364,6 +364,7 @@ is_private(Ann) -> proplists:get_value(private, Ann, false). global_env() -> Ann = [{origin, system}], Int = {id, Ann, "int"}, + Char = {id, Ann, "char"}, Bool = {id, Ann, "bool"}, String = {id, Ann, "string"}, Address = {id, Ann, "address"}, @@ -586,11 +587,21 @@ global_env() -> %% Strings StringScope = #scope { funs = MkDefs( - [{"length", Fun1(String, Int)}, - {"concat", Fun([String, String], String)}, - {"sha3", Fun1(String, Hash)}, - {"sha256", Fun1(String, Hash)}, - {"blake2b", Fun1(String, Hash)}]) }, + [{"length", Fun1(String, Int)}, + {"concat", Fun([String, String], String)}, + {"to_list", Fun1(String, List(Char))}, + {"from_list", Fun1(List(Char), String)}, + {"sha3", Fun1(String, Hash)}, + {"sha256", Fun1(String, Hash)}, + {"blake2b", Fun1(String, Hash)}]) }, + + %% Chars + CharScope = #scope + { funs = MkDefs( + [{"to_int", Fun1(Char, Int)}, + {"from_int", Fun1(Int, Option(Char))}, + {"to_upper", Fun1(Char, Char)}, + {"to_lower", Fun1(Char, Char)}]) }, %% Bits BitsScope = #scope @@ -634,7 +645,8 @@ global_env() -> , ["Auth"] => AuthScope , ["Crypto"] => CryptoScope , ["MCL_BLS12_381"] => MCL_BLS12_381_Scope - , ["String"] => StringScope + , ["StringInternal"] => StringScope + , ["Char"] => CharScope , ["Bits"] => BitsScope , ["Bytes"] => BytesScope , ["Int"] => IntScope diff --git a/src/aeso_ast_to_fcode.erl b/src/aeso_ast_to_fcode.erl index 0abfb49..df6b957 100644 --- a/src/aeso_ast_to_fcode.erl +++ b/src/aeso_ast_to_fcode.erl @@ -246,8 +246,10 @@ builtins() -> {"gt_inv", 1}, {"gt_add", 2}, {"gt_mul", 2}, {"gt_pow", 2}, {"gt_is_one", 1}, {"pairing", 2}, {"miller_loop", 2}, {"final_exp", 1}, {"int_to_fr", 1}, {"int_to_fp", 1}, {"fr_to_int", 1}, {"fp_to_int", 1}]}, + {["StringInternal"], [{"length", 1}, {"concat", 2}, {"to_list", 1}, {"from_list", 1}, + {"sha3", 1}, {"sha256", 1}, {"blake2b", 1}]}, + {["Char"], [{"to_int", 1}, {"from_int", 1}, {"to_lower", 1}, {"to_upper", 1}]}, {["Auth"], [{"tx_hash", none}, {"tx", none}]}, - {["String"], [{"length", 1}, {"concat", 2}, {"sha3", 1}, {"sha256", 1}, {"blake2b", 1}]}, {["Bits"], [{"set", 2}, {"clear", 2}, {"test", 2}, {"sum", 1}, {"intersection", 2}, {"union", 2}, {"difference", 2}, {"none", none}, {"all", none}]}, {["Bytes"], [{"to_int", 1}, {"to_str", 1}, {"concat", 2}, {"split", 1}]}, @@ -1040,7 +1042,9 @@ stmts_to_fcode(Env, [Expr | Stmts]) -> op_builtins() -> [map_from_list, map_to_list, map_delete, map_member, map_size, - string_length, string_concat, string_sha3, string_sha256, string_blake2b, + stringinternal_length, stringinternal_concat, stringinternal_to_list, stringinternal_from_list, + stringinternal_sha3, stringinternal_sha256, stringinternal_blake2b, + char_to_int, char_from_int, char_to_lower, char_to_upper, bits_set, bits_clear, bits_test, bits_sum, bits_intersection, bits_union, bits_difference, int_to_str, address_to_str, crypto_verify_sig, address_to_contract, diff --git a/src/aeso_fcode_to_fate.erl b/src/aeso_fcode_to_fate.erl index a5a5139..8206c3f 100644 --- a/src/aeso_fcode_to_fate.erl +++ b/src/aeso_fcode_to_fate.erl @@ -573,8 +573,14 @@ op_to_scode(map_to_list) -> aeb_fate_ops:map_to_list(?a, ?a); op_to_scode(map_delete) -> aeb_fate_ops:map_delete(?a, ?a, ?a); op_to_scode(map_member) -> aeb_fate_ops:map_member(?a, ?a, ?a); op_to_scode(map_size) -> aeb_fate_ops:map_size_(?a, ?a); -op_to_scode(string_length) -> aeb_fate_ops:str_length(?a, ?a); -op_to_scode(string_concat) -> aeb_fate_ops:str_join(?a, ?a, ?a); +op_to_scode(stringinternal_length) -> aeb_fate_ops:str_length(?a, ?a); +op_to_scode(stringinternal_concat) -> aeb_fate_ops:str_join(?a, ?a, ?a); +op_to_scode(stringinternal_to_list) -> aeb_fate_ops:str_to_list(?a, ?a); +op_to_scode(stringinternal_from_list) -> aeb_fate_ops:str_from_list(?a, ?a); +op_to_scode(char_to_int) -> aeb_fate_ops:char_to_int(?a, ?a); +op_to_scode(char_from_int) -> aeb_fate_ops:char_from_int(?a, ?a); +op_to_scode(char_to_lower) -> aeb_fate_ops:char_to_lower(?a, ?a); +op_to_scode(char_to_upper) -> aeb_fate_ops:char_to_upper(?a, ?a); op_to_scode(bits_set) -> aeb_fate_ops:bits_set(?a, ?a, ?a); op_to_scode(bits_clear) -> aeb_fate_ops:bits_clear(?a, ?a, ?a); op_to_scode(bits_test) -> aeb_fate_ops:bits_test(?a, ?a, ?a); @@ -593,9 +599,9 @@ op_to_scode(crypto_ecrecover_secp256k1) -> aeb_fate_ops:ecrecover_secp256k1(?a, op_to_scode(crypto_sha3) -> aeb_fate_ops:sha3(?a, ?a); op_to_scode(crypto_sha256) -> aeb_fate_ops:sha256(?a, ?a); op_to_scode(crypto_blake2b) -> aeb_fate_ops:blake2b(?a, ?a); -op_to_scode(string_sha3) -> aeb_fate_ops:sha3(?a, ?a); -op_to_scode(string_sha256) -> aeb_fate_ops:sha256(?a, ?a); -op_to_scode(string_blake2b) -> aeb_fate_ops:blake2b(?a, ?a); +op_to_scode(stringinternal_sha3) -> aeb_fate_ops:sha3(?a, ?a); +op_to_scode(stringinternal_sha256) -> aeb_fate_ops:sha256(?a, ?a); +op_to_scode(stringinternal_blake2b) -> aeb_fate_ops:blake2b(?a, ?a); op_to_scode(mcl_bls12_381_g1_neg) -> aeb_fate_ops:bls12_381_g1_neg(?a, ?a); op_to_scode(mcl_bls12_381_g1_norm) -> aeb_fate_ops:bls12_381_g1_norm(?a, ?a); op_to_scode(mcl_bls12_381_g1_valid) -> aeb_fate_ops:bls12_381_g1_valid(?a, ?a); @@ -919,6 +925,12 @@ attributes(I) -> {'BLS12_381_INT_TO_FP', A, B} -> Pure(A, [B]); {'BLS12_381_FR_TO_INT', A, B} -> Pure(A, [B]); {'BLS12_381_FP_TO_INT', A, B} -> Pure(A, [B]); + {'STRING_TO_LIST', A, B} -> Pure(A, [B]); + {'STRING_FROM_LIST', A, B} -> Pure(A, [B]); + {'CHAR_TO_INT', A, B} -> Pure(A, [B]); + {'CHAR_FROM_INT', A, B} -> Pure(A, [B]); + {'CHAR_TO_UPPER', A, B} -> Pure(A, [B]); + {'CHAR_TO_LOWER', A, B} -> Pure(A, [B]); {'ABORT', A} -> Impure(pc, A); {'EXIT', A} -> Impure(pc, A); 'NOP' -> Pure(none, []) diff --git a/test/aeso_calldata_tests.erl b/test/aeso_calldata_tests.erl index 46e0be9..ff14259 100644 --- a/test/aeso_calldata_tests.erl +++ b/test/aeso_calldata_tests.erl @@ -142,4 +142,4 @@ compilable_contracts() -> not_yet_compilable(fate) -> []; not_yet_compilable(aevm) -> - []. + ["funargs", "strings"]. diff --git a/test/aeso_compiler_tests.erl b/test/aeso_compiler_tests.erl index ee5991a..97ad893 100644 --- a/test/aeso_compiler_tests.erl +++ b/test/aeso_compiler_tests.erl @@ -168,11 +168,13 @@ compilable_contracts() -> "pairing_crypto", "qualified_constructor", "let_patterns", - "lhs_matching" + "lhs_matching", + "more_strings" ]. not_yet_compilable(fate) -> []; -not_yet_compilable(aevm) -> ["pairing_crypto", "aens_update", "basic_auth_tx"]. +not_yet_compilable(aevm) -> ["pairing_crypto", "aens_update", "basic_auth_tx", "more_strings", + "unapplied_builtins", "bytes_to_x", "state_handling"]. %% Contracts that should produce type errors diff --git a/test/contracts/bytes_to_x.aes b/test/contracts/bytes_to_x.aes index 6054301..6ab4852 100644 --- a/test/contracts/bytes_to_x.aes +++ b/test/contracts/bytes_to_x.aes @@ -1,4 +1,4 @@ - +include "String.aes" contract BytesToX = entrypoint to_int(b : bytes(42)) : int = Bytes.to_int(b) diff --git a/test/contracts/funargs.aes b/test/contracts/funargs.aes index b63edff..450c158 100644 --- a/test/contracts/funargs.aes +++ b/test/contracts/funargs.aes @@ -1,4 +1,4 @@ - +include "String.aes" contract FunctionArguments = entrypoint sum(n : int, m: int) = diff --git a/test/contracts/more_strings.aes b/test/contracts/more_strings.aes new file mode 100644 index 0000000..9579423 --- /dev/null +++ b/test/contracts/more_strings.aes @@ -0,0 +1,14 @@ +include "String.aes" +contract StringX = + entrypoint test() = + let s1 = "a string" + let s2 = "another string" + let s = String.concat(s1, s2) + String.sha256(s) + String.length(s1) + String.from_list(String.to_list(s)) + String.split(4, s1) + String.at(2, s2) + String.tokens(s, ",") + String.to_upper(s1) + String.to_lower(s2) diff --git a/test/contracts/state_handling.aes b/test/contracts/state_handling.aes index e4311d6..00c2fcb 100644 --- a/test/contracts/state_handling.aes +++ b/test/contracts/state_handling.aes @@ -1,3 +1,4 @@ +include "String.aes" contract Remote = record rstate = { i : int, s : string, m : map(int, int) } diff --git a/test/contracts/strings.aes b/test/contracts/strings.aes index cbde027..752266b 100644 --- a/test/contracts/strings.aes +++ b/test/contracts/strings.aes @@ -1,3 +1,4 @@ +include "String.aes" contract Strings = entrypoint str_len(s) = String.length(s) entrypoint str_concat(s1, s2) = String.concat(s1, s2) diff --git a/test/contracts/unapplied_builtins.aes b/test/contracts/unapplied_builtins.aes index 3d3bb39..678b085 100644 --- a/test/contracts/unapplied_builtins.aes +++ b/test/contracts/unapplied_builtins.aes @@ -7,6 +7,7 @@ // AENS.transfer // AENS.revoke // Oracle.extend +include "String.aes" contract UnappliedBuiltins = entrypoint main() = () type o = oracle(int, int) -- 2.30.2 From 2bad76314f90cb871122b8395feddf79c1de843e Mon Sep 17 00:00:00 2001 From: Hans Svensson Date: Mon, 24 Feb 2020 15:34:23 +0100 Subject: [PATCH 2/4] More efficient implementations in String.aes --- priv/stdlib/String.aes | 63 +++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/priv/stdlib/String.aes b/priv/stdlib/String.aes index db5242f..9e9efb2 100644 --- a/priv/stdlib/String.aes +++ b/priv/stdlib/String.aes @@ -15,30 +15,41 @@ namespace String = (StringInternal.from_list(List.take(i, cs)), StringInternal.from_list(List.drop(i, cs))) function at(ix : int, s : string) = - switch(List.drop(ix - 1, StringInternal.to_list(s))) + switch(List.drop(ix, StringInternal.to_list(s))) [] => None x :: _ => Some(x) - function tokens(s : string, pat : string) = - let pat_len = StringInternal.length(pat) - tokens_(StringInternal.to_list(pat), StringInternal.to_list(s), []) - function to_upper(s : string) = - StringInternal.from_list([ Char.to_upper(c) | c <- StringInternal.to_list(s) ]) + StringInternal.from_list(List.map(Char.to_upper, StringInternal.to_list(s))) function to_lower(s : string) = - StringInternal.from_list([ Char.to_lower(c) | c <- StringInternal.to_list(s) ]) + StringInternal.from_list(List.map(Char.to_lower, StringInternal.to_list(s))) function contains(str : string, substr : string) : option(int) = - let last_ix = StringInternal.length(str) - (StringInternal.length(substr) - 1) - contains_(1, last_ix, StringInternal.to_list(str), StringInternal.to_list(substr)) + if(substr == "") Some(0) + else + contains_(0, StringInternal.to_list(str), StringInternal.to_list(substr)) - function - to_int : (string, int) => option(int) - to_int(s, 10) = to_int_(List.reverse(StringInternal.to_list(s)), ch_to_int_10, 0, 1, 10) - to_int(s, 16) = to_int_(List.reverse(StringInternal.to_list(s)), ch_to_int_16, 0, 1, 16) + function tokens(s : string, pat : string) = + require(pat != "", "String.tokens: empty pattern") + tokens_(StringInternal.to_list(pat), StringInternal.to_list(s), []) - function + function to_int(s : string) : option(int) = + let s = StringInternal.to_list(s) + switch(is_prefix(['-'], s)) + None => to_int_pos(s) + Some(s) => switch(to_int_pos(s)) + None => None + Some(x) => Some(-x) + + private function to_int_pos(s : list(char)) = + switch(is_prefix(['0', 'x'], s)) + None => + to_int_(s, ch_to_int_10, 0, 10) + Some(s) => + to_int_(s, ch_to_int_16, 0, 16) + + private function tokens_(_, [], acc) = [StringInternal.from_list(List.reverse(acc))] tokens_(pat, str, acc) = switch(is_prefix(pat, str)) @@ -48,36 +59,36 @@ namespace String = let c :: cs = str tokens_(pat, cs, c :: acc) - function contains_(ix, lix, str, substr) = - if(ix > lix) None - else + private function + contains_(_, [], _) = None + contains_(ix, str, substr) = switch(is_prefix(substr, str)) None => let _ :: str = str - contains_(ix + 1, lix, str, substr) + contains_(ix + 1, str, substr) Some(_) => Some(ix) - function + private function is_prefix([], ys) = Some(ys) is_prefix(_, []) = None is_prefix(x :: xs, y :: ys) = if(x == y) is_prefix(xs, ys) else None - function - to_int_([], _, x, _, _) = Some(x) - to_int_(i :: is, c, x, t, f) = - switch(c(i)) + private function + to_int_([], _, x, _) = Some(x) + to_int_(i :: is, value, x, b) = + switch(value(i)) None => None - Some(i) => to_int_(is, c, x + t * i, t * f, f) + Some(i) => to_int_(is, value, x * b + i, b) - function ch_to_int_10(c) = + private function ch_to_int_10(c) = let c = Char.to_int(c) if(c >= 48 && c =< 57) Some(c - 48) else None - function ch_to_int_16(c) = + private function ch_to_int_16(c) = let c = Char.to_int(c) if(c >= 48 && c =< 57) Some(c - 48) elif(c >= 65 && c =< 70) Some(c - 55) -- 2.30.2 From e98edd4eefc0d2a6f196fb4ce9823837d3691390 Mon Sep 17 00:00:00 2001 From: Hans Svensson Date: Mon, 24 Feb 2020 15:35:54 +0100 Subject: [PATCH 3/4] Handle UTF-8 in character literals Also handle `\x{hhh..}` in strings... Character literals has to be a single character, not composite. + tests (and the corresponding fix to the char literal pretty printer) --- src/aeso_pretty.erl | 9 ++++-- src/aeso_scan.erl | 58 +++++++++++++++++++++----------------- test/aeso_parser_tests.erl | 5 ++-- test/contracts/utf8.aes | 21 ++++++++++++++ 4 files changed, 63 insertions(+), 30 deletions(-) create mode 100644 test/contracts/utf8.aes diff --git a/src/aeso_pretty.erl b/src/aeso_pretty.erl index bf00107..919c9ef 100644 --- a/src/aeso_pretty.erl +++ b/src/aeso_pretty.erl @@ -369,8 +369,13 @@ expr_p(_, {char, _, C}) -> case C of $' -> text("'\\''"); $" -> text("'\"'"); - _ -> S = lists:flatten(io_lib:format("~p", [[C]])), - text("'" ++ tl(lists:droplast(S)) ++ "'") + _ when C < 16#80 -> + S = lists:flatten(io_lib:format("~p", [[C]])), + text("'" ++ tl(lists:droplast(S)) ++ "'"); + _ -> + S = lists:flatten( + io_lib:format("'~ts'", [list_to_binary(aeso_scan:utf8_encode([C]))])), + text(S) end; %% -- Names expr_p(_, E = {id, _, _}) -> name(E); diff --git a/src/aeso_scan.erl b/src/aeso_scan.erl index 1c30016..e81757f 100644 --- a/src/aeso_scan.erl +++ b/src/aeso_scan.erl @@ -7,7 +7,7 @@ %%%------------------------------------------------------------------- -module(aeso_scan). --export([scan/1]). +-export([scan/1, utf8_encode/1]). -import(aeso_scan_lib, [token/1, token/2, symbol/0, skip/0, override/2, push/2, pop/1]). @@ -28,7 +28,13 @@ lexer() -> QID = ["(", CON, "\\.)+", ID], QCON = ["(", CON, "\\.)+", CON], OP = "[=!<>+\\-*/:&|?~@^]+", - CHAR = "'([^'\\\\]|(\\\\.))'", + %% Five cases for a character + %% * 1 7-bit ascii, not \ or ' + %% * 2-4 8-bit values (UTF8) + %% * \ followed by a known modifier [aernrtv] + %% * \xhh + %% * \x{hhh...} + CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'", STRING = "\"([^\"\\\\]|(\\\\.))*\"", CommentStart = {"/\\*", push(comment, skip())}, @@ -77,34 +83,34 @@ scan(String) -> %% -- Helpers ---------------------------------------------------------------- parse_string([$" | Chars]) -> - unescape(Chars). + unicode:characters_to_nfc_binary(unescape(Chars)). -parse_char([$', $\\, Code, $']) -> - case Code of - $' -> $'; - $\\ -> $\\; - $b -> $\b; - $e -> $\e; - $f -> $\f; - $n -> $\n; - $r -> $\r; - $t -> $\t; - $v -> $\v; - _ -> {error, "Bad control sequence: \\" ++ [Code]} - end; -parse_char([$', C, $']) -> C. +parse_char([$' | Chars]) -> + case unicode:characters_to_nfc_list(unescape($', Chars, [])) of + [Char] -> Char; + _Bad -> {error, "Bad character literal: '" ++ Chars} + end. -unescape(Str) -> unescape(Str, []). +utf8_encode(Cs) -> + binary_to_list(unicode:characters_to_binary(Cs)). -unescape([$"], Acc) -> +unescape(Str) -> unescape($", Str, []). + +unescape(Delim, [Delim], Acc) -> list_to_binary(lists:reverse(Acc)); -unescape([$\\, $x, D1, D2 | Chars ], Acc) -> +unescape(Delim, [$\\, $x, ${ | Chars ], Acc) -> + {Ds, [_ | Cs]} = lists:splitwith(fun($}) -> false ; (_) -> true end, Chars), + C = list_to_integer(Ds, 16), + Utf8Cs = binary_to_list(unicode:characters_to_binary([C])), + unescape(Delim, Cs, [Utf8Cs | Acc]); +unescape(Delim, [$\\, $x, D1, D2 | Chars ], Acc) -> C = list_to_integer([D1, D2], 16), - unescape(Chars, [C | Acc]); -unescape([$\\, Code | Chars], Acc) -> - Ok = fun(C) -> unescape(Chars, [C | Acc]) end, + Utf8Cs = binary_to_list(unicode:characters_to_binary([C])), + unescape(Delim, Chars, [Utf8Cs | Acc]); +unescape(Delim, [$\\, Code | Chars], Acc) -> + Ok = fun(C) -> unescape(Delim, Chars, [C | Acc]) end, case Code of - $" -> Ok($"); + Delim -> Ok(Delim); $\\ -> Ok($\\); $b -> Ok($\b); $e -> Ok($\e); @@ -115,8 +121,8 @@ unescape([$\\, Code | Chars], Acc) -> $v -> Ok($\v); _ -> error("Bad control sequence: \\" ++ [Code]) %% TODO end; -unescape([C | Chars], Acc) -> - unescape(Chars, [C | Acc]). +unescape(Delim, [C | Chars], Acc) -> + unescape(Delim, Chars, [C | Acc]). strip_underscores(S) -> lists:filter(fun(C) -> C /= $_ end, S). diff --git a/test/aeso_parser_tests.erl b/test/aeso_parser_tests.erl index 21d7ff7..c978b0a 100644 --- a/test/aeso_parser_tests.erl +++ b/test/aeso_parser_tests.erl @@ -63,7 +63,8 @@ simple_contracts_test_() -> %% Parse tests of example contracts [ {lists:concat(["Parse the ", Contract, " contract."]), fun() -> roundtrip_contract(Contract) end} - || Contract <- [counter, voting, all_syntax, '05_greeter', aeproof, multi_sig, simple_storage, fundme, dutch_auction] ] + || Contract <- [counter, voting, all_syntax, '05_greeter', aeproof, + multi_sig, simple_storage, fundme, dutch_auction, utf8] ] }. parse_contract(Name) -> @@ -85,7 +86,7 @@ parse_expr(Text) -> round_trip(Text) -> Contract = parse_string(Text), Text1 = prettypr:format(aeso_pretty:decls(strip_stdlib(Contract))), - Contract1 = parse_string(Text1), + Contract1 = parse_string(aeso_scan:utf8_encode(Text1)), NoSrcLoc = remove_line_numbers(Contract), NoSrcLoc1 = remove_line_numbers(Contract1), ?assertMatch(NoSrcLoc, diff(NoSrcLoc, NoSrcLoc1)). diff --git a/test/contracts/utf8.aes b/test/contracts/utf8.aes new file mode 100644 index 0000000..3e82b6b --- /dev/null +++ b/test/contracts/utf8.aes @@ -0,0 +1,21 @@ +contract UTF8 = + entrypoint f1() : char = '1' + entrypoint f2() : char = '+' + entrypoint f3() : char = 'd' + entrypoint f4() : char = 'X' + entrypoint f5() : char = 'å' + entrypoint f6() : char = 'Ä' + entrypoint f7() : char = 'æ' + entrypoint f8() : char = 'ë' + entrypoint f9() : char = 'ẻ' + entrypoint f10() : char = '\x27' + entrypoint f11() : char = '\x{2200}' + entrypoint f12() : char = '💩' + entrypoint f13() : char = '\n' + + + + // entrypoint f13() : char = 'e̊' + // entrypoint f14() : char = '\Ì' + + // '💩' vs. map('a,'b) -- 2.30.2 From 3da694e798d1725342168f205319386e530006b9 Mon Sep 17 00:00:00 2001 From: Hans Svensson Date: Mon, 24 Feb 2020 15:36:19 +0100 Subject: [PATCH 4/4] New aebytecode commit hash --- rebar.config | 2 +- rebar.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index 7b515ec..9b46fa8 100644 --- a/rebar.config +++ b/rebar.config @@ -2,7 +2,7 @@ {erl_opts, [debug_info]}. -{deps, [ {aebytecode, {git, "https://github.com/aeternity/aebytecode.git", {ref,"ff5a4c7"}}} +{deps, [ {aebytecode, {git, "https://github.com/aeternity/aebytecode.git", {ref,"94d0b98"}}} , {getopt, "1.0.1"} , {eblake2, "1.0.0"} , {jsx, {git, "https://github.com/talentdeficit/jsx.git", diff --git a/rebar.lock b/rebar.lock index cf6a6be..d9b253e 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1,7 +1,7 @@ {"1.1.0", [{<<"aebytecode">>, {git,"https://github.com/aeternity/aebytecode.git", - {ref,"ff5a4c7dd54ec22d30f16152341fb9ffcd6cc135"}}, + {ref,"94d0b984e98cf2c2477e85f88d52de82e130f06b"}}, 0}, {<<"aeserialization">>, {git,"https://github.com/aeternity/aeserialization.git", -- 2.30.2