From cdccd1ade978b4218b3647372c60c477ff7aefbe Mon Sep 17 00:00:00 2001 From: Erik Stenman Date: Fri, 1 Mar 2019 12:01:58 +0100 Subject: [PATCH] Handle varaint constants and types. --- README.md | 91 +++++++++++++++++++++++-- src/aeb_fate_asm.erl | 118 +++++++++++++++++++++++++-------- src/aeb_fate_asm_scan.template | 33 +++++++-- src/aeb_fate_data.erl | 15 ++--- test/asm_code/immediates.fate | 5 ++ 5 files changed, 217 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 96be24b..5c3405a 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,92 @@ -aebytecode +# aebytecode ===== -An OTP library +An library and stand alone assembler for aeternity bytecode. -Build +This version supports Aevm bytecode and Fate bytecode. + +## Build ----- - $ rebar3 compile + $ make + +## Fate Code +--------- + +Fate code exists in 3 formats: + +1. Fate byte code. This format is under consensus. +2. Fate assembler. This is a text represenation of fate code. + This is not under consensus and other + implemenation and toolchains could have + their own format. +3. Internal. This is an Erlang representation of fate code + Used by this particular engin implementation. + +This library handles all tree representations. +The byte code format is described in a separate document. +The internal format is described in a separate document. +The text representation is described below. + +### Fate Assembler Code +------------------- + +Assembler code can be read from a file. +The assembler has the following format: + + Comments start with 2 semicolons and runs till end of line + `;; This is a comment` + Opcode mnemonics start with an upper case letter. + `DUP` + Identifiers start with a lower case letter + `an_identifier` + References to function arguments start with arg followed by an integer + `arg0` + References to variables/registers start with var followed by an integer + `var0` + References to stack postions is either a (for stack 0) + or start with stack followed by an integer + `stack1` + `a` + + Immediate values can be of 9 types: + 1a. Integers as decimals: {Digits} or -{Digits} + `42` + `-2374683271468723648732648736498712634876147` + 1b. Integers as Hexadecimals:: 0x{Hexdigits} + `0x0deadbeef0` + 2. addresses, a base58 encoded string starting with #{base58char} + followed by up to 64 hex chars + `#nv5B93FPzRHrGNmMdTDfGdd5xGZvep3MVSpJqzcQmMp59bBCv` + 3. Boolean true or false + `true` + `false` + 4. Strings "{Characters}" + `"Hello"` + 5. Map { Key => Value } + `{}` + `{ 1 => { "foo" => true, "bar" => false}` + 6. Lists [ Elements ] + `[]` + `[1, 2]` + 7. Bit field < Bits > or !< Bits > + `<000>` + `<1010 1010>` + `<>` + `!<>` + 8. Tuples ( Elements ) + `()` + `(1, "foo")` + 9. Varaiants: (| Size | Tag | ( Elements ) |) + `(| 42 | 12 | ( "foo", 12) |)` + + Where Digits: [0123456789] + Hexdigits: [0123456789abcdef] + base58char: [123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz] + Characters any printable ascii character 0..255 (except " no quoting yet) + Key: any value except for a map + Bits: 01 or space + Elements: Nothing or Value , Elements + Size: Digits + Tag: Digits + diff --git a/src/aeb_fate_asm.erl b/src/aeb_fate_asm.erl index 17f8f60..dae9d81 100644 --- a/src/aeb_fate_asm.erl +++ b/src/aeb_fate_asm.erl @@ -34,34 +34,47 @@ %%% stack1 %%% a %%% -%%% Immediates can be of 9 types: -%%% 1. Integers +%%% Immediate values can be of 9 types: +%%% 1a. Integers as decimals: {Digits} or -{Digits} %%% 42 %%% -2374683271468723648732648736498712634876147 -%%% 2. Hexadecimal integers starting with 0x +%%% 1b. Integers as Hexadecimals:: 0x{Hexdigits} %%% 0x0deadbeef0 -%%% 3. addresses, a 256-bit hash strings starting with # +%%% 2. addresses, a base58 encoded string starting with #{base58char} %%% followed by up to 64 hex chars %%% #00000deadbeef -%%% 4. Boolean +%%% 3. Boolean true or false %%% true %%% false -%%% 5. Strings +%%% 4. Strings "{Characters}" %%% "Hello" -%%% 6. Map +%%% 5. Map { Key => Value } %%% {} %%% { 1 => { "foo" => true, "bar" => false} -%%% 7. Lists +%%% 6. Lists [ Elements ] %%% [] %%% [1, 2] -%%% 8. Bit field +%%% 7. Bit field < Bits > or !< Bits > %%% <000> %%% <1010 1010> %%% <> %%% !<> -%%% 9. Tuples +%%% 8. Tuples ( Elements ) %%% () %%% (1, "foo") +%%% 9. Varaiants: (| Size | Tag | ( Elements ) |) +%%% (| 42 | 12 | ( "foo", 12) |) +%%% +%%% Where Digits: [0123456789] +%%% Hexdigits: [0123456789abcdef] +%%% base58char: [123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz] +%%% Characters any printable ascii character 0..255 (except " no quoting yet) +%%% Key: any value except for a map +%%% Bits: 01 or space +%%% Elements: Nothing or Value , Elements +%%% Size: Digits +%%% Tag: Digits +%%% %%% @end %%% Created : 21 Dec 2017 %%%------------------------------------------------------------------- @@ -100,7 +113,7 @@ parse_function_call([{id,_,Name}, {'(',_}| Rest]) -> to_args([{')', _}]) -> {[], []}; to_args(Tokens) -> - case to_data(Tokens) of + case parse_value(Tokens) of {Arg, [{',', _} | Rest]} -> {More, Rest2} = to_args(Rest), {[Arg|More], Rest2}; @@ -108,13 +121,6 @@ to_args(Tokens) -> {[Arg], Rest} end. -to_data([{int,_line, Int}|Rest]) -> - {Int, Rest}; -to_data([{boolean,_line, Bool}|Rest]) -> - {Bool, Rest}; -to_data([{hash,_line, Hash}|Rest]) -> - {Hash, Rest}. - pp(FateCode) -> Listing = to_asm(FateCode), io_lib:format("~ts~n",[Listing]). @@ -572,7 +578,16 @@ deserialize_type(<<6, Rest/binary>>) -> {V, Rest3} = deserialize_type(Rest2), {{map, K, V}, Rest3}; deserialize_type(<<7, Rest/binary>>) -> - {string, Rest}. + {string, Rest}; +deserialize_type(<<8, Size, Rest/binary>>) -> + {Variants, Rest2} = deserialize_variants(Size, Rest, []), + {{variant, Size, Variants}, Rest2}. + +deserialize_variants(0, Rest, Variants) -> + {lists:reverse(Variants), Rest}; +deserialize_variants(N, Rest, Variants) -> + {T, Rest2} = deserialize_type(Rest), + deserialize_variants(N-1, Rest2, [T|Variants]). @@ -616,8 +631,14 @@ to_bytecode([{int,_line, Int}|Rest], Address, Env, Code, Opts) -> to_bytecode(Rest, Address, Env, [{immediate, Int}|Code], Opts); to_bytecode([{boolean,_line, Bool}|Rest], Address, Env, Code, Opts) -> to_bytecode(Rest, Address, Env, [{immediate, Bool}|Code], Opts); -to_bytecode([{hash,_line, Hash}|Rest], Address, Env, Code, Opts) -> - to_bytecode(Rest, Address, Env, [{immediate, Hash}|Code], Opts); +to_bytecode([{string,_line, String}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, + [{immediate, aeb_fate_data:make_string(String)}|Code], + Opts); +to_bytecode([{address,_line, Value}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, + [{immediate, aeb_fate_data:make_address(Value)}|Code], + Opts); to_bytecode([{id,_line, ID}|Rest], Address, Env, Code, Opts) -> {Hash, Env2} = insert_symbol(ID, Env), to_bytecode(Rest, Address, Env2, [{immediate, Hash}|Code], Opts); @@ -631,6 +652,10 @@ to_bytecode([{'(',_line}|Rest], Address, Env, Code, Opts) -> {Elements, Rest2} = parse_tuple(Rest), Tuple = aeb_fate_data:make_tuple(list_to_tuple(Elements)), to_bytecode(Rest2, Address, Env, [{immediate, Tuple}|Code], Opts); +to_bytecode([{start_variant,_line}|_] = Tokens, Address, Env, Code, Opts) -> + {Size, Tag, Values, Rest} = parse_variant(Tokens), + Variant = aeb_fate_data:make_variant(Size, Tag, Values), + to_bytecode(Rest, Address, Env, [{immediate, Variant}|Code], Opts); to_bytecode([{bits,_line, Bits}|Rest], Address, Env, Code, Opts) -> to_bytecode(Rest, Address, Env, [{immediate, aeb_fate_data:make_bits(Bits)}|Code], Opts); @@ -689,13 +714,35 @@ parse_tuple(Tokens) -> end. +parse_variant([{start_variant,_line} + , {int,_line, Size} + , {'|',_} + , {int,_line, Tag} + , {'|',_} + , {'(',_} + | Rest]) when (Size > 0), (Tag < Size) -> + {Elements , [{end_variant, _} | Rest2]} = parse_tuple(Rest), + {Size, Tag, list_to_tuple(Elements), Rest2}. + + parse_value([{int,_line, Int} | Rest]) -> {Int, Rest}; parse_value([{boolean,_line, Bool} | Rest]) -> {Bool, Rest}; parse_value([{hash,_line, Hash} | Rest]) -> {Hash, Rest}; parse_value([{'{',_line} | Rest]) -> parse_map(Rest); parse_value([{'[',_line} | Rest]) -> parse_list(Rest); -parse_value([{'(',_line} | Rest]) -> parse_tuple(Rest). - +parse_value([{'(',_line} | Rest]) -> + {T, Rest2} = parse_tuple(Rest), + {aeb_fate_data:make_tuple(list_to_tuple(T)), Rest2}; +parse_value([{bits,_line, Bits} | Rest]) -> + {aeb_fate_data:make_bits(Bits), Rest}; +parse_value([{start_variant,_line}|_] = Tokens) -> + {Size, Tag, Values, Rest} = parse_variant(Tokens), + Variant = aeb_fate_data:make_variant(Size, Tag, Values), + {Variant, Rest}; +parse_value([{string,_line, String} | Rest]) -> + {aeb_fate_data:make_string(String), Rest}; +parse_value([{address,_line, Address} | Rest]) -> + {aeb_fate_data:make_address(Address), Rest}. to_fun_def([{id, _, Name}, {'(', _} | Rest]) -> {ArgsType, [{'to', _} | Rest2]} = to_arg_types(Rest), @@ -732,7 +779,22 @@ to_type([{'{', _}, {id, _, "map"}, {',', _} | Rest]) -> %% TODO: Error handling {KeyType, [{',', _}| Rest2]} = to_type(Rest), {ValueType, [{'}', _}| Rest3]} = to_type(Rest2), - {{map, KeyType, ValueType}, Rest3}. + {{map, KeyType, ValueType}, Rest3}; +to_type([{'{', _} + , {id, _, "variant"} + , {',', _} + , {int, _, Size} + , {',', _} + , {'[', _} + | Rest]) when Size > 0 + , Size < 256 -> + {ElementTypes, [{'}', _}| Rest2]} = to_list_of_types(Rest), + %% TODO: Error handling + if Size =:= length(ElementTypes) -> + {{variant, Size, ElementTypes}, Rest2} + end. + + to_list_of_types([{']', _} | Rest]) -> {[], Rest}; to_list_of_types(Tokens) -> @@ -756,8 +818,12 @@ serialize_type({tuple, Ts}) -> serialize_type(address) -> [4]; serialize_type(bits) -> [5]; serialize_type({map, K, V}) -> [6 | serialize_type(K) ++ serialize_type(V)]; -serialize_type(string) -> [7]. - +serialize_type(string) -> [7]; +serialize_type({variant, Size, ListOfVariants}) + when Size > 0 + , Size < 256 + , Size =:= length(ListOfVariants) -> + [8, Size | [serialize_type(T) || T <- ListOfVariants]]. %% ------------------------------------------------------------------- diff --git a/src/aeb_fate_asm_scan.template b/src/aeb_fate_asm_scan.template index fa4d49d..9c6fd00 100644 --- a/src/aeb_fate_asm_scan.template +++ b/src/aeb_fate_asm_scan.template @@ -12,9 +12,10 @@ DIGIT = [0-9] HEXDIGIT = [0-9a-fA-F] LOWER = [a-z_] UPPER = [A-Z] +BASE58 = [123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz] INT = {DIGIT}+ HEX = 0x{HEXDIGIT}+ -HASH = #{HEXDIGIT}+ +HASH = #{BASE58}+ WS = [\000-\s] ID = {LOWER}[a-zA-Z0-9_]* STRING = "[^"]*" @@ -42,9 +43,9 @@ FUNCTION : {token, {function, TokenLine, 'FUNCTION' }}. -{INT} : {token, {int, TokenLine, parse_int(TokenChars)}}. {HASH} : - {token, {hash, TokenLine, parse_hash(TokenChars)}}. + {token, {address, TokenLine, parse_hash(TokenChars)}}. {STRING} : - {token, {hash, TokenLine, list_to_binary(TokenChars)}}. + {token, {string, TokenLine, list_to_binary(TokenChars)}}. {BITS} : {token, {bits, TokenLine, bits(TokenChars)}}. @@ -54,6 +55,8 @@ FUNCTION : {token, {function, TokenLine, 'FUNCTION' }}. \: : {token, {to, TokenLine}}. \=\> : {token, {arrow, TokenLine}}. +\(\| : {token, {start_varaint, TokenLine}}. +\|\) : {token, {end_varaint, TokenLine}}. , : {token, {',', TokenLine}}. \( : {token, {'(', TokenLine}}. @@ -62,6 +65,7 @@ FUNCTION : {token, {function, TokenLine, 'FUNCTION' }}. \] : {token, {']', TokenLine}}. \{ : {token, {'{', TokenLine}}. \} : {token, {'}', TokenLine}}. +\| : {token, {'|', TokenLine}}. ;;.* : {token, {comment, TokenLine, drop_prefix($;, TokenChars)}}. @@ -98,8 +102,7 @@ parse_acc("a" ++ N) -> list_to_integer(N). parse_hash("#" ++ Chars) -> - N = list_to_integer(Chars, 16), - <>. + base58_to_address(Chars). scan(S) -> string(S). @@ -117,3 +120,23 @@ bits([$> |_Rest], Acc) -> Acc; bits([$0 | Rest], Acc) -> bits(Rest, Acc bsl 1); bits([$1 | Rest], Acc) -> bits(Rest, (Acc bsl 1) bor 1); bits([$ | Rest], Acc) -> bits(Rest, Acc). + +char_to_base58(C) -> + binary:at(<<0,1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,9,10,11,12,13,14,15,16,0,17, + 18,19,20,21,0,22,23,24,25,26,27,28,29,30,31,32,0,0,0,0,0,0, + 33,34,35,36,37,38,39,40,41,42,43,0,44,45,46,47,48,49,50,51, + 52,53,54,55,56,57>>, C-$1). + +base58_to_integer(C, []) -> C; +base58_to_integer(C, [X | Xs]) -> + base58_to_integer(C * 58 + char_to_base58(X), Xs). + +base58_to_integer([]) -> error; +base58_to_integer([Char]) -> char_to_base58(Char); +base58_to_integer([Char | Str]) -> + base58_to_integer(char_to_base58(Char), Str). + +base58_to_address(Base58) -> + I = base58_to_integer(Base58), + Bin = <>, + Bin. \ No newline at end of file diff --git a/src/aeb_fate_data.erl b/src/aeb_fate_data.erl index 5ddab1e..e2308ea 100644 --- a/src/aeb_fate_data.erl +++ b/src/aeb_fate_data.erl @@ -144,7 +144,7 @@ format(?FATE_BITS(B)) when B < 0 -> format(?FATE_VARIANT(Size, Tag, T)) -> ["(| ", lists:join("| ", [integer_to_list(Size), integer_to_list(Tag) | - [format(E) || E <- erlang:tuple_to_list(T)]]), + [format(make_tuple(T))]]), " |)"]; format(M) when ?IS_FATE_MAP(M) -> ["{ ", format_kvs(maps:to_list(?FATE_MAP_VALUE(M))), " }"]; @@ -153,18 +153,13 @@ format(V) -> exit({not_a_fate_type, V}). format_bits(0, Acc) -> Acc; format_bits(N, Acc) -> - case N band 1 of - 1 -> format_bits(N bsr 1, [$1|Acc]); - 0 -> format_bits(N bsr 1, [$0|Acc]) - end. + Bit = $0 + (N band 1), + format_bits(N bsr 1, [Bit|Acc]). format_nbits(0, Acc) -> Acc; format_nbits(N, Acc) -> - case N band 1 of - 1 -> format_nbits(N bsr 1, [$0|Acc]); - 0 -> format_nbits(N bsr 1, [$1|Acc]) - end. - + Bit = $1 - (N band 1), + format_nbits(N bsr 1, [Bit|Acc]). format_list(List) -> ["[ ", lists:join(", ", [format(E) || E <- List]), " ]"]. diff --git a/test/asm_code/immediates.fate b/test/asm_code/immediates.fate index d3a0077..ed44029 100644 --- a/test/asm_code/immediates.fate +++ b/test/asm_code/immediates.fate @@ -65,4 +65,9 @@ FUNCTION tuple() : {tuple, [integer, boolean, string, {tuple, [integer, integer] RETURNR (42, true, "FooBar", (1, 2)) +FUNCTION address() : address + RETURNR #deadbeef +;; Option(integer) = NONE | SOME(integer) +FUNCTION varaint() : {variant, 2, [{tuple, []}, {tuple, [integer]}]} + RETURNR #deadbeef