diff --git a/.gitignore b/.gitignore index 318406e..cde56db 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,6 @@ rel/example_project .concrete/DEV_MODE .rebar aeb_asm_scan.erl +aefa_asm_scan.erl _build/ +aefateasm diff --git a/include/aefa_data.hrl b/include/aefa_data.hrl new file mode 100644 index 0000000..424c9b1 --- /dev/null +++ b/include/aefa_data.hrl @@ -0,0 +1,55 @@ +-define(FATE_INTEGER_T, integer()). +-define(FATE_BYTE_T, 0..255). +-define(FATE_BOOLEAN_T, true | false). +-define(FATE_NIL_T, []). +-define(FATE_LIST_T, list()). +-define(FATE_UNIT_T, {tuple, {}}). +-define(FATE_MAP_T, #{ fate_type() => fate_type() }). +-define(FATE_STRING_T, binary()). +-define(FATE_ADDRESS_T, {address, <<_:256>>}). +-define(FATE_VARIANT_T, {variant, ?FATE_BYTE_T, ?FATE_BYTE_T, tuple()}). +-define(FATE_VOID_T, void). +-define(FATE_TUPLE_T, {tuple, tuple()}). +-define(FATE_BITS_T, {bits, integer()}). + +-define(IS_FATE_INTEGER(X), is_integer(X)). +-define(IS_FATE_LIST(X), (is_list(X))). +-define(IS_FATE_STRING(X), (is_binary(X))). +-define(IS_FATE_MAP(X), (is_map(X))). +-define(IS_FATE_TUPLE(X), (is_tuple(X) andalso (tuple == element(1, X) andalso is_tuple(element(2, X))))). +-define(IS_FATE_ADDRESS(X), (is_tuple(X) andalso (address == element(1, X) andalso is_binary(element(2, X))))). +-define(IS_FATE_BITS(X), (is_tuple(X) andalso (bits == element(1, X) andalso is_integer(element(2, X))))). +-define(IS_FATE_VARIANT(X), (is_tuple(X) + andalso + (variant == element(1, X) + andalso is_integer(element(2, X)) + andalso is_integer(element(3, X)) + andalso is_tuple(element(4, X)) + ))). +-define(IS_FATE_BOOLEAN(X), is_boolean(X)). + +-define(FATE_UNIT, {tuple, {}}). +-define(FATE_TUPLE(T), {tuple, T}). +-define(FATE_ADDRESS(A), {address, A}). +-define(FATE_BITS(B), {bits, B}). + + +-define(FATE_INTEGER_VALUE(X), (X)). +-define(FATE_LIST_VALUE(X), (X)). +-define(FATE_STRING_VALUE(X), (X)). +-define(FATE_ADDRESS_VALUE(X), (element(2, X))). +-define(FATE_MAP_VALUE(X), (X)). +-define(FATE_MAP_SIZE(X), (map_size(X))). +-define(FATE_STRING_SIZE(X), (byte_size(X))). +-define(FATE_TRUE, true). +-define(FATE_FALSE, false). +-define(FATE_NIL, []). +-define(FATE_VOID, void). +-define(FATE_EMPTY_STRING, <<>>). +-define(FATE_STRING(S), S). +-define(FATE_VARIANT(Size, Tag,T), {variant, Size, Tag, T}). + +-define(MAKE_FATE_INTEGER(X), X). +-define(MAKE_FATE_LIST(X), X). +-define(MAKE_FATE_MAP(X), X). +-define(MAKE_FATE_STRING(X), X). diff --git a/include/aefa_opcodes.hrl b/include/aefa_opcodes.hrl new file mode 100644 index 0000000..e1f107c --- /dev/null +++ b/include/aefa_opcodes.hrl @@ -0,0 +1,110 @@ + +%% FATE opcodes +-define('NOP' , 16#00). +-define('RETURN' , 16#01). +-define('CALL' , 16#02). +-define('CALL_R' , 16#03). +-define('CALL_T' , 16#04). +-define('CALL_TR' , 16#05). +-define('JUMP' , 16#06). +-define('JUMPIF' , 16#07). +-define('SWITCH' , 16#08). +-define('PUSH' , 16#09). +-define('DUP' , 16#0a). +-define('POP' , 16#0b). +-define('STORE' , 16#10). +-define('ADD' , 16#11). +-define('MUL' , 16#12). +-define('SUB' , 16#13). +-define('DIV' , 16#14). +-define('MOD' , 16#15). +-define('POW' , 16#16). +-define('LT' , 16#17). +-define('GT' , 16#18). +-define('EQ' , 16#19). +-define('ELT' , 16#1a). +-define('EGT' , 16#1b). +-define('NEQ' , 16#1c). +-define('AND' , 16#1d). +-define('OR' , 16#1e). +-define('NOT' , 16#1f). +-define('TUPLE' , 16#20). +-define('ELEMENT' , 16#21). +-define('MAP_EMPTY' , 16#22). +-define('MAP_LOOKUP' , 16#23). +-define('MAP_UPDATE' , 16#24). +-define('MAP_DELETE' , 16#25). +-define('MAP_MEMBER' , 16#26). +-define('MAP_FROM_LIST' , 16#27). +-define('NIL' , 16#28). +-define('IS_NIL' , 16#29). +-define('CONS' , 16#2a). +-define('HD' , 16#2b). +-define('TL' , 16#2c). +-define('LENGTH' , 16#2d). +-define('STR_EQ' , 16#2e). +-define('STR_JOIN' , 16#2f). +-define('ADDR_TO_STR' , 16#30). +-define('STR_REVERSE' , 16#31). +-define('INT_TO_ADDR' , 16#32). +-define('VARIANT' , 16#33). +-define('VARIANT_TEST' , 16#34). +-define('VARIANT_ELEMENT', 16#35). +-define('BITS_NONE' , 16#36). +-define('BITS_ALL' , 16#37). +-define('BITS_SET' , 16#38). +-define('BITS_CLEAR' , 16#39). +-define('BITS_TEST' , 16#3a). +-define('BITS_SUM' , 16#3b). +-define('BITS_OR' , 16#3c). +-define('BITS_AND' , 16#3d). +-define('BITS_DIFF' , 16#3e). +-define('ADDRESS' , 16#3f). +-define('BALANCE' , 16#40). +-define('ORIGIN' , 16#41). +-define('CALLER' , 16#42). +-define('GASPRICE' , 16#43). +-define('BLOCKHASH' , 16#44). +-define('BENEFICIARY' , 16#45). +-define('TIMESTAMP' , 16#46). +-define('NUMBER' , 16#47). +-define('DIFFICULTY' , 16#48). +-define('GASLIMIT' , 16#49). +-define('GAS' , 16#4a). +-define('LOG0' , 16#4b). +-define('LOG1' , 16#4c). +-define('LOG2' , 16#4d). +-define('LOG3' , 16#4e). +-define('LOG4' , 16#4f). +-define('ABORT' , 16#50). +-define('EXIT' , 16#51). +-define('DEACTIVATE' , 16#52). +-define('INC' , 16#53). +-define('DEC' , 16#54). +-define('INT_TO_STR' , 16#55). +-define('SPEND' , 16#56). +-define('ORACLE_REGISTER', 16#57). +-define('ORACLE_QUERY' , 16#58). +-define('ORACLE_RESPOND' , 16#59). +-define('ORACLE_EXTEND' , 16#5a). +-define('ORACLE_GET_ANSWER', 16#5b). +-define('ORACLE_GET_QUESTION', 16#5c). +-define('ORACLE_QUERY_FEE', 16#5d). +-define('AENS_RESOLVE' , 16#5e). +-define('AENS_PRECLAIM' , 16#5f). +-define('AENS_CLAIM' , 16#60). +-define('AENS_UPDATE' , 16#61). +-define('AENS_TRANSFER' , 16#62). +-define('AENS_REVOKE' , 16#63). +-define('ECVERIFY' , 16#64). +-define('SHA3' , 16#65). +-define('SHA256' , 16#66). +-define('BLAKE2B' , 16#67). +-define('RETURNR' , 16#68). +-define('MAP_LOOKUPD' , 16#69). + +-define('FUNCTION' , 16#fe). +-define('EXTEND' , 16#ff). + +-define( COMMENT(X), {comment, X}). + diff --git a/rebar.config b/rebar.config index cfd8b45..aae208e 100644 --- a/rebar.config +++ b/rebar.config @@ -2,10 +2,36 @@ {erl_opts, [debug_info]}. -{deps, []}. +{deps, [ {getopt, "1.0.1"} + ]}. + + +{escript_incl_apps, [aebytecode, getopt]}. +{escript_main_app, aebytecode}. +{escript_name, aefateasm}. +{escript_emu_args, "%%! +sbtu +A0\n"}. +{provider_hooks, [{post, [{compile, escriptize}]}]}. + +{post_hooks, [{"(linux|darwin|solaris|freebsd|netbsd|openbsd)", + escriptize, + "cp \"$REBAR_BUILD_DIR/bin/aefateasm\" ./aefateasm"}, + {"win32", + escriptize, + "robocopy \"%REBAR_BUILD_DIR%/bin/\" ./ aefateasm* " + "/njs /njh /nfl /ndl & exit /b 0"} % silence things + ]}. {dialyzer, [ {warnings, [unknown]}, {plt_apps, all_deps}, - {base_plt_apps, [erts, kernel, stdlib]} + {base_plt_apps, [erts, kernel, stdlib, crypto]} ]}. + + +{relx, [{release, {aessembler, "0.0.1"}, + [aebytecode, getopt]}, + + {dev_mode, true}, + {include_erts, false}, + + {extended_start_script, true}]}. diff --git a/rebar.lock b/rebar.lock index 57afcca..3c625aa 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1 +1,10 @@ -[]. +{"1.1.0", +[{<<"enacl">>, + {git,"https://github.com/aeternity/enacl.git", + {ref,"26180f42c0b3a450905d2efd8bc7fd5fd9cece75"}}, + 0}, + {<<"getopt">>,{pkg,<<"getopt">>,<<"1.0.1">>},0}]}. +[ +{pkg_hash,[ + {<<"getopt">>, <<"C73A9FA687B217F2FF79F68A3B637711BB1936E712B521D8CE466B29CBF7808A">>}]} +]. diff --git a/src/ae_rlp.erl b/src/ae_rlp.erl new file mode 100644 index 0000000..6e537b4 --- /dev/null +++ b/src/ae_rlp.erl @@ -0,0 +1,91 @@ +%%%------------------------------------------------------------------- +%%% @copyright (C) 2017, Aeternity Anstalt +%%% @doc +%%% Implementation of the Recursive Length Prefix. +%%% +%%% https://github.com/ethereum/wiki/wiki/RLP +%%% +%%% @end +%%%------------------------------------------------------------------- + +-module(ae_rlp). +-export([ decode/1 + , decode_one/1 + , encode/1 + ]). + +-export_type([ encodable/0 + , encoded/0 + ]). + +-type encodable() :: [encodable()] | binary(). +-type encoded() :: <<_:8, _:_*8>>. + +-define(UNTAGGED_SIZE_LIMIT , 55). +-define(UNTAGGED_LIMIT , 127). +-define(BYTE_ARRAY_OFFSET , 128). +-define(LIST_OFFSET , 192). + + +-spec encode(encodable()) -> encoded(). +encode(X) -> + encode(X, []). + +encode(<> = X,_Opts) when B =< ?UNTAGGED_LIMIT -> + %% An untagged value + X; +encode(X,_Opts) when is_binary(X) -> + %% Byte array + add_size(?BYTE_ARRAY_OFFSET, X); +encode(L, Opts) when is_list(L) -> + %% Lists items are encoded and concatenated + ByteArray = << << (encode(X, Opts))/binary >> || X <- L >>, + add_size(?LIST_OFFSET, ByteArray). + +add_size(Offset, X) when byte_size(X) =< ?UNTAGGED_SIZE_LIMIT -> + %% The size fits in one tagged byte + <<(Offset + byte_size(X)), X/binary>>; +add_size(Offset, X) when is_binary(X) -> + %% The size itself needs to be encoded as a byte array + %% Add the tagged size of the size byte array + SizeBin = binary:encode_unsigned(byte_size(X)), + TaggedSize = ?UNTAGGED_SIZE_LIMIT + Offset + byte_size(SizeBin), + true = (TaggedSize < 256 ), %% Assert + <>. + +-spec decode(encoded()) -> encodable(). +decode(Bin) when is_binary(Bin), byte_size(Bin) > 0 -> + case decode_one(Bin) of + {X, <<>>} -> X; + {X, Left} -> error({trailing, X, Bin, Left}) + end. + +decode_one(<>) when X =< ?UNTAGGED_LIMIT -> + %% Untagged value + {<>, B}; +decode_one(<> = B) when L < ?LIST_OFFSET -> + %% Byte array + {Size, Rest} = decode_size(B, ?BYTE_ARRAY_OFFSET), + <> = Rest, + {X, Tail}; +decode_one(<<_/binary>> = B) -> + %% List + {Size, Rest} = decode_size(B, ?LIST_OFFSET), + <> = Rest, + {decode_list(X), Tail}. + +decode_size(<>, Offset) when L =< Offset + ?UNTAGGED_SIZE_LIMIT-> + %% One byte tagged size. + {L - Offset, B}; +decode_size(<<_, 0, _/binary>>,_Offset) -> + error(leading_zeroes_in_size); +decode_size(<>, Offset) -> + %% Actual size is in a byte array. + BinSize = L - Offset - ?UNTAGGED_SIZE_LIMIT, + <> = B, + {Size, Rest}. + +decode_list(<<>>) -> []; +decode_list(B) -> + {Element, Rest} = decode_one(B), + [Element|decode_list(Rest)]. diff --git a/src/aeblake2.erl b/src/aeblake2.erl new file mode 100644 index 0000000..0519440 --- /dev/null +++ b/src/aeblake2.erl @@ -0,0 +1,148 @@ +%%%============================================================================= +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc +%%% BLAKE2b implementation in Erlang - for details see: https://blake2.net +%%% @end +%%%============================================================================= + +-module(aeblake2). + +-export([ blake2b/2 + , blake2b/3 + ]). + +-define(MAX_64BIT, 16#ffffffffffffffff). + +-spec blake2b(HashLen :: integer(), Msg :: binary()) -> {ok, binary()}. +blake2b(HashLen, Msg) -> + blake2b(HashLen, Msg, <<>>). + +-spec blake2b(HashLen :: integer(), Msg :: binary(), Key :: binary()) -> {ok, binary()}. +blake2b(HashLen, Msg0, Key) -> + %% If message should be keyed, prepend message with padded key. + Msg = <<(pad(128, Key))/binary, Msg0/binary>>, + + %% Set up the initial state + Init = (16#01010000 + (byte_size(Key) bsl 8) + HashLen), + <> = blake_iv(), + H = <<(H0 bxor Init):64, H1_7/binary>>, + + %% Perform the compression - message will be chopped into 128-byte chunks. + State = blake2b_compress(H, Msg, 0), + + %% Just return the requested part of the hash + {ok, binary_part(to_little_endian(State), {0, HashLen})}. + +blake2b_compress(H, <>, BCompr) when Rest /= <<>> -> + H1 = blake2b_compress(H, <>, BCompr + 128, false), + blake2b_compress(H1, Rest, BCompr + 128); +blake2b_compress(H, SmallChunk, BCompr) -> + Size = byte_size(SmallChunk), + FillSize = (128 - Size) * 8, + blake2b_compress(H, <>, BCompr + Size, true). + +blake2b_compress(H, Chunk0, BCompr, Last) -> + Chunk = to_big_endian(Chunk0), + <> = <>, + V12_ = V12 bxor (BCompr band ?MAX_64BIT), + V13_ = V13 bxor ((BCompr bsr 64) band ?MAX_64BIT), + V14_ = case Last of + false -> V14; + true -> V14 bxor ?MAX_64BIT + end, + V = <>, + + <> = + lists:foldl(fun(Round, Vx) -> blake2b_mix(Round, Chunk, Vx) end, V, lists:seq(0, 11)), + + <> = H, + <<((HInt bxor VLow) bxor VHigh):(8*64)>>. + +blake2b_mix(Rnd, Chunk, V) -> + <> = V, + <> = Chunk, + Ms = {M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, M10, M11, M12, M13, M14, M15}, + M = fun(Ix) -> element(Ix+1, Ms) end, + + [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15] = sigma(Rnd rem 10), + + {Vx0, Vx4, Vx8, Vx12} = blake2b_mix(V0, V4, V8, V12, M(S0), M(S1)), + {Vx1, Vx5, Vx9, Vx13} = blake2b_mix(V1, V5, V9, V13, M(S2), M(S3)), + {Vx2, Vx6, Vx10, Vx14} = blake2b_mix(V2, V6, V10, V14, M(S4), M(S5)), + {Vx3, Vx7, Vx11, Vx15} = blake2b_mix(V3, V7, V11, V15, M(S6), M(S7)), + + {Vy0, Vy5, Vy10, Vy15} = blake2b_mix(Vx0, Vx5, Vx10, Vx15, M(S8), M(S9)), + {Vy1, Vy6, Vy11, Vy12} = blake2b_mix(Vx1, Vx6, Vx11, Vx12, M(S10), M(S11)), + {Vy2, Vy7, Vy8, Vy13} = blake2b_mix(Vx2, Vx7, Vx8, Vx13, M(S12), M(S13)), + {Vy3, Vy4, Vy9, Vy14} = blake2b_mix(Vx3, Vx4, Vx9, Vx14, M(S14), M(S15)), + + <>. + +blake2b_mix(Va, Vb, Vc, Vd, X, Y) -> + Va1 = (Va + Vb + X) band ?MAX_64BIT, + Vd1 = rotr64(32, Vd bxor Va1), + + Vc1 = (Vc + Vd1) band ?MAX_64BIT, + Vb1 = rotr64(24, Vb bxor Vc1), + + Va2 = (Va1 + Vb1 + Y) band ?MAX_64BIT, + Vd2 = rotr64(16, Va2 bxor Vd1), + + Vc2 = (Vc1 + Vd2) band ?MAX_64BIT, + Vb2 = rotr64(63, Vb1 bxor Vc2), + + {Va2, Vb2, Vc2, Vd2}. + +blake_iv() -> + IV0 = 16#6A09E667F3BCC908, + IV1 = 16#BB67AE8584CAA73B, + IV2 = 16#3C6EF372FE94F82B, + IV3 = 16#A54FF53A5F1D36F1, + IV4 = 16#510E527FADE682D1, + IV5 = 16#9B05688C2B3E6C1F, + IV6 = 16#1F83D9ABFB41BD6B, + IV7 = 16#5BE0CD19137E2179, + <>. + +sigma(N) -> + {_, Row} = lists:keyfind(N, 1, sigma()), Row. + +sigma() -> + [{0, [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]}, + {1, [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3]}, + {2, [11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4]}, + {3, [ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8]}, + {4, [ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13]}, + {5, [ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9]}, + {6, [12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11]}, + {7, [13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10]}, + {8, [ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5]}, + {9, [10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0]}]. + +rotr64(N, I64) -> + <> = rotr641(N, <>), + I64rot. + +rotr641(16, <>) -> <>; +rotr641(24, <>) -> <>; +rotr641(32, <>) -> <>; +rotr641(63, <>) -> <>. + +pad(N, Bin) -> + case (N - (byte_size(Bin) rem N)) rem N of + 0 -> Bin; + Pad -> <> + end. + +to_big_endian(Bin) -> to_big_endian(Bin, <<>>). +to_big_endian(<<>>, Acc) -> Acc; +to_big_endian(<>, Acc) -> + to_big_endian(Rest, <>). + +to_little_endian(Bin) -> to_little_endian(Bin, <<>>). +to_little_endian(<<>>, Acc) -> Acc; +to_little_endian(<>, Acc) -> + to_little_endian(Rest, <>). diff --git a/src/aefa_asm.erl b/src/aefa_asm.erl new file mode 100644 index 0000000..0b9d420 --- /dev/null +++ b/src/aefa_asm.erl @@ -0,0 +1,484 @@ +%%%------------------------------------------------------------------- +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc Assembler for Fate machine code. +%%% +%%% Assembler code can be read from a file. +%%% The assembler has the following format +%%% Comments start with 2 semicolons and runs till end of line +%%% ;; This is a comment +%%% Opcode mnemonics start with an upper case letter. +%%% DUP +%%% Identifiers start with a lower case letter +%%% an_identifier +%%% Immediates can be of 9 types: +%%% 1. Integers +%%% 42 +%%% -2374683271468723648732648736498712634876147 +%%% 2. Hexadecimal integers starting with 0x +%%% 0x0deadbeef0 +%%% 3. addresses, a 256-bit hash strings starting with # +%%% followed by up to 64 hex chars +%%% #00000deadbeef +%%% 4. Boolean +%%% true +%%% false +%%% 5. Strings +%%% "Hello" +%%% 6. Empty map +%%% {} +%%% 7. Lists +%%% [] +%%% [1, 2] +%%% 8. Bit field +%%% <000> +%%% <1010> +%%% <> +%%% !<> +%%% 9. Tuples +%%% () +%%% (1, "foo") +%%% @end +%%% Created : 21 Dec 2017 +%%%------------------------------------------------------------------- + +-module(aefa_asm). + +-export([ assemble_file/3 + , asm_to_bytecode/2 + , bytecode_to_fate_code/2 + , pp/1 + , read_file/1 + , to_hexstring/1 + ]). + +-include_lib("aebytecode/include/aefa_opcodes.hrl"). +-define(HASH_BYTES, 32). + +assemble_file(InFile, OutFile, Options) -> + Asm = read_file(InFile), + {Env, BC} = aefa_asm:asm_to_bytecode(Asm, Options), + ok = file:write_file(OutFile, BC). + +pp(Asm) -> + Listing = format(Asm), + io:format("~s~n", [Listing]). + +format(Asm) -> format(Asm, 0). + +format([{comment, Comment} | Rest], Address) -> + ";; " ++ Comment ++ "\n" ++ format(Rest, Address); +format([Mnemonic | Rest], Address) -> + _Op = aefa_opcodes:m_to_op(Mnemonic), + " " ++ atom_to_list(Mnemonic) ++ "\n" + ++ format(Rest, Address + 1); +format([],_) -> []. + + +read_file(Filename) -> + {ok, File} = file:read_file(Filename), + binary_to_list(File). + +asm_to_bytecode(AssemblerCode, Options) -> + {ok, Tokens, _} = aefa_asm_scan:scan(AssemblerCode), + + case proplists:lookup(pp_tokens, Options) of + {pp_tokens, true} -> + io:format("Tokens ~p~n",[Tokens]); + none -> + ok + end, + + Env = to_bytecode(Tokens, none, #{ functions => #{} + , symbols => #{} + }, [], Options), + + ByteList = serialize(Env), + + case proplists:lookup(pp_hex_string, Options) of + {pp_hex_string, true} -> + io:format("Code: ~s~n",[to_hexstring(ByteList)]); + none -> + ok + end, + + {Env, list_to_binary(ByteList)}. + +bytecode_to_fate_code(ByteCode,_Options) -> + deserialize(ByteCode, #{ function => none + , bb => 0 + , current_bb_code => [] + , functions => #{} + , code => #{} + }). + +deserialize(<>, + #{ function := none + , bb := 0 + , current_bb_code := [] + } = Env) -> + {Sig, Rest2} = deserialize_signature(Rest), + Env2 = Env#{function => {<>, Sig}}, + deserialize(Rest2, Env2); +deserialize(<>, + #{ function := F + , bb := BB + , current_bb_code := Code + , code := Program + , functions := Funs} = Env) -> + {Sig, Rest2} = deserialize_signature(Rest), + case Code of + [] -> + Env2 = Env#{ bb => 0 + , current_bb_code => [] + , function => {<>, Sig} + , code => #{} + , functions => Funs#{F => Program}}, + deserialize(Rest2, Env2); + _ -> + Env2 = Env#{ bb => 0 + , current_bb_code => [] + , function => {<>, Sig} + , code => #{} + , functions => + Funs#{F => Program#{ BB => lists:reverse(Code)}}}, + deserialize(Rest2, Env2) + end; +deserialize(<>, + #{ bb := BB + , current_bb_code := Code + , code := Program} = Env) -> + {Rest2, OpCode} = deserialize_op(Op, Rest, Code), + case aefa_opcodes:end_bb(Op) of + true -> + deserialize(Rest2, Env#{ bb => BB+1 + , current_bb_code => [] + , code => Program#{BB => + lists:reverse(OpCode)}}); + false -> + deserialize(Rest2, Env#{ current_bb_code => OpCode}) + end; +deserialize(<<>>, #{ function := F + , bb := BB + , current_bb_code := Code + , code := Program + , functions := Funs} = Env) -> + FunctionCode = + case Code of + [] -> Program; + _ -> Program#{ BB => lists:reverse(Code)} + end, + Env#{ bb => 0 + , current_bb_code => [] + , function => none + , code => #{} + , functions => Funs#{F => FunctionCode}}. + +deserialize_op(?ELEMENT, Rest, Code) -> + {Type, Rest2} = deserialize_type(Rest), + <> = Rest2, + {Arg0, Rest4} = aefa_encoding:deserialize_one(Rest3), + {Arg1, Rest5} = aefa_encoding:deserialize_one(Rest4), + {Arg2, Rest6} = aefa_encoding:deserialize_one(Rest5), + Modifier0 = bits_to_modifier(ArgType band 2#11), + Modifier1 = bits_to_modifier((ArgType bsr 2) band 2#11), + Modifier2 = bits_to_modifier((ArgType bsr 4) band 2#11), + {Rest6, [{ aefa_opcodes:mnemonic(?ELEMENT) + , Type + , {Modifier0, Arg0} + , {Modifier1, Arg1} + , {Modifier2, Arg2}} + | Code]}; +deserialize_op(Op, Rest, Code) -> + OpName = aefa_opcodes:mnemonic(Op), + case aefa_opcodes:args(Op) of + 0 -> {Rest, [OpName | Code]}; + 1 -> + <> = Rest, + {Arg, Rest3} = aefa_encoding:deserialize_one(Rest2), + Modifier = bits_to_modifier(ArgType), + {Rest3, [{OpName, {Modifier, Arg}} | Code]}; + 2 -> + <> = Rest, + {Arg0, Rest3} = aefa_encoding:deserialize_one(Rest2), + {Arg1, Rest4} = aefa_encoding:deserialize_one(Rest3), + Modifier0 = bits_to_modifier(ArgType band 2#11), + Modifier1 = bits_to_modifier((ArgType bsr 2) band 2#11), + {Rest4, [{OpName, {Modifier0, Arg0}, + {Modifier1, Arg1}} | Code]}; + 3 -> + <> = Rest, + {Arg0, Rest3} = aefa_encoding:deserialize_one(Rest2), + {Arg1, Rest4} = aefa_encoding:deserialize_one(Rest3), + {Arg2, Rest5} = aefa_encoding:deserialize_one(Rest4), + Modifier0 = bits_to_modifier(ArgType band 2#11), + Modifier1 = bits_to_modifier((ArgType bsr 2) band 2#11), + Modifier2 = bits_to_modifier((ArgType bsr 4) band 2#11), + {Rest5, [{ OpName + , {Modifier0, Arg0} + , {Modifier1, Arg1} + , {Modifier2, Arg2}} + | Code]}; + 4 -> + <> = Rest, + {Arg0, Rest3} = aefa_encoding:deserialize_one(Rest2), + {Arg1, Rest4} = aefa_encoding:deserialize_one(Rest3), + {Arg2, Rest5} = aefa_encoding:deserialize_one(Rest4), + {Arg3, Rest6} = aefa_encoding:deserialize_one(Rest5), + Modifier0 = bits_to_modifier(ArgType band 2#11), + Modifier1 = bits_to_modifier((ArgType bsr 2) band 2#11), + Modifier2 = bits_to_modifier((ArgType bsr 4) band 2#11), + Modifier3 = bits_to_modifier((ArgType bsr 6) band 2#11), + {Rest6, [{ OpName + , {Modifier0, Arg0} + , {Modifier1, Arg1} + , {Modifier2, Arg2} + , {Modifier3, Arg3}} + | Code]} + end. + + +serialize(#{functions := Functions} =_Env) -> + Code = [[?FUNCTION, Name, serialize_signature(Sig), C] || + {Name, {Sig, C}} <- maps:to_list(Functions)], + serialize_code(lists:flatten(Code)). + + +%% Argument encoding +%% Agument Specification Byte +%% bitpos: 6 4 2 0 +%% xx xx xx xx +%% Arg3 Arg2 Arg1 Arg0 +%% Bit pattern +%% 00 : stack/unused (depending on instruction) +%% 01 : argN +%% 10 : varN +%% 11 : immediate + +serialize_code([ {Arg0Type, Arg0} + , {Arg1Type, Arg1} + , {Arg2Type, Arg2} + , {Arg3Type, Arg3}| Rest]) -> + ArgSpec = + modifier_bits(Arg0Type) bor + (modifier_bits(Arg1Type) bsl 2) bor + (modifier_bits(Arg2Type) bsl 4) bor + (modifier_bits(Arg3Type) bsl 6), + [ ArgSpec + , serialize_data(Arg0Type, Arg0) + , serialize_data(Arg1Type, Arg1) + , serialize_data(Arg2Type, Arg2) + , serialize_data(Arg3Type, Arg3) + | serialize_code(Rest)]; +serialize_code([ {Arg0Type, Arg0} + , {Arg1Type, Arg1} + , {Arg2Type, Arg2} + | Rest]) -> + ArgSpec = + modifier_bits(Arg0Type) bor + (modifier_bits(Arg1Type) bsl 2) bor + (modifier_bits(Arg2Type) bsl 4), + [ArgSpec + , serialize_data(Arg0Type, Arg0) + , serialize_data(Arg1Type, Arg1) + , serialize_data(Arg2Type, Arg2) + | serialize_code(Rest)]; +serialize_code([ {Arg0Type, Arg0} + , {Arg1Type, Arg1} + | Rest]) -> + ArgSpec = + modifier_bits(Arg0Type) bor + (modifier_bits(Arg1Type) bsl 2), + [ArgSpec + , serialize_data(Arg0Type, Arg0) + , serialize_data(Arg1Type, Arg1) + | serialize_code(Rest)]; +serialize_code([ {Arg0Type, Arg0} | Rest]) -> + ArgSpec = + modifier_bits(Arg0Type), + [ArgSpec + , serialize_data(Arg0Type, Arg0) + | serialize_code(Rest)]; +serialize_code([ ?ELEMENT + , ResType + | Rest]) -> + [?ELEMENT, + serialize_type(ResType) + | serialize_code(Rest)]; +serialize_code([B|Rest]) -> + [B | serialize_code(Rest)]; +serialize_code([]) -> []. + +%% 00 : stack/unused (depending on instruction) +%% 01 : argN +%% 10 : varN +%% 11 : immediate +modifier_bits(immediate) -> 2#11; +modifier_bits(var) -> 2#10; +modifier_bits(arg) -> 2#01; +modifier_bits(stack) -> 2#00. + +bits_to_modifier(2#11) -> immediate; +bits_to_modifier(2#10) -> var; +bits_to_modifier(2#01) -> arg; +bits_to_modifier(2#00) -> stack. + +serialize_data(_, Data) -> + aefa_encoding:serialize(Data). + +serialize_signature({Args, RetType}) -> + [serialize_type({tuple, Args}) | + serialize_type(RetType)]. + +serialize_type(integer) -> [0]; +serialize_type(boolean) -> [1]; +serialize_type({list, T}) -> [2 | serialize_type(T)]; +serialize_type({tuple, Ts}) -> + case length(Ts) of + N when N =< 255 -> + [3, N | [serialize_type(T) || T <- Ts]] + end; +serialize_type(address) -> 4; +serialize_type(bits) -> 5; +serialize_type({map, K, V}) -> [6 | serialize_type(K) ++ serialize_type(V)]. + + +deserialize_signature(Binary) -> + {{tuple, Args}, Rest} = deserialize_type(Binary), + {RetType, Rest2} = deserialize_type(Rest), + {{Args, RetType}, Rest2}. + +deserialize_type(<<0, Rest/binary>>) -> {integer, Rest}; +deserialize_type(<<1, Rest/binary>>) -> {boolean, Rest}; +deserialize_type(<<2, Rest/binary>>) -> + {T, Rest2} = deserialize_type(Rest), + {{list, T}, Rest2}; +deserialize_type(<<3, N, Rest/binary>>) -> + {Ts, Rest2} = deserialize_types(N, Rest, []), + {{tuple, Ts}, Rest2}; +deserialize_type(<<4, Rest/binary>>) -> {address, Rest}; +deserialize_type(<<5, Rest/binary>>) -> {bits, Rest}; +deserialize_type(<<6, Rest/binary>>) -> + {K, Rest2} = deserialize_type(Rest), + {V, Rest3} = deserialize_type(Rest2), + {{map, K, V}, Rest3}. + +deserialize_types(0, Binary, Acc) -> + {lists:reverse(Acc), Binary}; +deserialize_types(N, Binary, Acc) -> + {T, Rest} = deserialize_type(Binary), + deserialize_types(N-1, Rest, [T | Acc]). + + +to_hexstring(ByteList) -> + "0x" ++ lists:flatten( + [io_lib:format("~2.16.0b", [X]) + || X <- ByteList]). + +to_bytecode([{function,_line, 'FUNCTION'}|Rest], Address, Env, Code, Opts) -> + Env2 = insert_fun(Address, Code, Env), + {Fun, Rest2} = to_fun_def(Rest), + to_bytecode(Rest2, Fun, Env2, [], Opts); +to_bytecode([{mnemonic,_line, 'ELEMENT'}|Rest], Address, Env, Code, Opts) -> + OpCode = aefa_opcodes:m_to_op('ELEMENT'), + {RetType, Rest2} = to_type(Rest), + to_bytecode(Rest2, Address, Env, [RetType, OpCode|Code], Opts); +to_bytecode([{mnemonic,_line, Op}|Rest], Address, Env, Code, Opts) -> + OpCode = aefa_opcodes:m_to_op(Op), + to_bytecode(Rest, Address, Env, [OpCode|Code], Opts); +to_bytecode([{arg,_line, N}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{arg, N}|Code], Opts); +to_bytecode([{var,_line, N}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{var, N}|Code], Opts); +to_bytecode([{stack,_line, N}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{stack, N}|Code], Opts); +to_bytecode([{int,_line, Int}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{immediate, Int}|Code], Opts); +to_bytecode([{boolean,_line, Bool}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{immediate, Bool}|Code], Opts); +to_bytecode([{hash,_line, Hash}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{immediate, Hash}|Code], Opts); +to_bytecode([{id,_line, ID}|Rest], Address, Env, Code, Opts) -> + {Hash, Env2} = insert_symbol(ID, Env), + to_bytecode(Rest, Address, Env2, [{immediate, Hash}|Code], Opts); +to_bytecode([], Address, Env, Code, Opts) -> + Env2 = insert_fun(Address, Code, Env), + case proplists:lookup(pp_opcodes, Opts) of + {pp_opcodes, true} -> + Ops = [C || {_Name, {_Sig, C}} <- maps:to_list(Env2)], + io:format("opcodes ~p~n", [Ops]); + none -> + ok + end, + Env2. + + +to_fun_def([{id, _, Name}, {'(', _} | Rest]) -> + {ArgsType, [{'to', _} | Rest2]} = to_arg_types(Rest), + {RetType, Rest3} = to_type(Rest2), + {{Name, ArgsType, RetType}, Rest3}. + +to_arg_types([{')', _} | Rest]) -> {[], Rest}; +to_arg_types(Tokens) -> + case to_type(Tokens) of + {Type, [{',', _} | Rest]} -> + {MoreTypes, Rest2} = to_arg_types(Rest), + {[Type|MoreTypes], Rest2}; + {Type, [{')', _} | Rest]} -> + {[Type], Rest} + end. + +to_type([{id, _, "integer"} | Rest]) -> {integer, Rest}; +to_type([{id, _, "boolean"} | Rest]) -> {boolean, Rest}; +to_type([{id, _, "string"} | Rest]) -> {string, Rest}; +to_type([{id, _, "address"} | Rest]) -> {address, Rest}; +to_type([{id, _, "bits"} | Rest]) -> {bits, Rest}; +to_type([{'{', _}, {id, _, "list"}, {',', _} | Rest]) -> + %% TODO: Error handling + {ListType, [{'}', _}| Rest2]} = to_type(Rest), + {{list, ListType}, Rest2}; +to_type([{'{', _}, {id, _, "tuple"}, {',', _}, {'[', _} | Rest]) -> + %% TODO: Error handling + {ElementTypes, [{'}', _}| Rest2]} = to_list_of_types(Rest), + {{tuple, ElementTypes}, Rest2}; +to_type([{'{', _}, {id, _, "map"}, {',', _} | Rest]) -> + %% TODO: Error handling + {KeyType, [{',', _}| Rest2]} = to_type(Rest), + {ValueType, [{'}', _}| Rest3]} = to_type(Rest2), + {{map, KeyType, ValueType}, Rest3}. + +to_list_of_types([{']', _} | Rest]) -> {[], Rest}; +to_list_of_types(Tokens) -> + case to_type(Tokens) of + {Type, [{',', _} | Rest]} -> + {MoreTypes, Rest2} = to_list_of_types(Rest), + {[Type|MoreTypes], Rest2}; + {Type, [{']', _} | Rest]} -> + {[Type], Rest} + end. + + + +insert_fun(none, [], Env) -> Env; +insert_fun({Name, Type, RetType}, Code, #{functions := Functions} = Env) -> + {Hash, Env2} = insert_symbol(Name, Env), + Env2#{ + functions => Functions#{Hash => {{Type, RetType}, lists:reverse(Code)}} + }. + +insert_symbol(Id, Env) -> + %% Use first 4 bytes of blake hash + {ok, <> } = aeblake2:blake2b(?HASH_BYTES, list_to_binary(Id)), + insert_symbol(Id, <>, Env). + +insert_symbol(Id, Hash, #{symbols := Symbols} = Env) -> + case maps:find(Hash, Symbols) of + {ok, Id} -> {Hash, Env}; + {ok, Id2} -> + %% Very unlikely... + exit({two_symbols_with_same_hash, Id, Id2}); + error -> + {Hash, Env#{symbols => Symbols#{ Id => Hash + , Hash => Id}}} + end. +lookup_symbol(Id, #{symbols := Symbols} = Env) -> + maps:find(Id, Symbols). diff --git a/src/aefa_asm_scan.xrl b/src/aefa_asm_scan.xrl new file mode 100644 index 0000000..b61d240 --- /dev/null +++ b/src/aefa_asm_scan.xrl @@ -0,0 +1,191 @@ +%%% -*- erlang-indent-level:4; indent-tabs-mode: nil -*- +%%%------------------------------------------------------------------- +%%% @copyright (C) 2019, aeternity Anstalt +%%% @doc +%%% Handling FATE code. +%%% @end +%%% Created : 9 Jan 2019 +%%%------------------------------------------------------------------- + +Definitions. +DIGIT = [0-9] +HEXDIGIT = [0-9a-fA-F] +LOWER = [a-z_] +UPPER = [A-Z] +INT = {DIGIT}+ +HEX = 0x{HEXDIGIT}+ +HASH = #{HEXDIGIT}+ +WS = [\000-\s] +ID = {LOWER}[a-zA-Z0-9_]* + + +Rules. +arg{INT} : {token, {arg, TokenLine, parse_arg(TokenChars)}}. +var{INT} : {token, {var, TokenLine, parse_var(TokenChars)}}. +a : {token, {stack, TokenLine, 0}}. +a{INT} : {token, {stack, TokenLine, parse_acc(TokenChars)}}. + +true : {token, {boolean, TokenLine, true}}. +false : {token, {boolean, TokenLine, false}}. + +RETURN : {token, {mnemonic, TokenLine, 'RETURN'}}. +RETURNR : {token, {mnemonic, TokenLine, 'RETURNR'}}. +CALL : {token, {mnemonic, TokenLine, 'CALL'}}. +FUNCTION : {token, {function, TokenLine, 'FUNCTION' }}. +NOP : {token, {mnemonic, TokenLine, 'NOP'}}. + +CALL_R : {token, {mnemonic, TokenLine, 'CALL_R'}}. +CALL_T : {token, {mnemonic, TokenLine, 'CALL_T'}}. +CALL_TR : {token, {mnemonic, TokenLine, 'CALL_TR'}}. +JUMP : {token, {mnemonic, TokenLine, 'JUMP'}}. +JUMPIF : {token, {mnemonic, TokenLine, 'JUMPIF'}}. +SWITCH : {token, {mnemonic, TokenLine, 'SWITCH'}}. + +PUSH : {token, {mnemonic, TokenLine, 'PUSH'}}. +DUP : {token, {mnemonic, TokenLine, 'DUP'}}. +POP : {token, {mnemonic, TokenLine, 'POP'}}. + +STORE : {token, {mnemonic, TokenLine, 'STORE'}}. + +ADD : {token, {mnemonic, TokenLine, 'ADD'}}. +MUL : {token, {mnemonic, TokenLine, 'MUL'}}. +SUB : {token, {mnemonic, TokenLine, 'SUB'}}. +DIV : {token, {mnemonic, TokenLine, 'DIV'}}. +MOD : {token, {mnemonic, TokenLine, 'MOD'}}. +POW : {token, {mnemonic, TokenLine, 'POW'}}. + +INC : {token, {mnemonic, TokenLine, 'INC'}}. +DEC : {token, {mnemonic, TokenLine, 'DEC'}}. + +LT : {token, {mnemonic, TokenLine, 'LT'}}. +GT : {token, {mnemonic, TokenLine, 'GT'}}. +EQ : {token, {mnemonic, TokenLine, 'EQ'}}. +ELT : {token, {mnemonic, TokenLine, 'ELT'}}. +EGT : {token, {mnemonic, TokenLine, 'EGT'}}. +NEQ : {token, {mnemonic, TokenLine, 'NEQ'}}. + +AND : {token, {mnemonic, TokenLine, 'AND'}}. +OR : {token, {mnemonic, TokenLine, 'OR'}}. +NOT : {token, {mnemonic, TokenLine, 'NOT'}}. + +TUPLE : {token, {mnemonic, TokenLine, 'TUPLE'}}. +ELEMENT : {token, {mnemonic, TokenLine, 'ELEMENT'}}. + +MAP_EMPTY : {token, {mnemonic, TokenLine, 'MAP_EMPTY'}}. +MAP_LOOKUP : {token, {mnemonic, TokenLine, 'MAP_LOOKUP'}}. +MAP_LOOKUPD : {token, {mnemonic, TokenLine, 'MAP_LOOKUPD'}}. +MAP_UPDATE : {token, {mnemonic, TokenLine, 'MAP_UPDATE'}}. +MAP_MEMBER : {token, {mnemonic, TokenLine, 'MAP_MEMBER'}}. +MAP_DELETE : {token, {mnemonic, TokenLine, 'MAP_DELETE'}}. +MAP_FROM_LIST : {token, {mnemonic, TokenLine, 'MAP_FROM_LIST'}}. + +NIL : {token, {mnemonic, TokenLine, 'NIL'}}. +IS_NIL : {token, {mnemonic, TokenLine, 'IS_NIL'}}. +CONS : {token, {mnemonic, TokenLine, 'CONS'}}. +HD : {token, {mnemonic, TokenLine, 'HD'}}. +TL : {token, {mnemonic, TokenLine, 'TL'}}. +LENGTH : {token, {mnemonic, TokenLine, 'LENGTH'}}. + +STR_EQ : {token, {mnemonic, TokenLine, 'STR_EQ'}}. +STR_JOIN : {token, {mnemonic, TokenLine, 'STR_JOIN'}}. +INT_TO_STR : {token, {mnemonic, TokenLine, 'INT_TO_STR'}}. +ADDR_TO_STR : {token, {mnemonic, TokenLine, 'ADDR_TO_STR'}}. +STR_REVERSE : {token, {mnemonic, TokenLine, 'STR_REVERSE'}}. + +INT_TO_ADDR : {token, {mnemonic, TokenLine, 'INT_TO_ADDR'}}. + +VARIANT : {token, {mnemonic, TokenLine, 'VARIANT'}}. +VARIANT_TEST : {token, {mnemonic, TokenLine, 'VARIANT_TEST'}}. +VARIANT_ELEMENT : {token, {mnemonic, TokenLine, 'VARIANT_ELEMENT'}}. + +BITS_NONE : {token, {mnemonic, TokenLine, 'BITS_NONE'}}. +BITS_ALL : {token, {mnemonic, TokenLine, 'BITS_ALL'}}. +BITS_SET : {token, {mnemonic, TokenLine, 'BITS_SET'}}. +BITS_CLEAR : {token, {mnemonic, TokenLine, 'BITS_CLEAR'}}. +BITS_TEST : {token, {mnemonic, TokenLine, 'BITS_TEST'}}. +BITS_SUM : {token, {mnemonic, TokenLine, 'BITS_SUM'}}. +BITS_OR : {token, {mnemonic, TokenLine, 'BITS_OR'}}. +BITS_AND : {token, {mnemonic, TokenLine, 'BITS_AND'}}. +BITS_DIFF : {token, {mnemonic, TokenLine, 'BITS_DIFF'}}. + + +ADDRESS : {token, {mnemonic, TokenLine, 'ADDRESS'}}. +BALANCE : {token, {mnemonic, TokenLine, 'BALANCE'}}. +ORIGIN : {token, {mnemonic, TokenLine, 'ORIGIN'}}. +CALLER : {token, {mnemonic, TokenLine, 'CALLER'}}. +GASPRICE : {token, {mnemonic, TokenLine, 'GASPRICE'}}. +BLOCKHASH : {token, {mnemonic, TokenLine, 'BLOCKHASH'}}. +BENEFICIARY : {token, {mnemonic, TokenLine, 'BENEFICIARY'}}. +TIMESTAMP : {token, {mnemonic, TokenLine, 'TIMESTAMP'}}. +NUMBER : {token, {mnemonic, TokenLine, 'NUMBER'}}. +DIFFICULTY : {token, {mnemonic, TokenLine, 'DIFFICULTY'}}. +GASLIMIT : {token, {mnemonic, TokenLine, 'GASLIMIT'}}. +GAS : {token, {mnemonic, TokenLine, 'GAS'}}. +LOG0 : {token, {mnemonic, TokenLine, 'LOG0'}}. +LOG1 : {token, {mnemonic, TokenLine, 'LOG1'}}. +LOG2 : {token, {mnemonic, TokenLine, 'LOG2'}}. +LOG3 : {token, {mnemonic, TokenLine, 'LOG3'}}. +LOG4 : {token, {mnemonic, TokenLine, 'LOG4'}}. +ABORT : {token, {mnemonic, TokenLine, 'ABORT'}}. +EXIT : {token, {mnemonic, TokenLine, 'EXIT'}}. +DEACTIVATE : {token, {mnemonic, TokenLine, 'DEACTIVATE'}}. +COMMENT : {token, {mnemonic, TokenLine, 'COMMENT'}}. +{ID} : + {token, {id, TokenLine, TokenChars}}. +{HEX} : + {token, {int, TokenLine, parse_hex(TokenChars)}}. +{INT} : + {token, {int, TokenLine, parse_int(TokenChars)}}. +{HASH} : + {token, {hash, TokenLine, parse_hash(TokenChars)}}. + + +%% Symbols +\-\> : {token, {'to', TokenLine}}. +\: : {token, {'to', TokenLine}}. +, : {token, {',', TokenLine}}. +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. +\[ : {token, {'[', TokenLine}}. +\] : {token, {']', TokenLine}}. +\{ : {token, {'{', TokenLine}}. +\} : {token, {'}', TokenLine}}. + +\. : skip_token. + + +%% Whitespace ignore +{WS} : skip_token. + +%% Comments (TODO: nested comments) +;;.* : skip_token. + +. : {error, "Unexpected token: " ++ TokenChars}. + +Erlang code. + +-export([scan/1]). + +-dialyzer({nowarn_function, yyrev/2}). + +-ignore_xref([format_error/1, string/2, token/2, token/3, tokens/2, tokens/3]). + +-include_lib("aebytecode/include/aefa_opcodes.hrl"). + + +parse_hex("0x" ++ Chars) -> list_to_integer(Chars, 16). + +parse_int(Chars) -> list_to_integer(Chars). + +parse_arg("arg" ++ N) -> list_to_integer(N). +parse_var("var" ++ N) -> list_to_integer(N). +parse_acc("a" ++ N) -> list_to_integer(N). + + +parse_hash("#" ++ Chars) -> + N = list_to_integer(Chars, 16), + <>. + +scan(S) -> + string(S). + diff --git a/src/aefa_data.erl b/src/aefa_data.erl new file mode 100644 index 0000000..568dbd7 --- /dev/null +++ b/src/aefa_data.erl @@ -0,0 +1,180 @@ +%% First draft of FATE data representation. +%% Very likely to change. +%% +-include("aefa_data.hrl"). + +-module(aefa_data). + +-type fate_integer() :: ?FATE_INTEGER_T. +-type fate_boolean() :: ?FATE_BOOLEAN_T. +-type fate_nil() :: ?FATE_NIL_T. +-type fate_list() :: ?FATE_LIST_T. +-type fate_unit() :: ?FATE_UNIT_T. +-type fate_map() :: ?FATE_MAP_T. +-type fate_string() :: ?FATE_STRING_T. +-type fate_address() :: ?FATE_ADDRESS_T. + +-type fate_variant() :: ?FATE_VARIANT_T. + +-type fate_void() :: ?FATE_VOID_T. + +-type fate_tuple() :: ?FATE_TUPLE_T. + +-type fate_type() :: + fate_boolean() + | fate_integer() + | fate_nil() + | fate_list() + | fate_unit() + | fate_tuple() + | fate_string() + | fate_address() + | fate_variant() + | fate_map() + | fate_list() + | fate_tuple() + | fate_void(). %% Not sure we need this. + +-export_type([fate_type/0]). + +-export([ make_integer/1 + , make_boolean/1 + , make_list/1 + , make_variant/3 + , make_tuple/1 + , make_string/1 + , make_map/1 + , make_address/1 + , make_bits/1 + , make_unit/0 + , tuple_to_list/1 + , decode/1 + , encode/1 + ]). +-export([format/1]). + + +make_integer(I) when is_integer(I) -> ?MAKE_FATE_INTEGER(I). +make_boolean(true) -> ?FATE_TRUE; +make_boolean(false) -> ?FATE_FALSE. +make_list([]) -> ?FATE_NIL; +make_list(L) -> ?MAKE_FATE_LIST(L). +make_string(S) when is_list(S) -> + ?FATE_STRING(list_to_binary(lists:flatten(S))); +make_string(S) when is_binary(S) -> ?FATE_STRING(S). +make_unit() -> ?FATE_UNIT. +make_tuple(T) -> ?FATE_TUPLE(T). +make_map(M) -> ?MAKE_FATE_MAP(M). +make_address(A) -> ?FATE_ADDRESS(A). +make_bits(I) when is_integer(I) -> ?FATE_BITS(I). + +make_variant(Size, Tag, Values) when is_integer(Size), is_integer(Tag) + , 0 =< Size + , 0 =< Tag + , Tag < Size + , is_tuple(Values) -> + ?FATE_VARIANT(Size, Tag, Values). + +tuple_to_list(?FATE_TUPLE(T)) -> erlang:tuple_to_list(T). + +%% Encode is a convinience function for testing, encoding an Erlang term +%% to a Fate term, but it can not distinguish between e.g. 32-byte strings +%% and addresses. Therfore an extra tuple layer on the erlang side for +%% addresses and bits. +encode({bits, Term}) when is_integer(Term) -> make_bits(Term); +%% TODO: check that each byte is in base58 +encode({address, B}) when is_binary(B) -> make_address(B); +encode({address, I}) when is_integer(I) -> B = <>, make_address(B); +encode({address, S}) when is_list(S) -> make_address(base58_to_address(S)); +encode({variant, Size, Tag, Values}) -> make_variant(Size, Tag, Values); +encode(Term) when is_integer(Term) -> make_integer(Term); +encode(Term) when is_boolean(Term) -> make_boolean(Term); +encode(Term) when is_list(Term) -> make_list([encode(E) || E <- Term]); +encode(Term) when is_tuple(Term) -> + make_tuple(list_to_tuple([encode(E) || E <- erlang:tuple_to_list(Term)])); +encode(Term) when is_map(Term) -> + make_map(maps:from_list([{encode(K), encode(V)} || {K,V} <- maps:to_list(Term)])); +encode(Term) when is_binary(Term) -> make_string(Term). + + + +decode(I) when ?IS_FATE_INTEGER(I) -> I; +decode(?FATE_TRUE) -> true; +decode(?FATE_FALSE) -> false; +decode(L) when ?IS_FATE_LIST(L) -> [decode(E) || E <- L]; +decode(?FATE_ADDRESS(<>)) -> {address, Address}; +decode(?FATE_BITS(Bits)) -> {bits, Bits}; +decode(?FATE_TUPLE(T)) -> erlang:list_to_tuple([decode(E) || E <- T]); +decode(?FATE_VARIANT(Size, Tag, Values)) -> {variant, Size, Tag, Values}; +decode(S) when ?IS_FATE_STRING(S) -> binary_to_list(S); +decode(M) when ?IS_FATE_MAP(M) -> + maps:from_list([{decode(K), decode(V)} || {K, V} <- maps:to_list(M)]). + +-spec format(fate_type()) -> iolist(). +format(I) when ?IS_FATE_INTEGER(I) -> integer_to_list(?MAKE_FATE_INTEGER(I)); +format(?FATE_VOID) -> "void"; +format(?FATE_TRUE) -> "true"; +format(?FATE_FALSE) -> "false"; +format(?FATE_NIL) -> "[]"; +format(L) when ?IS_FATE_LIST(L) -> format_list(?FATE_LIST_VALUE(L)); +format(?FATE_UNIT) -> "unit"; +format(?FATE_TUPLE(T)) -> + "{ " ++ [format(E) ++ " " || E <- erlang:tuple_to_list(T)] ++ "}"; +format(S) when ?IS_FATE_STRING(S) -> [S]; +format(?FATE_VARIANT(Size, Tag, T)) -> + "( " ++ integer_to_list(Size) ++ ", " + ++ integer_to_list(Tag) ++ ", " + ++ [format(E) ++ " " || E <- erlang:tuple_to_list(T)] + ++ " )"; +format(M) when ?IS_FATE_MAP(M) -> + "#{ " + ++ format_kvs(maps:to_list(?FATE_MAP_VALUE(M))) + ++" }"; +format(?FATE_ADDRESS(Address)) -> base58:binary_to_base58(Address); +format(V) -> exit({not_a_fate_type, V}). + +format_list([]) -> " ]"; +format_list([E]) -> format(E) ++ " ]"; +format_list([H|T]) -> format(H) ++ ", " ++ format_list(T). + +format_kvs([]) -> ""; +format_kvs([{K,V}]) -> "( " ++ format(K) ++ " => " ++ format(V) ++ " )"; +format_kvs([{K,V} | Rest]) -> + "( " ++ format(K) ++ " => " ++ format(V) ++ " ), " ++ format_kvs(Rest). + + +%% -- Local base 58 library + +base58char(Char) -> + binary:at(<<"123456789ABCDEFGHJKLMNPQRSTUVWXYZ" + "abcdefghijkmnopqrstuvwxyz">>, Char). +char_to_base58(C) -> + binary:at(<<0,1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,9,10,11,12,13,14,15,16,0,17, + 18,19,20,21,0,22,23,24,25,26,27,28,29,30,31,32,0,0,0,0,0,0, + 33,34,35,36,37,38,39,40,41,42,43,0,44,45,46,47,48,49,50,51, + 52,53,54,55,56,57>>, C-$1). + +base58_to_integer(C, []) -> C; +base58_to_integer(C, [X | Xs]) -> + base58_to_integer(C * 58 + char_to_base58(X), Xs). + +base58_to_integer([]) -> error; +base58_to_integer([Char]) -> char_to_base58(Char); +base58_to_integer([Char | Str]) -> + base58_to_integer(char_to_base58(Char), Str). + +base58_to_address(Base58) -> + I = base58_to_integer(Base58), + Bin = <>, + Bin. + +integer_to_base58(0) -> <<"1">>; +integer_to_base58(Integer) -> + Base58String = integer_to_base58(Integer, []), + list_to_binary(Base58String). + +integer_to_base58(0, Acc) -> Acc; +integer_to_base58(Integer, Acc) -> + Quot = Integer div 58, + Rem = Integer rem 58, + integer_to_base58(Quot, [base58char(Rem)|Acc]). diff --git a/src/aefa_encoding.erl b/src/aefa_encoding.erl new file mode 100644 index 0000000..dd83e64 --- /dev/null +++ b/src/aefa_encoding.erl @@ -0,0 +1,262 @@ +%% Fate data (and instruction) serialization. +%% +%% The FATE serialization has to fullfill the following properties: +%% * There has to be 1 and only 1 byte sequence +%% representing each unique value in FATE. +%% * A valid byte sequence has to be deserializable to a FATE value. +%% * A valid byte sequence must not contain any trailing bytes. +%% * A serialization is a sequence of 8-bit bytes. +%% +%% The serialization function should fullfill the following: +%% * A valid FATE value should be serialized to a byte sequence. +%% * Any other argument, not representing a valid FATE value should +%% throw an exception +%% +%% The deserialization function should fullfill the following: +%% * A valid byte sequence should be deserialized to a valid FATE value. +%% * Any other argument, not representing a valid byte sequence should +%% throw an exception +%% +%% History +%% * First draft of FATE serialization encoding/decoding. +%% Initial experiment with tags +%% * Second draft +%% * FATE data is now defined in aefa_data.erl +%% * Third draft +%% * Added Bit strings +%% +%% TODO: +%% * Make the code production ready. +%% (add tests, document exported functions). +%% * Handle Variant types better. +%% * Handle type representations. +%% * Handle instructions. +%% +%% ------------------------------------------------------------------------ +-module(aefa_encoding). + +-export([ deserialize/1 + , deserialize_one/1 + , serialize/1 + ]). + +-include("aefa_data.hrl"). + +%% Definition of tag scheme. +%% This has to follow the protocol specification. + +-define(SMALL_INT , 2#0). %% sxxxxxx 0 - 6 bit integer with sign bit +%% 1 Set below +-define(LONG_STRING , 2#00000001). %% 000000 01 - RLP encoded array, size >= 64 +-define(SHORT_STRING , 2#01). %% xxxxxx 01 - [bytes], 0 < xxxxxx:size < 64 +%% 11 Set below +-define(SHORT_LIST , 2#0011). %% xxxx 0011 - [encoded elements], 0 < length < 16 +%% xxxx 0111 - FREE (For typedefs in future) +-define(LONG_TUPLE , 2#00001011). %% 0000 1011 - RLP encoded (size - 16) + [encoded elements], +-define(SHORT_TUPLE , 2#1011). %% xxxx 1011 - [encoded elements], 0 < size < 16 +%% 1111 Set below +-define(LONG_LIST , 2#00011111). %% 0001 1111 - RLP encoded (length - 16) + [Elements] +-define(MAP , 2#00101111). %% 0010 1111 - RLP encoded size + [encoded key, encoded value] +-define(EMPTY_TUPLE , 2#00111111). %% 0011 1111 +-define(POS_BITS , 2#01001111). %% 0100 1111 - RLP encoded integer (to be interpreted as bitfield) +-define(EMPTY_STRING , 2#01011111). %% 0101 1111 +-define(POS_BIG_INT , 2#01101111). %% 0110 1111 - RLP encoded (integer - 64) +-define(FALSE , 2#01111111). %% 0111 1111 +%% %% 1000 1111 - FREE (Possibly for bytecode in the future.) +-define(ADDRESS , 2#10011111). %% 1001 1111 - [32 bytes] +-define(VARIANT , 2#10101111). %% 1010 1111 - encoded size + encoded tag + encoded values +-define(NIL , 2#10111111). %% 1011 1111 - Empty list +-define(NEG_BITS , 2#11001111). %% 1100 1111 - RLP encoded integer (infinite 1:s bitfield) +-define(EMPTY_MAP , 2#11011111). %% 1101 1111 +-define(NEG_BIG_INT , 2#11101111). %% 1110 1111 - RLP encoded (integer - 64) +-define(TRUE , 2#11111111). %% 1111 1111 + +-define(SHORT_TUPLE_SIZE, 16). +-define(SHORT_LIST_SIZE , 16). +-define(SMALL_INT_SIZE , 64). +-define(SHORT_STRING_SIZE, 64). + +-define(POS_SIGN, 0). +-define(NEG_SIGN, 1). + + +%% -------------------------------------------------- +%% Serialize +%% Serialized a Fate data value into a sequence of bytes +%% according to the Fate serialization specification. +%% TODO: The type Fate Data is not final yet. +-spec serialize(aefa_data:fate_type()) -> binary(). +serialize(?FATE_TRUE) -> <>; +serialize(?FATE_FALSE) -> <>; +serialize(?FATE_NIL) -> <>; %% ! Untyped +serialize(?FATE_UNIT) -> <>; %% ! Untyped +serialize(M) when ?IS_FATE_MAP(M), ?FATE_MAP_SIZE(M) =:= 0 -> <>; %% ! Untyped +serialize(?FATE_EMPTY_STRING) -> <>; +serialize(I) when ?IS_FATE_INTEGER(I) -> serialize_integer(I); +serialize(?FATE_BITS(Bits)) when is_integer(Bits) -> serialize_bits(Bits); +serialize(String) when ?IS_FATE_STRING(String), + ?FATE_STRING_SIZE(String) > 0, + ?FATE_STRING_SIZE(String) < ?SHORT_STRING_SIZE -> + Size = ?FATE_STRING_SIZE(String), + Bytes = ?FATE_STRING_VALUE(String), + <>; +serialize(String) when ?IS_FATE_STRING(String), + ?FATE_STRING_SIZE(String) > 0, + ?FATE_STRING_SIZE(String) >= ?SHORT_STRING_SIZE -> + Bytes = ?FATE_STRING_VALUE(String), + <>; +serialize(?FATE_ADDRESS(Address)) when is_binary(Address) -> + <>; +serialize(?FATE_TUPLE(T)) when size(T) > 0 -> + S = size(T), + L = tuple_to_list(T), + Rest = << <<(serialize(E))/binary>> || E <- L >>, + if S < ?SHORT_TUPLE_SIZE -> + <>; + true -> + Size = rlp_integer(S - ?SHORT_TUPLE_SIZE), + <> + end; +serialize(L) when ?IS_FATE_LIST(L) -> + [_E|_] = List = ?FATE_LIST_VALUE(L), + S = length(List), + Rest = << <<(serialize(El))/binary>> || El <- List >>, + if S < ?SHORT_LIST_SIZE -> + <>; + true -> + Val = rlp_integer(S - ?SHORT_LIST_SIZE), + <> + end; +serialize(Map) when ?IS_FATE_MAP(Map) -> + L = [{_K,_V}|_] = maps:to_list(?FATE_MAP_VALUE(Map)), + Size = length(L), + %% TODO: check all K same type, and all V same type + %% check K =/= map + Elements = << <<(serialize(K1))/binary, (serialize(V1))/binary>> || {K1,V1} <- L >>, + <>; +serialize(?FATE_VARIANT(Size, Tag, Values)) when 0 =< Size + , Size < 256 + , 0 =< Tag + , Tag < Size -> + <>. + + +%% ----------------------------------------------------- + +rlp_integer(S) when S >= 0 -> + ae_rlp:encode(binary:encode_unsigned(S)). + +serialize_integer(I) when ?IS_FATE_INTEGER(I) -> + V = ?FATE_INTEGER_VALUE(I), + Abs = abs(V), + Sign = case V < 0 of + true -> ?NEG_SIGN; + false -> ?POS_SIGN + end, + if Abs < ?SMALL_INT_SIZE -> <>; + Sign =:= ?NEG_SIGN -> <>; + Sign =:= ?POS_SIGN -> <> + end. + +serialize_bits(B) when is_integer(B) -> + Abs = abs(B), + Sign = case B < 0 of + true -> ?NEG_SIGN; + false -> ?POS_SIGN + end, + if + Sign =:= ?NEG_SIGN -> <>; + Sign =:= ?POS_SIGN -> <> + end. + +-spec deserialize(binary()) -> aefa_data:fate_type(). +deserialize(B) -> + {T, <<>>} = deserialize2(B), + T. + +deserialize_one(B) -> deserialize2(B). + +deserialize2(<>) -> + {?MAKE_FATE_INTEGER(I), Rest}; +deserialize2(<>) -> + {?MAKE_FATE_INTEGER(-I), Rest}; +deserialize2(<>) -> + {Bint, Rest2} = ae_rlp:decode_one(Rest), + {?MAKE_FATE_INTEGER(-binary:decode_unsigned(Bint) - ?SMALL_INT_SIZE), + Rest2}; +deserialize2(<>) -> + {Bint, Rest2} = ae_rlp:decode_one(Rest), + {?MAKE_FATE_INTEGER(binary:decode_unsigned(Bint) + ?SMALL_INT_SIZE), + Rest2}; +deserialize2(<>) -> + {Bint, Rest2} = ae_rlp:decode_one(Rest), + {?FATE_BITS(-binary:decode_unsigned(Bint)), Rest2}; +deserialize2(<>) -> + {Bint, Rest2} = ae_rlp:decode_one(Rest), + {?FATE_BITS(binary:decode_unsigned(Bint)), Rest2}; +deserialize2(<>) -> + {String, Rest2} = ae_rlp:decode_one(Rest), + {?MAKE_FATE_STRING(String), Rest2}; +deserialize2(<>) -> + String = binary:part(Rest, 0, S), + Rest2 = binary:part(Rest, byte_size(Rest), - (byte_size(Rest) - S)), + {?MAKE_FATE_STRING(String), Rest2}; +deserialize2(<>) -> + {A, Rest2} = ae_rlp:decode_one(Rest), + {?FATE_ADDRESS(A), Rest2}; +deserialize2(<>) -> + {?FATE_TRUE, Rest}; +deserialize2(<>) -> + {?FATE_FALSE, Rest}; +deserialize2(<>) -> + {?FATE_NIL, Rest}; +deserialize2(<>) -> + {?FATE_UNIT, Rest}; +deserialize2(<>) -> + {?MAKE_FATE_MAP(#{}), Rest}; +deserialize2(<>) -> + {?FATE_EMPTY_STRING, Rest}; +deserialize2(<>) -> + {BSize, Rest1} = ae_rlp:decode_one(Rest), + N = binary:decode_unsigned(BSize) + ?SHORT_TUPLE_SIZE, + {List, Rest2} = deserialize_elements(N, Rest1), + {?FATE_TUPLE(list_to_tuple(List)), Rest2}; +deserialize2(<>) -> + {List, Rest1} = deserialize_elements(S, Rest), + {?FATE_TUPLE(list_to_tuple(List)), Rest1}; +deserialize2(<>) -> + {BLength, Rest1} = ae_rlp:decode_one(Rest), + Length = binary:decode_unsigned(BLength) + ?SHORT_LIST_SIZE, + {List, Rest2} = deserialize_elements(Length, Rest1), + {?MAKE_FATE_LIST(List), Rest2}; +deserialize2(<>) -> + {List, Rest1} = deserialize_elements(S, Rest), + {?MAKE_FATE_LIST(List), Rest1}; +deserialize2(<>) -> + {BSize, Rest1} = ae_rlp:decode_one(Rest), + Size = binary:decode_unsigned(BSize), + {List, Rest2} = deserialize_elements(2*Size, Rest1), + Map = insert_kv(List, #{}), + {?MAKE_FATE_MAP(Map), Rest2}; +deserialize2(<>) -> + if Tag > Size -> exit({too_large_tag_in_variant, Tag, Size}); + true -> + {?FATE_TUPLE(T), Rest2} = deserialize2(Rest), + {?FATE_VARIANT(Size, Tag, T), Rest2} + end. + +insert_kv([], M) -> M; +insert_kv([K,V|R], M) -> insert_kv(R, maps:put(K, V, M)). + +deserialize_elements(0, Rest) -> + {[], Rest}; +deserialize_elements(N, Es) -> + {E, Rest} = deserialize2(Es), + {Tail, Rest2} = deserialize_elements(N-1, Rest), + {[E|Tail], Rest2}. diff --git a/src/aefa_opcodes.erl b/src/aefa_opcodes.erl new file mode 100644 index 0000000..20116e5 --- /dev/null +++ b/src/aefa_opcodes.erl @@ -0,0 +1,156 @@ +%%%------------------------------------------------------------------- +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc +%%% Opcodes +%%% @end +%%%------------------------------------------------------------------- + +-module(aefa_opcodes). + +-export([ args/1 + , end_bb/1 + , mnemonic/1 + , m_to_op/1 + , opcode/1 + ]). + +-include_lib("aebytecode/include/aefa_opcodes.hrl"). + + +%%==================================================================== +%% API +%%==================================================================== + +opcode(X) when X >= 0, X =< 255 -> X; +opcode({comment,X}) -> ?COMMENT(X). + +mnemonic(?NOP) -> 'NOP' ; +mnemonic(?RETURN) -> 'RETURN' ; +mnemonic(?RETURNR) -> 'RETURNR' ; +mnemonic(?PUSH) -> 'PUSH' ; +mnemonic(?JUMP) -> 'JUMP' ; +mnemonic(?JUMPIF) -> 'JUMPIF' ; +mnemonic(?INC) -> 'INC' ; +mnemonic(?CALL) -> 'CALL' ; +mnemonic(?CALL_T) -> 'CALL_T' ; +mnemonic(?CALL_R) -> 'CALL_R' ; +mnemonic(?CALL_TR) -> 'CALL_TR' ; +mnemonic(?ADD) -> 'ADD' ; +mnemonic(?SUB) -> 'SUB' ; +mnemonic(?MUL) -> 'MUL' ; +mnemonic(?DIV) -> 'DIV' ; +mnemonic(?MOD) -> 'MOD' ; +mnemonic(?POW) -> 'POW' ; +mnemonic(?AND) -> 'AND' ; +mnemonic(?OR) -> 'OR' ; +mnemonic(?NOT) -> 'NOT' ; +mnemonic(?LT) -> 'LT' ; +mnemonic(?GT) -> 'GT' ; +mnemonic(?EGT) -> 'EGT' ; +mnemonic(?ELT) -> 'ELT' ; +mnemonic(?EQ) -> 'EQ' ; +mnemonic(?NEQ) -> 'NEQ' ; +mnemonic(?STORE) -> 'STORE' ; +mnemonic(?TUPLE) -> 'TUPLE' ; +mnemonic(?ELEMENT) -> 'ELEMENT' ; +mnemonic(?MAP_EMPTY) -> 'MAP_EMPTY' ; +mnemonic(?MAP_UPDATE) -> 'MAP_UPDATE' ; +mnemonic(?MAP_DELETE) -> 'MAP_DELETE' ; +mnemonic(?MAP_MEMBER) -> 'MAP_MEMBER' ; +mnemonic(?MAP_LOOKUP) -> 'MAP_LOOKUP' ; +mnemonic(?MAP_LOOKUPD) -> 'MAP_LOOKUPD'; +mnemonic(?MAP_FROM_LIST)->'MAP_FROM_LIST' ; + +mnemonic(OP) -> {OP, nothandled}. + +m_to_op('NOP') -> ?NOP ; +m_to_op('COMMENT') -> ?COMMENT("") ; +m_to_op('RETURN') -> ?RETURN ; +m_to_op('RETURNR') -> ?RETURNR ; +m_to_op('PUSH') -> ?PUSH ; +m_to_op('JUMP') -> ?JUMP ; +m_to_op('JUMPIF') -> ?JUMPIF ; +m_to_op('INC') -> ?INC ; +m_to_op('ADD') -> ?ADD ; +m_to_op('SUB') -> ?SUB ; +m_to_op('MUL') -> ?MUL ; +m_to_op('DIV') -> ?DIV ; +m_to_op('MOD') -> ?MOD ; +m_to_op('POW') -> ?POW ; +m_to_op('AND') -> ?AND ; +m_to_op('OR') -> ?OR ; +m_to_op('NOT') -> ?NOT ; +m_to_op('LT') -> ?LT ; +m_to_op('GT') -> ?GT ; +m_to_op('ELT') -> ?ELT ; +m_to_op('EGT') -> ?EGT ; +m_to_op('EQ') -> ?EQ ; +m_to_op('NEQ') -> ?NEQ ; +m_to_op('STORE') -> ?STORE ; +m_to_op('TUPLE') -> ?TUPLE ; +m_to_op('ELEMENT') -> ?ELEMENT ; +m_to_op('MAP_EMPTY') -> ?MAP_EMPTY ; +m_to_op('MAP_UPDATE') -> ?MAP_UPDATE ; +m_to_op('MAP_DELETE') -> ?MAP_DELETE ; +m_to_op('MAP_MEMBER') -> ?MAP_MEMBER ; +m_to_op('MAP_LOOKUP') -> ?MAP_LOOKUP ; +m_to_op('MAP_LOOKUPD') -> ?MAP_LOOKUPD ; +m_to_op('MAP_FROM_LIST')->?MAP_FROM_LIST ; +m_to_op('CALL') -> ?CALL ; +m_to_op('CALL_T') -> ?CALL_T ; +m_to_op('CALL_R') -> ?CALL_R ; +m_to_op('CALL_TR') -> ?CALL_TR . + +args(?NOP) -> 0; +args(?RETURN) -> 0; +args(?INC) -> 0; + +args(?RETURNR) -> 1; +args(?PUSH) -> 1; +args(?JUMP) -> 1; +args(?CALL) -> 1; +args(?CALL_T) -> 1; +args(?TUPLE) -> 1; +args(?MAP_EMPTY) -> 1; + +args(?JUMPIF) -> 2; +args(?CALL_R) -> 2; +args(?CALL_TR) -> 2; +args(?NOT) -> 2; +args(?STORE) -> 2; +args(?MAP_FROM_LIST) -> 2; + +args(?ADD) -> 3; +args(?SUB) -> 3; +args(?MUL) -> 3; +args(?DIV) -> 3; +args(?MOD) -> 3; +args(?POW) -> 3; +args(?AND) -> 3; +args(?OR) -> 3; +args(?LT) -> 3; +args(?GT) -> 3; +args(?EGT) -> 3; +args(?ELT) -> 3; +args(?EQ) -> 3; +args(?NEQ) -> 3; +args(?MAP_MEMBER) -> 3; +args(?MAP_LOOKUP) -> 3; +args(?MAP_DELETE) -> 3; + +args(?ELEMENT) -> 4; +args(?MAP_UPDATE) -> 4; +args(?MAP_LOOKUPD) -> 4; + +args(_) -> 0. %% TODO do not allow this + +end_bb(?RETURN) -> true; +end_bb(?RETURNR)-> true; +end_bb(?JUMP) -> true; +end_bb(?JUMPIF) -> true; +end_bb(?CALL) -> true; +end_bb(?CALL_T) -> true; +end_bb(?CALL_R) -> true; +end_bb(?CALL_TR)-> true; +end_bb(_) -> false. + diff --git a/src/aefateasm.erl b/src/aefateasm.erl new file mode 100644 index 0000000..2958808 --- /dev/null +++ b/src/aefateasm.erl @@ -0,0 +1,58 @@ +-module(aefateasm). + +-export([main/1]). + +-define(OPT_SPEC, + [ {src_file, undefined, undefined, string, "Fate assembler code file"} + , {verbose, $v, "verbose", undefined, "Verbose output"} + , {help, $h, "help", undefined, "Show this message"} + , {outfile, $o, "out", string, "Output file (experimental)"} ]). + +usage() -> + getopt:usage(?OPT_SPEC, "aefateasm"). + +main(Args) -> + case getopt:parse(?OPT_SPEC, Args) of + {ok, {Opts, []}} -> + case proplists:get_value(help, Opts, false) of + false -> + assemble(Opts); + true -> + usage() + end; + + {ok, {_, NonOpts}} -> + io:format("Can't understand ~p\n\n", [NonOpts]), + usage(); + + {error, {Reason, Data}} -> + io:format("Error: ~s ~p\n\n", [Reason, Data]), + usage() + end. + +assemble(Opts) -> + case proplists:get_value(src_file, Opts, undefined) of + undefined -> + io:format("Error: no input source file\n\n"), + usage(); + File -> + assemble(File, Opts) + end. + +assemble(File, Opts) -> + Verbose = proplists:get_value(verbose, Opts, false), + case proplists:get_value(outfile, Opts, undefined) of + undefined -> + Asm = aefa_asm:read_file(File), + {Env, BC} = aefa_asm:asm_to_bytecode(Asm, Opts), + case Verbose of + true -> + io:format("Env: ~0p~n", [Env]); + false -> ok + end, + io:format("Code: ~0p~n", [BC]); + OutFile -> + aefa_asm:assemble_file(File, OutFile, Opts) + end. + + diff --git a/test/asm_code/arith.fate b/test/asm_code/arith.fate new file mode 100644 index 0000000..ffd5311 --- /dev/null +++ b/test/asm_code/arith.fate @@ -0,0 +1,26 @@ +;; CONTRACT arith + +FUNCTION add (integer, integer) : integer + ADD a arg0 arg1 + RETURN + +FUNCTION sub (integer, integer) : integer + SUB a arg0 arg1 + RETURN + +FUNCTION mul (integer, integer) : integer + MUL a arg0 arg1 + RETURN + +FUNCTION div (integer, integer) : integer + DIV a arg0 arg1 + RETURN + +FUNCTION mod (integer, integer) : integer + MOD a arg0 arg1 + RETURN + +FUNCTION pow (integer, integer) : integer + POW a arg0 arg1 + RETURN + diff --git a/test/asm_code/bool.fate b/test/asm_code/bool.fate new file mode 100644 index 0000000..d12d881 --- /dev/null +++ b/test/asm_code/bool.fate @@ -0,0 +1,14 @@ +;; CONTRACT bool + +FUNCTION and(boolean, boolean) : boolean + AND a arg0 arg1 + RETURN + +FUNCTION or(boolean, boolean) : boolean + OR a arg0 arg1 + RETURN + +FUNCTION not(boolean) : boolean + NOT a arg0 + RETURN + diff --git a/test/asm_code/comp.fate b/test/asm_code/comp.fate new file mode 100644 index 0000000..b3f8345 --- /dev/null +++ b/test/asm_code/comp.fate @@ -0,0 +1,26 @@ +;; CONTRACT comp + +FUNCTION lt(integer, integer) : boolean + LT a arg0 arg1 + RETURN + +FUNCTION gt(integer, integer) : boolean + GT a arg0 arg1 + RETURN + +FUNCTION egt(integer, integer) : boolean + EGT a arg0 arg1 + RETURN + +FUNCTION elt(integer, integer) : boolean + ELT a arg0 arg1 + RETURN + +FUNCTION eq(integer, integer) : boolean + EQ a arg0 arg1 + RETURN + +FUNCTION neq(integer, integer) : boolean + NEQ a arg0 arg1 + RETURN + diff --git a/test/asm_code/identity.fate b/test/asm_code/identity.fate new file mode 100644 index 0000000..a2350dc --- /dev/null +++ b/test/asm_code/identity.fate @@ -0,0 +1,8 @@ +;; CONTRACT: Identity +FUNCTION id(integer) -> integer + RETURN + +;; Test the code from the shell +;; _build/default/rel/aessembler/bin/aessembler console + +;; aeb_aefa:file("../../../../test/asm_code/identity.fate", []). diff --git a/test/asm_code/jumpif.fate b/test/asm_code/jumpif.fate new file mode 100644 index 0000000..05855eb --- /dev/null +++ b/test/asm_code/jumpif.fate @@ -0,0 +1,9 @@ +;; CONTRACT jumpif +FUNCTION skip(integer, integer) : integer + PUSH arg1 + PUSH 0 + EQ a a arg0 + JUMPIF a 2 + INC + JUMP 2 + RETURN diff --git a/test/asm_code/map.fate b/test/asm_code/map.fate new file mode 100644 index 0000000..beb1589 --- /dev/null +++ b/test/asm_code/map.fate @@ -0,0 +1,34 @@ +;; CONTRACT map +FUNCTION make_empty_map():{map, integer, boolean} + MAP_EMPTY a + RETURN + +FUNCTION map_update({map, integer, boolean}, integer, boolean):{map, integer, boolean} + MAP_UPDATE a arg0 arg1 arg2 + RETURN + +FUNCTION map_lookup({map, integer, boolean}, integer):boolean + MAP_LOOKUP a arg0 arg1 + RETURN + +FUNCTION map_lookup_default({map, integer, boolean}, integer): boolean + MAP_LOOKUPD a arg0 arg1 false + RETURN + +FUNCTION map_member({map, integer, boolean}, integer):boolean + MAP_MEMBER a arg0 arg1 + RETURN + +FUNCTION map_delete({map, integer, boolean}, integer):{map, integer, boolean} + MAP_DELETE a arg0 arg1 + RETURN + + +FUNCTION map_member({map, integer, boolean}, integer) : boolean + MAP_MEMBER a arg0 arg1 + RETURN + +FUNCTION map_from_list({list, {tuple, [integer, boolean]}}) : {map, integer, boolean} + MAP_FROM_LIST a arg0 + RETURN + diff --git a/test/asm_code/memory.fate b/test/asm_code/memory.fate new file mode 100644 index 0000000..e162daa --- /dev/null +++ b/test/asm_code/memory.fate @@ -0,0 +1,31 @@ +;; CONTRACT memory +FUNCTION call(integer):integer + STORE var1 arg0 + PUSH 0 + CALL write + PUSH var1 + RETURN + +FUNCTION write(integer):integer + STORE var1 arg0 + RETURNR var1 + +FUNCTION dest_add(integer, integer): integer + STORE var1 arg0 + STORE var2 arg1 + ADD var3 var1 var2 + PUSH var3 + RETURN + +FUNCTION dest_add_imm(integer):integer + STORE var1 arg0 + ADD var3 var1 2 + PUSH var3 + RETURN + +FUNCTION dest_add_stack(integer, integer): integer + STORE var1 arg0 + PUSH arg1 + ADD var3 var1 a + PUSH var3 + RETURN diff --git a/test/asm_code/remote.fate b/test/asm_code/remote.fate new file mode 100644 index 0000000..5ad09ea --- /dev/null +++ b/test/asm_code/remote.fate @@ -0,0 +1,4 @@ +;; CONTRACT remote +FUNCTION add_five(integer):integer + ADD a 5 arg0 + RETURN diff --git a/test/asm_code/test.fate b/test/asm_code/test.fate new file mode 100644 index 0000000..294e0c8 --- /dev/null +++ b/test/asm_code/test.fate @@ -0,0 +1,45 @@ +;; CONTRACT: Test +FUNCTION id(integer) -> integer + RETURN + +FUNCTION jumps() -> integer + PUSH 0 + JUMP 3 + NOP + JUMP 2 + NOP + RETURN + NOP + JUMP 1 + +FUNCTION inc(integer) -> integer + INC + INC + RETURN + +FUNCTION call(integer) -> integer + INC + CALL inc + INC + RETURN + + +FUNCTION tailcall(integer) -> integer + INC + CALL_T inc + +FUNCTION remote_call(integer) : integer + PUSH arg0 + CALL_R remote.add_five + INC + RETURN + +FUNCTION remote_tailcall(integer) : integer + PUSH arg0 + CALL_TR remote add_five + +;; Test the code from the shell +;; _build/default/rel/aessembler/bin/aessembler console + +;; aeb_aefa:file("../../../../test/asm_code/test.fate", []). +;; f(Asm), f(Env), f(BC), Asm = aefa_asm:read_file("../../../../test/asm_code/test.fate"), {Env, BC} = aefa_asm:asm_to_bytecode(Asm, []), aefa_asm:bytecode_to_fate_code(BC, []). \ No newline at end of file diff --git a/test/asm_code/tuple.fate b/test/asm_code/tuple.fate new file mode 100644 index 0000000..573166b --- /dev/null +++ b/test/asm_code/tuple.fate @@ -0,0 +1,31 @@ +;;CONTRACT tuple +FUNCTION make_0tuple():{tuple, []} + TUPLE 0 + RETURN + +FUNCTION make_2tuple(integer, integer):{tuple, [integer, integer]} + PUSH arg0 + PUSH arg1 + TUPLE 2 + RETURN + +FUNCTION make_5tuple(integer, integer, integer, integer, integer): + {tuple, [integer, integer, integer, integer, integer]} + PUSH arg0 + PUSH arg1 + PUSH arg2 + PUSH arg3 + PUSH arg4 + TUPLE 5 + RETURN + +FUNCTION element1(integer, integer): integer + PUSH arg0 + PUSH arg1 + TUPLE 2 + ELEMENT integer a 1 a + RETURN + +FUNCTION element({tuple, [integer, integer]}, integer): integer + ELEMENT integer a arg1 arg0 + RETURN