From 286a8c19137138aecd2d776e3216ca77bb2ea809 Mon Sep 17 00:00:00 2001 From: Erik Stenman Date: Wed, 13 Feb 2019 15:35:48 +0100 Subject: [PATCH] Add serialization/deserialization. --- .gitignore | 1 + include/aefa_opcodes.hrl | 7 +- rebar.config | 9 +- rebar.lock | 5 +- src/aefa_asm.erl | 186 ++++++++++++++++++++-- src/aefa_asm_scan.xrl | 3 + src/{aefa_opcode.erl => aefa_opcodes.erl} | 27 +++- test/asm_code/test.fate | 9 +- 8 files changed, 223 insertions(+), 24 deletions(-) rename src/{aefa_opcode.erl => aefa_opcodes.erl} (57%) diff --git a/.gitignore b/.gitignore index db21748..d18405b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ rel/example_project .concrete/DEV_MODE .rebar aeb_asm_scan.erl +aefa_asm_scan.erl \ No newline at end of file diff --git a/include/aefa_opcodes.hrl b/include/aefa_opcodes.hrl index 6e9a2b6..c52016e 100644 --- a/include/aefa_opcodes.hrl +++ b/include/aefa_opcodes.hrl @@ -79,9 +79,9 @@ -define('ABORT' , 16#50). -define('EXIT' , 16#51). -define('DEACTIVATE' , 16#52). --define('INT_TO_STR' , 16#53). - - +-define('INC' , 16#53). +-define('DEC' , 16#54). +-define('INT_TO_STR' , 16#55). -define('SPEND' , 16#56). -define('ORACLE_REGISTER', 16#57). -define('ORACLE_QUERY' , 16#58). @@ -101,6 +101,7 @@ -define('SHA256' , 16#66). -define('BLAKE2B' , 16#67). +-define('FUNCTION' , 16#fe). -define('EXTEND' , 16#ff). -define( COMMENT(X), {comment, X}). diff --git a/rebar.config b/rebar.config index db69d34..557a749 100644 --- a/rebar.config +++ b/rebar.config @@ -2,7 +2,10 @@ {erl_opts, [debug_info]}. -{deps, []}. +{deps, [ + {enacl, {git, "https://github.com/aeternity/enacl.git", + {ref, "26180f4"}}} + ]}. {dialyzer, [ {warnings, [unknown]}, @@ -11,9 +14,9 @@ ]}. {relx, [{release, {aessembler, "0.0.1"}, - [aebytecode]}, + [aebytecode, enacl]}, {dev_mode, true}, {include_erts, false}, - {extended_start_script, true}]}. \ No newline at end of file + {extended_start_script, true}]}. diff --git a/rebar.lock b/rebar.lock index 57afcca..6880fc5 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1 +1,4 @@ -[]. +[{<<"enacl">>, + {git,"https://github.com/aeternity/enacl.git", + {ref,"26180f42c0b3a450905d2efd8bc7fd5fd9cece75"}}, + 0}]. diff --git a/src/aefa_asm.erl b/src/aefa_asm.erl index a671e66..430761e 100644 --- a/src/aefa_asm.erl +++ b/src/aefa_asm.erl @@ -43,12 +43,15 @@ -module(aefa_asm). --export([ file/2 +-export([ asm_to_bytecode/2 + , bytecode_to_fate_code/2 , pp/1 + , read_file/1 , to_hexstring/1 ]). -include_lib("aebytecode/include/aefa_opcodes.hrl"). +-define(HASH_BYTES, 32). pp(Asm) -> @@ -60,17 +63,18 @@ format(Asm) -> format(Asm, 0). format([{comment, Comment} | Rest], Address) -> ";; " ++ Comment ++ "\n" ++ format(Rest, Address); format([Mnemonic | Rest], Address) -> - _Op = aefa_opcode:m_to_op(Mnemonic), + _Op = aefa_opcodes:m_to_op(Mnemonic), " " ++ atom_to_list(Mnemonic) ++ "\n" ++ format(Rest, Address + 1); format([],_) -> []. - - -file(Filename, Options) -> +read_file(Filename) -> {ok, File} = file:read_file(Filename), - {ok, Tokens, _} = aefa_asm_scan:scan(binary_to_list(File)), + binary_to_list(File). + +asm_to_bytecode(AssemblerCode, Options) -> + {ok, Tokens, _} = aefa_asm_scan:scan(AssemblerCode), case proplists:lookup(pp_tokens, Options) of {pp_tokens, true} -> @@ -79,7 +83,9 @@ file(Filename, Options) -> ok end, - Env = to_bytecode(Tokens, none, #{}, [], Options), + Env = to_bytecode(Tokens, none, #{ functions => #{} + , symbols => #{} + }, [], Options), ByteList = serialize(Env), @@ -92,11 +98,141 @@ file(Filename, Options) -> {Env, list_to_binary(ByteList)}. -serialize(Env) -> +bytecode_to_fate_code(ByteCode,_Options) -> + deserialize(ByteCode, #{ function => none + , bb => 0 + , current_bb_code => [] + , functions => #{} + , code => #{} + }). + +deserialize(<>, + #{ function := none + , bb := 0 + , current_bb_code := [] + } = Env) -> + {Sig, Rest2} = deserialize_signature(Rest), + Env2 = Env#{function => {<>, Sig}}, + deserialize(Rest2, Env2); +deserialize(<>, + #{ function := F + , bb := BB + , current_bb_code := Code + , code := Program + , functions := Funs} = Env) -> + {Sig, Rest2} = deserialize_signature(Rest), + case Code of + [] -> + Env2 = Env#{ bb => 0 + , current_bb_code => [] + , function => {<>, Sig} + , code => #{} + , functions => Funs#{F => Program}}, + deserialize(Rest2, Env2); + _ -> + Env2 = Env#{ bb => 0 + , current_bb_code => [] + , function => {<>, Sig} + , code => #{} + , functions => + Funs#{F => Program#{ BB => lists:reverse(Code)}}}, + deserialize(Rest2, Env2) + end; +deserialize(<>, + #{ bb := BB + , current_bb_code := Code + , code := Program} = Env) -> + {Rest2, OpCode} = deserialize_op(Op, Rest, Code), + case aefa_opcodes:end_bb(Op) of + true -> + deserialize(Rest2, Env#{ bb => BB+1 + , current_bb_code => [] + , code => Program#{BB => + lists:reverse(OpCode)}}); + false -> + deserialize(Rest2, Env#{ current_bb_code => OpCode}) + end; +deserialize(<<>>, #{ function := F + , bb := BB + , current_bb_code := Code + , code := Program + , functions := Funs} = Env) -> + FunctionCode = + case Code of + [] -> Program; + _ -> Program#{ BB => lists:reverse(Code)} + end, + Env#{ bb => 0 + , current_bb_code => [] + , function => none + , code => #{} + , functions => Funs#{F => FunctionCode}}. + +deserialize_op(Op, Rest, Code) -> + OpName = aefa_opcodes:mnemonic(Op), + case aefa_opcodes:args(Op) of + 0 -> {Rest, [OpName | Code]}; + 1 -> %% TODO: use rlp encoded int. + <> = Rest, + {Rest2, [Arg, OpName | Code]}; + hash -> + <> = Rest, + Code2 = [<>, OpName | Code], + {Rest2, Code2} + end. + + + +serialize(#{functions := Functions} = Env) -> %% TODO: add serialization of immediates %% TODO: add serialization of function definitions - Code = [C || {_Name, {_Sig, C}} <- maps:to_list(Env)], - Code. + Code = [[?FUNCTION, Name, serialize_signature(Sig), C] || + {Name, {Sig, C}} <- maps:to_list(Functions)], + lists:flatten(Code). + +serialize_signature({Args, RetType}) -> + [serialize_type({tuple, Args}) | + serialize_type(RetType)]. + +serialize_type(integer) -> [0]; +serialize_type(boolean) -> [1]; +serialize_type({list, T}) -> [2 | serialize_type(T)]; +serialize_type({tuple, Ts}) -> + case length(Ts) of + N when N =< 255 -> + [3, N | [serialize_type(T) || T <- Ts]] + end; +serialize_type(address) -> 4; +serialize_type(bits) -> 5; +serialize_type({map, K, V}) -> [6 | serialize_type(K) ++ serialize_type(V)]. + + +deserialize_signature(Binary) -> + {{tuple, Args}, Rest} = deserialize_type(Binary), + {RetType, Rest2} = deserialize_type(Rest), + {{Args, RetType}, Rest2}. + +deserialize_type(<<0, Rest/binary>>) -> {integer, Rest}; +deserialize_type(<<1, Rest/binary>>) -> {boolean, Rest}; +deserialize_type(<<2, Rest/binary>>) -> + {T, Rest2} = deserialize_type(Rest), + {{list, T}, Rest2}; +deserialize_type(<<3, N, Rest/binary>>) -> + {Ts, Rest2} = deserialize_types(N, Rest, []), + {{tuple, Ts}, Rest2}; +deserialize_type(<<4, Rest/binary>>) -> {address, Rest}; +deserialize_type(<<5, Rest/binary>>) -> {bits, Rest}; +deserialize_type(<<6, Rest/binary>>) -> + {K, Rest2} = deserialize_type(Rest), + {V, Rest3} = deserialize_type(Rest2), + {{map, K, V}, Rest3}. + +deserialize_types(0, Binary, Acc) -> + {lists:reverse(Acc), Binary}; +deserialize_types(N, Binary, Acc) -> + {T, Rest} = deserialize_type(Binary), + deserialize_types(N-1, Rest, [T | Acc]). + to_hexstring(ByteList) -> "0x" ++ lists:flatten( @@ -108,7 +244,7 @@ to_bytecode([{function,_line, 'FUNCTION'}|Rest], Address, Env, Code, Opts) -> {Fun, Rest2} = to_fun_def(Rest), to_bytecode(Rest2, Fun, Env2, [], Opts); to_bytecode([{mnemonic,_line, Op}|Rest], Address, Env, Code, Opts) -> - OpCode = aefa_opcode:m_to_op(Op), + OpCode = aefa_opcodes:m_to_op(Op), %% TODO: arguments to_bytecode(Rest, Address, Env, [OpCode|Code], Opts); to_bytecode([{int,_line, Int}|Rest], Address, Env, Code, Opts) -> @@ -116,7 +252,8 @@ to_bytecode([{int,_line, Int}|Rest], Address, Env, Code, Opts) -> to_bytecode([{hash,_line, Hash}|Rest], Address, Env, Code, Opts) -> to_bytecode(Rest, Address, Env, [Hash|Code], Opts); to_bytecode([{id,_line, ID}|Rest], Address, Env, Code, Opts) -> - to_bytecode(Rest, Address, Env, [{ref, ID}|Code], Opts); + {ok, Hash} = lookup_symbol(ID, Env), + to_bytecode(Rest, Address, Env, [Hash|Code], Opts); to_bytecode([{label,_line, Label}|Rest], Address, Env, Code, Opts) -> to_bytecode(Rest, Address, Env#{Label => Address}, Code, Opts); to_bytecode([], Address, Env, Code, Opts) -> @@ -190,5 +327,26 @@ expand_args([OP | Rest]) -> expand_args([]) -> []. insert_fun(none, [], Env) -> Env; -insert_fun({Name, Type, RetType}, Code, Env) -> - Env#{Name => {{Type, RetType}, lists:reverse(Code)}}. +insert_fun({Name, Type, RetType}, Code, #{functions := Functions} = Env) -> + {Hash, Env2} = insert_symbol(Name, Env), + Env2#{ + functions => Functions#{Hash => {{Type, RetType}, lists:reverse(Code)}} + }. + +insert_symbol(Id, Env) -> + %% Use first 4 bytes of blake hash + {ok, <> } = enacl:generichash(?HASH_BYTES, list_to_binary(Id)), + insert_symbol(Id, <>, Env). + +insert_symbol(Id, Hash, #{symbols := Symbols} = Env) -> + case maps:find(Hash, Symbols) of + {ok, Id} -> {Hash, Env}; + {ok, Id2} -> + %% Very unlikely... + exit({two_symbols_with_same_hash, Id, Id2}); + error -> + {Hash, Env#{symbols => Symbols#{ Id => Hash + , Hash => Id}}} + end. +lookup_symbol(Id, #{symbols := Symbols} = Env) -> + maps:find(Id, Symbols). diff --git a/src/aefa_asm_scan.xrl b/src/aefa_asm_scan.xrl index 3473418..bbc7290 100644 --- a/src/aefa_asm_scan.xrl +++ b/src/aefa_asm_scan.xrl @@ -47,6 +47,9 @@ DIV : {token, {mnemonic, TokenLine, 'DIV'}}. MOD : {token, {mnemonic, TokenLine, 'MOD'}}. POW : {token, {mnemonic, TokenLine, 'POW'}}. +INC : {token, {mnemonic, TokenLine, 'INC'}}. +DEC : {token, {mnemonic, TokenLine, 'DEC'}}. + LT : {token, {mnemonic, TokenLine, 'LT'}}. GT : {token, {mnemonic, TokenLine, 'GT'}}. EQ : {token, {mnemonic, TokenLine, 'EQ'}}. diff --git a/src/aefa_opcode.erl b/src/aefa_opcodes.erl similarity index 57% rename from src/aefa_opcode.erl rename to src/aefa_opcodes.erl index 2c726ff..809601f 100644 --- a/src/aefa_opcode.erl +++ b/src/aefa_opcodes.erl @@ -5,9 +5,11 @@ %%% @end %%%------------------------------------------------------------------- --module(aefa_opcode). +-module(aefa_opcodes). --export([ mnemonic/1 +-export([ args/1 + , end_bb/1 + , mnemonic/1 , m_to_op/1 , opcode/1 ]). @@ -23,6 +25,12 @@ opcode(X) when X >= 0, X =< 255 -> X; opcode({comment,X}) -> ?COMMENT(X). mnemonic(?NOP) -> 'NOP' ; +mnemonic(?RETURN) -> 'RETURN' ; +mnemonic(?PUSH) -> 'PUSH' ; +mnemonic(?JUMP) -> 'JUMP' ; +mnemonic(?INC) -> 'INC' ; +mnemonic(?CALL) -> 'CALL' ; +mnemonic(OP) -> {OP, nothandled} ; mnemonic({comment,_}) -> 'COMMENT' . m_to_op('NOP') -> ?NOP ; @@ -30,5 +38,20 @@ m_to_op('COMMENT') -> ?COMMENT("") ; m_to_op('RETURN') -> ?RETURN ; m_to_op('PUSH') -> ?PUSH ; m_to_op('JUMP') -> ?JUMP ; +m_to_op('INC') -> ?INC ; +m_to_op('CALL') -> ?CALL ; m_to_op(Data) when 0= Data. +args(?NOP) -> 0; +args(?RETURN) -> 0; +args(?PUSH) -> 1; +args(?JUMP) -> 1; +args(?INC) -> 0; +args(?CALL) -> hash; +args(_) -> 0. %% TODO do not allow this + +end_bb(?RETURN) -> true; +end_bb(?JUMP) -> true; +end_bb(?CALL) -> true; +end_bb(_) -> false. + diff --git a/test/asm_code/test.fate b/test/asm_code/test.fate index d4bf55b..1afaa70 100644 --- a/test/asm_code/test.fate +++ b/test/asm_code/test.fate @@ -13,7 +13,13 @@ FUNCTION jumps() -> integer JUMP 1 FUNCTION inc(integer) -> integer - INC arg0 + INC + INC + RETURN + +FUNCTION call(integer) -> integer + INC + CALL inc INC RETURN @@ -21,3 +27,4 @@ FUNCTION inc(integer) -> integer ;; _build/default/rel/aessembler/bin/aessembler console ;; aeb_aefa:file("../../../../test/asm_code/test.fate", []). +;; f(Asm), f(Env), f(BC), Asm = aefa_asm:read_file("../../../../test/asm_code/test.fate"), {Env, BC} = aefa_asm:asm_to_bytecode(Asm, []), aefa_asm:bytecode_to_fate_code(BC, []). \ No newline at end of file