From 5878ef3e9b27f40d1bd8f3409df28617c2f3ffd2 Mon Sep 17 00:00:00 2001 From: Erik Stenman Date: Mon, 11 Feb 2019 18:14:45 +0100 Subject: [PATCH] Initial handling of Fate opcodes. --- include/aefa_opcodes.hrl | 107 +++++++++++++++++++++++++ src/aefa_asm.erl | 140 ++++++++++++++++++++++++++++++++ src/aefa_asm_scan.xrl | 169 +++++++++++++++++++++++++++++++++++++++ src/aefa_opcode.erl | 32 ++++++++ 4 files changed, 448 insertions(+) create mode 100644 include/aefa_opcodes.hrl create mode 100644 src/aefa_asm.erl create mode 100644 src/aefa_asm_scan.xrl create mode 100644 src/aefa_opcode.erl diff --git a/include/aefa_opcodes.hrl b/include/aefa_opcodes.hrl new file mode 100644 index 0000000..6e9a2b6 --- /dev/null +++ b/include/aefa_opcodes.hrl @@ -0,0 +1,107 @@ + +%% FATE opcodes +-define('NOP' , 16#00). +-define('RETURN' , 16#01). +-define('CALL' , 16#02). +-define('CALL_R' , 16#03). +-define('CALL_T' , 16#04). +-define('CALL_TR' , 16#05). +-define('JUMP' , 16#06). +-define('JUMPI' , 16#07). +-define('SWITCH' , 16#08). +-define('PUSH' , 16#09). +-define('DUP' , 16#0a). +-define('POP' , 16#0b). +-define('STORE' , 16#10). +-define('ADD' , 16#11). +-define('MUL' , 16#12). +-define('SUB' , 16#13). +-define('DIV' , 16#14). +-define('MOD' , 16#15). +-define('POW' , 16#16). +-define('LT' , 16#17). +-define('GT' , 16#18). +-define('EQ' , 16#19). +-define('ELT' , 16#1a). +-define('EGT' , 16#1b). +-define('NEQ' , 16#1c). +-define('AND' , 16#1d). +-define('OR' , 16#1e). +-define('NOT' , 16#1f). +-define('TUPLE' , 16#20). +-define('ELEMENT' , 16#21). +-define('MAP_EMPTY' , 16#22). +-define('MAP_LOOKUP' , 16#23). +-define('MAP_UPDATE' , 16#24). +-define('MAP_DELETE' , 16#25). +-define('MAP_MEMBER' , 16#26). +-define('MAP_FROM_LIST' , 16#27). +-define('NIL' , 16#28). +-define('IS_NIL' , 16#29). +-define('CONS' , 16#2a). +-define('HD' , 16#2b). +-define('TL' , 16#2c). +-define('LENGTH' , 16#2d). +-define('STR_EQ' , 16#2e). +-define('STR_JOIN' , 16#2f). +-define('ADDR_TO_STR' , 16#30). +-define('STR_REVERSE' , 16#31). +-define('INT_TO_ADDR' , 16#32). +-define('VARIANT' , 16#33). +-define('VARIANT_TEST' , 16#34). +-define('VARIANT_ELEMENT', 16#35). +-define('BITS_NONE' , 16#36). +-define('BITS_ALL' , 16#37). +-define('BITS_SET' , 16#38). +-define('BITS_CLEAR' , 16#39). +-define('BITS_TEST' , 16#3a). +-define('BITS_SUM' , 16#3b). +-define('BITS_OR' , 16#3c). +-define('BITS_AND' , 16#3d). +-define('BITS_DIFF' , 16#3e). +-define('ADDRESS' , 16#3f). +-define('BALANCE' , 16#40). +-define('ORIGIN' , 16#41). +-define('CALLER' , 16#42). +-define('GASPRICE' , 16#43). +-define('BLOCKHASH' , 16#44). +-define('BENEFICIARY' , 16#45). +-define('TIMESTAMP' , 16#46). +-define('NUMBER' , 16#47). +-define('DIFFICULTY' , 16#48). +-define('GASLIMIT' , 16#49). +-define('GAS' , 16#4a). +-define('LOG0' , 16#4b). +-define('LOG1' , 16#4c). +-define('LOG2' , 16#4d). +-define('LOG3' , 16#4e). +-define('LOG4' , 16#4f). +-define('ABORT' , 16#50). +-define('EXIT' , 16#51). +-define('DEACTIVATE' , 16#52). +-define('INT_TO_STR' , 16#53). + + +-define('SPEND' , 16#56). +-define('ORACLE_REGISTER', 16#57). +-define('ORACLE_QUERY' , 16#58). +-define('ORACLE_RESPOND' , 16#59). +-define('ORACLE_EXTEND' , 16#5a). +-define('ORACLE_GET_ANSWER', 16#5b). +-define('ORACLE_GET_QUESTION', 16#5c). +-define('ORACLE_QUERY_FEE', 16#5d). +-define('AENS_RESOLVE' , 16#5e). +-define('AENS_PRECLAIM' , 16#5f). +-define('AENS_CLAIM' , 16#60). +-define('AENS_UPDATE' , 16#61). +-define('AENS_TRANSFER' , 16#62). +-define('AENS_REVOKE' , 16#63). +-define('ECVERIFY' , 16#64). +-define('SHA3' , 16#65). +-define('SHA256' , 16#66). +-define('BLAKE2B' , 16#67). + +-define('EXTEND' , 16#ff). + +-define( COMMENT(X), {comment, X}). + diff --git a/src/aefa_asm.erl b/src/aefa_asm.erl new file mode 100644 index 0000000..6bf6d9e --- /dev/null +++ b/src/aefa_asm.erl @@ -0,0 +1,140 @@ +%%%------------------------------------------------------------------- +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc Assembler for Fate machine code. +%%% +%%% Assembler code can be read from a file. +%%% The assembler has the following format +%%% Comments start with 2 semicolons and runs till end of line +%%% ;; This is a comment +%%% Opcode mnemonics start with an upper case letter. +%%% DUP +%%% Identifiers start with a lower case letter +%%% an_identifier +%%% Immediates can be of 9 types: +%%% 1. Integers +%%% 42 +%%% -2374683271468723648732648736498712634876147 +%%% 2. Hexadecimal integers starting with 0x +%%% 0x0deadbeef0 +%%% 3. addresses, a 256-bit hash strings starting with # +%%% followed by up to 64 hex chars +%%% #00000deadbeef +%%% 4. Boolean +%%% true +%%% false +%%% 5. Strings +%%% "Hello" +%%% 6. Empty map +%%% {} +%%% 7. Lists +%%% [] +%%% [1, 2] +%%% 8. Bit field +%%% <000> +%%% <1010> +%%% <> +%%% !<> +%%% 9. Tuples +%%% () +%%% (1, "foo") +%%% @end +%%% Created : 21 Dec 2017 +%%%------------------------------------------------------------------- + +-module(aefa_asm). + +-export([ file/2 + , pp/1 + , to_hexstring/1 + ]). + +-include_lib("aebytecode/include/aefa_opcodes.hrl"). + + +pp(Asm) -> + Listing = format(Asm), + io:format("~s~n", [Listing]). + +format(Asm) -> format(Asm, 0). + +format([{comment, Comment} | Rest], Address) -> + ";; " ++ Comment ++ "\n" ++ format(Rest, Address); +format([Mnemonic | Rest], Address) -> + _Op = aefa_opcodes:m_to_op(Mnemonic), + " " ++ atom_to_list(Mnemonic) ++ "\n" + ++ format(Rest, Address + 1); +format([],_) -> []. + + + + +file(Filename, Options) -> + {ok, File} = file:read_file(Filename), + {ok, Tokens, _} = aefa_asm_scan:scan(binary_to_list(File)), + + case proplists:lookup(pp_tokens, Options) of + {pp_tokens, true} -> + io:format("Tokens ~p~n",[Tokens]); + none -> + ok + end, + + ByteList = to_bytecode(Tokens, 0, #{}, [], Options), + + case proplists:lookup(pp_hex_string, Options) of + {pp_hex_string, true} -> + io:format("Code: ~s~n",[to_hexstring(ByteList)]); + none -> + ok + end, + + + list_to_binary(ByteList). + +to_hexstring(ByteList) -> + "0x" ++ lists:flatten( + [io_lib:format("~2.16.0b", [X]) + || X <- ByteList]). + + +to_bytecode([{mnemonic,_line, Op}|Rest], Address, Env, Code, Opts) -> + OpCode = aefa_opcodes:m_to_op(Op), + OpSize = aefa_opcodes:op_size(OpCode), + to_bytecode(Rest, Address + OpSize, Env, [OpCode|Code], Opts); +to_bytecode([{int,_line, Int}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [Int|Code], Opts); +to_bytecode([{hash,_line, Hash}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [Hash|Code], Opts); +to_bytecode([{id,_line, ID}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env, [{ref, ID}|Code], Opts); +to_bytecode([{label,_line, Label}|Rest], Address, Env, Code, Opts) -> + to_bytecode(Rest, Address, Env#{Label => Address}, Code, Opts); +to_bytecode([], _Address, Env, Code, Opts) -> + case proplists:lookup(pp_opcodes, Opts) of + {pp_opcodes, true} -> + io:format("opcodes ~p~n", [lists:reverse(Code)]); + none -> + ok + end, + + PatchedCode = resolve_refs(Code, Env, []), + case proplists:lookup(pp_patched_code, Opts) of + {pp_patched_code, true} -> + io:format("Patched Code: ~p~n", [PatchedCode]); + none -> + ok + end, + + expand_args(PatchedCode). + +%% Also reverses the code (back to unreversed state). +resolve_refs([{ref, ID} | Rest], Env, Code) -> + Address = maps:get(ID, Env), + resolve_refs(Rest, Env, [Address | Code]); +resolve_refs([Op | Rest], Env, Code) -> + resolve_refs(Rest, Env, [Op | Code]); +resolve_refs([],_Env, Code) -> Code. + +expand_args([OP | Rest]) -> + [OP | expand_args(Rest)]; +expand_args([]) -> []. diff --git a/src/aefa_asm_scan.xrl b/src/aefa_asm_scan.xrl new file mode 100644 index 0000000..af3b7c6 --- /dev/null +++ b/src/aefa_asm_scan.xrl @@ -0,0 +1,169 @@ +%%% -*- erlang-indent-level:4; indent-tabs-mode: nil -*- +%%%------------------------------------------------------------------- +%%% @copyright (C) 2019, aeternity Anstalt +%%% @doc +%%% Handling FATE code. +%%% @end +%%% Created : 9 Jan 2019 +%%%------------------------------------------------------------------- + +Definitions. +DIGIT = [0-9] +HEXDIGIT = [0-9a-fA-F] +LOWER = [a-z_] +UPPER = [A-Z] +INT = {DIGIT}+ +HEX = 0x{HEXDIGIT}+ +HASH = #{HEXDIGIT}+ +WS = [\000-\s] +ID = {LOWER}[a-zA-Z0-9_]* + +Rules. +{ID} : {token, {id, TokenLine, TokenChars }}. + +NOP : {token, {mnemonic, TokenLine, 'NOP'}}. + +RETURN : {token, {mnemonic, TokenLine, 'RETURN'}}. +CALL : {token, {mnemonic, TokenLine, 'CALL'}}. +CALL_R : {token, {mnemonic, TokenLine, 'CALL_R'}}. +CALL_T : {token, {mnemonic, TokenLine, 'CALL_T'}}. +CALL_TR : {token, {mnemonic, TokenLine, 'CALL_TR'}}. +JUMP : {token, {mnemonic, TokenLine, 'JUMP'}}. +JUMPI : {token, {mnemonic, TokenLine, 'JUMPI'}}. +SWITCH : {token, {mnemonic, TokenLine, 'SWITCH'}}. + +PUSH : {token, {mnemonic, TokenLine, 'PUSH'}}. +DUP : {token, {mnemonic, TokenLine, 'DUP'}}. +POP : {token, {mnemonic, TokenLine, 'POP'}}. + +STORE : {token, {mnemonic, TokenLine, 'STORE'}}. + +ADD : {token, {mnemonic, TokenLine, 'ADD'}}. +MUL : {token, {mnemonic, TokenLine, 'MUL'}}. +SUB : {token, {mnemonic, TokenLine, 'SUB'}}. +DIV : {token, {mnemonic, TokenLine, 'DIV'}}. +MOD : {token, {mnemonic, TokenLine, 'MOD'}}. +POW : {token, {mnemonic, TokenLine, 'POW'}}. + +LT : {token, {mnemonic, TokenLine, 'LT'}}. +GT : {token, {mnemonic, TokenLine, 'GT'}}. +EQ : {token, {mnemonic, TokenLine, 'EQ'}}. +ELT : {token, {mnemonic, TokenLine, 'ELT'}}. +EGT : {token, {mnemonic, TokenLine, 'EGT'}}. +NEQ : {token, {mnemonic, TokenLine, 'NEQ'}}. + +AND : {token, {mnemonic, TokenLine, 'AND'}}. +OR : {token, {mnemonic, TokenLine, 'OR'}}. +NOT : {token, {mnemonic, TokenLine, 'NOT'}}. + +TUPLE : {token, {mnemonic, TokenLine, 'TUPLE'}}. +ELEMENT : {token, {mnemonic, TokenLine, 'ELEMENT'}}. + +MAP_EMPTY : {token, {mnemonic, TokenLine, 'MAP_EMPTY'}}. +MAP_LOOKUP : {token, {mnemonic, TokenLine, 'MAP_LOOKUP'}}. +MAP_UPDATE : {token, {mnemonic, TokenLine, 'MAP_UPDATE'}}. +MAP_MEMBER : {token, {mnemonic, TokenLine, 'MAP_MEMBER'}}. +MAP_FROM_LIST : {token, {mnemonic, TokenLine, 'MAP_FROM_LIST'}}. + +NIL : {token, {mnemonic, TokenLine, 'NIL'}}. +IS_NIL : {token, {mnemonic, TokenLine, 'IS_NIL'}}. +CONS : {token, {mnemonic, TokenLine, 'CONS'}}. +HD : {token, {mnemonic, TokenLine, 'HD'}}. +TL : {token, {mnemonic, TokenLine, 'TL'}}. +LENGTH : {token, {mnemonic, TokenLine, 'LENGTH'}}. + +STR_EQ : {token, {mnemonic, TokenLine, 'STR_EQ'}}. +STR_JOIN : {token, {mnemonic, TokenLine, 'STR_JOIN'}}. +INT_TO_STR : {token, {mnemonic, TokenLine, 'INT_TO_STR'}}. +ADDR_TO_STR : {token, {mnemonic, TokenLine, 'ADDR_TO_STR'}}. +STR_REVERSE : {token, {mnemonic, TokenLine, 'STR_REVERSE'}}. + +INT_TO_ADDR : {token, {mnemonic, TokenLine, 'INT_TO_ADDR'}}. + +VARIANT : {token, {mnemonic, TokenLine, 'VARIANT'}}. +VARIANT_TEST : {token, {mnemonic, TokenLine, 'VARIANT_TEST'}}. +VARIANT_ELEMENT : {token, {mnemonic, TokenLine, 'VARIANT_ELEMENT'}}. + +BITS_NONE : {token, {mnemonic, TokenLine, 'BITS_NONE'}}. +BITS_ALL : {token, {mnemonic, TokenLine, 'BITS_ALL'}}. +BITS_SET : {token, {mnemonic, TokenLine, 'BITS_SET'}}. +BITS_CLEAR : {token, {mnemonic, TokenLine, 'BITS_CLEAR'}}. +BITS_TEST : {token, {mnemonic, TokenLine, 'BITS_TEST'}}. +BITS_SUM : {token, {mnemonic, TokenLine, 'BITS_SUM'}}. +BITS_OR : {token, {mnemonic, TokenLine, 'BITS_OR'}}. +BITS_AND : {token, {mnemonic, TokenLine, 'BITS_AND'}}. +BITS_DIFF : {token, {mnemonic, TokenLine, 'BITS_DIFF'}}. + + +ADDRESS : {token, {mnemonic, TokenLine, 'ADDRESS'}}. +BALANCE : {token, {mnemonic, TokenLine, 'BALANCE'}}. +ORIGIN : {token, {mnemonic, TokenLine, 'ORIGIN'}}. +CALLER : {token, {mnemonic, TokenLine, 'CALLER'}}. +GASPRICE : {token, {mnemonic, TokenLine, 'GASPRICE'}}. +BLOCKHASH : {token, {mnemonic, TokenLine, 'BLOCKHASH'}}. +BENEFICIARY : {token, {mnemonic, TokenLine, 'BENEFICIARY'}}. +TIMESTAMP : {token, {mnemonic, TokenLine, 'TIMESTAMP'}}. +NUMBER : {token, {mnemonic, TokenLine, 'NUMBER'}}. +DIFFICULTY : {token, {mnemonic, TokenLine, 'DIFFICULTY'}}. +GASLIMIT : {token, {mnemonic, TokenLine, 'GASLIMIT'}}. +GAS : {token, {mnemonic, TokenLine, 'GAS'}}. +LOG0 : {token, {mnemonic, TokenLine, 'LOG0'}}. +LOG1 : {token, {mnemonic, TokenLine, 'LOG1'}}. +LOG2 : {token, {mnemonic, TokenLine, 'LOG2'}}. +LOG3 : {token, {mnemonic, TokenLine, 'LOG3'}}. +LOG4 : {token, {mnemonic, TokenLine, 'LOG4'}}. +ABORT : {token, {mnemonic, TokenLine, 'ABORT'}}. +EXIT : {token, {mnemonic, TokenLine, 'EXIT'}}. +DEACTIVATE : {token, {mnemonic, TokenLine, 'DEACTIVATE'}}. +COMMENT : {token, {mnemonic, TokenLine, 'COMMENT'}}. +{ID} : + {token, {id, TokenLine, TokenChars}}. +{HEX} : + {token, {int, TokenLine, parse_hex(TokenChars)}}. +{INT} : + {token, {int, TokenLine, parse_int(TokenChars)}}. +{HASH} : + {token, {hash, TokenLine, parse_hash(TokenChars)}}. + + +%% Symbols +, : {token, {',', TokenLine}}. +\. : {token, {'.', TokenLine}}. +\( : {token, {'(', TokenLine}}. +\) : {token, {')', TokenLine}}. +\[ : {token, {'[', TokenLine}}. +\] : {token, {']', TokenLine}}. +{ : {token, {'{', TokenLine}}. +} : {token, {'}', TokenLine}}. + + +%% Whitespace ignore +{WS} : skip_token. + +%% Comments (TODO: nested comments) +;;.* : skip_token. + +. : {error, "Unexpected token: " ++ TokenChars}. + +Erlang code. + +-export([scan/1]). + +-dialyzer({nowarn_function, yyrev/2}). + +-ignore_xref([format_error/1, string/2, token/2, token/3, tokens/2, tokens/3]). + +-include_lib("aebytecode/include/aeb_opcodes.hrl"). + + +parse_hex("0x" ++ Chars) -> list_to_integer(Chars, 16). + +parse_int(Chars) -> list_to_integer(Chars). + +parse_hash("#" ++ Chars) -> + N = list_to_integer(Chars, 16), + <>. + +scan(S) -> + string(S). + diff --git a/src/aefa_opcode.erl b/src/aefa_opcode.erl new file mode 100644 index 0000000..4563f81 --- /dev/null +++ b/src/aefa_opcode.erl @@ -0,0 +1,32 @@ +%%%------------------------------------------------------------------- +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc +%%% Opcodes +%%% @end +%%%------------------------------------------------------------------- + +-module(aefa_opcode). + +-export([ mnemonic/1 + , m_to_op/1 + , opcode/1 + ]). + +-include_lib("aebytecode/include/aefa_opcodes.hrl"). + + +%%==================================================================== +%% API +%%==================================================================== + +opcode(X) when X >= 0, X =< 255 -> X; +opcode({comment,X}) -> ?COMMENT(X). + +mnemonic(?NOP) -> 'NOP' ; +mnemonic({comment,_}) -> 'COMMENT' . + +m_to_op('NOP') -> ?NOP ; +m_to_op('COMMENT') -> ?COMMENT("") ; +m_to_op(Data) when 0= Data . +