From 0aa1c89556e5c58c54b11956b7f1a6a18347550b Mon Sep 17 00:00:00 2001 From: Ulf Norell Date: Tue, 7 May 2019 15:48:47 +0200 Subject: [PATCH] Fate compiler (#62) * Update to changes in icode format * Start on new intermediate code for FATE * Compile `let` to FATE * Fix and improve broken bytecode optimisations * Basic tuple patterns * Compile shallow matching on tuples * Liveness analysis for local variables * Fix minor bug * Use RETURNR when possible * Nicer debug printing * Refactor optimization rules * Compile tuple construction * Improve instruction analysis and generalize some optimizations * Compile nested pattern matching to case trees (Only tuple and variable patterns so far) * Reannotate and repeat optimization pass once it done Could try hard to keep annotations more precise, but would be more error prone * Get rid of unnecessary STORE instructions * Keep better track of liveness annotations when swapping instructions * Limit the number of iterations for the optimization loop Should finish in one iteration, but we shouldn't loop if there are bugs or corner cases where it doesn't. * Pattern matching on booleans * wip: rewrite case tree compiler to handle catch-alls still with debug printing, and can't compile it yet * Add missing case in renaming * Compile case trees all the way to Fate assembly * Simplify variables bindings in environment * Shortcut let x = y in ... * compile list literals * Fix various bugs in pattern match compilation * Pretty printer for fcode * Fix renaming bug * Another renaming bug * Handle switch_body in optimizations * Remove optimization for if-then-else * Tag instructions in annotated scode * Remove 'if' from fcode * Fix dialyzer things * Remove unused argument * Compile pattern matching on integer literals * Compile list patterns * Use op_view in more places * allow leaving out fields from record patterns * compile records (patterns and construction) * Compile record update * Use SETELEMENT instruction * Compile variants * Remove incorrect push for tuple switches * Optimize matching on single constructors datatypes * Use the fact that SWITCH and JUMPIF can use args and vars * string literals and pattern matching on the same * Compile character literals * Minor refactoring of op instruction handling * compile address literals * Get rid of unit in AST * Unary operators * Compile function calls (to fully saturated top-level functions only) * fix breakage after unary operators * variables are now lists of names in fcode * pretty printing for function calls * use STORE ?a instead of PUSH during optimizations * no-op fcode optimization pass * some constant propagation optimizations * Case on constructor optimization * fix minor bugs * Compile all the operators * Compile maps * Simplify JUMPIF on true/false * Fixed left-over reference to STR_EQ * Add compile-time evaluation for more operators * Distinguish local vars and top-level names already in fcode * Compile builtins * Compile bytes(N) Compile to FATE strings for now * Improve inlining of PUSH * Fix name resolution bug * Change map_get/set to operators in fcode * Compile lambdas and higher-order functions * Optimize single variable closure envs * Handle unapplied builtins and top-level functions * Missing case in fcode pretty printer * Fix variable binding bug in fcode compiler * Compiler side of state updates No support in FATE yet though * Compile statements * Compile events But no FATE support for events yet * Compile remote calls * Clearer distinction between applied and unapplied top-level things (def/builtin) in fcode * Tag for literals in fcode to make code cleaner * We now have block hash at height in FATE * Update aebytecode commit * Get rid of catchall todos * Jump some hoops to please Dialyzer --- rebar.config | 2 +- rebar.lock | 4 +- src/aeso_ast_infer_types.erl | 25 +- src/aeso_ast_to_fcode.erl | 1294 ++++++++++++++++++++++++++++++++ src/aeso_fcode_to_fate.erl | 1360 ++++++++++++++++++++++++++++++++++ src/aeso_icode_to_fate.erl | 299 -------- src/aeso_parser.erl | 4 +- src/aeso_pretty.erl | 1 - src/aeso_syntax.erl | 1 - 9 files changed, 2672 insertions(+), 318 deletions(-) create mode 100644 src/aeso_ast_to_fcode.erl create mode 100644 src/aeso_fcode_to_fate.erl delete mode 100644 src/aeso_icode_to_fate.erl diff --git a/rebar.config b/rebar.config index a61cafd..bd0dda0 100644 --- a/rebar.config +++ b/rebar.config @@ -3,7 +3,7 @@ {erl_opts, [debug_info]}. {deps, [ {aebytecode, {git, "https://github.com/aeternity/aebytecode.git", - {ref, "1526ad3"}}} + {ref, "2555868"}}} , {getopt, "1.0.1"} , {jsx, {git, "https://github.com/talentdeficit/jsx.git", {tag, "2.8.0"}}} diff --git a/rebar.lock b/rebar.lock index 75cbf1e..a2d120e 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1,11 +1,11 @@ {"1.1.0", [{<<"aebytecode">>, {git,"https://github.com/aeternity/aebytecode.git", - {ref,"1526ad3bf057e72a1714aea0430b001bd1d576c9"}}, + {ref,"2555868990ac2a08876e86b1b798b4750273591f"}}, 0}, {<<"aeserialization">>, {git,"https://github.com/aeternity/aeserialization.git", - {ref,"6dce265753af4e651f77746e77ea125145c85dd3"}}, + {ref,"816bf994ffb5cee218c3f22dc5fea296c9e0882e"}}, 1}, {<<"base58">>, {git,"https://github.com/aeternity/erl-base58.git", diff --git a/src/aeso_ast_infer_types.erl b/src/aeso_ast_infer_types.erl index ed9083b..a24aaea 100644 --- a/src/aeso_ast_infer_types.erl +++ b/src/aeso_ast_infer_types.erl @@ -98,11 +98,12 @@ -type scope() :: #scope{}. -record(env, - { scopes = #{ [] => #scope{}} :: #{ qname() => scope() } - , vars = [] :: [{name(), var_info()}] - , typevars = unrestricted :: unrestricted | [name()] - , fields = #{} :: #{ name() => [field_info()] } %% fields are global - , namespace = [] :: qname() + { scopes = #{ [] => #scope{}} :: #{ qname() => scope() } + , vars = [] :: [{name(), var_info()}] + , typevars = unrestricted :: unrestricted | [name()] + , fields = #{} :: #{ name() => [field_info()] } %% fields are global + , namespace = [] :: qname() + , in_pattern = false :: boolean() }). -type env() :: #env{}. @@ -358,7 +359,7 @@ global_env() -> Fun1 = fun(S, T) -> Fun([S], T) end, TVar = fun(X) -> {tvar, Ann, "'" ++ X} end, SignId = {id, Ann, "signature"}, - SignDef = {tuple, Ann, [{int, Ann, 0}, {int, Ann, 0}]}, + SignDef = {bytes, Ann, <<0:64/unit:8>>}, Signature = {named_arg_t, Ann, SignId, SignId, {typed, Ann, SignDef, SignId}}, SignFun = fun(Ts, T) -> {type_sig, Ann, [Signature], Ts, T} end, TTL = {qid, Ann, ["Chain", "ttl"]}, @@ -378,7 +379,7 @@ global_env() -> %% Abort {"abort", Fun1(String, A)}]) , types = MkDefs( - [{"int", 0}, {"bool", 0}, {"string", 0}, {"address", 0}, + [{"int", 0}, {"bool", 0}, {"char", 0}, {"string", 0}, {"address", 0}, {"hash", {[], {alias_t, Bytes(32)}}}, {"signature", {[], {alias_t, Bytes(64)}}}, {"bits", 0}, @@ -908,6 +909,8 @@ infer_expr(_Env, Body={bool, As, _}) -> {typed, As, Body, {id, As, "bool"}}; infer_expr(_Env, Body={int, As, _}) -> {typed, As, Body, {id, As, "int"}}; +infer_expr(_Env, Body={char, As, _}) -> + {typed, As, Body, {id, As, "char"}}; infer_expr(_Env, Body={string, As, _}) -> {typed, As, Body, {id, As, "string"}}; infer_expr(_Env, Body={bytes, As, Bin}) -> @@ -935,8 +938,6 @@ infer_expr(Env, Id = {Tag, As, _}) when Tag == id; Tag == qid -> infer_expr(Env, Id = {Tag, As, _}) when Tag == con; Tag == qcon -> {QName, Type} = lookup_name(Env, As, Id, [freshen]), {typed, As, QName, Type}; -infer_expr(Env, {unit, As}) -> - infer_expr(Env, {tuple, As, []}); infer_expr(Env, {tuple, As, Cpts}) -> NewCpts = [infer_expr(Env, C) || C <- Cpts], CptTypes = [T || {typed, _, _, T} <- NewCpts], @@ -990,7 +991,7 @@ infer_expr(Env, {record, Attrs, Fields}) -> constrain([ #record_create_constraint{ record_t = RecordType1, fields = [ FieldName || {field, _, [{proj, _, FieldName}], _} <- Fields ], - context = Attrs } ] ++ + context = Attrs } || not Env#env.in_pattern ] ++ [begin [{proj, _, FieldName}] = LV, #field_constraint{ @@ -1119,7 +1120,7 @@ infer_case(Env, Attrs, Pattern, ExprType, Branch, SwitchType) -> [] -> ok; Nonlinear -> type_error({non_linear_pattern, Pattern, lists:usort(Nonlinear)}) end, - NewEnv = bind_vars([{Var, fresh_uvar(Ann)} || Var = {id, Ann, _} <- Vars], Env), + NewEnv = bind_vars([{Var, fresh_uvar(Ann)} || Var = {id, Ann, _} <- Vars], Env#env{ in_pattern = true }), NewPattern = {typed, _, _, PatType} = infer_expr(NewEnv, Pattern), NewBranch = check_expr(NewEnv, Branch, SwitchType), unify(Env, PatType, ExprType, {case_pat, Pattern, PatType, ExprType}), @@ -1178,6 +1179,8 @@ infer_prefix({IntOp,As}) when IntOp =:= '-' -> free_vars({int, _, _}) -> []; +free_vars({char, _, _}) -> + []; free_vars({string, _, _}) -> []; free_vars({bool, _, _}) -> diff --git a/src/aeso_ast_to_fcode.erl b/src/aeso_ast_to_fcode.erl new file mode 100644 index 0000000..0e06cac --- /dev/null +++ b/src/aeso_ast_to_fcode.erl @@ -0,0 +1,1294 @@ +%%%------------------------------------------------------------------- +%%% @author Ulf Norell +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc +%%% Compiler from Aeterinty Sophia language to Fate intermediate code. +%%% @end +%%% Created : 26 Mar 2019 +%%% +%%%------------------------------------------------------------------- +-module(aeso_ast_to_fcode). + +-export([ast_to_fcode/2, format_fexpr/1]). +-export_type([fcode/0, fexpr/0, fun_def/0]). + +%% -- Type definitions ------------------------------------------------------- + +-type option() :: term(). + +-type attribute() :: stateful | pure. + +-type fun_name() :: {entrypoint, binary()} + | {local_fun, [string()]} + | init. +-type var_name() :: string(). +-type sophia_name() :: [string()]. + +-type builtin() :: atom(). + +-type op() :: '+' | '-' | '*' | '/' | mod | '^' | '++' | '::' | + '<' | '>' | '=<' | '>=' | '==' | '!=' | '!' | + map_get | map_get_d | map_set | map_from_list | map_to_list | + map_delete | map_member | map_size | string_length | + string_concat | bits_set | bits_clear | bits_test | bits_sum | + bits_intersection | bits_union | bits_difference. + +-type flit() :: {int, integer()} + | {string, binary()} + | {account_pubkey, binary()} + | {contract_pubkey, binary()} + | {oracle_pubkey, binary()} + | {oracle_query_id, binary()} + | {bool, false | true}. + +-type fexpr() :: {lit, flit()} + | nil + | {var, var_name()} + | {def, fun_name(), [fexpr()]} + | {remote, fexpr(), fun_name(), [fexpr()]} + | {builtin, builtin(), [fexpr()]} + | {con, arities(), tag(), [fexpr()]} + | {tuple, [fexpr()]} + | {proj, fexpr(), integer()} + | {set_proj, fexpr(), integer(), fexpr()} %% tuple, field, new_value + | {op, op(), [fexpr()]} + | {'let', var_name(), fexpr(), fexpr()} + | {funcall, fexpr(), [fexpr()]} %% Call to unknown function + | {closure, fun_name(), fexpr()} + | {switch, fsplit()} + %% The following (unapplied top-level functions/builtins and + %% lambdas) are generated by the fcode compiler, but translated + %% to closures by the lambda lifter. + | {def_u, fun_name(), arity()} + | {remote_u, fexpr(), fun_name(), arity()} + | {builtin_u, builtin(), arity()} + | {lam, [var_name()], fexpr()}. + +-type fsplit() :: {split, ftype(), var_name(), [fcase()]} + | {nosplit, fexpr()}. + +-type fcase() :: {'case', fsplit_pat(), fsplit()}. + +-type fsplit_pat() :: {var, var_name()} + | {bool, false | true} + | {int, integer()} + | {string, binary()} + | nil + | {'::', var_name(), var_name()} + | {con, arities(), tag(), [var_name()]} + | {tuple, [var_name()]}. + +-type ftype() :: integer + | boolean + | string + | {list, ftype()} + | {map, ftype(), ftype()} + | {tuple, [ftype()]} + | address + | hash + | signature + | contract + | oracle + | oracle_query + | name + | channel + | bits + | {variant, [[ftype()]]} + | {function, [ftype()], ftype()} + | any. + +-type fun_def() :: #{ attrs := [attribute()], + args := [{var_name(), ftype()}], + return := ftype(), + body := fexpr() }. + +-type fcode() :: #{ contract_name := string(), + state_type := ftype(), + event_type := ftype() | none, + functions := #{ fun_name() => fun_def() } }. + +-type type_def() :: fun(([ftype()]) -> ftype()). + +-type tag() :: non_neg_integer(). +-type arities() :: [arity()]. + +-record(con_tag, { tag :: tag(), arities :: arities() }). +-type con_tag() :: #con_tag{}. + +-type type_env() :: #{ sophia_name() => type_def() }. +-type fun_env() :: #{ sophia_name() => {fun_name(), non_neg_integer()} }. +-type con_env() :: #{ sophia_name() => con_tag() }. +-type builtins() :: #{ sophia_name() => {builtin(), non_neg_integer() | none} }. + +-type context() :: {main_contract, string()} + | {namespace, string()} + | {abstract_contract, string()}. + +-type env() :: #{ type_env := type_env(), + fun_env := fun_env(), + con_env := con_env(), + builtins := builtins(), + options := [option()], + context => context(), + vars => [var_name()], + functions := #{ fun_name() => fun_def() } }. + +%% -- Entrypoint ------------------------------------------------------------- + +%% Main entrypoint. Takes typed syntax produced by aeso_ast_infer_types:infer/1,2 +%% and produces Fate intermediate code. +-spec ast_to_fcode(aeso_syntax:ast(), [option()]) -> fcode(). +ast_to_fcode(Code, Options) -> + Verbose = lists:member(pp_fcode, Options), + FCode1 = to_fcode(init_env(Options), Code), + [io:format("-- Before lambda lifting --\n~s\n\n", [format_fcode(FCode1)]) || Verbose], + FCode2 = lambda_lift(FCode1), + [ io:format("-- After lambda lifting --\n~s\n\n", [format_fcode(FCode2)]) || Verbose, FCode2 /= FCode1 ], + FCode3 = optimize_fcode(FCode2), + [ io:format("-- After optimization --\n~s\n\n", [format_fcode(FCode3)]) || Verbose, FCode3 /= FCode2 ], + FCode3. + +%% -- Environment ------------------------------------------------------------ + +-spec init_env([option()]) -> env(). +init_env(Options) -> + #{ type_env => init_type_env(), + fun_env => #{}, + builtins => builtins(), + con_env => #{["None"] => #con_tag{ tag = 0, arities = [0, 1] }, + ["Some"] => #con_tag{ tag = 1, arities = [0, 1] }, + ["RelativeTTL"] => #con_tag{ tag = 0, arities = [1, 1] }, + ["FixedTTL"] => #con_tag{ tag = 1, arities = [1, 1] } + }, + options => Options, + functions => #{} }. + +-spec builtins() -> builtins(). +builtins() -> + MkName = fun(NS, Fun) -> + list_to_atom(string:to_lower(string:join(NS ++ [Fun], "_"))) + end, + Scopes = [{[], [{"abort", 1}]}, + {["Chain"], [{"spend", 2}, {"balance", 1}, {"block_hash", 1}, {"coinbase", none}, + {"timestamp", none}, {"block_height", none}, {"difficulty", none}, + {"gas_limit", none}]}, + {["Contract"], [{"address", none}, {"balance", none}]}, + {["Call"], [{"origin", none}, {"caller", none}, {"value", none}, {"gas_price", none}, + {"gas_left", 0}]}, + {["Oracle"], [{"register", 4}, {"query_fee", 1}, {"query", 5}, {"get_question", 2}, + {"respond", 4}, {"extend", 3}, {"get_answer", 2}]}, + {["AENS"], [{"resolve", 2}, {"preclaim", 3}, {"claim", 4}, {"transfer", 4}, + {"revoke", 3}]}, + {["Map"], [{"from_list", 1}, {"to_list", 1}, {"lookup", 2}, + {"lookup_default", 3}, {"delete", 2}, {"member", 2}, {"size", 1}]}, + {["Crypto"], [{"ecverify", 3}, {"ecverify_secp256k1", 3}, {"sha3", 1}, + {"sha256", 1}, {"blake2b", 1}]}, + {["Auth"], [{"tx_hash", none}]}, + {["String"], [{"length", 1}, {"concat", 2}, {"sha3", 1}, {"sha256", 1}, {"blake2b", 1}]}, + {["Bits"], [{"set", 2}, {"clear", 2}, {"test", 2}, {"sum", 1}, {"intersection", 2}, + {"union", 2}, {"difference", 2}, {"none", none}, {"all", none}]}, + {["Int"], [{"to_str", 1}]}, + {["Address"], [{"to_str", 1}]} + ], + maps:from_list([ {NS ++ [Fun], {MkName(NS, Fun), Arity}} + || {NS, Funs} <- Scopes, + {Fun, Arity} <- Funs ]). + +-define(type(T), fun([]) -> T end). +-define(type(X, T), fun([X]) -> T end). +-define(type(X, Y, T), fun([X, Y]) -> T end). + +-spec init_type_env() -> type_env(). +init_type_env() -> + #{ ["int"] => ?type(integer), + ["bool"] => ?type(boolean), + ["bits"] => ?type(bits), + ["char"] => ?type(integer), + ["string"] => ?type(string), + ["address"] => ?type(address), + ["hash"] => ?type(hash), + ["signature"] => ?type(signature), + ["oracle"] => ?type(_, _, oracle), + ["oracle_query"] => ?type(_, _, oracle_query), %% TODO: not in Fate + ["list"] => ?type(T, {list, T}), + ["map"] => ?type(K, V, {map, K, V}), + ["option"] => ?type(T, {variant, [[], [T]]}), + ["Chain", "ttl"] => ?type({variant, [[integer], [integer]]}) + }. + +%% -- Compilation ------------------------------------------------------------ + +-spec to_fcode(env(), aeso_syntax:ast()) -> fcode(). +to_fcode(Env, [{contract, _, {con, _, Main}, Decls}]) -> + #{ builtins := Builtins } = Env, + MainEnv = Env#{ context => {main_contract, Main}, + builtins => Builtins#{[Main, "state"] => {get_state, none}, + [Main, "put"] => {set_state, 1}, + [Main, "Chain", "event"] => {event, 1}} }, + #{ functions := Funs } = Env1 = + decls_to_fcode(MainEnv, Decls), + StateType = lookup_type(Env1, [Main, "state"], [], {tuple, []}), + EventType = lookup_type(Env1, [Main, "event"], [], none), + #{ contract_name => Main, + state_type => StateType, + event_type => EventType, + functions => Funs }; +to_fcode(Env, [{contract, _, {con, _, Con}, Decls} | Code]) -> + Env1 = decls_to_fcode(Env#{ context => {abstract_contract, Con} }, Decls), + to_fcode(Env1, Code); +to_fcode(Env, [{namespace, _, {con, _, Con}, Decls} | Code]) -> + Env1 = decls_to_fcode(Env#{ context => {namespace, Con} }, Decls), + to_fcode(Env1, Code). + +-spec decls_to_fcode(env(), [aeso_syntax:decl()]) -> env(). +decls_to_fcode(Env, Decls) -> + %% First compute mapping from Sophia names to fun_names and add it to the + %% environment. + Env1 = add_fun_env(Env, Decls), + lists:foldl(fun(D, E) -> + init_fresh_names(), + R = decl_to_fcode(E, D), + clear_fresh_names(), + R + end, Env1, Decls). + +-spec decl_to_fcode(env(), aeso_syntax:decl()) -> env(). +decl_to_fcode(Env, {type_decl, _, _, _}) -> Env; +decl_to_fcode(Env, {fun_decl, _, _, _}) -> Env; +decl_to_fcode(Env, {type_def, _Ann, Name, Args, Def}) -> + typedef_to_fcode(Env, Name, Args, Def); +decl_to_fcode(Env = #{ functions := Funs }, {letfun, Ann, {id, _, Name}, Args, Ret, Body}) -> + Attrs = get_attributes(Ann), + FName = lookup_fun(Env, qname(Env, Name)), + FArgs = args_to_fcode(Env, Args), + FBody = expr_to_fcode(Env#{ vars => [X || {X, _} <- FArgs] }, Body), + Def = #{ attrs => Attrs, + args => FArgs, + return => type_to_fcode(Env, Ret), + body => FBody }, + NewFuns = Funs#{ FName => Def }, + Env#{ functions := NewFuns }. + +-spec typedef_to_fcode(env(), aeso_syntax:id(), [aeso_syntax:tvar()], aeso_syntax:typedef()) -> env(). +typedef_to_fcode(Env, {id, _, Name}, Xs, Def) -> + Q = qname(Env, Name), + FDef = fun(Args) -> + Sub = maps:from_list(lists:zip([X || {tvar, _, X} <- Xs], Args)), + case Def of + {record_t, Fields} -> {todo, Xs, Args, record_t, Fields}; + {variant_t, Cons} -> + FCons = [ begin + {constr_t, _, _, Ts} = Con, + [type_to_fcode(Env, Sub, T) || T <- Ts] + end || Con <- Cons ], + {variant, FCons}; + {alias_t, Type} -> {todo, Xs, Args, alias_t, Type} + end end, + Constructors = + case Def of + {variant_t, Cons} -> + Arities = [ begin + {constr_t, _, _, Args} = Con, + length(Args) + end || Con <- Cons ], + Tags = [ #con_tag{ tag = I, arities = Arities } || I <- lists:seq(0, length(Cons) - 1) ], + GetName = fun({constr_t, _, {con, _, C}, _}) -> C end, + QName = fun(Con) -> qname(Env, GetName(Con)) end, + maps:from_list([ {QName(Con), Tag} || {Tag, Con} <- lists:zip(Tags, Cons) ]); + _ -> #{} + end, + Env1 = bind_constructors(Env, Constructors), + bind_type(Env1, Q, FDef). + +-spec type_to_fcode(env(), aeso_syntax:type()) -> ftype(). +type_to_fcode(Env, Type) -> + type_to_fcode(Env, #{}, Type). + +-spec type_to_fcode(env(), #{var_name() => ftype()}, aeso_syntax:type()) -> ftype(). +type_to_fcode(_Env, _Sub, {con, _, _}) -> contract; +type_to_fcode(Env, Sub, {app_t, _, T = {Id, _, _}, Types}) when Id == id; Id == qid -> + lookup_type(Env, T, [type_to_fcode(Env, Sub, Type) || Type <- Types]); +type_to_fcode(Env, _Sub, T = {Id, _, _}) when Id == id; Id == qid -> + lookup_type(Env, T, []); +type_to_fcode(Env, Sub, {tuple_t, _, Types}) -> + {tuple, [type_to_fcode(Env, Sub, T) || T <- Types]}; +type_to_fcode(Env, Sub, {record_t, Fields}) -> + FieldType = fun({field_t, _, _, Ty}) -> Ty end, + type_to_fcode(Env, Sub, {tuple_t, [], lists:map(FieldType, Fields)}); +type_to_fcode(_Env, _Sub, {bytes_t, _, _N}) -> + string; %% TODO: add bytes type to FATE? +type_to_fcode(_Env, Sub, {tvar, _, X}) -> + maps:get(X, Sub, any); +type_to_fcode(Env, Sub, {fun_t, _, Named, Args, Res}) -> + FNamed = [type_to_fcode(Env, Sub, Arg) || {named_arg_t, _, _, Arg, _} <- Named], + FArgs = [type_to_fcode(Env, Sub, Arg) || Arg <- Args], + {function, FNamed ++ FArgs, type_to_fcode(Env, Sub, Res)}; +type_to_fcode(_Env, _Sub, Type) -> + error({todo, Type}). + +-spec args_to_fcode(env(), [aeso_syntax:arg()]) -> [{var_name(), ftype()}]. +args_to_fcode(Env, Args) -> + [ {Name, type_to_fcode(Env, Type)} || {arg, _, {id, _, Name}, Type} <- Args ]. + +-spec expr_to_fcode(env(), aeso_syntax:expr()) -> fexpr(). +expr_to_fcode(Env, {typed, _, Expr, Type}) -> + expr_to_fcode(Env, Type, Expr); +expr_to_fcode(Env, Expr) -> + expr_to_fcode(Env, no_type, Expr). + +-spec expr_to_fcode(env(), aeso_syntax:type() | no_type, aeso_syntax:expr()) -> fexpr(). + +%% Literals +expr_to_fcode(_Env, _Type, {int, _, N}) -> {lit, {int, N}}; +expr_to_fcode(_Env, _Type, {char, _, N}) -> {lit, {int, N}}; +expr_to_fcode(_Env, _Type, {bool, _, B}) -> {lit, {bool, B}}; +expr_to_fcode(_Env, _Type, {string, _, S}) -> {lit, {string, S}}; +expr_to_fcode(_Env, _Type, {account_pubkey, _, K}) -> {lit, {account_pubkey, K}}; +expr_to_fcode(_Env, _Type, {contract_pubkey, _, K}) -> {lit, {contract_pubkey, K}}; +expr_to_fcode(_Env, _Type, {oracle_pubkey, _, K}) -> {lit, {oracle_pubkey, K}}; +expr_to_fcode(_Env, _Type, {oracle_query_id, _, K}) -> {lit, {oracle_query_id, K}}; + +expr_to_fcode(_Env, _Type, {bytes, _, Bin}) -> {lit, {string, Bin}}; + +%% Variables +expr_to_fcode(Env, _Type, {id, _, X}) -> resolve_var(Env, [X]); +expr_to_fcode(Env, _Type, {qid, _, X}) -> resolve_var(Env, X); + +%% Constructors +expr_to_fcode(Env, Type, {C, _, _} = Con) when C == con; C == qcon -> + expr_to_fcode(Env, Type, {app, [], {typed, [], Con, Type}, []}); +expr_to_fcode(Env, _Type, {app, _, {typed, _, {C, _, _} = Con, _}, Args}) when C == con; C == qcon -> + #con_tag{ tag = I, arities = Arities } = lookup_con(Env, Con), + Arity = lists:nth(I + 1, Arities), + case length(Args) == Arity of + true -> {con, Arities, I, [expr_to_fcode(Env, Arg) || Arg <- Args]}; + false -> fcode_error({constructor_arity_mismatch, Con, length(Args), Arity}) + end; + +%% Tuples +expr_to_fcode(Env, _Type, {tuple, _, Es}) -> + {tuple, [expr_to_fcode(Env, E) || E <- Es]}; + +%% Records +expr_to_fcode(Env, Type, {proj, _Ann, Rec = {typed, _, _, RecType}, {id, _, X}}) -> + case RecType of + {con, _, _} -> + {fun_t, _, Named, Args, _} = Type, + Arity = length(Named) + length(Args), + {remote_u, expr_to_fcode(Env, Rec), {entrypoint, list_to_binary(X)}, Arity}; + {record_t, _} -> + {proj, expr_to_fcode(Env, Rec), field_index(Rec, X)} + end; + +expr_to_fcode(Env, {record_t, FieldTypes}, {record, _Ann, Fields}) -> + FVal = fun(F) -> + %% All fields are present and no updates + {set, E} = field_value(F, Fields), + expr_to_fcode(Env, E) + end, + {tuple, lists:map(FVal, FieldTypes)}; + +expr_to_fcode(Env, {record_t, FieldTypes}, {record, _Ann, Rec, Fields}) -> + X = fresh_name(), + Proj = fun(I) -> {proj, {var, X}, I - 1} end, + Comp = fun({I, false}) -> Proj(I); + ({_, {set, E}}) -> expr_to_fcode(Env, E); + ({I, {upd, Z, E}}) -> {'let', Z, Proj(I), expr_to_fcode(bind_var(Env, Z), E)} + end, + Set = fun({_, false}, R) -> R; + ({I, {set, E}}, R) -> {set_proj, R, I - 1, expr_to_fcode(Env, E)}; + ({I, {upd, Z, E}}, R) -> {set_proj, R, I - 1, + {'let', Z, Proj(I), expr_to_fcode(bind_var(Env, Z), E)}} + end, + Expand = length(Fields) == length(FieldTypes), + Updates = [ {I, field_value(FT, Fields)} || {I, FT} <- indexed(FieldTypes) ], + Body = case Expand of + true -> {tuple, lists:map(Comp, Updates)}; + false -> lists:foldr(Set, {var, X}, Updates) + end, + {'let', X, expr_to_fcode(Env, Rec), Body}; + +%% Lists +expr_to_fcode(Env, _Type, {list, _, Es}) -> + lists:foldr(fun(E, L) -> {op, '::', [expr_to_fcode(Env, E), L]} end, + nil, Es); + +%% Conditionals +expr_to_fcode(Env, _Type, {'if', _, Cond, Then, Else}) -> + Switch = fun(X) -> + {switch, {split, boolean, X, + [{'case', {bool, false}, {nosplit, expr_to_fcode(Env, Else)}}, + {'case', {bool, true}, {nosplit, expr_to_fcode(Env, Then)}}]}} + end, + case Cond of + {var, X} -> Switch(X); + _ -> + X = fresh_name(), + {'let', X, expr_to_fcode(Env, Cond), Switch(X)} + end; + +%% Switch +expr_to_fcode(Env, _, {switch, _, Expr = {typed, _, E, Type}, Alts}) -> + Switch = fun(X) -> + {switch, alts_to_fcode(Env, type_to_fcode(Env, Type), X, Alts)} + end, + case E of + {id, _, X} -> Switch(X); + _ -> + X = fresh_name(), + {'let', X, expr_to_fcode(Env, Expr), + Switch(X)} + end; + +%% Blocks +expr_to_fcode(Env, _Type, {block, _, Stmts}) -> + stmts_to_fcode(Env, Stmts); + +%% Binary operator +expr_to_fcode(Env, _Type, Expr = {app, _, {Op, _}, [_, _]}) when Op == '&&'; Op == '||' -> + Tree = expr_to_decision_tree(Env, Expr), + decision_tree_to_fcode(Tree); +expr_to_fcode(Env, _Type, {app, _Ann, {Op, _}, [A, B]}) when is_atom(Op) -> + {op, Op, [expr_to_fcode(Env, A), expr_to_fcode(Env, B)]}; +expr_to_fcode(Env, _Type, {app, _Ann, {Op, _}, [A]}) when is_atom(Op) -> + case Op of + '-' -> {op, '-', [{lit, {int, 0}}, expr_to_fcode(Env, A)]}; + '!' -> {op, '!', [expr_to_fcode(Env, A)]} + end; + +%% Function calls +expr_to_fcode(Env, _Type, {app, _Ann, Fun = {typed, _, _, {fun_t, _, NamedArgsT, _, _}}, Args}) -> + Args1 = get_named_args(NamedArgsT, Args), + FArgs = [expr_to_fcode(Env, Arg) || Arg <- Args1], + case expr_to_fcode(Env, Fun) of + {builtin_u, B, _Ar} -> builtin_to_fcode(B, FArgs); + {def_u, F, _Ar} -> {def, F, FArgs}; + {remote_u, Ct, RFun, _Ar} -> {remote, Ct, RFun, FArgs}; + FFun -> + %% FFun is a closure, with first component the function name and + %% second component the environment + Call = fun(X) -> {funcall, {proj, {var, X}, 0}, [{proj, {var, X}, 1} | FArgs]} end, + case FFun of + {var, X} -> Call(X); + _ -> X = fresh_name(), + {'let', X, FFun, Call(X)} + end + end; + +%% Maps +expr_to_fcode(_Env, _Type, {map, _, []}) -> + {builtin, map_empty, []}; +expr_to_fcode(Env, Type, {map, Ann, KVs}) -> + %% Cheaper to do incremental map_update than building the list and doing + %% map_from_list (I think). + Fields = [{field, Ann, [{map_get, Ann, K}], V} || {K, V} <- KVs], + expr_to_fcode(Env, Type, {map, Ann, {map, Ann, []}, Fields}); +expr_to_fcode(Env, _Type, {map, _, Map, KVs}) -> + X = fresh_name(), + Map1 = {var, X}, + {'let', X, expr_to_fcode(Env, Map), + lists:foldr(fun(Fld, M) -> + case Fld of + {field, _, [{map_get, _, K}], V} -> + {op, map_set, [M, expr_to_fcode(Env, K), expr_to_fcode(Env, V)]}; + {field_upd, _, [{map_get, _, K}], {typed, _, {lam, _, [{arg, _, {id, _, Z}, _}], V}, _}} -> + Y = fresh_name(), + {'let', Y, expr_to_fcode(Env, K), + {'let', Z, {op, map_get, [Map1, {var, Y}]}, + {op, map_set, [M, {var, Y}, expr_to_fcode(bind_var(Env, Z), V)]}}} + end end, Map1, KVs)}; +expr_to_fcode(Env, _Type, {map_get, _, Map, Key}) -> + {op, map_get, [expr_to_fcode(Env, Map), expr_to_fcode(Env, Key)]}; +expr_to_fcode(Env, _Type, {map_get, _, Map, Key, Def}) -> + {op, map_get_d, [expr_to_fcode(Env, Map), expr_to_fcode(Env, Key), expr_to_fcode(Env, Def)]}; + +expr_to_fcode(Env, _Type, {lam, _, Args, Body}) -> + GetArg = fun({arg, _, {id, _, X}, _}) -> X end, + Xs = lists:map(GetArg, Args), + {lam, Xs, expr_to_fcode(bind_vars(Env, Xs), Body)}; + +expr_to_fcode(_Env, Type, Expr) -> + error({todo, {Expr, ':', Type}}). + +%% -- Pattern matching -- + +-spec alts_to_fcode(env(), ftype(), var_name(), [aeso_syntax:alt()]) -> fsplit(). +alts_to_fcode(Env, Type, X, Alts) -> + FAlts = [alt_to_fcode(Env, Alt) || Alt <- Alts], + split_tree(Env, [{X, Type}], FAlts). + +%% Intermediate format before case trees (fcase() and fsplit()). +-type falt() :: {'case', [fpat()], fexpr()}. +-type fpat() :: {var, var_name()} + | {bool, false | true} + | {int, integer()} + | {string, binary()} + | nil | {'::', fpat(), fpat()} + | {tuple, [fpat()]} + | {con, arities(), tag(), [fpat()]}. + +%% %% Invariant: the number of variables matches the number of patterns in each falt. +-spec split_tree(env(), [{var_name(), ftype()}], [falt()]) -> fsplit(). +split_tree(_Env, _Vars, []) -> + error(non_exhaustive_patterns); %% TODO: nice error +split_tree(Env, Vars, Alts = [{'case', Pats, Body} | _]) -> + case next_split(Pats) of + false -> + Xs = [ X || {X, _} <- Vars ], + Ys = [ Y || {var, Y} <- Pats ], + Ren = [ {Y, X} || {Y, X} <- lists:zip(Ys, Xs), X /= Y, Y /= "_" ], + %% TODO: Unreachable clauses error + {nosplit, rename(Ren, Body)}; + I when is_integer(I) -> + {Vars0, [{X, Type} | Vars1]} = lists:split(I - 1, Vars), + SAlts = merge_alts(I, X, [ split_alt(I, A) || A <- Alts ]), + Cases = [ {'case', SPat, split_tree(Env, Vars0 ++ split_vars(SPat, Type) ++ Vars1, FAlts)} + || {SPat, FAlts} <- SAlts ], + {split, Type, X, Cases} + end. + +-spec merge_alts(integer(), var_name(), [{fsplit_pat(), falt()}]) -> [{fsplit_pat(), [falt()]}]. +merge_alts(I, X, Alts) -> + merge_alts(I, X, Alts, []). + +merge_alts(I, X, Alts, Alts1) -> + lists:foldr(fun(A, As) -> merge_alt(I, X, A, As) end, + Alts1, Alts). + +-spec merge_alt(integer(), var_name(), {fsplit_pat(), falt()}, Alts) -> Alts + when Alts :: [{fsplit_pat(), [falt()]}]. +merge_alt(_, _, {P, A}, []) -> [{P, [A]}]; +merge_alt(I, X, {P, A}, [{Q, As} | Rest]) -> + Match = fun({var, _}, {var, _}) -> match; + ({tuple, _}, {tuple, _}) -> match; + ({bool, B}, {bool, B}) -> match; + ({int, N}, {int, N}) -> match; + ({string, S}, {string, S}) -> match; + (nil, nil) -> match; + ({'::', _, _}, {'::', _, _}) -> match; + ({con, _, C, _}, {con, _, C, _}) -> match; + ({con, _, _, _}, {con, _, _, _}) -> mismatch; + ({var, _}, _) -> expand; + (_, {var, _}) -> insert; + (_, _) -> mismatch + end, + case Match(P, Q) of + match -> [{Q, [A | As]} | Rest]; + mismatch -> [{Q, As} | merge_alt(I, X, {P, A}, Rest)]; + expand -> + {Before, After} = expand(I, X, P, Q, A), + merge_alts(I, X, Before, [{Q, As} | merge_alts(I, X, After, Rest)]); + insert -> [{P, [A]}, {Q, As} | Rest] + end. + +expand(I, X, P, Q, Case = {'case', Ps, E}) -> + {Ps0, [{var, Y} | Ps1]} = lists:split(I - 1, Ps), + {Ps0r, Ren1} = rename_fpats([{Y, X} || Y /= X], Ps0), + {Ps1r, Ren2} = rename_fpats(Ren1, Ps1), + E1 = rename(Ren2, E), + Splice = fun(N) -> Ps0r ++ lists:duplicate(N, {var, "_"}) ++ Ps1r end, + Type = fun({tuple, Xs}) -> {tuple, length(Xs)}; + ({bool, _}) -> bool; + ({int, _}) -> int; + ({string, _}) -> string; + (nil) -> list; + ({'::', _, _}) -> list; + ({con, As, _, _}) -> {variant, As} + end, + MkCase = fun(Pat, Vars) -> {Pat, {'case', Splice(Vars), E1}} end, + case Type(Q) of + {tuple, N} -> {[MkCase(Q, N)], []}; + bool -> {[MkCase({bool, B}, 0) || B <- [false, true]], []}; + int -> {[MkCase(Q, 0)], [{P, Case}]}; + string -> {[MkCase(Q, 0)], [{P, Case}]}; + list -> {[MkCase(nil, 0), MkCase({'::', fresh_name(), fresh_name()}, 2)], []}; + {variant, As} -> {[MkCase({con, As, C - 1, [fresh_name() || _ <- lists:seq(1, Ar)]}, Ar) + || {C, Ar} <- indexed(As)], []} + end. + +-spec split_alt(integer(), falt()) -> {fsplit_pat(), falt()}. +split_alt(I, {'case', Pats, Body}) -> + {Pats0, [Pat | Pats1]} = lists:split(I - 1, Pats), + {SPat, InnerPats} = split_pat(Pat), + {SPat, {'case', Pats0 ++ InnerPats ++ Pats1, Body}}. + +-spec split_pat(fpat()) -> {fsplit_pat(), [fpat()]}. +split_pat(P = {var, _}) -> {{var, fresh_name()}, [P]}; +split_pat({bool, B}) -> {{bool, B}, []}; +split_pat({int, N}) -> {{int, N}, []}; +split_pat({string, N}) -> {{string, N}, []}; +split_pat(nil) -> {nil, []}; +split_pat({'::', P, Q}) -> {{'::', fresh_name(), fresh_name()}, [P, Q]}; +split_pat({con, As, I, Pats}) -> + Xs = [fresh_name() || _ <- Pats], + {{con, As, I, Xs}, Pats}; +split_pat({tuple, Pats}) -> + Xs = [fresh_name() || _ <- Pats], + {{tuple, Xs}, Pats}. + +-spec split_vars(fsplit_pat(), ftype()) -> [{var_name(), ftype()}]. +split_vars({bool, _}, boolean) -> []; +split_vars({int, _}, integer) -> []; +split_vars({string, _}, string) -> []; +split_vars(nil, {list, _}) -> []; +split_vars({'::', X, Xs}, {list, T}) -> [{X, T}, {Xs, {list, T}}]; +split_vars({con, _, I, Xs}, {variant, Cons}) -> + lists:zip(Xs, lists:nth(I + 1, Cons)); +split_vars({tuple, Xs}, {tuple, Ts}) -> + lists:zip(Xs, Ts); +split_vars({var, X}, T) -> [{X, T}]. + +-spec next_split([fpat()]) -> integer() | false. +next_split(Pats) -> + IsVar = fun({var, _}) -> true; (_) -> false end, + case [ I || {I, P} <- indexed(Pats), not IsVar(P) ] of + [] -> false; + [I | _] -> I + end. + +-spec alt_to_fcode(env(), aeso_syntax:alt()) -> falt(). +alt_to_fcode(Env, {'case', _, Pat, Expr}) -> + FPat = pat_to_fcode(Env, Pat), + FExpr = expr_to_fcode(bind_vars(Env, pat_vars(FPat)), Expr), + {'case', [FPat], FExpr}. + +-spec pat_to_fcode(env(), aeso_syntax:pat()) -> fpat(). +pat_to_fcode(Env, {typed, _, Pat, Type}) -> + pat_to_fcode(Env, Type, Pat); +pat_to_fcode(Env, Pat) -> + pat_to_fcode(Env, no_type, Pat). + +-spec pat_to_fcode(env(), aeso_syntax:type() | no_type, aeso_syntax:pat()) -> fpat(). +pat_to_fcode(_Env, _Type, {id, _, X}) -> {var, X}; +pat_to_fcode(Env, _Type, {C, _, _} = Con) when C == con; C == qcon -> + #con_tag{tag = I, arities = As} = lookup_con(Env, Con), + {con, As, I, []}; +pat_to_fcode(Env, _Type, {app, _, {typed, _, {C, _, _} = Con, _}, Pats}) when C == con; C == qcon -> + #con_tag{tag = I, arities = As} = lookup_con(Env, Con), + {con, As, I, [pat_to_fcode(Env, Pat) || Pat <- Pats]}; +pat_to_fcode(Env, _Type, {tuple, _, Pats}) -> + {tuple, [ pat_to_fcode(Env, Pat) || Pat <- Pats ]}; +pat_to_fcode(_Env, _Type, {bool, _, B}) -> {bool, B}; +pat_to_fcode(_Env, _Type, {int, _, N}) -> {int, N}; +pat_to_fcode(_Env, _Type, {char, _, N}) -> {int, N}; +pat_to_fcode(_Env, _Type, {string, _, N}) -> {string, N}; +pat_to_fcode(Env, _Type, {list, _, Ps}) -> + lists:foldr(fun(P, Qs) -> + {'::', pat_to_fcode(Env, P), Qs} + end, nil, Ps); +pat_to_fcode(Env, _Type, {app, _, {'::', _}, [P, Q]}) -> + {'::', pat_to_fcode(Env, P), pat_to_fcode(Env, Q)}; +pat_to_fcode(Env, {record_t, Fields}, {record, _, FieldPats}) -> + FieldPat = fun(F) -> + case field_value(F, FieldPats) of + false -> {id, [], "_"}; + {set, Pat} -> Pat + %% {upd, _, _} is impossible in patterns + end end, + {tuple, [pat_to_fcode(Env, FieldPat(Field)) + || Field <- Fields]}; + +pat_to_fcode(_Env, Type, Pat) -> + error({todo, Pat, ':', Type}). + +%% -- Decision trees for boolean operators -- + +decision_op('&&', {atom, A}, B) -> {'if', A, B, false}; +decision_op('&&', false, _) -> false; +decision_op('&&', true, B) -> B; +decision_op('||', {atom, A}, B) -> {'if', A, true, B}; +decision_op('||', false, B) -> B; +decision_op('||', true, _) -> true; +decision_op(Op, {'if', A, Then, Else}, B) -> + {'if', A, decision_op(Op, Then, B), decision_op(Op, Else, B)}. + +expr_to_decision_tree(Env, {app, _Ann, {Op, _}, [A, B]}) when Op == '&&'; Op == '||' -> + decision_op(Op, expr_to_decision_tree(Env, A), expr_to_decision_tree(Env, B)); +expr_to_decision_tree(Env, {typed, _, Expr, _}) -> expr_to_decision_tree(Env, Expr); +expr_to_decision_tree(Env, Expr) -> + {atom, expr_to_fcode(Env, Expr)}. + +decision_tree_to_fcode(false) -> {lit, {bool, false}}; +decision_tree_to_fcode(true) -> {lit, {bool, true}}; +decision_tree_to_fcode({atom, B}) -> B; +decision_tree_to_fcode({'if', A, Then, Else}) -> + X = fresh_name(), + {'let', X, A, + {switch, {split, boolean, X, [{'case', {bool, false}, {nosplit, decision_tree_to_fcode(Else)}}, + {'case', {bool, true}, {nosplit, decision_tree_to_fcode(Then)}}]}}}. + +%% -- Statements -- + +-spec stmts_to_fcode(env(), [aeso_syntax:stmt()]) -> fexpr(). +stmts_to_fcode(Env, [{letval, _, {typed, _, {id, _, X}, _}, _, Expr} | Stmts]) -> + {'let', X, expr_to_fcode(Env, Expr), stmts_to_fcode(bind_var(Env, X), Stmts)}; +stmts_to_fcode(Env, [Expr]) -> + expr_to_fcode(Env, Expr); +stmts_to_fcode(Env, [Expr | Stmts]) -> + {'let', "_", expr_to_fcode(Env, Expr), stmts_to_fcode(Env, Stmts)}. + +%% -- Builtins -- + +op_builtins() -> + [map_from_list, map_to_list, map_delete, map_member, map_size, + string_length, string_concat, string_sha3, string_sha256, string_blake2b, + bits_set, bits_clear, bits_test, bits_sum, bits_intersection, bits_union, + bits_difference, int_to_str, address_to_str]. + +builtin_to_fcode(map_lookup, [Key, Map]) -> + {op, map_get, [Map, Key]}; +builtin_to_fcode(map_lookup_default, [Key, Map, Def]) -> + {op, map_get_d, [Map, Key, Def]}; +builtin_to_fcode(Builtin, Args) -> + case lists:member(Builtin, op_builtins()) of + true -> {op, Builtin, Args}; + false -> {builtin, Builtin, Args} + end. + +%% -- Lambda lifting --------------------------------------------------------- +%% The expr_to_fcode compiler lambda expressions to {lam, Xs, Body}, but in +%% FATE we can only call top-level functions, so we need to lift the lambda to +%% the top-level and replace it with a closure. + +-spec lambda_lift(fcode()) -> fcode(). +lambda_lift(FCode = #{ functions := Funs }) -> + init_fresh_names(), + init_lambda_funs(), + Funs1 = maps:map(fun lambda_lift_fun/2, Funs), + NewFuns = get_lambda_funs(), + clear_fresh_names(), + FCode#{ functions := maps:merge(Funs1, NewFuns) }. + +-define(lambda_key, '%lambdalifted'). +init_lambda_funs() -> put(?lambda_key, #{}). +get_lambda_funs() -> erase(?lambda_key). + +add_lambda_fun(Def) -> + Name = fresh_fun(), + Funs = get(?lambda_key), + put(?lambda_key, Funs#{ Name => Def }), + Name. + +lambda_lift_fun(_, Def = #{ body := Body }) -> + Def#{ body := lambda_lift_expr(Body) }. + +lifted_fun([Z], Xs, Body) -> + #{ attrs => [private], + args => [{Z, any} | [{X, any} || X <- Xs]], + return => any, + body => Body }; +lifted_fun(FVs, Xs, Body) -> + Z = "%env", + Proj = fun({I, Y}, E) -> {'let', Y, {proj, {var, Z}, I - 1}, E} end, + #{ attrs => [private], + args => [{Z, any} | [{X, any} || X <- Xs]], + return => any, + body => lists:foldr(Proj, Body, indexed(FVs)) + }. + +make_closure(FVs, Xs, Body) -> + Fun = add_lambda_fun(lifted_fun(FVs, Xs, Body)), + Tup = fun([Y]) -> Y; (Ys) -> {tuple, Ys} end, + {closure, Fun, Tup([{var, Y} || Y <- FVs])}. + +lambda_lift_expr({lam, Xs, Body}) -> + FVs = free_vars({lam, Xs, Body}), + make_closure(FVs, Xs, lambda_lift_expr(Body)); +lambda_lift_expr({Tag, F, Ar}) when Tag == def_u; Tag == builtin_u -> + Xs = [ lists:concat(["arg", I]) || I <- lists:seq(1, Ar) ], + Args = [{var, X} || X <- Xs], + Body = case Tag of + builtin_u -> builtin_to_fcode(F, Args); + def_u -> {def, F, Args} + end, + make_closure([], Xs, Body); +lambda_lift_expr({remote_u, Ct, F, Ar}) -> + FVs = free_vars(Ct), + Ct1 = lambda_lift_expr(Ct), + Xs = [ lists:concat(["arg", I]) || I <- lists:seq(1, Ar) ], + Args = [{var, X} || X <- Xs], + make_closure(FVs, Xs, {remote, Ct1, F, Args}); +lambda_lift_expr(Expr) -> + case Expr of + {lit, _} -> Expr; + nil -> Expr; + {var, _} -> Expr; + {closure, _, _} -> Expr; + {def, D, As} -> {def, D, lambda_lift_exprs(As)}; + {builtin, B, As} -> {builtin, B, lambda_lift_exprs(As)}; + {remote, Ct, F, As} -> {remote, lambda_lift_expr(Ct), F, lambda_lift_exprs(As)}; + {con, Ar, C, As} -> {con, Ar, C, lambda_lift_exprs(As)}; + {tuple, As} -> {tuple, lambda_lift_exprs(As)}; + {proj, A, I} -> {proj, lambda_lift_expr(A), I}; + {set_proj, A, I, B} -> {set_proj, lambda_lift_expr(A), I, lambda_lift_expr(B)}; + {op, Op, As} -> {op, Op, lambda_lift_exprs(As)}; + {'let', X, A, B} -> {'let', X, lambda_lift_expr(A), lambda_lift_expr(B)}; + {funcall, A, Bs} -> {funcall, lambda_lift_expr(A), lambda_lift_exprs(Bs)}; + {switch, S} -> {switch, lambda_lift_expr(S)}; + {split, Type, X, Alts} -> {split, Type, X, lambda_lift_exprs(Alts)}; + {nosplit, A} -> {nosplit, lambda_lift_expr(A)}; + {'case', P, S} -> {'case', P, lambda_lift_expr(S)} + end. + +lambda_lift_exprs(As) -> [lambda_lift_expr(A) || A <- As]. + +%% -- Optimisations ---------------------------------------------------------- + +%% - Deadcode elimination +%% - Unused variable analysis (replace by _) +%% - Case specialization +%% - Constant propagation +%% - Inlining + +-spec optimize_fcode(fcode()) -> fcode(). +optimize_fcode(Code = #{ functions := Funs }) -> + Code#{ functions := maps:map(fun(Name, Def) -> optimize_fun(Code, Name, Def) end, Funs) }. + +-spec optimize_fun(fcode(), fun_name(), fun_def()) -> fun_def(). +optimize_fun(_Fcode, _Fun, Def = #{ body := _Body }) -> + %% io:format("Optimizing ~p =\n~s\n", [_Fun, prettypr:format(pp_fexpr(_Body))]), + Def. + +%% -- Helper functions ------------------------------------------------------- + +%% -- Types -- + +-spec lookup_type(env(), aeso_syntax:id() | aeso_syntax:qid() | sophia_name(), [ftype()]) -> ftype(). +lookup_type(Env, {id, _, Name}, Args) -> + lookup_type(Env, [Name], Args); +lookup_type(Env, {qid, _, Name}, Args) -> + lookup_type(Env, Name, Args); +lookup_type(Env, Name, Args) -> + case lookup_type(Env, Name, Args, not_found) of + not_found -> error({unknown_type, Name}); + Type -> Type + end. + +-spec lookup_type(env(), sophia_name(), [ftype()], ftype() | A) -> ftype() | A. +lookup_type(#{ type_env := TypeEnv }, Name, Args, Default) -> + case maps:get(Name, TypeEnv, false) of + false -> Default; + Fun -> Fun(Args) + end. + +-spec bind_type(env(), sophia_name(), type_def()) -> env(). +bind_type(Env = #{type_env := TEnv}, Q, FDef) -> + Env#{ type_env := TEnv#{ Q => FDef } }. + +-spec bind_constructors(env(), con_env()) -> env(). +bind_constructors(Env = #{ con_env := ConEnv }, NewCons) -> + Env#{ con_env := maps:merge(ConEnv, NewCons) }. + +%% -- Names -- + +-spec add_fun_env(env(), [aeso_syntax:decl()]) -> env(). +add_fun_env(Env = #{ context := {abstract_contract, _} }, _) -> Env; %% no functions from abstract contracts +add_fun_env(Env = #{ fun_env := FunEnv }, Decls) -> + Entry = fun({letfun, Ann, {id, _, Name}, Args, _, _}) -> + [{qname(Env, Name), {make_fun_name(Env, Ann, Name), length(Args)}}]; + (_) -> [] end, + FunEnv1 = maps:from_list(lists:flatmap(Entry, Decls)), + Env#{ fun_env := maps:merge(FunEnv, FunEnv1) }. + +make_fun_name(#{ context := Context }, Ann, Name) -> + Private = proplists:get_value(private, Ann, false) orelse + proplists:get_value(internal, Ann, false), + case Context of + {main_contract, Main} -> + if Private -> {local_fun, [Main, Name]}; + Name == "init" -> init; + true -> {entrypoint, list_to_binary(Name)} + end; + {namespace, Lib} -> + {local_fun, [Lib, Name]} + end. + +-spec current_namespace(env()) -> string(). +current_namespace(#{ context := Cxt }) -> + case Cxt of + {abstract_contract, Con} -> Con; + {main_contract, Con} -> Con; + {namespace, NS} -> NS + end. + +-spec qname(env(), string()) -> sophia_name(). +qname(Env, Name) -> + [current_namespace(Env), Name]. + +-spec lookup_fun(env(), sophia_name()) -> fun_name(). +lookup_fun(#{ fun_env := FunEnv }, Name) -> + case maps:get(Name, FunEnv, false) of + false -> error({unbound_name, Name}); + {FName, _} -> FName + end. + +-spec lookup_con(env(), aeso_syntax:con() | aeso_syntax:qcon() | sophia_name()) -> con_tag(). +lookup_con(Env, {con, _, Con}) -> lookup_con(Env, [Con]); +lookup_con(Env, {qcon, _, Con}) -> lookup_con(Env, Con); +lookup_con(#{ con_env := ConEnv }, Con) -> + case maps:get(Con, ConEnv, false) of + false -> error({unbound_constructor, Con}); + Tag -> Tag + end. + +bind_vars(Env, Xs) -> + lists:foldl(fun(X, E) -> bind_var(E, X) end, Env, Xs). + +bind_var(Env = #{ vars := Vars }, X) -> Env#{ vars := [X | Vars] }. + +resolve_var(#{ vars := Vars } = Env, [X]) -> + case lists:member(X, Vars) of + true -> {var, X}; + false -> resolve_fun(Env, [X]) + end; +resolve_var(Env, Q) -> resolve_fun(Env, Q). + +resolve_fun(#{ fun_env := Funs, builtins := Builtin }, Q) -> + case {maps:get(Q, Funs, not_found), maps:get(Q, Builtin, not_found)} of + {not_found, not_found} -> fcode_error({unbound_variable, Q}); + {_, {B, none}} -> {builtin, B, []}; + {_, {B, Ar}} -> {builtin_u, B, Ar}; + {{Fun, Ar}, _} -> {def_u, Fun, Ar} + end. + +init_fresh_names() -> + put('%fresh', 0). + +clear_fresh_names() -> + erase('%fresh'). + +-spec fresh_name() -> var_name(). +fresh_name() -> fresh_name("%"). + +-spec fresh_fun() -> fun_name(). +fresh_fun() -> {local_fun, [fresh_name("^")]}. + +-spec fresh_name(string()) -> var_name(). +fresh_name(Prefix) -> + N = get('%fresh'), + put('%fresh', N + 1), + lists:concat([Prefix, N]). + +-spec pat_vars(fpat()) -> [var_name()]. +pat_vars({var, X}) -> [X || X /= "_"]; +pat_vars({bool, _}) -> []; +pat_vars({int, _}) -> []; +pat_vars({string, _}) -> []; +pat_vars(nil) -> []; +pat_vars({'::', P, Q}) -> pat_vars(P) ++ pat_vars(Q); +pat_vars({tuple, Ps}) -> pat_vars(Ps); +pat_vars({con, _, _, Ps}) -> pat_vars(Ps); +pat_vars(Ps) when is_list(Ps) -> [X || P <- Ps, X <- pat_vars(P)]. + +free_vars(Xs) when is_list(Xs) -> + lists:umerge([ free_vars(X) || X <- Xs ]); +free_vars(Expr) -> + case Expr of + {var, X} -> [X]; + {lit, _} -> []; + nil -> []; + {def, _, As} -> free_vars(As); + {def_u, _, _} -> []; + {remote, Ct, _, As} -> free_vars([Ct | As]); + {remote_u, Ct, _, _} -> free_vars(Ct); + {builtin, _, As} -> free_vars(As); + {builtin_u, _, _} -> []; + {con, _, _, As} -> free_vars(As); + {tuple, As} -> free_vars(As); + {proj, A, _} -> free_vars(A); + {set_proj, A, _, B} -> free_vars([A, B]); + {op, _, As} -> free_vars(As); + {'let', X, A, B} -> free_vars([A, {lam, [X], B}]); + {funcall, A, Bs} -> free_vars([A | Bs]); + {lam, Xs, B} -> free_vars(B) -- lists:sort(Xs); + {closure, _, A} -> free_vars(A); + {switch, A} -> free_vars(A); + {split, _, X, As} -> free_vars([{var, X} | As]); + {nosplit, A} -> free_vars(A); + {'case', P, A} -> free_vars(A) -- lists:sort(pat_vars(P)) + end. + +get_named_args(NamedArgsT, Args) -> + IsNamed = fun({named_arg, _, _, _}) -> true; + (_) -> false end, + {Named, NotNamed} = lists:partition(IsNamed, Args), + NamedArgs = [get_named_arg(NamedArg, Named) || NamedArg <- NamedArgsT], + NamedArgs ++ NotNamed. + +get_named_arg({named_arg_t, _, {id, _, Name}, _, Default}, Args) -> + case [ Val || {named_arg, _, {id, _, X}, Val} <- Args, X == Name ] of + [Val] -> Val; + [] -> Default + end. + +%% -- Renaming -- + +-spec rename([{var_name(), var_name()}], fexpr()) -> fexpr(). +rename(Ren, Expr) -> + case Expr of + {lit, _} -> Expr; + nil -> nil; + {var, X} -> {var, rename_var(Ren, X)}; + {def, D, Es} -> {def, D, [rename(Ren, E) || E <- Es]}; + {def_u, _, _} -> Expr; + {builtin, B, Es} -> {builtin, B, [rename(Ren, E) || E <- Es]}; + {builtin_u, _, _} -> Expr; + {remote, Ct, F, Es} -> {remote, rename(Ren, Ct), F, [rename(Ren, E) || E <- Es]}; + {remote_u, Ct, F, Ar} -> {remote_u, rename(Ren, Ct), F, Ar}; + {con, Ar, I, Es} -> {con, Ar, I, [rename(Ren, E) || E <- Es]}; + {tuple, Es} -> {tuple, [rename(Ren, E) || E <- Es]}; + {proj, E, I} -> {proj, rename(Ren, E), I}; + {set_proj, R, I, E} -> {set_proj, rename(Ren, R), I, rename(Ren, E)}; + {op, Op, Es} -> {op, Op, [rename(Ren, E) || E <- Es]}; + {funcall, Fun, Es} -> {funcall, rename(Ren, Fun), [rename(Ren, E) || E <- Es]}; + {closure, F, Env} -> {closure, F, rename(Ren, Env)}; + {switch, Split} -> {switch, rename_split(Ren, Split)}; + {lam, Xs, B} -> + {Zs, Ren1} = rename_bindings(Ren, Xs), + {lam, Zs, rename(Ren1, B)}; + {'let', X, E, Body} -> + {Z, Ren1} = rename_binding(Ren, X), + {'let', Z, rename(Ren, E), rename(Ren1, Body)} + end. + +rename_var(Ren, X) -> proplists:get_value(X, Ren, X). +rename_binding(Ren, X) -> + Ren1 = lists:keydelete(X, 1, Ren), + case lists:keymember(X, 2, Ren) of + false -> {X, Ren1}; + true -> + Z = fresh_name(), + {Z, [{X, Z} | Ren1]} + end. + +rename_bindings(Ren, []) -> {[], Ren}; +rename_bindings(Ren, [X | Xs]) -> + {Z, Ren1} = rename_binding(Ren, X), + {Zs, Ren2} = rename_bindings(Ren1, Xs), + {[Z | Zs], Ren2}. + +rename_fpats(Ren, []) -> {[], Ren}; +rename_fpats(Ren, [P | Ps]) -> + {Q, Ren1} = rename_fpat(Ren, P), + {Qs, Ren2} = rename_fpats(Ren1, Ps), + {[Q | Qs], Ren2}. + +rename_fpat(Ren, P = {bool, _}) -> {P, Ren}; +rename_fpat(Ren, P = {int, _}) -> {P, Ren}; +rename_fpat(Ren, P = {string, _}) -> {P, Ren}; +rename_fpat(Ren, P = nil) -> {P, Ren}; +rename_fpat(Ren, {'::', P, Q}) -> + {P1, Ren1} = rename_fpat(Ren, P), + {Q1, Ren2} = rename_fpat(Ren1, Q), + {{'::', P1, Q1}, Ren2}; +rename_fpat(Ren, {var, X}) -> + {Z, Ren1} = rename_binding(Ren, X), + {{var, Z}, Ren1}; +rename_fpat(Ren, {con, Ar, C, Ps}) -> + {Ps1, Ren1} = rename_fpats(Ren, Ps), + {{con, Ar, C, Ps1}, Ren1}; +rename_fpat(Ren, {tuple, Ps}) -> + {Ps1, Ren1} = rename_fpats(Ren, Ps), + {{tuple, Ps1}, Ren1}. + +rename_spat(Ren, P = {bool, _}) -> {P, Ren}; +rename_spat(Ren, P = {int, _}) -> {P, Ren}; +rename_spat(Ren, P = {string, _}) -> {P, Ren}; +rename_spat(Ren, P = nil) -> {P, Ren}; +rename_spat(Ren, {'::', X, Y}) -> + {X1, Ren1} = rename_binding(Ren, X), + {Y1, Ren2} = rename_binding(Ren1, Y), + {{'::', X1, Y1}, Ren2}; +rename_spat(Ren, {var, X}) -> + {Z, Ren1} = rename_binding(Ren, X), + {{var, Z}, Ren1}; +rename_spat(Ren, {con, Ar, C, Xs}) -> + {Zs, Ren1} = rename_bindings(Ren, Xs), + {{con, Ar, C, Zs}, Ren1}; +rename_spat(Ren, {tuple, Xs}) -> + {Zs, Ren1} = rename_bindings(Ren, Xs), + {{tuple, Zs}, Ren1}. + +rename_split(Ren, {split, Type, X, Cases}) -> + {split, Type, rename_var(Ren, X), [rename_case(Ren, C) || C <- Cases]}; +rename_split(Ren, {nosplit, E}) -> {nosplit, rename(Ren, E)}. + +rename_case(Ren, {'case', Pat, Split}) -> + {Pat1, Ren1} = rename_spat(Ren, Pat), + {'case', Pat1, rename_split(Ren1, Split)}. + +%% -- Records -- + +field_index({typed, _, _, RecTy}, X) -> + field_index(RecTy, X); +field_index({record_t, Fields}, X) -> + IsX = fun({field_t, _, {id, _, Y}, _}) -> X == Y end, + [I] = [ I || {I, Field} <- indexed(Fields), IsX(Field) ], + I - 1. %% Tuples are 0-indexed + +field_value({field_t, _, {id, _, X}, _}, Fields) -> + View = fun({field, _, [{proj, _, {id, _, Y}}], E}) -> {Y, {set, E}}; + ({field_upd, _, [{proj, _, {id, _, Y}}], + {typed, _, {lam, _, [{arg, _, {id, _, Z}, _}], E}, _}}) -> {Y, {upd, Z, E}} end, + case [Upd || {Y, Upd} <- lists:map(View, Fields), X == Y] of + [Upd] -> Upd; + [] -> false + end. + +%% -- Attributes -- + +get_attributes(Ann) -> + [stateful || proplists:get_value(stateful, Ann, false)]. + +%% -- Basic utilities -- + +indexed(Xs) -> + lists:zip(lists:seq(1, length(Xs)), Xs). + +fcode_error(Err) -> + error(Err). + +%% -- Pretty printing -------------------------------------------------------- + +format_fcode(#{ functions := Funs }) -> + prettypr:format(pp_above( + [ pp_fun(Name, Def) || {Name, Def} <- maps:to_list(Funs) ])). + +format_fexpr(E) -> + prettypr:format(pp_fexpr(E)). + +pp_fun(Name, #{ args := Args, return := Return, body := Body }) -> + PPArg = fun({X, T}) -> pp_beside([pp_text(X), pp_text(" : "), pp_ftype(T)]) end, + pp_above(pp_beside([pp_text("function "), pp_fun_name(Name), + pp_parens(pp_par(pp_punctuate(pp_text(","), [PPArg(Arg) || Arg <- Args]))), + pp_text(" : "), pp_ftype(Return), pp_text(" =")]), + prettypr:nest(2, pp_fexpr(Body))). + +pp_fun_name(init) -> pp_text("init"); +pp_fun_name({entrypoint, E}) -> pp_text(binary_to_list(E)); +pp_fun_name({local_fun, Q}) -> pp_text(string:join(Q, ".")). + +pp_text(<<>>) -> prettypr:text("\"\""); +pp_text(Bin) when is_binary(Bin) -> prettypr:text(lists:flatten(io_lib:format("~p", [binary_to_list(Bin)]))); +pp_text(S) -> prettypr:text(lists:concat([S])). + +pp_beside([]) -> prettypr:empty(); +pp_beside([X]) -> X; +pp_beside([X | Xs]) -> pp_beside(X, pp_beside(Xs)). + +pp_beside(A, B) -> prettypr:beside(A, B). + +pp_above([]) -> prettypr:empty(); +pp_above([X]) -> X; +pp_above([X | Xs]) -> pp_above(X, pp_above(Xs)). + +pp_above(A, B) -> prettypr:above(A, B). + +pp_parens(Doc) -> pp_beside([pp_text("("), Doc, pp_text(")")]). +pp_braces(Doc) -> pp_beside([pp_text("{"), Doc, pp_text("}")]). + +pp_punctuate(_Sep, []) -> []; +pp_punctuate(_Sep, [X]) -> [X]; +pp_punctuate(Sep, [X | Xs]) -> [pp_beside(X, Sep) | pp_punctuate(Sep, Xs)]. + +pp_par([]) -> prettypr:empty(); +pp_par(Xs) -> prettypr:par(Xs). +pp_fexpr({lit, {Tag, Lit}}) -> + aeso_pretty:expr({Tag, [], Lit}); +pp_fexpr(nil) -> + pp_text("[]"); +pp_fexpr({var, X}) -> pp_text(X); +pp_fexpr({def, Fun}) -> pp_fun_name(Fun); +pp_fexpr({def_u, Fun, Ar}) -> + pp_beside([pp_fun_name(Fun), pp_text("/"), pp_text(Ar)]); +pp_fexpr({def, Fun, Args}) -> + pp_call(pp_fun_name(Fun), Args); +pp_fexpr({con, _, I, []}) -> + pp_beside(pp_text("C"), pp_text(I)); +pp_fexpr({con, _, I, Es}) -> + pp_beside(pp_fexpr({con, [], I, []}), + pp_fexpr({tuple, Es})); +pp_fexpr({tuple, Es}) -> + pp_parens(pp_par(pp_punctuate(pp_text(","), [pp_fexpr(E) || E <- Es]))); +pp_fexpr({proj, E, I}) -> + pp_beside([pp_fexpr(E), pp_text("."), pp_text(I)]); +pp_fexpr({lam, Xs, A}) -> + pp_par([pp_fexpr({tuple, [{var, X} || X <- Xs]}), pp_text("=>"), + prettypr:nest(2, pp_fexpr(A))]); +pp_fexpr({closure, Fun, ClEnv}) -> + FVs = case ClEnv of + {tuple, Xs} -> Xs; + {var, _} -> [ClEnv] + end, + pp_call(pp_text("__CLOSURE__"), [{def, Fun} | FVs]); +pp_fexpr({set_proj, E, I, A}) -> + pp_beside(pp_fexpr(E), pp_braces(pp_beside([pp_text(I), pp_text(" = "), pp_fexpr(A)]))); +pp_fexpr({op, Op, [A, B] = Args}) -> + case is_infix(Op) of + false -> pp_call(pp_text(Op), Args); + true -> pp_parens(pp_par([pp_fexpr(A), pp_text(Op), pp_fexpr(B)])) + end; +pp_fexpr({op, Op, [A] = Args}) -> + case is_infix(Op) of + false -> pp_call(pp_text(Op), Args); + true -> pp_parens(pp_par([pp_text(Op), pp_fexpr(A)])) + end; +pp_fexpr({op, Op, As}) -> + pp_beside(pp_text(Op), pp_fexpr({tuple, As})); +pp_fexpr({'let', X, A, B}) -> + pp_par([pp_beside([pp_text("let "), pp_text(X), pp_text(" = "), pp_fexpr(A), pp_text(" in")]), + pp_fexpr(B)]); +pp_fexpr({builtin_u, B, N}) -> + pp_beside([pp_text(B), pp_text("/"), pp_text(N)]); +pp_fexpr({builtin, B, As}) -> + pp_call(pp_text(B), As); +pp_fexpr({remote_u, Ct, Fun, Ar}) -> + pp_beside([pp_fexpr(Ct), pp_text("."), pp_fun_name(Fun), pp_text("/"), pp_text(Ar)]); +pp_fexpr({remote, Ct, Fun, As}) -> + pp_call(pp_beside([pp_fexpr(Ct), pp_text("."), pp_fun_name(Fun)]), As); +pp_fexpr({funcall, Fun, As}) -> + pp_call(pp_fexpr(Fun), As); +pp_fexpr({switch, Split}) -> pp_split(Split). + +pp_call(Fun, Args) -> + pp_beside(Fun, pp_fexpr({tuple, Args})). + +pp_ftype(T) when is_atom(T) -> pp_text(T); +pp_ftype(any) -> pp_text("_"); +pp_ftype({tuple, Ts}) -> + pp_parens(pp_par(pp_punctuate(pp_text(","), [pp_ftype(T) || T <- Ts]))); +pp_ftype({list, T}) -> + pp_beside([pp_text("list("), pp_ftype(T), pp_text(")")]); +pp_ftype({function, Args, Res}) -> + pp_par([pp_ftype({tuple, Args}), pp_text("=>"), pp_ftype(Res)]); +pp_ftype({map, Key, Val}) -> + pp_beside([pp_text("map"), pp_ftype({tuple, [Key, Val]})]); +pp_ftype({variant, Cons}) -> + pp_par( + pp_punctuate(pp_text(" |"), + [ case Args of + [] -> pp_fexpr({con, [], I - 1, []}); + _ -> pp_beside(pp_fexpr({con, [], I - 1, []}), pp_ftype({tuple, Args})) + end || {I, Args} <- indexed(Cons)])). + +pp_split({nosplit, E}) -> pp_fexpr(E); +pp_split({split, Type, X, Alts}) -> + pp_above([pp_beside([pp_text("switch("), pp_text(X), pp_text(" : "), pp_ftype(Type), pp_text(")")])] ++ + [prettypr:nest(2, pp_case(Alt)) || Alt <- Alts]). + +pp_case({'case', Pat, Split}) -> + prettypr:sep([pp_beside(pp_pat(Pat), pp_text(" =>")), + prettypr:nest(2, pp_split(Split))]). + +pp_pat({tuple, Xs}) -> pp_fexpr({tuple, [{var, X} || X <- Xs]}); +pp_pat({'::', X, Xs}) -> pp_fexpr({op, '::', [{var, X}, {var, Xs}]}); +pp_pat({con, As, I, Xs}) -> pp_fexpr({con, As, I, [{var, X} || X <- Xs]}); +pp_pat({var, X}) -> pp_fexpr({var, X}); +pp_pat(P = {Tag, _}) when Tag == bool; Tag == int; Tag == string + -> pp_fexpr({lit, P}); +pp_pat(Pat) -> pp_fexpr(Pat). + +is_infix(Op) -> + C = hd(atom_to_list(Op)), + C < $a orelse C > $z. + diff --git a/src/aeso_fcode_to_fate.erl b/src/aeso_fcode_to_fate.erl new file mode 100644 index 0000000..d70f2cb --- /dev/null +++ b/src/aeso_fcode_to_fate.erl @@ -0,0 +1,1360 @@ +%%%------------------------------------------------------------------- +%%% @author Ulf Norell +%%% @copyright (C) 2019, Aeternity Anstalt +%%% @doc +%%% Fate backend for Sophia compiler +%%% @end +%%% Created : 11 Jan 2019 +%%% +%%%------------------------------------------------------------------- +-module(aeso_fcode_to_fate). + +-export([compile/2]). + +%% -- Preamble --------------------------------------------------------------- + +-type scode() :: [sinstr()]. +-type sinstr() :: {switch, arg(), stype(), [maybe_scode()], maybe_scode()} %% last arg is catch-all + | switch_body + | tuple(). %% FATE instruction + +-type arg() :: tuple(). %% Not exported: aeb_fate_code:fate_arg(). + +%% Annotated scode +-type scode_a() :: [sinstr_a()]. +-type sinstr_a() :: {switch, arg(), stype(), [maybe_scode_a()], maybe_scode_a()} %% last arg is catch-all + | switch_body + | {i, ann(), tuple()}. %% FATE instruction + +-type ann() :: #{ live_in := vars(), live_out := vars() }. +-type var() :: {var, integer()}. +-type vars() :: ordsets:ordset(var()). + +-type stype() :: tuple | boolean | {variant, [non_neg_integer()]}. +-type maybe_scode() :: missing | scode(). +-type maybe_scode_a() :: missing | scode_a(). + +-define(TODO(What), error({todo, ?FILE, ?LINE, ?FUNCTION_NAME, What})). + +-define(i(X), {immediate, X}). +-define(a, {stack, 0}). +-define(s, {var, -1}). %% TODO: until we have state support in FATE + +-define(IsOp(Op), ( + Op =:= 'STORE' orelse + Op =:= 'ADD' orelse + Op =:= 'SUB' orelse + Op =:= 'MUL' orelse + Op =:= 'DIV' orelse + Op =:= 'MOD' orelse + Op =:= 'POW' orelse + Op =:= 'LT' orelse + Op =:= 'GT' orelse + Op =:= 'EQ' orelse + Op =:= 'ELT' orelse + Op =:= 'EGT' orelse + Op =:= 'NEQ' orelse + Op =:= 'AND' orelse + Op =:= 'OR' orelse + Op =:= 'NOT' orelse + Op =:= 'ELEMENT' orelse + Op =:= 'MAP_EMPTY' orelse + Op =:= 'MAP_LOOKUP' orelse + Op =:= 'MAP_LOOKUPD' orelse + Op =:= 'MAP_UPDATE' orelse + Op =:= 'MAP_DELETE' orelse + Op =:= 'MAP_MEMBER' orelse + Op =:= 'MAP_FROM_LIST' orelse + Op =:= 'NIL' orelse + Op =:= 'IS_NIL' orelse + Op =:= 'CONS' orelse + Op =:= 'HD' orelse + Op =:= 'TL' orelse + Op =:= 'LENGTH' orelse + Op =:= 'APPEND' orelse + Op =:= 'STR_JOIN' orelse + Op =:= 'INT_TO_STR' orelse + Op =:= 'ADDR_TO_STR' orelse + Op =:= 'STR_REVERSE' orelse + Op =:= 'INT_TO_ADDR' orelse + Op =:= 'VARIANT_TEST' orelse + Op =:= 'VARIANT_ELEMENT' orelse + Op =:= 'BITS_NONE' orelse + Op =:= 'BITS_ALL' orelse + Op =:= 'BITS_ALL_N' orelse + Op =:= 'BITS_SET' orelse + Op =:= 'BITS_CLEAR' orelse + Op =:= 'BITS_TEST' orelse + Op =:= 'BITS_SUM' orelse + Op =:= 'BITS_OR' orelse + Op =:= 'BITS_AND' orelse + Op =:= 'BITS_DIFF' orelse + false)). + +-record(env, { contract, vars = [], locals = [], tailpos = true }). + +%% -- Debugging -------------------------------------------------------------- + +debug(Tag, Options, Fmt, Args) -> + Tags = proplists:get_value(debug, Options, []), + case Tags == all orelse lists:member(Tag, Tags) of + true -> io:format(Fmt, Args); + false -> ok + end. + +%% -- Main ------------------------------------------------------------------- + +%% @doc Main entry point. +compile(FCode, Options) -> + #{ contract_name := ContractName, + state_type := _StateType, + functions := Functions } = FCode, + SFuns = functions_to_scode(ContractName, Functions, Options), + SFuns1 = optimize_scode(SFuns, Options), + BBFuns = to_basic_blocks(SFuns1, Options), + FateCode = #{ functions => BBFuns, + symbols => #{}, + annotations => #{} }, + debug(compile, Options, "~s\n", [aeb_fate_asm:pp(FateCode)]), + FateCode. + +make_function_name(init) -> <<"init">>; +make_function_name({entrypoint, Name}) -> Name; +make_function_name({local_fun, Xs}) -> list_to_binary("." ++ string:join(Xs, ".")). + +functions_to_scode(ContractName, Functions, Options) -> + FunNames = maps:keys(Functions), + maps:from_list( + [ {make_function_name(Name), function_to_scode(ContractName, FunNames, Name, Args, Body, Type, Options)} + || {Name, #{args := Args, + body := Body, + return := Type}} <- maps:to_list(Functions), + Name /= init ]). %% TODO: skip init for now + +function_to_scode(ContractName, Functions, _Name, Args, Body, ResType, _Options) -> + ArgTypes = [ type_to_scode(T) || {_, T} <- Args ], + SCode = to_scode(init_env(ContractName, Functions, Args), Body), + {{ArgTypes, type_to_scode(ResType)}, SCode}. + +type_to_scode({variant, Cons}) -> {variant, lists:map(fun length/1, Cons)}; +type_to_scode({list, Type}) -> {list, type_to_scode(Type)}; +type_to_scode({tuple, Types}) -> {tuple, lists:map(fun type_to_scode/1, Types)}; +type_to_scode({map, Key, Val}) -> {map, type_to_scode(Key), type_to_scode(Val)}; +type_to_scode({function, _Args, _Res}) -> {tuple, [string, any]}; +type_to_scode(contract) -> address; +type_to_scode(T) -> T. + +%% -- Phase I ---------------------------------------------------------------- +%% Icode to structured assembly + +%% -- Environment functions -- + +init_env(ContractName, FunNames, Args) -> + #env{ vars = [ {X, {arg, I}} || {I, {X, _}} <- with_ixs(Args) ], + contract = ContractName, + locals = FunNames, + tailpos = true }. + +next_var(#env{ vars = Vars }) -> + 1 + lists:max([-1 | [J || {_, {var, J}} <- Vars]]). + +bind_var(Name, Var, Env = #env{ vars = Vars }) -> + Env#env{ vars = [{Name, Var} | Vars] }. + +bind_local(Name, Env) -> + I = next_var(Env), + {I, bind_var(Name, {var, I}, Env)}. + +notail(Env) -> Env#env{ tailpos = false }. + +code_error(Err) -> error(Err). + +lookup_var(#env{vars = Vars}, X) -> + case lists:keyfind(X, 1, Vars) of + {_, Var} -> Var; + false -> code_error({unbound_variable, X, Vars}) + end. + +%% -- The compiler -- + +to_scode(_Env, {lit, L}) -> + case L of + {int, N} -> [push(?i(N))]; + {string, S} -> [push(?i(aeb_fate_data:make_string(S)))]; + {bool, B} -> [push(?i(B))]; + {account_pubkey, K} -> [push(?i(aeb_fate_data:make_address(K)))]; + {contract_pubkey, K} -> [push(?i(aeb_fate_data:make_address(K)))]; + {oracle_pubkey, K} -> [push(?i(aeb_fate_data:make_oracle(K)))]; + {oracle_query_id, _} -> ?TODO(fate_oracle_query_id_value) + end; + +to_scode(_Env, nil) -> + [aeb_fate_code:nil(?a)]; + +to_scode(Env, {var, X}) -> + [push(lookup_var(Env, X))]; + +to_scode(Env, {con, Ar, I, As}) -> + N = length(As), + [[to_scode(notail(Env), A) || A <- As], + aeb_fate_code:variant(?a, ?i(Ar), ?i(I), ?i(N))]; + +to_scode(Env, {tuple, As}) -> + N = length(As), + [[ to_scode(notail(Env), A) || A <- As ], + aeb_fate_code:tuple(N)]; + +to_scode(Env, {proj, E, I}) -> + [to_scode(notail(Env), E), + aeb_fate_code:element_op(?a, ?i(I), ?a)]; + +to_scode(Env, {set_proj, R, I, E}) -> + [to_scode(notail(Env), E), + to_scode(notail(Env), R), + aeb_fate_code:setelement(?a, ?i(I), ?a, ?a)]; + +to_scode(Env, {op, Op, Args}) -> + call_to_scode(Env, op_to_scode(Op), Args); + +to_scode(Env, {'let', X, {var, Y}, Body}) -> + Env1 = bind_var(X, lookup_var(Env, Y), Env), + to_scode(Env1, Body); +to_scode(Env, {'let', X, Expr, Body}) -> + {I, Env1} = bind_local(X, Env), + [ to_scode(notail(Env), Expr), + aeb_fate_code:store({var, I}, {stack, 0}), + to_scode(Env1, Body) ]; + +to_scode(Env, {def, Fun, Args}) -> + FName = make_function_name(Fun), + Lbl = aeb_fate_data:make_string(FName), + call_to_scode(Env, local_call(Env, ?i(Lbl)), Args); +to_scode(Env, {funcall, Fun, Args}) -> + call_to_scode(Env, [to_scode(Env, Fun), local_call(Env, ?a)], Args); + +to_scode(Env, {builtin, B, Args}) -> + builtin_to_scode(Env, B, Args); + +to_scode(Env, {remote, Ct, Fun, [_Gas, _Value | Args]}) -> + %% TODO: FATE doesn't support value and gas arguments yet + Lbl = make_function_name(Fun), + Call = if Env#env.tailpos -> aeb_fate_code:call_tr(?a, Lbl); + true -> aeb_fate_code:call_r(?a, Lbl) end, + call_to_scode(Env, [to_scode(Env, Ct), Call], Args); + +to_scode(Env, {closure, Fun, FVs}) -> + to_scode(Env, {tuple, [{lit, {string, make_function_name(Fun)}}, FVs]}); + +to_scode(Env, {switch, Case}) -> + split_to_scode(Env, Case). + +local_call( Env, Fun) when Env#env.tailpos -> aeb_fate_code:call_t(Fun); +local_call(_Env, Fun) -> aeb_fate_code:call(Fun). + +split_to_scode(Env, {nosplit, Expr}) -> + [switch_body, to_scode(Env, Expr)]; +split_to_scode(Env, {split, {tuple, _}, X, Alts}) -> + {Def, Alts1} = catchall_to_scode(Env, X, Alts), + Arg = lookup_var(Env, X), + Alt = case [ {Xs, Split} || {'case', {tuple, Xs}, Split} <- Alts1 ] of + [] -> missing; + [{Xs, S} | _] -> + {Code, Env1} = match_tuple(Env, Arg, Xs), + [Code, split_to_scode(Env1, S)] + end, + case Def == missing andalso Alt /= missing of + true -> Alt; % skip the switch if single tuple pattern + false -> [{switch, Arg, tuple, [Alt], Def}] + end; +split_to_scode(Env, {split, boolean, X, Alts}) -> + {Def, Alts1} = catchall_to_scode(Env, X, Alts), + GetAlt = fun(B) -> + case lists:keyfind({bool, B}, 2, Alts1) of + false -> missing; + {'case', _, S} -> split_to_scode(Env, S) + end + end, + SAlts = [GetAlt(false), GetAlt(true)], + Arg = lookup_var(Env, X), + [{switch, Arg, boolean, SAlts, Def}]; +split_to_scode(Env, {split, {list, _}, X, Alts}) -> + {Def, Alts1} = catchall_to_scode(Env, X, Alts), + Arg = lookup_var(Env, X), + GetAlt = fun(P) -> + case [C || C = {'case', Pat, _} <- Alts1, Pat == P orelse is_tuple(Pat) andalso element(1, Pat) == P] of + [] -> missing; + [{'case', nil, S} | _] -> split_to_scode(Env, S); + [{'case', {'::', Y, Z}, S} | _] -> + {I, Env1} = bind_local(Y, Env), + {J, Env2} = bind_local(Z, Env1), + [aeb_fate_code:hd({var, I}, Arg), + aeb_fate_code:tl({var, J}, Arg), + split_to_scode(Env2, S)] + end + end, + SAlts = [GetAlt('::'), GetAlt(nil)], + [aeb_fate_code:is_nil(?a, Arg), + {switch, ?a, boolean, SAlts, Def}]; +split_to_scode(Env, {split, Type, X, Alts}) when Type == integer; Type == string -> + {Def, Alts1} = catchall_to_scode(Env, X, Alts), + literal_split_to_scode(Env, Type, lookup_var(Env, X), Alts1, Def); +split_to_scode(Env, {split, {variant, Cons}, X, Alts}) -> + {Def, Alts1} = catchall_to_scode(Env, X, Alts), + Arg = lookup_var(Env, X), + GetAlt = fun(I) -> + case [{Xs, S} || {'case', {con, _, J, Xs}, S} <- Alts1, I == J] of + [] -> missing; + [{Xs, S} | _] -> + {Code, Env1} = match_variant(Env, Arg, Xs), + [Code, split_to_scode(Env1, S)] + end + end, + SType = {variant, [length(Args) || Args <- Cons]}, + case {[GetAlt(I) || I <- lists:seq(0, length(Cons) - 1)], Def} of + %% Skip the switch for single constructor datatypes (with no catchall) + {[SAlt], missing} when SAlt /= missing -> SAlt; + {SAlts, _} -> [{switch, Arg, SType, SAlts, Def}] + end. + +literal_split_to_scode(_Env, _Type, Arg, [], Def) -> + {switch, Arg, boolean, [missing, missing], Def}; +literal_split_to_scode(Env, Type, Arg, [{'case', Lit, Body} | Alts], Def) when Type == integer; Type == string -> + True = split_to_scode(Env, Body), + False = + case Alts of + [] -> missing; + _ -> literal_split_to_scode(Env, Type, Arg, Alts, missing) + end, + SLit = case Lit of + {int, N} -> N; + {string, S} -> aeb_fate_data:make_string(S) + end, + [aeb_fate_code:eq(?a, Arg, ?i(SLit)), + {switch, ?a, boolean, [False, True], Def}]. + +catchall_to_scode(Env, X, Alts) -> catchall_to_scode(Env, X, Alts, []). + +catchall_to_scode(Env, X, [{'case', {var, Y}, Split} | _], Acc) -> + Env1 = bind_var(Y, lookup_var(Env, X), Env), + {split_to_scode(Env1, Split), lists:reverse(Acc)}; +catchall_to_scode(Env, X, [Alt | Alts], Acc) -> + catchall_to_scode(Env, X, Alts, [Alt | Acc]); +catchall_to_scode(_, _, [], Acc) -> {missing, lists:reverse(Acc)}. + +%% Tuple is in the accumulator. Arguments are the variable names. +match_tuple(Env, Arg, Xs) -> + match_tuple(Env, 0, fun aeb_fate_code:element_op/3, Arg, Xs). + +match_variant(Env, Arg, Xs) -> + Elem = fun(Dst, I, Val) -> aeb_fate_code:variant_element(Dst, Val, I) end, + match_tuple(Env, 0, Elem, Arg, Xs). + +match_tuple(Env, I, Elem, Arg, ["_" | Xs]) -> + match_tuple(Env, I + 1, Elem, Arg, Xs); +match_tuple(Env, I, Elem, Arg, [X | Xs]) -> + {J, Env1} = bind_local(X, Env), + {Code, Env2} = match_tuple(Env1, I + 1, Elem, Arg, Xs), + {[Elem({var, J}, ?i(I), Arg), Code], Env2}; +match_tuple(Env, _, _, _, []) -> + {[], Env}. + +%% -- Builtins -- + +call_to_scode(Env, CallCode, Args) -> + [[to_scode(notail(Env), A) || A <- lists:reverse(Args)], + CallCode]. + +builtin_to_scode(_Env, get_state, []) -> + [push(?s)]; +builtin_to_scode(Env, set_state, [_] = Args) -> + call_to_scode(Env, [aeb_fate_code:store(?s, ?a), + aeb_fate_code:tuple(0)], Args); +builtin_to_scode(_Env, event, [_] = _Args) -> + ?TODO(fate_event_instruction); +builtin_to_scode(_Env, map_empty, []) -> + [aeb_fate_code:map_empty(?a)]; +builtin_to_scode(_Env, bits_none, []) -> + [aeb_fate_code:bits_none(?a)]; +builtin_to_scode(_Env, bits_all, []) -> + [aeb_fate_code:bits_all(?a)]; +builtin_to_scode(Env, abort, [_] = Args) -> + call_to_scode(Env, aeb_fate_code:abort(?a), Args); +builtin_to_scode(Env, chain_spend, [_, _] = Args) -> + call_to_scode(Env, [aeb_fate_code:spend(?a, ?a), + aeb_fate_code:tuple(0)], Args); +builtin_to_scode(Env, chain_balance, [_] = Args) -> + call_to_scode(Env, aeb_fate_code:balance_other(?a, ?a), Args); +builtin_to_scode(Env, chain_block_hash, [_] = Args) -> + call_to_scode(Env, aeb_fate_code:blockhash(?a, ?a), Args); +builtin_to_scode(_Env, chain_coinbase, []) -> + [aeb_fate_code:beneficiary(?a)]; +builtin_to_scode(_Env, chain_timestamp, []) -> + [aeb_fate_code:timestamp(?a)]; +builtin_to_scode(_Env, chain_block_height, []) -> + [aeb_fate_code:generation(?a)]; +builtin_to_scode(_Env, chain_difficulty, []) -> + [aeb_fate_code:difficulty(?a)]; +builtin_to_scode(_Env, chain_gas_limit, []) -> + [aeb_fate_code:gaslimit(?a)]; +builtin_to_scode(_Env, contract_balance, []) -> + [aeb_fate_code:balance(?a)]; +builtin_to_scode(_Env, contract_address, []) -> + [aeb_fate_code:address(?a)]; +builtin_to_scode(_Env, call_origin, []) -> + [aeb_fate_code:origin(?a)]; +builtin_to_scode(_Env, call_caller, []) -> + [aeb_fate_code:caller(?a)]; +builtin_to_scode(_Env, call_value, []) -> + ?TODO(fate_call_value_instruction); +builtin_to_scode(_Env, call_gas_price, []) -> + [aeb_fate_code:gasprice(?a)]; +builtin_to_scode(_Env, call_gas_left, []) -> + [aeb_fate_code:gas(?a)]; +builtin_to_scode(_Env, oracle_register, [_, _, _, _] = _Args) -> + ?TODO(fate_oracle_register_instruction); +builtin_to_scode(_Env, oracle_query_fee, [_] = _Args) -> + ?TODO(fate_oracle_query_fee_instruction); +builtin_to_scode(_Env, oracle_query, [_, _, _, _, _] = _Args) -> + ?TODO(fate_oracle_query_instruction); +builtin_to_scode(_Env, oracle_get_question, [_, _] = _Args) -> + ?TODO(fate_oracle_get_question_instruction); +builtin_to_scode(_Env, oracle_respond, [_, _, _, _] = _Args) -> + ?TODO(fate_oracle_respond_instruction); +builtin_to_scode(_Env, oracle_extend, [_, _, _] = _Args) -> + ?TODO(fate_oracle_extend_instruction); +builtin_to_scode(_Env, oracle_get_answer, [_, _] = _Args) -> + ?TODO(fate_oracle_get_answer_instruction); +builtin_to_scode(_Env, aens_resolve, [_, _] = _Args) -> + ?TODO(fate_aens_resolve_instruction); +builtin_to_scode(_Env, aens_preclaim, [_, _, _] = _Args) -> + ?TODO(fate_aens_preclaim_instruction); +builtin_to_scode(_Env, aens_claim, [_, _, _, _] = _Args) -> + ?TODO(fate_aens_claim_instruction); +builtin_to_scode(_Env, aens_transfer, [_, _, _, _] = _Args) -> + ?TODO(fate_aens_transfer_instruction); +builtin_to_scode(_Env, aens_revoke, [_, _, _] = _Args) -> + ?TODO(fate_aens_revoke_instruction); +builtin_to_scode(_Env, crypto_ecverify, [_, _, _] = _Args) -> + ?TODO(fate_crypto_ecverify_instruction); +builtin_to_scode(_Env, crypto_ecverify_secp256k1, [_, _, _] = _Args) -> + ?TODO(fate_crypto_ecverify_secp256k1_instruction); +builtin_to_scode(_Env, crypto_sha3, [_] = _Args) -> + ?TODO(fate_crypto_sha3_instruction); +builtin_to_scode(_Env, crypto_sha256, [_] = _Args) -> + ?TODO(fate_crypto_sha256_instruction); +builtin_to_scode(_Env, crypto_blake2b, [_] = _Args) -> + ?TODO(fate_crypto_blake2b_instruction); +builtin_to_scode(_Env, auth_tx_hash, []) -> + ?TODO(fate_auth_tx_hash_instruction). + +%% -- Operators -- + +op_to_scode('+') -> aeb_fate_code:add(?a, ?a, ?a); +op_to_scode('-') -> aeb_fate_code:sub(?a, ?a, ?a); +op_to_scode('*') -> aeb_fate_code:mul(?a, ?a, ?a); +op_to_scode('/') -> aeb_fate_code:divide(?a, ?a, ?a); +op_to_scode(mod) -> aeb_fate_code:modulo(?a, ?a, ?a); +op_to_scode('^') -> aeb_fate_code:pow(?a, ?a, ?a); +op_to_scode('++') -> aeb_fate_code:append(?a, ?a, ?a); +op_to_scode('::') -> aeb_fate_code:cons(?a, ?a, ?a); +op_to_scode('<') -> aeb_fate_code:lt(?a, ?a, ?a); +op_to_scode('>') -> aeb_fate_code:gt(?a, ?a, ?a); +op_to_scode('=<') -> aeb_fate_code:elt(?a, ?a, ?a); +op_to_scode('>=') -> aeb_fate_code:egt(?a, ?a, ?a); +op_to_scode('==') -> aeb_fate_code:eq(?a, ?a, ?a); +op_to_scode('!=') -> aeb_fate_code:neq(?a, ?a, ?a); +op_to_scode('!') -> aeb_fate_code:not_op(?a, ?a); +op_to_scode(map_get) -> aeb_fate_code:map_lookup(?a, ?a, ?a); +op_to_scode(map_get_d) -> aeb_fate_code:map_lookup(?a, ?a, ?a, ?a); +op_to_scode(map_set) -> aeb_fate_code:map_update(?a, ?a, ?a, ?a); +op_to_scode(map_from_list) -> aeb_fate_code:map_from_list(?a, ?a); +op_to_scode(map_to_list) -> ?TODO(fate_map_to_list_instruction); +op_to_scode(map_delete) -> aeb_fate_code:map_delete(?a, ?a, ?a); +op_to_scode(map_member) -> aeb_fate_code:map_member(?a, ?a, ?a); +op_to_scode(map_size) -> ?TODO(fate_map_size_instruction); +op_to_scode(string_length) -> ?TODO(fate_string_length_instruction); +op_to_scode(string_concat) -> aeb_fate_code:str_join(?a, ?a, ?a); +op_to_scode(bits_set) -> aeb_fate_code:bits_set(?a, ?a, ?a); +op_to_scode(bits_clear) -> aeb_fate_code:bits_clear(?a, ?a, ?a); +op_to_scode(bits_test) -> aeb_fate_code:bits_test(?a, ?a, ?a); +op_to_scode(bits_sum) -> aeb_fate_code:bits_sum(?a, ?a); +op_to_scode(bits_intersection) -> aeb_fate_code:bits_and(?a, ?a, ?a); +op_to_scode(bits_union) -> aeb_fate_code:bits_or(?a, ?a, ?a); +op_to_scode(bits_difference) -> aeb_fate_code:bits_diff(?a, ?a, ?a); +op_to_scode(address_to_str) -> aeb_fate_code:addr_to_str(?a, ?a); +op_to_scode(int_to_str) -> aeb_fate_code:int_to_str(?a, ?a). + +%% PUSH and STORE ?a are the same, so we use STORE to make optimizations +%% easier, and specialize to PUSH (which is cheaper) at the end. +push(A) -> aeb_fate_code:store(?a, A). + +%% -- Phase II --------------------------------------------------------------- +%% Optimize + +optimize_scode(Funs, Options) -> + maps:map(fun(Name, Def) -> optimize_fun(Funs, Name, Def, Options) end, + Funs). + +flatten(missing) -> missing; +flatten(Code) -> lists:map(fun flatten_s/1, lists:flatten(Code)). + +flatten_s({switch, Arg, Type, Alts, Catch}) -> + {switch, Arg, Type, [flatten(Alt) || Alt <- Alts], flatten(Catch)}; +flatten_s(I) -> I. + +-define(MAX_SIMPL_ITERATIONS, 10). + +optimize_fun(_Funs, Name, {{Args, Res}, Code}, Options) -> + Code0 = flatten(Code), + debug(opt, Options, "Optimizing ~s\n", [Name]), + Code1 = simpl_loop(0, Code0, Options), + Code2 = desugar(Code1), + {{Args, Res}, Code2}. + +simpl_loop(N, Code, Options) when N >= ?MAX_SIMPL_ITERATIONS -> + debug(opt, Options, " No simpl_loop fixed_point after ~p iterations.\n\n", [N]), + Code; +simpl_loop(N, Code, Options) -> + ACode = annotate_code(Code), + [ debug(opt, Options, " annotated:\n~s\n", [pp_ann(" ", ACode)]) || N == 0 ], + Code1 = simplify(ACode, Options), + [ debug(opt, Options, " optimized:\n~s\n", [pp_ann(" ", Code1)]) || Code1 /= ACode ], + Code2 = unannotate(Code1), + case Code == Code2 of + true -> + debug(opt, Options, " Reached simpl_loop fixed point after ~p iteration~s.\n\n", + [N, if N /= 1 -> "s"; true -> "" end]), + Code2; + false -> simpl_loop(N + 1, Code2, Options) + end. + +pp_ann(Ind, [{switch, Arg, Type, Alts, Def} | Code]) -> + Tags = + case Type of + boolean -> ["FALSE", "TRUE"]; + tuple -> ["(_)"]; + {variant, Ar} -> ["C" ++ integer_to_list(I) || I <- lists:seq(0, length(Ar) - 1)] + end, + Ind1 = " " ++ Ind, + Ind2 = " " ++ Ind1, + [Ind, "SWITCH ", pp_arg(Arg), "\n", + [[Ind1, Tag, " =>\n", pp_ann(Ind2, Alt)] + || {Tag, Alt} <- lists:zip(Tags, Alts), Alt /= missing], + [[Ind1, "_ =>\n", pp_ann(Ind2, Def)] || Def /= missing], + pp_ann(Ind, Code)]; +pp_ann(Ind, [switch_body | Code]) -> + [Ind, "SWITCH-BODY\n", pp_ann(Ind, Code)]; +pp_ann(Ind, [{i, #{ live_in := In, live_out := Out }, I} | Code]) -> + Fmt = fun([]) -> "()"; + (Xs) -> string:join([lists:concat(["var", N]) || {var, N} <- Xs], " ") + end, + Op = [Ind, aeb_fate_pp:format_op(I, #{})], + Ann = [[" % ", Fmt(In), " -> ", Fmt(Out)] || In ++ Out /= []], + [io_lib:format("~-40s~s\n", [Op, Ann]), + pp_ann(Ind, Code)]; +pp_ann(_, []) -> []. + + +pp_arg(?i(I)) -> io_lib:format("~w", [I]); +pp_arg({arg, N}) -> io_lib:format("arg~p", [N]); +pp_arg({var, N}) -> io_lib:format("var~p", [N]); +pp_arg(?a) -> "a". + +%% -- Analysis -- + +annotate_code(Code) -> + {WCode, _} = ann_writes(Code, ordsets:new(), []), + {RCode, _} = ann_reads(WCode, ordsets:new(), []), + RCode. + +%% Reverses the code +ann_writes(missing, Writes, []) -> {missing, Writes}; +ann_writes([switch_body | Code], Writes, Acc) -> + ann_writes(Code, Writes, [switch_body | Acc]); +ann_writes([{switch, Arg, Type, Alts, Def} | Code], Writes, Acc) -> + {Alts1, WritesAlts} = lists:unzip([ ann_writes(Alt, Writes, []) || Alt <- Alts ]), + {Def1, WritesDef} = ann_writes(Def, Writes, []), + Writes1 = ordsets:union(Writes, ordsets:intersection([WritesDef | WritesAlts])), + ann_writes(Code, Writes1, [{switch, Arg, Type, Alts1, Def1} | Acc]); +ann_writes([I | Code], Writes, Acc) -> + Ws = var_writes(I), + Writes1 = ordsets:union(Writes, Ws), + Ann = #{ writes_in => Writes, writes_out => Writes1 }, + ann_writes(Code, Writes1, [{i, Ann, I} | Acc]); +ann_writes([], Writes, Acc) -> + {Acc, Writes}. + +%% Takes reversed code and unreverses it. +ann_reads(missing, Reads, []) -> {missing, Reads}; +ann_reads([switch_body | Code], Reads, Acc) -> + ann_reads(Code, Reads, [switch_body | Acc]); +ann_reads([{switch, Arg, Type, Alts, Def} | Code], Reads, Acc) -> + {Alts1, ReadsAlts} = lists:unzip([ ann_reads(Alt, Reads, []) || Alt <- Alts ]), + {Def1, ReadsDef} = ann_reads(Def, Reads, []), + Reads1 = ordsets:union([[Arg], Reads, ReadsDef | ReadsAlts]), + ann_reads(Code, Reads1, [{switch, Arg, Type, Alts1, Def1} | Acc]); +ann_reads([{i, Ann, I} | Code], Reads, Acc) -> + #{ writes_in := WritesIn, writes_out := WritesOut } = Ann, + #{ read := Rs, write := W, pure := Pure } = attributes(I), + Reads1 = + case {W, Pure andalso not ordsets:is_element(W, Reads)} of + %% This is a little bit dangerous: if writing to a dead variable, we ignore + %% the reads. Relies on dead writes to be removed by the + %% optimisations below (r_write_to_dead_var). + {{var, _}, true} -> Reads; + _ -> ordsets:union(Reads, Rs) + end, + LiveIn = ordsets:intersection(Reads1, WritesIn), + LiveOut = ordsets:intersection(Reads, WritesOut), + Ann1 = #{ live_in => LiveIn, live_out => LiveOut }, + ann_reads(Code, Reads1, [{i, Ann1, I} | Acc]); +ann_reads([], Reads, Acc) -> {Acc, Reads}. + +%% Instruction attributes: reads, writes and purity (pure means no side-effects +%% aside from the reads and writes). +attributes(I) -> + Set = fun(L) when is_list(L) -> ordsets:from_list(L); + (X) -> ordsets:from_list([X]) end, + Attr = fun(W, R, P) -> #{read => Set(R), write => W, pure => P} end, + Pure = fun(W, R) -> Attr(W, R, true) end, + Impure = fun(W, R) -> Attr(W, R, false) end, + case I of + 'RETURN' -> Impure(pc, []); + {'RETURNR', A} -> Impure(pc, A); + {'CALL', _} -> Impure(?a, []); + {'CALL_R', A, _} -> Impure(?a, A); + {'CALL_T', _} -> Impure(pc, []); + {'CALL_TR', A, _} -> Impure(pc, A); + {'JUMP', _} -> Impure(pc, []); + {'JUMPIF', A, _} -> Impure(pc, A); + {'SWITCH_V2', A, _, _} -> Impure(pc, A); + {'SWITCH_V3', A, _, _, _} -> Impure(pc, A); + {'SWITCH_VN', A, _} -> Impure(pc, A); + {'PUSH', A} -> Pure(?a, A); + 'DUPA' -> Pure(?a, []); + {'DUP', A} -> Pure(?a, A); + {'POP', A} -> Pure(A, ?a); + {'STORE', A, B} -> Pure(A, B); + 'INCA' -> Pure(?a, ?a); + {'INC', A} -> Pure(A, A); + 'DECA' -> Pure(?a, []); + {'DEC', A} -> Pure(A, A); + {'ADD', A, B, C} -> Pure(A, [B, C]); + {'SUB', A, B, C} -> Pure(A, [B, C]); + {'MUL', A, B, C} -> Pure(A, [B, C]); + {'DIV', A, B, C} -> Pure(A, [B, C]); + {'MOD', A, B, C} -> Pure(A, [B, C]); + {'POW', A, B, C} -> Pure(A, [B, C]); + {'LT', A, B, C} -> Pure(A, [B, C]); + {'GT', A, B, C} -> Pure(A, [B, C]); + {'EQ', A, B, C} -> Pure(A, [B, C]); + {'ELT', A, B, C} -> Pure(A, [B, C]); + {'EGT', A, B, C} -> Pure(A, [B, C]); + {'NEQ', A, B, C} -> Pure(A, [B, C]); + {'AND', A, B, C} -> Pure(A, [B, C]); + {'OR', A, B, C} -> Pure(A, [B, C]); + {'NOT', A, B} -> Pure(A, B); + {'TUPLE', _} -> Pure(?a, []); + {'ELEMENT', A, B, C} -> Pure(A, [B, C]); + {'SETELEMENT', A, B, C, D} -> Pure(A, [B, C, D]); + {'MAP_EMPTY', A} -> Pure(A, []); + {'MAP_LOOKUP', A, B, C} -> Pure(A, [B, C]); + {'MAP_LOOKUPD', A, B, C, D} -> Pure(A, [B, C, D]); + {'MAP_UPDATE', A, B, C, D} -> Pure(A, [B, C, D]); + {'MAP_DELETE', A, B, C} -> Pure(A, [B, C]); + {'MAP_MEMBER', A, B, C} -> Pure(A, [B, C]); + {'MAP_FROM_LIST', A, B} -> Pure(A, B); + {'NIL', A} -> Pure(A, []); + {'IS_NIL', A, B} -> Pure(A, B); + {'CONS', A, B, C} -> Pure(A, [B, C]); + {'HD', A, B} -> Pure(A, B); + {'TL', A, B} -> Pure(A, B); + {'LENGTH', A, B} -> Pure(A, B); + {'APPEND', A, B, C} -> Pure(A, [B, C]); + {'STR_JOIN', A, B, C} -> Pure(A, [B, C]); + {'INT_TO_STR', A, B} -> Pure(A, B); + {'ADDR_TO_STR', A, B} -> Pure(A, B); + {'STR_REVERSE', A, B} -> Pure(A, B); + {'INT_TO_ADDR', A, B} -> Pure(A, B); + {'VARIANT', A, B, C, D} -> Pure(A, [?a, B, C, D]); + {'VARIANT_TEST', A, B, C} -> Pure(A, [B, C]); + {'VARIANT_ELEMENT', A, B, C} -> Pure(A, [B, C]); + 'BITS_NONEA' -> Pure(?a, []); + {'BITS_NONE', A} -> Pure(A, []); + 'BITS_ALLA' -> Pure(?a, []); + {'BITS_ALL', A} -> Pure(A, []); + {'BITS_ALL_N', A, B} -> Pure(A, B); + {'BITS_SET', A, B, C} -> Pure(A, [B, C]); + {'BITS_CLEAR', A, B, C} -> Pure(A, [B, C]); + {'BITS_TEST', A, B, C} -> Pure(A, [B, C]); + {'BITS_SUM', A, B} -> Pure(A, B); + {'BITS_OR', A, B, C} -> Pure(A, [B, C]); + {'BITS_AND', A, B, C} -> Pure(A, [B, C]); + {'BITS_DIFF', A, B, C} -> Pure(A, [B, C]); + {'ADDRESS', A} -> Pure(A, []); + {'BALANCE', A} -> Impure(A, []); + {'BALANCE_OTHER', A, B} -> Impure(A, [B]); + {'ORIGIN', A} -> Pure(A, []); + {'CALLER', A} -> Pure(A, []); + {'GASPRICE', A} -> Pure(A, []); + {'BLOCKHASH', A} -> Pure(A, []); + {'BENEFICIARY', A} -> Pure(A, []); + {'TIMESTAMP', A} -> Pure(A, []); + {'GENERATION', A} -> Pure(A, []); + {'MICROBLOCK', A} -> Pure(A, []); + {'DIFFICULTY', A} -> Pure(A, []); + {'GASLIMIT', A} -> Pure(A, []); + {'GAS', A} -> Impure(?a, A); + {'LOG0', A, B} -> Impure(none, [A, B]); + {'LOG1', A, B, C} -> Impure(none, [A, B, C]); + {'LOG2', A, B, C, D} -> Impure(none, [A, B, C, D]); + {'LOG3', A, B, C, D, E} -> Impure(none, [A, B, C, D, E]); + {'LOG4', A, B, C, D, E, F} -> Impure(none, [A, B, C, D, E, F]); + 'DEACTIVATE' -> Impure(none, []); + {'SPEND', A, B} -> Impure(none, [A, B]); + + {'ORACLE_REGISTER', A, B, C, D, E, F} -> Impure(A, [B, C, D, E, F]); + 'ORACLE_QUERY' -> Impure(?a, []); %% TODO + 'ORACLE_RESPOND' -> Impure(?a, []); %% TODO + 'ORACLE_EXTEND' -> Impure(?a, []); %% TODO + 'ORACLE_GET_ANSWER' -> Impure(?a, []); %% TODO + 'ORACLE_GET_QUESTION' -> Impure(?a, []); %% TODO + 'ORACLE_QUERY_FEE' -> Impure(?a, []); %% TODO + 'AENS_RESOLVE' -> Impure(?a, []); %% TODO + 'AENS_PRECLAIM' -> Impure(?a, []); %% TODO + 'AENS_CLAIM' -> Impure(?a, []); %% TODO + 'AENS_UPDATE' -> Impure(?a, []); %% TODO + 'AENS_TRANSFER' -> Impure(?a, []); %% TODO + 'AENS_REVOKE' -> Impure(?a, []); %% TODO + 'ECVERIFY' -> Pure(?a, []); %% TODO + 'SHA3' -> Pure(?a, []); %% TODO + 'SHA256' -> Pure(?a, []); %% TODO + 'BLAKE2B' -> Pure(?a, []); %% TODO + {'ABORT', A} -> Impure(pc, A); + {'EXIT', A} -> Impure(pc, A); + 'NOP' -> Pure(none, []) + end. + +var_writes({i, _, I}) -> var_writes(I); +var_writes(I) -> + #{ write := W } = attributes(I), + case W of + {var, _} -> [W]; + _ -> [] + end. + +-spec independent(sinstr_a(), sinstr_a()) -> boolean(). +%% independent({switch, _, _, _, _}, _) -> false; %% Commented due to Dialyzer whinging +independent(_, {switch, _, _, _, _}) -> false; +%% independent(switch_body, _) -> true; +independent(_, switch_body) -> true; +independent({i, _, I}, {i, _, J}) -> + #{ write := WI, read := RI, pure := PureI } = attributes(I), + #{ write := WJ, read := RJ, pure := PureJ } = attributes(J), + + StackI = lists:member(?a, [WI | RI]), + StackJ = lists:member(?a, [WJ | RJ]), + + if WI == pc; WJ == pc -> false; %% no jumps + not (PureI or PureJ) -> false; %% at least one is pure + StackI and StackJ -> false; %% cannot both use the stack + true -> + %% and cannot write to each other's inputs + not lists:member(WI, RJ) andalso + not lists:member(WJ, RI) + end. + +merge_ann(#{ live_in := LiveIn }, #{ live_out := LiveOut }) -> + #{ live_in => LiveIn, live_out => LiveOut }. + +%% Swap two instructions. Precondition: the instructions are independent/2. +swap_instrs(I, switch_body) -> {switch_body, I}; +%% swap_instrs(switch_body, I) -> {I, switch_body}; %% Commented due to Dialyzer whinging +swap_instrs({i, #{ live_in := Live1 }, I}, {i, #{ live_in := Live2, live_out := Live3 }, J}) -> + %% Since I and J are independent the J can't read or write anything in + %% that I writes. + WritesI = ordsets:subtract(Live2, Live1), + %% Any final reads by J, that I does not read should be removed from Live2. + #{ read := ReadsI } = attributes(I), + ReadsJ = ordsets:subtract(Live2, ordsets:union(Live3, ReadsI)), + Live2_ = ordsets:subtract(ordsets:union([Live1, Live2, Live3]), ordsets:union(WritesI, ReadsJ)), + {{i, #{ live_in => Live1, live_out => Live2_ }, J}, + {i, #{ live_in => Live2_, live_out => Live3 }, I}}. + +live_in(R, #{ live_in := LiveIn }) -> ordsets:is_element(R, LiveIn); +live_in(R, {i, Ann, _}) -> live_in(R, Ann); +live_in(R, [I = {i, _, _} | _]) -> live_in(R, I); +live_in(R, [switch_body | Code]) -> live_in(R, Code); +live_in(R, [{switch, A, _, Alts, Def} | _]) -> + R == A orelse lists:any(fun(Code) -> live_in(R, Code) end, [Def | Alts]); +live_in(_, missing) -> false; +live_in(_, []) -> false. + +live_out(R, #{ live_out := LiveOut }) -> ordsets:is_element(R, LiveOut). + +%% -- Optimizations -- + +simplify([], _) -> []; +simplify(missing, _) -> missing; +simplify([I | Code], Options) -> + simpl_top(simpl_s(I, Options), simplify(Code, Options), Options). + +simpl_s({switch, Arg, Type, Alts, Def}, Options) -> + {switch, Arg, Type, [simplify(A, Options) || A <- Alts], simplify(Def, Options)}; +simpl_s(I, _) -> I. + +simpl_top(I, Code, Options) -> + apply_rules(rules(), I, Code, Options). + +apply_rules(Rules, I, Code, Options) -> + Cons = fun(X, Xs) -> simpl_top(X, Xs, Options) end, + case apply_rules_once(Rules, I, Code) of + false -> [I | Code]; + {RName, New, Rest} -> + debug(opt_rules, Options, " Applied ~p:\n~s ==>\n~s\n", [RName, pp_ann(" ", [I | Code]), pp_ann(" ", New ++ Rest)]), + lists:foldr(Cons, Rest, New) + end. + +apply_rules_once([], _, _) -> + false; +apply_rules_once([{RName, Rule} | Rules], I, Code) -> + case Rule(I, Code) of + false -> apply_rules_once(Rules, I, Code); + {New, Rest} -> {RName, New, Rest} + end. + +-define(RULE(Name), {Name, fun Name/2}). + +merge_rules() -> + [?RULE(r_push_consume), + ?RULE(r_one_shot_var), + ?RULE(r_write_to_dead_var), + ?RULE(r_inline_switch_target) + ]. + +rules() -> + merge_rules() ++ + [?RULE(r_dup_to_push), + ?RULE(r_swap_push), + ?RULE(r_swap_write), + ?RULE(r_constant_propagation), + ?RULE(r_prune_impossible_branches), + ?RULE(r_inline_store), + ?RULE(r_float_switch_body) + ]. + +%% Removing pushes that are immediately consumed. +r_push_consume({i, Ann1, {'STORE', ?a, A}}, [{i, Ann2, {'POP', B}} | Code]) -> + case live_out(B, Ann2) of + true -> {[{i, merge_ann(Ann1, Ann2), {'STORE', B, A}}], Code}; + false -> {[], Code} + end; +r_push_consume({i, Ann1, {'STORE', ?a, A}}, Code) -> + inline_push(Ann1, A, 0, Code, []); +%% Writing directly to memory instead of going through the accumulator. +r_push_consume({i, Ann1, I}, [{i, Ann2, {'STORE', R, ?a}} | Code]) -> + IsPush = + case op_view(I) of + {_, ?a, _} -> true; + _ -> false + end orelse + case I of + {'VARIANT', ?a, _, _, _} -> true; + _ -> false + end, + if IsPush -> {[{i, merge_ann(Ann1, Ann2), setelement(2, I, R)}], Code}; + true -> false end; +r_push_consume(_, _) -> false. + +inline_push(Ann, Arg, Stack, [switch_body | Code], Acc) -> + inline_push(Ann, Arg, Stack, Code, [switch_body | Acc]); +inline_push(Ann1, Arg, Stack, [{i, Ann2, I} = AI | Code], Acc) -> + case op_view(I) of + {Op, R, As} -> + Consumes = length([ ?a || ?a <- As ]), + Produces = length([ ?a || ?a == R ]), + case Consumes > Stack of + true -> + {As0, As1} = split_stack_arg(Stack, As), + Acc1 = [{i, merge_ann(Ann1, Ann2), from_op_view(Op, R, As0 ++ [Arg] ++ As1)} | Acc], + {lists:reverse(Acc1), Code}; + false -> + {AI1, {i, Ann1b, _}} = swap_instrs({i, Ann1, {'STORE', ?a, Arg}}, AI), + inline_push(Ann1b, Arg, Stack + Produces - Consumes, Code, [AI1 | Acc]) + end; + false -> false + end; +inline_push(_, _, _, _, _) -> false. + +split_stack_arg(N, As) -> split_stack_arg(N, As, []). +split_stack_arg(0, [?a | As], Acc) -> + {lists:reverse(Acc), As}; +split_stack_arg(N, [A | As], Acc) -> + N1 = if A == ?a -> N - 1; + true -> N end, + split_stack_arg(N1, As, [A | Acc]). + +%% Changing PUSH A, DUPA to PUSH A, PUSH A enables further optimisations +r_dup_to_push({i, Ann1, Push={'STORE', ?a, _}}, [{i, Ann2, 'DUPA'} | Code]) -> + #{ live_in := LiveIn } = Ann1, + Ann1_ = Ann1#{ live_out => LiveIn }, + Ann2_ = Ann2#{ live_in => LiveIn }, + {[{i, Ann1_, Push}, {i, Ann2_, Push}], Code}; +r_dup_to_push(_, _) -> false. + +%% Move PUSH A past non-stack instructions. +r_swap_push(Push = {i, _, {'STORE', ?a, _}}, [I | Code]) -> + case independent(Push, I) of + true -> + {I1, Push1} = swap_instrs(Push, I), + {[I1, Push1], Code}; + false -> false + end; +r_swap_push(_, _) -> false. + +%% Match up writes to variables with instructions further down. +r_swap_write(I = {i, _, _}, [J | Code]) -> + case {var_writes(I), independent(I, J)} of + {[_], true} -> + {J1, I1} = swap_instrs(I, J), + r_swap_write([J1], I1, Code); + _ -> false + end; +r_swap_write(_, _) -> false. + +r_swap_write(Pre, I, [switch_body | Code]) -> + r_swap_write([switch_body | Pre], I, Code); +r_swap_write(Pre, I, Code0 = [J | Code]) -> + case apply_rules_once(merge_rules(), I, Code0) of + {_Rule, New, Rest} -> + {lists:reverse(Pre) ++ New, Rest}; + false -> + case independent(I, J) of + false -> false; + true -> + {J1, I1} = swap_instrs(I, J), + r_swap_write([J1 | Pre], I1, Code) + end + end; +r_swap_write(_, _, _) -> false. + +%% Precompute instructions with known values +r_constant_propagation(Cons = {i, _, {'CONS', R, _, _}}, [{i, Ann, {'IS_NIL', S, R}} | Code]) -> + Store = {i, Ann, {'STORE', S, ?i(false)}}, + case R of + ?a -> {[Store], Code}; + _ -> {[Cons, Store], Code} + end; +r_constant_propagation(Cons = {i, _, {'NIL', R}}, [{i, Ann, {'IS_NIL', S, R}} | Code]) -> + Store = {i, Ann, {'STORE', S, ?i(true)}}, + case R of + ?a -> {[Store], Code}; + _ -> {[Cons, Store], Code} + end; +r_constant_propagation({i, Ann, I}, Code) -> + case op_view(I) of + false -> false; + {Op, R, As} -> + Vs = [V || ?i(V) <- As], + case length(Vs) == length(As) of + false -> false; + true -> + case eval_op(Op, Vs) of + no_eval -> false; + V -> {[{i, Ann, {'STORE', R, ?i(V)}}], Code} + end + end + end; +r_constant_propagation(_, _) -> false. + +eval_op('ADD', [X, Y]) -> X + Y; +eval_op('SUB', [X, Y]) -> X - Y; +eval_op('MUL', [X, Y]) -> X * Y; +eval_op('DIV', [X, Y]) when Y /= 0 -> X div Y; +eval_op('MOD', [X, Y]) when Y /= 0 -> X rem Y; +eval_op('POW', [_, _]) -> no_eval; +eval_op('LT', [X, Y]) -> X < Y; +eval_op('GT', [X, Y]) -> X > Y; +eval_op('EQ', [X, Y]) -> X =:= Y; +eval_op('ELT', [X, Y]) -> X =< Y; +eval_op('EGT', [X, Y]) -> X >= Y; +eval_op('NEQ', [X, Y]) -> X =/= Y; +eval_op('NOT', [X]) -> not X; +eval_op(_, _) -> no_eval. %% TODO: bits? + +%% Prune impossible branches from switches +r_prune_impossible_branches({switch, ?i(V), Type, Alts, missing}, Code) -> + case pick_branch(Type, V, Alts) of + false -> false; + Alt -> {Alt, Code} + end; +r_prune_impossible_branches({switch, ?i(V), boolean, [False, True] = Alts, Def}, Code) -> + Alts1 = [if V -> missing; true -> False end, + if V -> True; true -> missing end], + case Alts == Alts1 of + true -> false; + false -> + case Alts1 of + [missing, missing] -> {Def, Code}; + _ -> {[{switch, ?i(V), boolean, Alts1, Def}], Code} + end + end; +r_prune_impossible_branches(Variant = {i, _, {'VARIANT', R, ?i(_), ?i(Tag), ?i(_)}}, + [{switch, R, Type, Alts, missing} | Code]) -> + case {R, lists:nth(Tag + 1, Alts)} of + {_, missing} -> + Alts1 = [missing || _ <- Alts], + case Alts == Alts1 of + true -> false; + false -> {[Variant, {switch, R, Type, Alts1, missing}], Code} + end; + {?a, Alt} -> {Alt, Code}; + {_, Alt} -> + case live_in(R, Alt) of + true -> {[Variant | Alt], Code}; + false -> {Alt, Code} + end + end; +r_prune_impossible_branches(_, _) -> false. + +pick_branch(boolean, V, [False, True]) -> + Alt = if V -> True; true -> False end, + case Alt of + missing -> false; + _ -> Alt + end; +pick_branch(_Type, _V, _Alts) -> + false. + +%% STORE R A, SWITCH R --> SWITCH A +r_inline_switch_target(Store = {i, _, {'STORE', R, A}}, [{switch, R, Type, Alts, Def} | Code]) -> + Switch = {switch, A, Type, Alts, Def}, + case R of + ?a -> {[Switch], Code}; + {var, _} -> + case lists:any(fun(Alt) -> live_in(R, Alt) end, [Def | Alts]) of + false -> {[Switch], Code}; + true when A /= ?a -> {[Store, Switch], Code}; + true -> false + end; + _ -> false %% impossible + end; +r_inline_switch_target(_, _) -> false. + +%% Float switch-body to closest switch +r_float_switch_body(I = {i, _, _}, [switch_body | Code]) -> + {[], [switch_body, I | Code]}; +r_float_switch_body(_, _) -> false. + +%% Inline stores +r_inline_store(I = {i, _, {'STORE', R = {var, _}, A}}, Code) -> + %% Not when A is var unless updating the annotations properly. + Inline = case A of + {arg, _} -> true; + ?i(_) -> true; + _ -> false + end, + if Inline -> r_inline_store([I], R, A, Code); + true -> false end; +r_inline_store(_, _) -> false. + +r_inline_store(Acc, R, A, [switch_body | Code]) -> + r_inline_store([switch_body | Acc], R, A, Code); +r_inline_store(Acc, R, A, [{i, Ann, I} | Code]) -> + #{ write := W, pure := Pure } = attributes(I), + Inl = fun(X) when X == R -> A; (X) -> X end, + case not live_in(R, Ann) orelse not Pure orelse lists:member(W, [R, A]) of + true -> false; + false -> + case op_view(I) of + {Op, S, As} -> + case lists:member(R, As) of + true -> + Acc1 = [{i, Ann, from_op_view(Op, S, lists:map(Inl, As))} | Acc], + case r_inline_store(Acc1, R, A, Code) of + false -> {lists:reverse(Acc1), Code}; + {_, _} = Res -> Res + end; + false -> + r_inline_store([{i, Ann, I} | Acc], R, A, Code) + end; + _ -> r_inline_store([{i, Ann, I} | Acc], R, A, Code) + end + end; +r_inline_store(_Acc, _, _, _) -> false. + +%% Shortcut write followed by final read +r_one_shot_var({i, Ann1, I}, [{i, Ann2, J} | Code]) -> + case op_view(I) of + {Op, R, As} -> + Copy = case J of + {'STORE', S, R} -> {write_to, S}; + _ -> false + end, + case {live_out(R, Ann2), Copy} of + {false, {write_to, X}} -> + {[{i, merge_ann(Ann1, Ann2), from_op_view(Op, X, As)}], Code}; + _ -> false + end; + _ -> false + end; +r_one_shot_var(_, _) -> false. + +%% Remove writes to dead variables +r_write_to_dead_var({i, Ann, I}, Code) -> + case op_view(I) of + {_Op, R = {var, _}, As} -> + case live_out(R, Ann) of + false -> + %% Subtle: we still have to pop the stack if any of the arguments + %% came from there. In this case we pop to R, which we know is + %% unused. + {[{i, Ann, {'POP', R}} || X <- As, X == ?a], Code}; + true -> false + end; + _ -> false + end; +r_write_to_dead_var(_, _) -> false. + +op_view(T) when is_tuple(T) -> + case tuple_to_list(T) of + [Op, R | As] when ?IsOp(Op) -> + {Op, R, As}; + _ -> false + end; +op_view(_) -> false. + +from_op_view(Op, R, As) -> list_to_tuple([Op, R | As]). + +%% Desugar and specialize and remove annotations +-spec unannotate(scode_a()) -> scode(); + (sinstr_a()) -> sinstr(); + (missing) -> missing. +unannotate(switch_body) -> [switch_body]; +unannotate({switch, Arg, Type, Alts, Def}) -> + [{switch, Arg, Type, [unannotate(A) || A <- Alts], unannotate(Def)}]; +unannotate(missing) -> missing; +unannotate(Code) when is_list(Code) -> + lists:flatmap(fun unannotate/1, Code); +unannotate({i, _Ann, I}) -> [I]. + +%% Desugar and specialize +desugar({'ADD', ?a, ?i(1), ?a}) -> [aeb_fate_code:inc()]; +desugar({'SUB', ?a, ?a, ?i(1)}) -> [aeb_fate_code:dec()]; +desugar({'STORE', ?a, A}) -> [aeb_fate_code:push(A)]; +desugar({switch, Arg, Type, Alts, Def}) -> + [{switch, Arg, Type, [desugar(A) || A <- Alts], desugar(Def)}]; +desugar(missing) -> missing; +desugar(Code) when is_list(Code) -> + lists:flatmap(fun desugar/1, Code); +desugar(I) -> [I]. + +%% -- Phase III -------------------------------------------------------------- +%% Constructing basic blocks + +to_basic_blocks(Funs, Options) -> + maps:from_list([ {Name, {{Args, Res}, + bb(Name, Code ++ [aeb_fate_code:return()], Options)}} + || {Name, {{Args, Res}, Code}} <- maps:to_list(Funs) ]). + +bb(_Name, Code, _Options) -> + Blocks0 = blocks(Code), + Blocks1 = optimize_blocks(Blocks0), + Blocks = lists:flatmap(fun split_calls/1, Blocks1), + Labels = maps:from_list([ {Ref, I} || {I, {Ref, _}} <- with_ixs(Blocks) ]), + BBs = [ set_labels(Labels, B) || B <- Blocks ], + maps:from_list(BBs). + +%% -- Break up scode into basic blocks -- + +-type bbref() :: reference(). + +%% Code to be turned into blocks. +-record(blk, { ref :: bbref(), %% block id + code :: scode(), + catchall = none :: bbref() | none %% closest catchall + }). + +-type bb() :: {bbref(), bcode()}. +-type bcode() :: [binstr()]. +-type binstr() :: {jump, bbref()} + | {jumpif, bbref()} + | tuple(). %% FATE instruction + +-spec blocks(scode()) -> [bb()]. +blocks(Code) -> + Top = make_ref(), + blocks([#blk{ref = Top, code = Code}], []). + +-spec blocks([#blk{}], [bb()]) -> [bb()]. +blocks([], Acc) -> + lists:reverse(Acc); +blocks([Blk | Blocks], Acc) -> + block(Blk, [], Blocks, Acc). + +-spec block(#blk{}, bcode(), [#blk{}], [bb()]) -> [bb()]. +block(#blk{ref = Ref, code = []}, CodeAcc, Blocks, BlockAcc) -> + blocks(Blocks, [{Ref, lists:reverse(CodeAcc)} | BlockAcc]); +block(Blk = #blk{code = [switch_body | Code]}, Acc, Blocks, BlockAcc) -> + %% Reached the body of a switch. Clear catchall ref. + block(Blk#blk{code = Code, catchall = none}, Acc, Blocks, BlockAcc); +block(Blk = #blk{code = [{switch, Arg, Type, Alts, Default} | Code], + catchall = Catchall}, Acc, Blocks, BlockAcc) -> + FreshBlk = fun(C, Ca) -> + R = make_ref(), + {R, [#blk{ref = R, code = C, catchall = Ca}]} + end, + {RestRef, RestBlk} = FreshBlk(Code, Catchall), + {DefRef, DefBlk} = + case Default of + missing when Catchall == none -> + FreshBlk([aeb_fate_code:abort(?i(<<"Incomplete patterns">>))], none); + missing -> {Catchall, []}; + _ -> FreshBlk(Default ++ [{jump, RestRef}], Catchall) + %% ^ fall-through to the outer catchall + end, + {Blk1, Code1, AltBlks} = + case Type of + boolean -> + [FalseCode, TrueCode] = Alts, + {ThenRef, ThenBlk} = + case TrueCode of + missing -> {DefRef, []}; + _ -> FreshBlk(TrueCode ++ [{jump, RestRef}], DefRef) + end, + ElseCode = + case FalseCode of + missing -> [{jump, DefRef}]; + _ -> FalseCode ++ [{jump, RestRef}] + end, + case lists:usort(Alts) == [missing] of + true -> {Blk#blk{code = [{jump, DefRef}]}, [], []}; + false -> + case Arg of + ?i(false) -> {Blk#blk{code = ElseCode}, [], ThenBlk}; + ?i(true) -> {Blk#blk{code = []}, [{jump, ThenRef}], ThenBlk}; + _ -> {Blk#blk{code = ElseCode}, [{jumpif, Arg, ThenRef}], ThenBlk} + end + end; + tuple -> + [TCode] = Alts, + {Blk#blk{code = TCode ++ [{jump, RestRef}]}, [], []}; + {variant, [_]} -> + %% [SINGLE_CON_SWITCH] Single constructor switches don't need a + %% switch instruction. + [AltCode] = Alts, + {Blk#blk{code = AltCode ++ [{jump, RestRef}]}, [], []}; + {variant, _Ar} -> + MkBlk = fun(missing) -> {DefRef, []}; + (ACode) -> FreshBlk(ACode ++ [{jump, RestRef}], DefRef) + end, + {AltRefs, AltBs} = lists:unzip(lists:map(MkBlk, Alts)), + {Blk#blk{code = []}, [{switch, Arg, AltRefs}], lists:append(AltBs)} + end, + Blk2 = Blk1#blk{catchall = DefRef}, %% Update catchall ref + block(Blk2, Code1 ++ Acc, DefBlk ++ RestBlk ++ AltBlks ++ Blocks, BlockAcc); +block(Blk = #blk{code = [I | Code]}, Acc, Blocks, BlockAcc) -> + block(Blk#blk{code = Code}, [I | Acc], Blocks, BlockAcc). + +%% -- Reorder, inline, and remove dead blocks -- + +optimize_blocks(Blocks) -> + %% We need to look at the last instruction a lot, so reverse all blocks. + Rev = fun(Bs) -> [ {Ref, lists:reverse(Code)} || {Ref, Code} <- Bs ] end, + RBlocks = Rev(Blocks), + RBlockMap = maps:from_list(RBlocks), + RBlocks1 = reorder_blocks(RBlocks, []), + RBlocks2 = [ {Ref, inline_block(RBlockMap, Ref, Code)} || {Ref, Code} <- RBlocks1 ], + RBlocks3 = remove_dead_blocks(RBlocks2), + RBlocks4 = [ {Ref, tweak_returns(Code)} || {Ref, Code} <- RBlocks3 ], + Rev(RBlocks4). + +%% Choose the next block based on the final jump. +reorder_blocks([], Acc) -> + lists:reverse(Acc); +reorder_blocks([{Ref, Code} | Blocks], Acc) -> + reorder_blocks(Ref, Code, Blocks, Acc). + +reorder_blocks(Ref, Code, Blocks, Acc) -> + Acc1 = [{Ref, Code} | Acc], + case Code of + ['RETURN'|_] -> reorder_blocks(Blocks, Acc1); + [{'RETURNR', _}|_] -> reorder_blocks(Blocks, Acc1); + [{'CALL_T', _}|_] -> reorder_blocks(Blocks, Acc1); + [{'CALL_TR', _, _}|_] -> reorder_blocks(Blocks, Acc1); + [{'ABORT', _}|_] -> reorder_blocks(Blocks, Acc1); + [{switch, _, _}|_] -> reorder_blocks(Blocks, Acc1); + [{jump, L}|_] -> + NotL = fun({L1, _}) -> L1 /= L end, + case lists:splitwith(NotL, Blocks) of + {Blocks1, [{L, Code1} | Blocks2]} -> + reorder_blocks(L, Code1, Blocks1 ++ Blocks2, Acc1); + {_, []} -> reorder_blocks(Blocks, Acc1) + end + end. + +%% Inline short blocks (≤ 2 instructions) +inline_block(BlockMap, Ref, [{jump, L} | Code] = Code0) when L /= Ref -> + case maps:get(L, BlockMap, nocode) of + Dest when length(Dest) < 3 -> + %% Remove Ref to avoid infinite loops + inline_block(maps:remove(Ref, BlockMap), L, Dest) ++ Code; + _ -> Code0 + end; +inline_block(_, _, Code) -> Code. + +%% Remove unused blocks +remove_dead_blocks(Blocks = [{Top, _} | _]) -> + BlockMap = maps:from_list(Blocks), + LiveBlocks = chase_labels([Top], BlockMap, #{}), + [ B || B = {L, _} <- Blocks, maps:is_key(L, LiveBlocks) ]. + +chase_labels([], _, Live) -> Live; +chase_labels([L | Ls], Map, Live) -> + Code = maps:get(L, Map), + Jump = fun({jump, A}) -> [A || not maps:is_key(A, Live)]; + ({jumpif, _, A}) -> [A || not maps:is_key(A, Live)]; + ({switch, _, As}) -> [A || A <- As, not maps:is_key(A, Live)]; + (_) -> [] end, + New = lists:flatmap(Jump, Code), + chase_labels(New ++ Ls, Map, Live#{ L => true }). + +%% Replace PUSH, RETURN by RETURNR, drop returns after tail calls. +tweak_returns(['RETURN', {'PUSH', A} | Code]) -> [{'RETURNR', A} | Code]; +tweak_returns(['RETURN' | Code = [{'CALL_T', _} | _]]) -> Code; +tweak_returns(['RETURN' | Code = [{'CALL_TR', _, _} | _]]) -> Code; +tweak_returns(['RETURN' | Code = [{'ABORT', _} | _]]) -> Code; +tweak_returns(Code) -> Code. + +%% -- Split basic blocks at CALL instructions -- +%% Calls can only return to a new basic block. + +split_calls({Ref, Code}) -> + split_calls(Ref, Code, [], []). + +split_calls(Ref, [], Acc, Blocks) -> + lists:reverse([{Ref, lists:reverse(Acc)} | Blocks]); +split_calls(Ref, [I | Code], Acc, Blocks) when element(1, I) == 'CALL'; element(1, I) == 'CALL_R' -> + split_calls(make_ref(), Code, [], [{Ref, lists:reverse([I | Acc])} | Blocks]); +split_calls(Ref, [I | Code], Acc, Blocks) -> + split_calls(Ref, Code, [I | Acc], Blocks). + +%% -- Translate label refs to indices -- + +set_labels(Labels, {Ref, Code}) when is_reference(Ref) -> + {maps:get(Ref, Labels), [ set_labels(Labels, I) || I <- Code ]}; +set_labels(Labels, {jump, Ref}) -> aeb_fate_code:jump(maps:get(Ref, Labels)); +set_labels(Labels, {jumpif, Arg, Ref}) -> aeb_fate_code:jumpif(Arg, maps:get(Ref, Labels)); +set_labels(Labels, {switch, Arg, Refs}) -> + case [ maps:get(Ref, Labels) || Ref <- Refs ] of + [R1, R2] -> aeb_fate_code:switch(Arg, R1, R2); + [R1, R2, R3] -> aeb_fate_code:switch(Arg, R1, R2, R3); + Rs -> aeb_fate_code:switch(Arg, Rs) + end; +set_labels(_, I) -> I. + +%% -- Helpers ---------------------------------------------------------------- + +with_ixs(Xs) -> + lists:zip(lists:seq(0, length(Xs) - 1), Xs). + diff --git a/src/aeso_icode_to_fate.erl b/src/aeso_icode_to_fate.erl deleted file mode 100644 index 245d8d7..0000000 --- a/src/aeso_icode_to_fate.erl +++ /dev/null @@ -1,299 +0,0 @@ -%%%------------------------------------------------------------------- -%%% @author Ulf Norell -%%% @copyright (C) 2019, Aeternity Anstalt -%%% @doc -%%% Fate backend for Sophia compiler -%%% @end -%%% Created : 11 Jan 2019 -%%% -%%%------------------------------------------------------------------- --module(aeso_icode_to_fate). - --include("aeso_icode.hrl"). - --export([compile/2]). - -%% -- Preamble --------------------------------------------------------------- - --define(TODO(What), error({todo, ?FILE, ?LINE, ?FUNCTION_NAME, What})). - --define(i(__X__), {immediate, __X__}). --define(a, {stack, 0}). - --record(env, { args = [], stack = [], tailpos = true }). - -%% -- Debugging -------------------------------------------------------------- - -%% debug(Options, Fmt) -> debug(Options, Fmt, []). -debug(Options, Fmt, Args) -> - case proplists:get_value(debug, Options, true) of - true -> io:format(Fmt, Args); - false -> ok - end. - -%% -- Main ------------------------------------------------------------------- - -%% @doc Main entry point. -compile(ICode, Options) -> - #{ contract_name := _ContractName, - state_type := _StateType, - functions := Functions } = ICode, - SFuns = functions_to_scode(Functions, Options), - SFuns1 = optimize_scode(SFuns, Options), - to_basic_blocks(SFuns1, Options). - -functions_to_scode(Functions, Options) -> - maps:from_list( - [ {list_to_binary(Name), function_to_scode(Name, Args, Body, Type, Options)} - || {Name, _Ann, Args, Body, Type} <- Functions, Name /= "init" ]). %% TODO: skip init for now - -function_to_scode(Name, Args, Body, Type, Options) -> - debug(Options, "Compiling ~p ~p : ~p ->\n ~p\n", [Name, Args, Type, Body]), - ArgTypes = [ icode_type_to_fate(T) || {_, T} <- Args ], - ResType = icode_type_to_fate(Type), - SCode = to_scode(init_env(Args), Body), - debug(Options, " scode: ~p\n", [SCode]), - {{ArgTypes, ResType}, SCode}. - -%% -- Types ------------------------------------------------------------------ - -%% TODO: the Fate types don't seem to be specified anywhere... -icode_type_to_fate(word) -> integer; -icode_type_to_fate(string) -> string; -icode_type_to_fate({tuple, Types}) -> - {tuple, lists:map(fun icode_type_to_fate/1, Types)}; -icode_type_to_fate({list, Type}) -> - {list, icode_type_to_fate(Type)}; -icode_type_to_fate(typerep) -> typerep; -icode_type_to_fate(Type) -> ?TODO(Type). - -%% -- Phase I ---------------------------------------------------------------- -%% Icode to structured assembly - -%% -- Environment functions -- - -init_env(Args) -> - #env{ args = Args, stack = [], tailpos = true }. - -push_env(Type, Env) -> - Env#env{ stack = [{"_", Type} | Env#env.stack] }. - -notail(Env) -> Env#env{ tailpos = false }. - -lookup_var(#env{ args = Args, stack = S }, X) -> - case {keyfind_index(X, 1, S), keyfind_index(X, 1, Args)} of - {false, false} -> false; - {false, Arg} -> {arg, Arg}; - {Local, _} -> {stack, Local} - end. - -%% -- The compiler -- - -to_scode(_Env, #integer{ value = N }) -> - [aeb_fate_code:push(?i(N))]; %% Doesn't exist (yet), translated by desugaring - -to_scode(Env, #var_ref{name = X}) -> - case lookup_var(Env, X) of - false -> error({unbound_variable, X, Env}); - {stack, N} -> [aeb_fate_code:dup(?i(N))]; - {arg, N} -> [aeb_fate_code:push({arg, N})] - end; -to_scode(Env, #binop{ op = Op, left = A, right = B }) -> - [ to_scode(notail(Env), B) - , to_scode(push_env(binop_type_r(Op), Env), A) - , binop_to_scode(Op) ]; - -to_scode(Env, #ifte{decision = Dec, then = Then, else = Else}) -> - [ to_scode(notail(Env), Dec) - , {ifte, to_scode(Env, Then), to_scode(Env, Else)} ]; - -to_scode(_Env, Icode) -> ?TODO(Icode). - -%% -- Operators -- - -binop_types('+') -> {word, word}; -binop_types('-') -> {word, word}; -binop_types('==') -> {word, word}; -binop_types(Op) -> ?TODO(Op). - -%% binop_type_l(Op) -> element(1, binop_types(Op)). -binop_type_r(Op) -> element(2, binop_types(Op)). - -binop_to_scode('+') -> add_a_a_a(); %% Optimization introduces other variants -binop_to_scode('-') -> sub_a_a_a(); -binop_to_scode('==') -> eq_a_a_a(). -% binop_to_scode(Op) -> ?TODO(Op). - -add_a_a_a() -> aeb_fate_code:add(?a, ?a, ?a). -sub_a_a_a() -> aeb_fate_code:sub(?a, ?a, ?a). -eq_a_a_a() -> aeb_fate_code:eq(?a, ?a, ?a). - -%% -- Phase II --------------------------------------------------------------- -%% Optimize - -optimize_scode(Funs, Options) -> - maps:map(fun(Name, Def) -> optimize_fun(Funs, Name, Def, Options) end, - Funs). - -flatten(Code) -> lists:map(fun flatten_s/1, lists:flatten(Code)). - -flatten_s({ifte, Then, Else}) -> {ifte, flatten(Then), flatten(Else)}; -flatten_s(I) -> I. - -optimize_fun(_Funs, Name, {{Args, Res}, Code}, Options) -> - Code0 = flatten(Code), - debug(Options, "Optimizing ~s\n", [Name]), - debug(Options, " original : ~p\n", [Code0]), - Code1 = simplify(Code0), - debug(Options, " simplified: ~p\n", [Code1]), - Code2 = desugar(Code1), - debug(Options, " desugared : ~p\n", [Code2]), - {{Args, Res}, Code2}. - -simplify([]) -> []; -simplify([I | Code]) -> - simpl_top(simpl_s(I), simplify(Code)). - -simpl_s({ifte, Then, Else}) -> - {ifte, simplify(Then), simplify(Else)}; -simpl_s(I) -> I. - -%% add_i 0 --> nop -simpl_top({'ADD', _, ?i(0), _}, Code) -> Code; -%% push n, add_a --> add_i n -simpl_top({'PUSH', ?a, ?i(N)}, - [{'ADD', ?a, ?a, ?a} | Code]) -> - simpl_top( aeb_fate_code:add(?a, ?i(N), ?a), Code); -%% push n, add_i m --> add_i (n + m) -simpl_top({'PUSH', ?a, ?i(N)}, [{'ADD', ?a, ?i(M), ?a} | Code]) -> - simpl_top(aeb_fate_code:push(?i(N + M)), Code); -%% add_i n, add_i m --> add_i (n + m) -simpl_top({'ADD', ?a, ?i(N), ?a}, [{'ADD', ?a, ?i(M), ?a} | Code]) -> - simpl_top({'ADD', ?a, ?i(N + M), ?a}, Code); - -simpl_top(I, Code) -> [I | Code]. - -%% Desugar and specialize -desugar({'ADD', ?a, ?i(1), ?a}) -> [aeb_fate_code:inc()]; -desugar({ifte, Then, Else}) -> [{ifte, desugar(Then), desugar(Else)}]; -desugar(Code) when is_list(Code) -> - lists:flatmap(fun desugar/1, Code); -desugar(I) -> [I]. - -%% -- Phase III -------------------------------------------------------------- -%% Constructing basic blocks - -to_basic_blocks(Funs, Options) -> - maps:from_list([ {Name, {{Args, Res}, - bb(Name, Code ++ [aeb_fate_code:return()], Options)}} - || {Name, {{Args, Res}, Code}} <- maps:to_list(Funs) ]). - -bb(Name, Code, Options) -> - Blocks0 = blocks(Code), - Blocks = optimize_blocks(Blocks0), - Labels = maps:from_list([ {Ref, I} || {I, {Ref, _}} <- with_ixs(Blocks) ]), - BBs = [ set_labels(Labels, B) || B <- Blocks ], - debug(Options, "Final code for ~s:\n ~p\n", [Name, BBs]), - maps:from_list(BBs). - -%% -- Break up scode into basic blocks -- - -blocks(Code) -> - Top = make_ref(), - blocks([{Top, Code}], []). - -blocks([], Acc) -> - lists:reverse(Acc); -blocks([{Ref, Code} | Blocks], Acc) -> - block(Ref, Code, [], Blocks, Acc). - -block(Ref, [], CodeAcc, Blocks, BlockAcc) -> - blocks(Blocks, [{Ref, lists:reverse(CodeAcc)} | BlockAcc]); -block(Ref, [{ifte, Then, Else} | Code], Acc, Blocks, BlockAcc) -> - ThenLbl = make_ref(), - RestLbl = make_ref(), - block(Ref, Else ++ [{jump, RestLbl}], - [{jumpif, ThenLbl} | Acc], - [{ThenLbl, Then ++ [{jump, RestLbl}]}, - {RestLbl, Code} | Blocks], - BlockAcc); -block(Ref, [I | Code], Acc, Blocks, BlockAcc) -> - block(Ref, Code, [I | Acc], Blocks, BlockAcc). - -%% -- Reorder, inline, and remove dead blocks -- - -optimize_blocks(Blocks) -> - %% We need to look at the last instruction a lot, so reverse all blocks. - Rev = fun(Bs) -> [ {Ref, lists:reverse(Code)} || {Ref, Code} <- Bs ] end, - RBlocks = Rev(Blocks), - RBlockMap = maps:from_list(RBlocks), - RBlocks1 = reorder_blocks(RBlocks, []), - RBlocks2 = [ {Ref, inline_block(RBlockMap, Ref, Code)} || {Ref, Code} <- RBlocks1 ], - RBlocks3 = remove_dead_blocks(RBlocks2), - Rev(RBlocks3). - -%% Choose the next block based on the final jump. -reorder_blocks([], Acc) -> - lists:reverse(Acc); -reorder_blocks([{Ref, Code} | Blocks], Acc) -> - reorder_blocks(Ref, Code, Blocks, Acc). - -reorder_blocks(Ref, Code, Blocks, Acc) -> - Acc1 = [{Ref, Code} | Acc], - case Code of - ['RETURN'|_] -> reorder_blocks(Blocks, Acc1); - [{'RETURNR', _}|_] -> reorder_blocks(Blocks, Acc1); - [{jump, L}|_] -> - NotL = fun({L1, _}) -> L1 /= L end, - case lists:splitwith(NotL, Blocks) of - {Blocks1, [{L, Code1} | Blocks2]} -> - reorder_blocks(L, Code1, Blocks1 ++ Blocks2, Acc1); - {_, []} -> reorder_blocks(Blocks, Acc1) - end - end. - -%% Inline short blocks (≤ 2 instructions) -inline_block(BlockMap, Ref, [{jump, L} | Code] = Code0) when L /= Ref -> - case maps:get(L, BlockMap, nocode) of - Dest when length(Dest) < 3 -> - %% Remove Ref to avoid infinite loops - inline_block(maps:remove(Ref, BlockMap), L, Dest) ++ Code; - _ -> Code0 - end; -inline_block(_, _, Code) -> Code. - -%% Remove unused blocks -remove_dead_blocks(Blocks = [{Top, _} | _]) -> - BlockMap = maps:from_list(Blocks), - LiveBlocks = chase_labels([Top], BlockMap, #{}), - [ B || B = {L, _} <- Blocks, maps:is_key(L, LiveBlocks) ]. - -chase_labels([], _, Live) -> Live; -chase_labels([L | Ls], Map, Live) -> - Code = maps:get(L, Map), - Jump = fun({jump, A}) -> [A || not maps:is_key(A, Live)]; - ({jumpif, A}) -> [A || not maps:is_key(A, Live)]; - (_) -> [] end, - New = lists:flatmap(Jump, Code), - chase_labels(New ++ Ls, Map, Live#{ L => true }). - - -%% -- Translate label refs to indices -- - -set_labels(Labels, {Ref, Code}) when is_reference(Ref) -> - {maps:get(Ref, Labels), [ set_labels(Labels, I) || I <- Code ]}; -set_labels(Labels, {jump, Ref}) -> aeb_fate_code:jump(maps:get(Ref, Labels)); -set_labels(Labels, {jumpif, Ref}) -> aeb_fate_code:jumpif(?a, maps:get(Ref, Labels)); -set_labels(_, I) -> I. - -%% -- Helpers ---------------------------------------------------------------- - -with_ixs(Xs) -> - lists:zip(lists:seq(0, length(Xs) - 1), Xs). - -keyfind_index(X, J, Xs) -> - case [ I || {I, E} <- with_ixs(Xs), X == element(J, E) ] of - [I | _] -> I; - [] -> false - end. - diff --git a/src/aeso_parser.erl b/src/aeso_parser.erl index c174a6b..ba9e8b9 100644 --- a/src/aeso_parser.erl +++ b/src/aeso_parser.erl @@ -441,7 +441,7 @@ build_if(Ann, Cond, Then, [{elif, Ann1, Cond1, Then1} | Elses]) -> build_if(Ann, Cond, Then, [{else, _Ann, Else}]) -> {'if', Ann, Cond, Then, Else}; build_if(Ann, Cond, Then, []) -> - {'if', Ann, Cond, Then, {unit, [{origin, system}]}}. + {'if', Ann, Cond, Then, {tuple, [{origin, system}], []}}. else_branches([Elif = {elif, _, _, _} | Stmts], Acc) -> else_branches(Stmts, [Elif | Acc]); @@ -457,7 +457,6 @@ fun_t(Domains, Type) -> lists:foldr(fun({Dom, Ann}, T) -> {fun_t, Ann, [], Dom, T} end, Type, Domains). -tuple_e(Ann, []) -> {unit, Ann}; tuple_e(_Ann, [Expr]) -> Expr; %% Not a tuple tuple_e(Ann, Exprs) -> {tuple, Ann, Exprs}. @@ -478,7 +477,6 @@ parse_pattern({record, Ann, Fs}) -> {record, Ann, lists:map(fun parse_field_pattern/1, Fs)}; parse_pattern(E = {con, _, _}) -> E; parse_pattern(E = {id, _, _}) -> E; -parse_pattern(E = {unit, _}) -> E; parse_pattern(E = {int, _, _}) -> E; parse_pattern(E = {bool, _, _}) -> E; parse_pattern(E = {bytes, _, _}) -> E; diff --git a/src/aeso_pretty.erl b/src/aeso_pretty.erl index 8a09fbc..3177e60 100644 --- a/src/aeso_pretty.erl +++ b/src/aeso_pretty.erl @@ -332,7 +332,6 @@ expr_p(_, {Type, _, Bin}) Type == oracle_pubkey; Type == oracle_query_id -> text(binary_to_list(aeser_api_encoder:encode(Type, Bin))); -expr_p(_, {unit, _}) -> text("()"); expr_p(_, {string, _, S}) -> term(binary_to_list(S)); expr_p(_, {char, _, C}) -> case C of diff --git a/src/aeso_syntax.erl b/src/aeso_syntax.erl index 25f0963..63f192d 100644 --- a/src/aeso_syntax.erl +++ b/src/aeso_syntax.erl @@ -75,7 +75,6 @@ | {contract_pubkey, binary()} | {oracle_pubkey, binary()} | {oracle_query_id, binary()} - | {unit, ann()} | {string, ann(), binary()} | {char, ann(), integer()}.