diff --git a/Home.md b/Home.md index 888d214..6ce17c8 100644 --- a/Home.md +++ b/Home.md @@ -34,7 +34,8 @@ Title | Brief Description ## Aspirational Principles -- **JUST WRITE** - imperfect content is infinitely better than nonexistent content, all else notwithstanding +- **JUST WRITE** - imperfect content is infinitely better than nonexistent + content, all else notwithstanding - **PICTURES:** ~Every article should have a "YouTube Thumbnail" diagram at the top that visually illustrates whatever is explained in the page - **QUICKREF:** Every page should have a "I just need to get a thing to work diff --git a/Sophia-FAQ.md b/Sophia-FAQ.md index d03df35..14a2088 100644 --- a/Sophia-FAQ.md +++ b/Sophia-FAQ.md @@ -2,10 +2,256 @@ - Created: 2026-03-30 - Authors: Peter Harpending `` -- Last Modified: 2026-03-30 +- Last Modified: 2026-04-07 -## Defining Events in interfaces +# References -## Can there be the same function name with different arities? +- [Sophia docs](https://git.qpq.swiss/QPQ-AG/sophia/src/branch/master/docs) +- [Protocol docs](https://git.qpq.swiss/QPQ-AG/protocol) -## What happens if you delete a non-existent key from a map? +# Defining Events in interfaces + +apparently this is legal syntax but the point of this is unclear. + +# Can there be the same function name with different arities? + +# What happens if you delete a non-existent key from a map? + +# How does sophia compilation work + + +From commit `dbab49936daad7d82bae7cf7336b1ce82e7ab779` + +```erlang +% so_compiler.erl:84 +-spec file(string()) -> {ok, map()} | {error, [so_errors:error()]}. +file(Filename) -> + file(Filename, []). + +-spec file(string(), options()) -> {ok, map()} | {error, [so_errors:error()]}. +file(File, Options0) -> + Options = add_include_path(File, Options0), + case read_contract(File) of + {ok, Bin} -> + SrcDir = so_utils:canonical_dir(filename:dirname(File)), + from_string(Bin, [{src_file, File}, {src_dir, SrcDir} | Options]); + {error, Error} -> + Msg = lists:flatten([File,": ",file:format_error(Error)]), + {error, [so_errors:new(file_error, Msg)]} + end. + +-spec from_string(binary() | string(), options()) -> {ok, map()} | {error, [so_errors:error()]}. +from_string(ContractBin, Options) when is_binary(ContractBin) -> + from_string(binary_to_list(ContractBin), Options); +from_string(ContractString, Options) -> + try + from_string1(ContractString, Options) + catch + throw:{error, Errors} -> {error, Errors} + end. + +from_string1(ContractString, Options) -> + #{ fcode := FCode + , fcode_env := FCodeEnv + , folded_typed_ast := FoldedTypedAst + , warnings := Warnings } = string_to_code(ContractString, Options), + #{ child_con_env := ChildContracts } = FCodeEnv, + SavedFreshNames = maps:get(saved_fresh_names, FCodeEnv, #{}), + FateCode = so_fcode_to_fate:compile(ChildContracts, FCode, SavedFreshNames, Options), + pp_assembler(FateCode, Options), + ByteCode = gmb_fate_code:serialize(FateCode, []), + {ok, Version} = version(), + Res = #{byte_code => ByteCode, + compiler_version => Version, + contract_source => ContractString, + type_info => [], + fate_code => FateCode, + abi_version => gmb_fate_abi:abi_version(), + payable => maps:get(payable, FCode), + warnings => Warnings + }, + {ok, maybe_generate_aci(Res, FoldedTypedAst, Options)}. + +``` + +So a lot is going on in `string_to_code/2` + +```erlang +-spec string_to_code(string(), options()) -> map(). +string_to_code(ContractString, Options) -> + Ast = parse(ContractString, Options), + pp_sophia_code(Ast, Options), + pp_ast(Ast, Options), + {TypeEnv, FoldedTypedAst, UnfoldedTypedAst, Warnings} = so_ast_infer_types:infer(Ast, [return_env | Options]), + pp_typed_ast(UnfoldedTypedAst, Options), + {Env, Fcode} = so_ast_to_fcode:ast_to_fcode(UnfoldedTypedAst, [{original_src, ContractString}|Options]), + #{ fcode => Fcode + , fcode_env => Env + , unfolded_typed_ast => UnfoldedTypedAst + , folded_typed_ast => FoldedTypedAst + , type_env => TypeEnv + , ast => Ast + , warnings => Warnings }. + + +-spec parse(string(), so_compiler:options()) -> none() | so_syntax:ast(). +parse(Text, Options) -> + parse(Text, sets:new(), Options). + +-spec parse(string(), sets:set(), so_compiler:options()) -> none() | so_syntax:ast(). +parse(Text, Included, Options) -> + so_parser:string(Text, Included, Options). +``` + +So we get an AST from `so_parser:string/3` + +``` +%% so_parser.erl +-spec string(string(), sets:set(include_hash()), so_compiler:options()) -> parse_result(). +string(String, Included, Opts) -> + AST = run_parser(file(), String, Opts), + case expand_includes(AST, Included, Opts) of + {ok, AST1} -> AST1; + {error, Err} -> parse_error(Err) + end. + + +run_parser(P, Inp) -> + escape_errors(parse_and_scan(P, Inp, [])). +run_parser(P, Inp, Opts) -> + escape_errors(parse_and_scan(P, Inp, Opts)). + +parse_and_scan(P, S, Opts) -> + set_current_file(proplists:get_value(src_file, Opts, no_file)), + set_current_dir(proplists:get_value(src_dir, Opts, no_file)), + set_current_include_type(proplists:get_value(include_type, Opts, none)), + case so_scan:scan(S) of + {ok, Tokens} -> so_parse_lib:parse(P, Tokens); + {error, {{Input, Pos}, _}} -> + {error, {Pos, scan_error, Input}} + end. + +``` + +So there's a lot of metadata being kept, but the key part is the call to +`so_scan:scan/1` + +```erl +lexer() -> + Number = fun(Digit) -> [Digit, "+(_", Digit, "+)*"] end, + DIGIT = "[0-9]", + HEXDIGIT = "[0-9a-fA-F]", + LOWER = "[a-z_]", + UPPER = "[A-Z]", + CON = [UPPER, "[a-zA-Z0-9_]*"], + INT = Number(DIGIT), + HEX = ["0x", Number(HEXDIGIT)], + BYTES = ["#", Number(HEXDIGIT)], + WS = "[\\000-\\ ]+", + ID = [LOWER, "[a-zA-Z0-9_']*"], + TVAR = ["'", ID], + QID = ["(", CON, "\\.)+", ID], + QCON = ["(", CON, "\\.)+", CON], + OP = "[=!<>+\\-*/:&|?~@^]+", + %% Five cases for a character + %% * 1 7-bit ascii, not \ or ' + %% * 2-4 8-bit values (UTF8) + %% * \ followed by a known modifier [aernrtv] + %% * \xhh + %% * \x{hhh...} + CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'", + STRING = "\"([^\"\\\\]|(\\\\.))*\"", + + CommentStart = {"/\\*", push(comment, skip())}, + CommentRules = + [ CommentStart + , {"\\*/", pop(skip())} + , {"[^/*]+|[/*]", skip()} ], + + Keywords = ["contract", "include", "let", "switch", "type", "record", "datatype", "if", "elif", "else", "function", + "stateful", "payable", "true", "false", "mod", "public", "entrypoint", "private", "indexed", "namespace", + "interface", "main", "using", "as", "for", "hiding", "band", "bor", "bxor", "bnot" + ], + KW = string:join(Keywords, "|"), + + Rules = + %% Comments and whitespace + [ CommentStart + , {"//.*", skip()} + , {WS, skip()} + + %% Special characters + , {"\\.\\.|[,.;()\\[\\]{}]", symbol()} + + %% Literals + , {CHAR, token(char, fun parse_char/1)} + , {STRING, token(string, fun parse_string/1)} + , {HEX, token(hex, fun parse_hex/1)} + , {INT, token(int, fun parse_int/1)} + , {BYTES, token(bytes, fun parse_bytes/1)} + + %% Identifiers (qualified first!) + , {QID, token(qid, fun(S) -> string:tokens(S, ".") end)} + , {QCON, token(qcon, fun(S) -> string:tokens(S, ".") end)} + , {TVAR, token(tvar)} + , override({ID, token(id)}, {KW, symbol()}) %% Keywords override identifiers. Need to + , {CON, token(con)} %% use override to avoid lexing "lettuce" + %% as ['let', {id, "tuce"}]. + %% Operators + , {OP, symbol()} + ], + + [{code, Rules}, {comment, CommentRules}]. + +scan(String) -> + Lexer = so_scan_lib:compile(lexer()), + so_scan_lib:string(Lexer, code, String). +``` + +OK. let's look at `so_scan_lib` + +```erl +-type regex() :: iodata() | unicode:charlist(). +-type pos() :: {integer(), integer()}. +-type lex_state() :: atom(). +-type token() :: {atom(), pos(), term()} | {atom(), pos()}. + +-type token_spec() :: {regex(), token_action()}. +-opaque token_action() :: fun((string(), pos()) -> {tok_result(), state_change()}). + +-opaque lexer() :: [{lex_state(), + fun((string(), pos()) -> {ok, tok_result(), string(), pos()} + | end_of_file | error)}]. + +%% -- Internal types -- +-type tok_result() :: {token, token()} | skip. +-type state_change() :: none | pop | {push, lex_state()}. + +%% @doc Compile a lexer specification. Takes the regexps for each state and +%% combines them into a single big regexp that is then compiled with re:compile/1. +%% Note: contrary to lexer generators like leex, we don't have longest match +%% semantics (since this isn't supported by re). Use override/2 instead. +-spec compile([{lex_state(), [token_spec()]}]) -> lexer(). +compile(TokenSpecs) -> + [{S, compile_spec(Spec)} || {S, Spec} <- TokenSpecs]. + +compile_spec(TokenSpecs) -> + WithIxs = lists:zip(lists:seq(1, length(TokenSpecs)), TokenSpecs), + {ok, Regex} = re:compile(["^(", name(0), string:join([ ["(", name(I), R, ")"] || {I, {R, _}} <- WithIxs ], "|"),")"]), + Actions = [ Fun || {_, Fun} <- TokenSpecs ], + fun ("", _Pos) -> end_of_file; + (S, Pos) -> + case re:run(S, Regex, [{capture, all_names}]) of + {match, [{0, N} | Capture]} -> + Index = 1 + length(lists:takewhile(fun({P, _}) -> P == -1 end, Capture)), + Action = lists:nth(Index, Actions), + {TokS, Rest} = lists:split(N, S), + Tok = Action(TokS, Pos), + {ok, Tok, Rest, next_pos(TokS, Pos)}; + nomatch -> + error + end + end. +``` + +# How does sophia compilation work