2
Sophia FAQ
Peter Harpending edited this page 2026-04-07 13:28:26 -07:00

Sophia FAQ

  • Created: 2026-03-30
  • Authors: Peter Harpending <peterharpending@qpq.swiss>
  • Last Modified: 2026-04-07

References

Defining Events in interfaces

apparently this is legal syntax but the point of this is unclear.

Can there be the same function name with different arities?

What happens if you delete a non-existent key from a map?

How does sophia compilation work

From commit dbab49936daad7d82bae7cf7336b1ce82e7ab779

% so_compiler.erl:84
-spec file(string()) -> {ok, map()} | {error, [so_errors:error()]}.
file(Filename) ->
    file(Filename, []).

-spec file(string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
file(File, Options0) ->
    Options = add_include_path(File, Options0),
    case read_contract(File) of
        {ok, Bin} ->
            SrcDir = so_utils:canonical_dir(filename:dirname(File)),
            from_string(Bin, [{src_file, File}, {src_dir, SrcDir} | Options]);
        {error, Error} ->
            Msg = lists:flatten([File,": ",file:format_error(Error)]),
            {error, [so_errors:new(file_error, Msg)]}
    end.

-spec from_string(binary() | string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
from_string(ContractBin, Options) when is_binary(ContractBin) ->
    from_string(binary_to_list(ContractBin), Options);
from_string(ContractString, Options) ->
    try
        from_string1(ContractString, Options)
    catch
        throw:{error, Errors} -> {error, Errors}
    end.

from_string1(ContractString, Options) ->
    #{ fcode := FCode
     , fcode_env := FCodeEnv
     , folded_typed_ast := FoldedTypedAst
     , warnings := Warnings } = string_to_code(ContractString, Options),
    #{ child_con_env := ChildContracts } = FCodeEnv,
    SavedFreshNames = maps:get(saved_fresh_names, FCodeEnv, #{}),
    FateCode = so_fcode_to_fate:compile(ChildContracts, FCode, SavedFreshNames, Options),
    pp_assembler(FateCode, Options),
    ByteCode = gmb_fate_code:serialize(FateCode, []),
    {ok, Version} = version(),
    Res = #{byte_code => ByteCode,
            compiler_version => Version,
            contract_source => ContractString,
            type_info => [],
            fate_code => FateCode,
            abi_version => gmb_fate_abi:abi_version(),
            payable => maps:get(payable, FCode),
            warnings => Warnings
           },
    {ok, maybe_generate_aci(Res, FoldedTypedAst, Options)}.

So a lot is going on in string_to_code/2

-spec string_to_code(string(), options()) -> map().
string_to_code(ContractString, Options) ->
    Ast = parse(ContractString, Options),
    pp_sophia_code(Ast, Options),
    pp_ast(Ast, Options),
    {TypeEnv, FoldedTypedAst, UnfoldedTypedAst, Warnings} = so_ast_infer_types:infer(Ast, [return_env | Options]),
    pp_typed_ast(UnfoldedTypedAst, Options),
    {Env, Fcode} = so_ast_to_fcode:ast_to_fcode(UnfoldedTypedAst, [{original_src, ContractString}|Options]),
    #{ fcode => Fcode
    ,  fcode_env => Env
    ,  unfolded_typed_ast => UnfoldedTypedAst
    ,  folded_typed_ast => FoldedTypedAst
    ,  type_env  => TypeEnv
    ,  ast => Ast
    ,  warnings => Warnings }.


-spec parse(string(), so_compiler:options()) -> none() | so_syntax:ast().
parse(Text, Options) ->
    parse(Text, sets:new(), Options).

-spec parse(string(), sets:set(), so_compiler:options()) -> none() | so_syntax:ast().
parse(Text, Included, Options) ->
    so_parser:string(Text, Included, Options).

So we get an AST from so_parser:string/3

%% so_parser.erl
-spec string(string(), sets:set(include_hash()), so_compiler:options()) -> parse_result().
string(String, Included, Opts) ->
    AST = run_parser(file(), String, Opts),
    case expand_includes(AST, Included, Opts) of
        {ok, AST1}   -> AST1;
        {error, Err} -> parse_error(Err)
    end.


run_parser(P, Inp) ->
    escape_errors(parse_and_scan(P, Inp, [])).
run_parser(P, Inp, Opts) ->
    escape_errors(parse_and_scan(P, Inp, Opts)).

parse_and_scan(P, S, Opts) ->
    set_current_file(proplists:get_value(src_file, Opts, no_file)),
    set_current_dir(proplists:get_value(src_dir, Opts, no_file)),
    set_current_include_type(proplists:get_value(include_type, Opts, none)),
    case so_scan:scan(S) of
        {ok, Tokens} -> so_parse_lib:parse(P, Tokens);
        {error, {{Input, Pos}, _}} ->
            {error, {Pos, scan_error, Input}}
    end.

So there's a lot of metadata being kept, but the key part is the call to so_scan:scan/1

lexer() ->
    Number   = fun(Digit) -> [Digit, "+(_", Digit, "+)*"] end,
    DIGIT    = "[0-9]",
    HEXDIGIT = "[0-9a-fA-F]",
    LOWER    = "[a-z_]",
    UPPER    = "[A-Z]",
    CON      = [UPPER, "[a-zA-Z0-9_]*"],
    INT      = Number(DIGIT),
    HEX      = ["0x", Number(HEXDIGIT)],
    BYTES    = ["#", Number(HEXDIGIT)],
    WS       = "[\\000-\\ ]+",
    ID       = [LOWER, "[a-zA-Z0-9_']*"],
    TVAR     = ["'", ID],
    QID      = ["(", CON, "\\.)+", ID],
    QCON     = ["(", CON, "\\.)+", CON],
    OP       = "[=!<>+\\-*/:&|?~@^]+",
    %% Five cases for a character
    %%  * 1 7-bit ascii, not \ or '
    %%  * 2-4 8-bit values (UTF8)
    %%  * \ followed by a known modifier [aernrtv]
    %%  * \xhh
    %%  * \x{hhh...}
    CHAR     = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'",
    STRING   = "\"([^\"\\\\]|(\\\\.))*\"",

    CommentStart = {"/\\*", push(comment, skip())},
    CommentRules =
        [ CommentStart
        , {"\\*/",        pop(skip())}
        , {"[^/*]+|[/*]", skip()} ],

    Keywords = ["contract", "include", "let", "switch", "type", "record", "datatype", "if", "elif", "else", "function",
                "stateful", "payable", "true", "false", "mod", "public", "entrypoint", "private", "indexed", "namespace",
                "interface", "main", "using", "as", "for", "hiding", "band", "bor", "bxor", "bnot"
               ],
    KW = string:join(Keywords, "|"),

    Rules =
          %% Comments and whitespace
        [ CommentStart
        , {"//.*", skip()}
        , {WS,     skip()}

          %% Special characters
        , {"\\.\\.|[,.;()\\[\\]{}]", symbol()}

          %% Literals
        , {CHAR,   token(char,   fun parse_char/1)}
        , {STRING, token(string, fun parse_string/1)}
        , {HEX,    token(hex,    fun parse_hex/1)}
        , {INT,    token(int,    fun parse_int/1)}
        , {BYTES,  token(bytes,  fun parse_bytes/1)}

          %% Identifiers (qualified first!)
        , {QID,   token(qid,  fun(S) -> string:tokens(S, ".") end)}
        , {QCON,  token(qcon, fun(S) -> string:tokens(S, ".") end)}
        , {TVAR,  token(tvar)}
        , override({ID, token(id)}, {KW, symbol()})    %% Keywords override identifiers. Need to
        , {CON, token(con)}                            %% use override to avoid lexing "lettuce"
                                                       %% as ['let', {id, "tuce"}].
          %% Operators
        , {OP, symbol()}
        ],

    [{code, Rules}, {comment, CommentRules}].

scan(String) ->
    Lexer = so_scan_lib:compile(lexer()),
    so_scan_lib:string(Lexer, code, String).

OK. let's look at so_scan_lib

-type regex()     :: iodata() | unicode:charlist().
-type pos()       :: {integer(), integer()}.
-type lex_state() :: atom().
-type token()     :: {atom(), pos(), term()} | {atom(), pos()}.

-type token_spec()     :: {regex(), token_action()}.
-opaque token_action() :: fun((string(), pos()) -> {tok_result(), state_change()}).

-opaque lexer() :: [{lex_state(),
                     fun((string(), pos()) -> {ok, tok_result(), string(), pos()}
                                            | end_of_file | error)}].

%% -- Internal types --
-type tok_result()   :: {token, token()} | skip.
-type state_change() :: none | pop | {push, lex_state()}.

%% @doc Compile a lexer specification. Takes the regexps for each state and
%% combines them into a single big regexp that is then compiled with re:compile/1.
%% Note: contrary to lexer generators like leex, we don't have longest match
%% semantics (since this isn't supported by re). Use override/2 instead.
-spec compile([{lex_state(), [token_spec()]}]) -> lexer().
compile(TokenSpecs) ->
    [{S, compile_spec(Spec)} || {S, Spec} <- TokenSpecs].

compile_spec(TokenSpecs) ->
    WithIxs     = lists:zip(lists:seq(1, length(TokenSpecs)), TokenSpecs),
    {ok, Regex} = re:compile(["^(", name(0), string:join([ ["(", name(I), R, ")"] || {I, {R, _}} <- WithIxs ], "|"),")"]),
    Actions     = [ Fun || {_, Fun} <- TokenSpecs ],
    fun ("", _Pos) -> end_of_file;
        (S, Pos)  ->
            case re:run(S, Regex, [{capture, all_names}]) of
                {match, [{0, N} | Capture]} ->
                    Index        = 1 + length(lists:takewhile(fun({P, _}) -> P == -1 end, Capture)),
                    Action       = lists:nth(Index, Actions),
                    {TokS, Rest} = lists:split(N, S),
                    Tok          = Action(TokS, Pos),
                    {ok, Tok, Rest, next_pos(TokS, Pos)};
                nomatch ->
                    error
            end
    end.

How does sophia compilation work