Page:
Sophia FAQ
Pages
API Encoding
BaseN
Blockchain Primer
Blockchain Trilemma
Blockchain
Consensus
DLTs
Finality
Flation
Gajumaru
Home
Install Erlang and zx
Leader Selection
Mempool
Mining
Mint
Monetary Mechanics
Money
Proof of Work
RLP
Serialization
Smart Contracts
Sophia FAQ
Sophia
State Channels
Testnet Node Setup
Transaction
Clone
2
Sophia FAQ
Peter Harpending edited this page 2026-04-07 13:28:26 -07:00
Sophia FAQ
- Created: 2026-03-30
- Authors: Peter Harpending
<peterharpending@qpq.swiss> - Last Modified: 2026-04-07
References
Defining Events in interfaces
apparently this is legal syntax but the point of this is unclear.
Can there be the same function name with different arities?
What happens if you delete a non-existent key from a map?
How does sophia compilation work
From commit dbab49936daad7d82bae7cf7336b1ce82e7ab779
% so_compiler.erl:84
-spec file(string()) -> {ok, map()} | {error, [so_errors:error()]}.
file(Filename) ->
file(Filename, []).
-spec file(string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
file(File, Options0) ->
Options = add_include_path(File, Options0),
case read_contract(File) of
{ok, Bin} ->
SrcDir = so_utils:canonical_dir(filename:dirname(File)),
from_string(Bin, [{src_file, File}, {src_dir, SrcDir} | Options]);
{error, Error} ->
Msg = lists:flatten([File,": ",file:format_error(Error)]),
{error, [so_errors:new(file_error, Msg)]}
end.
-spec from_string(binary() | string(), options()) -> {ok, map()} | {error, [so_errors:error()]}.
from_string(ContractBin, Options) when is_binary(ContractBin) ->
from_string(binary_to_list(ContractBin), Options);
from_string(ContractString, Options) ->
try
from_string1(ContractString, Options)
catch
throw:{error, Errors} -> {error, Errors}
end.
from_string1(ContractString, Options) ->
#{ fcode := FCode
, fcode_env := FCodeEnv
, folded_typed_ast := FoldedTypedAst
, warnings := Warnings } = string_to_code(ContractString, Options),
#{ child_con_env := ChildContracts } = FCodeEnv,
SavedFreshNames = maps:get(saved_fresh_names, FCodeEnv, #{}),
FateCode = so_fcode_to_fate:compile(ChildContracts, FCode, SavedFreshNames, Options),
pp_assembler(FateCode, Options),
ByteCode = gmb_fate_code:serialize(FateCode, []),
{ok, Version} = version(),
Res = #{byte_code => ByteCode,
compiler_version => Version,
contract_source => ContractString,
type_info => [],
fate_code => FateCode,
abi_version => gmb_fate_abi:abi_version(),
payable => maps:get(payable, FCode),
warnings => Warnings
},
{ok, maybe_generate_aci(Res, FoldedTypedAst, Options)}.
So a lot is going on in string_to_code/2
-spec string_to_code(string(), options()) -> map().
string_to_code(ContractString, Options) ->
Ast = parse(ContractString, Options),
pp_sophia_code(Ast, Options),
pp_ast(Ast, Options),
{TypeEnv, FoldedTypedAst, UnfoldedTypedAst, Warnings} = so_ast_infer_types:infer(Ast, [return_env | Options]),
pp_typed_ast(UnfoldedTypedAst, Options),
{Env, Fcode} = so_ast_to_fcode:ast_to_fcode(UnfoldedTypedAst, [{original_src, ContractString}|Options]),
#{ fcode => Fcode
, fcode_env => Env
, unfolded_typed_ast => UnfoldedTypedAst
, folded_typed_ast => FoldedTypedAst
, type_env => TypeEnv
, ast => Ast
, warnings => Warnings }.
-spec parse(string(), so_compiler:options()) -> none() | so_syntax:ast().
parse(Text, Options) ->
parse(Text, sets:new(), Options).
-spec parse(string(), sets:set(), so_compiler:options()) -> none() | so_syntax:ast().
parse(Text, Included, Options) ->
so_parser:string(Text, Included, Options).
So we get an AST from so_parser:string/3
%% so_parser.erl
-spec string(string(), sets:set(include_hash()), so_compiler:options()) -> parse_result().
string(String, Included, Opts) ->
AST = run_parser(file(), String, Opts),
case expand_includes(AST, Included, Opts) of
{ok, AST1} -> AST1;
{error, Err} -> parse_error(Err)
end.
run_parser(P, Inp) ->
escape_errors(parse_and_scan(P, Inp, [])).
run_parser(P, Inp, Opts) ->
escape_errors(parse_and_scan(P, Inp, Opts)).
parse_and_scan(P, S, Opts) ->
set_current_file(proplists:get_value(src_file, Opts, no_file)),
set_current_dir(proplists:get_value(src_dir, Opts, no_file)),
set_current_include_type(proplists:get_value(include_type, Opts, none)),
case so_scan:scan(S) of
{ok, Tokens} -> so_parse_lib:parse(P, Tokens);
{error, {{Input, Pos}, _}} ->
{error, {Pos, scan_error, Input}}
end.
So there's a lot of metadata being kept, but the key part is the call to
so_scan:scan/1
lexer() ->
Number = fun(Digit) -> [Digit, "+(_", Digit, "+)*"] end,
DIGIT = "[0-9]",
HEXDIGIT = "[0-9a-fA-F]",
LOWER = "[a-z_]",
UPPER = "[A-Z]",
CON = [UPPER, "[a-zA-Z0-9_]*"],
INT = Number(DIGIT),
HEX = ["0x", Number(HEXDIGIT)],
BYTES = ["#", Number(HEXDIGIT)],
WS = "[\\000-\\ ]+",
ID = [LOWER, "[a-zA-Z0-9_']*"],
TVAR = ["'", ID],
QID = ["(", CON, "\\.)+", ID],
QCON = ["(", CON, "\\.)+", CON],
OP = "[=!<>+\\-*/:&|?~@^]+",
%% Five cases for a character
%% * 1 7-bit ascii, not \ or '
%% * 2-4 8-bit values (UTF8)
%% * \ followed by a known modifier [aernrtv]
%% * \xhh
%% * \x{hhh...}
CHAR = "'(([\\x00-\\x26\\x28-\\x5b\\x5d-\\x7f])|([\\x00-\\xff][\\x80-\\xff]{1,3})|(\\\\[befnrtv'\\\\])|(\\\\x[0-9a-fA-F]{2,2})|(\\\\x\\{[0-9a-fA-F]*\\}))'",
STRING = "\"([^\"\\\\]|(\\\\.))*\"",
CommentStart = {"/\\*", push(comment, skip())},
CommentRules =
[ CommentStart
, {"\\*/", pop(skip())}
, {"[^/*]+|[/*]", skip()} ],
Keywords = ["contract", "include", "let", "switch", "type", "record", "datatype", "if", "elif", "else", "function",
"stateful", "payable", "true", "false", "mod", "public", "entrypoint", "private", "indexed", "namespace",
"interface", "main", "using", "as", "for", "hiding", "band", "bor", "bxor", "bnot"
],
KW = string:join(Keywords, "|"),
Rules =
%% Comments and whitespace
[ CommentStart
, {"//.*", skip()}
, {WS, skip()}
%% Special characters
, {"\\.\\.|[,.;()\\[\\]{}]", symbol()}
%% Literals
, {CHAR, token(char, fun parse_char/1)}
, {STRING, token(string, fun parse_string/1)}
, {HEX, token(hex, fun parse_hex/1)}
, {INT, token(int, fun parse_int/1)}
, {BYTES, token(bytes, fun parse_bytes/1)}
%% Identifiers (qualified first!)
, {QID, token(qid, fun(S) -> string:tokens(S, ".") end)}
, {QCON, token(qcon, fun(S) -> string:tokens(S, ".") end)}
, {TVAR, token(tvar)}
, override({ID, token(id)}, {KW, symbol()}) %% Keywords override identifiers. Need to
, {CON, token(con)} %% use override to avoid lexing "lettuce"
%% as ['let', {id, "tuce"}].
%% Operators
, {OP, symbol()}
],
[{code, Rules}, {comment, CommentRules}].
scan(String) ->
Lexer = so_scan_lib:compile(lexer()),
so_scan_lib:string(Lexer, code, String).
OK. let's look at so_scan_lib
-type regex() :: iodata() | unicode:charlist().
-type pos() :: {integer(), integer()}.
-type lex_state() :: atom().
-type token() :: {atom(), pos(), term()} | {atom(), pos()}.
-type token_spec() :: {regex(), token_action()}.
-opaque token_action() :: fun((string(), pos()) -> {tok_result(), state_change()}).
-opaque lexer() :: [{lex_state(),
fun((string(), pos()) -> {ok, tok_result(), string(), pos()}
| end_of_file | error)}].
%% -- Internal types --
-type tok_result() :: {token, token()} | skip.
-type state_change() :: none | pop | {push, lex_state()}.
%% @doc Compile a lexer specification. Takes the regexps for each state and
%% combines them into a single big regexp that is then compiled with re:compile/1.
%% Note: contrary to lexer generators like leex, we don't have longest match
%% semantics (since this isn't supported by re). Use override/2 instead.
-spec compile([{lex_state(), [token_spec()]}]) -> lexer().
compile(TokenSpecs) ->
[{S, compile_spec(Spec)} || {S, Spec} <- TokenSpecs].
compile_spec(TokenSpecs) ->
WithIxs = lists:zip(lists:seq(1, length(TokenSpecs)), TokenSpecs),
{ok, Regex} = re:compile(["^(", name(0), string:join([ ["(", name(I), R, ")"] || {I, {R, _}} <- WithIxs ], "|"),")"]),
Actions = [ Fun || {_, Fun} <- TokenSpecs ],
fun ("", _Pos) -> end_of_file;
(S, Pos) ->
case re:run(S, Regex, [{capture, all_names}]) of
{match, [{0, N} | Capture]} ->
Index = 1 + length(lists:takewhile(fun({P, _}) -> P == -1 end, Capture)),
Action = lists:nth(Index, Actions),
{TokS, Rest} = lists:split(N, S),
Tok = Action(TokS, Pos),
{ok, Tok, Rest, next_pos(TokS, Pos)};
nomatch ->
error
end
end.