From d2163c1ff870521439c392e5d6df3c11fd9fa553 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Wed, 7 Jan 2026 09:40:55 +0000 Subject: [PATCH 01/42] split AACI out of hz.erl So far the interface to hz.erl is mostly unchanged, apart from prepare_aaci/1 Maybe prepare_aaci should be re-exported, but using it is exactly in line with the 'inconvenient but more flexible primitives' that hz_aaci.erl is meant to represent, so, maybe that is a fine place to have to go for it, dunno. --- src/hz.erl | 1092 +------------------------------------------- src/hz_aaci.erl | 1146 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1153 insertions(+), 1085 deletions(-) create mode 100644 src/hz_aaci.erl diff --git a/src/hz.erl b/src/hz.erl index 2d909e1..277f122 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -65,7 +65,6 @@ contract_create_built/8, contract_create/8, prepare_contract/1, - prepare_aaci/1, cache_aaci/2, lookup_aaci/1, aaci_lookup_spec/2, @@ -717,7 +716,7 @@ decode_bytearray_fate(EncodedStr) -> decode_bytearray(Type, EncodedStr) -> case decode_bytearray_fate(EncodedStr) of {ok, none} -> {ok, none}; - {ok, Object} -> coerce(Type, Object, from_fate); + {ok, Object} -> hz_aaci:coerce(Type, Object, from_fate); {error, Reason} -> {error, Reason} end. @@ -1099,7 +1098,7 @@ contract_create_built(CreatorID, Compiled, InitArgs) -> contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, InitArgs) -> - AACI = prepare_aaci(maps:get(aci, Compiled)), + AACI = hz_aaci:prepare_aaci(maps:get(aci, Compiled)), case encode_call_data(AACI, "init", InitArgs) of {ok, CallData} -> assemble_calldata(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, @@ -1438,779 +1437,10 @@ contract_call4(PK, Nonce, Gas, GasPrice, Amount, TTL, CK, CallData) -> prepare_contract(File) -> case so_compiler:file(File, [{aci, json}]) of - {ok, #{aci := ACI}} -> {ok, prepare_aaci(ACI)}; + {ok, #{aci := ACI}} -> {ok, hz_aaci:prepare_aaci(ACI)}; Error -> Error end. -prepare_aaci(ACI) -> - % We want to take the types represented by the ACI, things like N1.T(N2.T), - % and dereference them down to concrete types like - % {tuple, [integer, string]}. Our type dereferencing algorithms - % shouldn't act directly on the JSON-based structures that the compiler - % gives us, though, though, so before we do the analysis, we should strip - % the ACI down to a list of 'opaque' type defintions and function specs. - {Name, OpaqueSpecs, TypeDefs} = convert_aci_types(ACI), - - % Now that we have the opaque types, we can dereference the function specs - % down to the concrete types they actually represent. We annotate each - % subexpression of this concrete type with other info too, in case it helps - % make error messages easier to understand. - InternalTypeDefs = maps:merge(builtin_typedefs(), TypeDefs), - Specs = annotate_function_specs(OpaqueSpecs, InternalTypeDefs, #{}), - - {aaci, Name, Specs, TypeDefs}. - -convert_aci_types(ACI) -> - % Find the main contract, so we can get the specifications of its - % entrypoints. - [{NameBin, SpecDefs}] = - [{N, F} - || #{contract := #{kind := contract_main, - functions := F, - name := N}} <- ACI], - Name = binary_to_list(NameBin), - % Turn these specifications into opaque types that we can reason about. - Specs = lists:map(fun convert_function_spec/1, SpecDefs), - - % These specifications can reference other type definitions from the main - % contract and any other namespaces, so extract these types and convert - % them too. - TypeDefTree = lists:map(fun convert_namespace_typedefs/1, ACI), - % The tree structure of the ACI naturally leads to a tree of opaque types, - % but we want a map, so flatten it out before we continue. - TypeDefMap = collect_opaque_types(TypeDefTree, #{}), - - % This is all the information we actually need from the ACI, the rest is - % just pre-compute and acceleration. - {Name, Specs, TypeDefMap}. - -convert_function_spec(#{name := NameBin, arguments := Args, returns := Result}) -> - Name = binary_to_list(NameBin), - ArgTypes = lists:map(fun convert_arg/1, Args), - ResultType = opaque_type([], Result), - {Name, ArgTypes, ResultType}. - -convert_arg(#{name := NameBin, type := TypeDef}) -> - Name = binary_to_list(NameBin), - Type = opaque_type([], TypeDef), - {Name, Type}. - -convert_namespace_typedefs(#{namespace := NS}) -> - Name = namespace_name(NS), - convert_typedefs(NS, Name); -convert_namespace_typedefs(#{contract := NS}) -> - Name = namespace_name(NS), - ImplicitTypes = convert_implicit_types(NS, Name), - ExplicitTypes = convert_typedefs(NS, Name), - [ImplicitTypes, ExplicitTypes]. - -namespace_name(#{name := NameBin}) -> - binary_to_list(NameBin). - -convert_implicit_types(#{state := StateDefACI}, Name) -> - StateDefOpaque = opaque_type([], StateDefACI), - [{Name, [], contract}, - {Name ++ ".state", [], StateDefOpaque}]; -convert_implicit_types(_, Name) -> - [{Name, [], contract}]. - -convert_typedefs(#{typedefs := TypeDefs}, Name) -> - convert_typedefs_loop(TypeDefs, Name ++ ".", []). - -% Take a namespace that has already had a period appended, and use that as a -% prefix to convert and annotate a list of types. -convert_typedefs_loop([], _NamePrefix, Converted) -> - Converted; -convert_typedefs_loop([Next | Rest], NamePrefix, Converted) -> - #{name := NameBin, vars := ParamDefs, typedef := DefACI} = Next, - Name = NamePrefix ++ binary_to_list(NameBin), - Params = [binary_to_list(Param) || #{name := Param} <- ParamDefs], - Def = opaque_type(Params, DefACI), - convert_typedefs_loop(Rest, NamePrefix, [Converted, {Name, Params, Def}]). - -collect_opaque_types([], Types) -> - Types; -collect_opaque_types([L | R], Types) -> - NewTypes = collect_opaque_types(L, Types), - collect_opaque_types(R, NewTypes); -collect_opaque_types({Name, Params, Def}, Types) -> - maps:put(Name, {Params, Def}, Types). - -% Convert an ACI type defintion/spec into the 'opaque type' representation that -% our dereferencing algorithms can reason about. -opaque_type(Params, NameBin) when is_binary(NameBin) -> - Name = opaque_type_name(NameBin), - case not is_atom(Name) and lists:member(Name, Params) of - false -> Name; - true -> {var, Name} - end; -opaque_type(Params, #{record := FieldDefs}) -> - Fields = [{binary_to_list(Name), opaque_type(Params, Type)} - || #{name := Name, type := Type} <- FieldDefs], - {record, Fields}; -opaque_type(Params, #{variant := VariantDefs}) -> - ConvertVariant = fun(Pair) -> - [{Name, Types}] = maps:to_list(Pair), - {binary_to_list(Name), [opaque_type(Params, Type) || Type <- Types]} - end, - Variants = lists:map(ConvertVariant, VariantDefs), - {variant, Variants}; -opaque_type(Params, #{tuple := TypeDefs}) -> - {tuple, [opaque_type(Params, Type) || Type <- TypeDefs]}; -opaque_type(_, #{bytes := Count}) -> - {bytes, [Count]}; -opaque_type(Params, Pair) when is_map(Pair) -> - [{Name, TypeArgs}] = maps:to_list(Pair), - {opaque_type_name(Name), [opaque_type(Params, Arg) || Arg <- TypeArgs]}. - -% Atoms for any builtins that aren't qualified by a namespace in Sophia. -% Everything else stays as a string, user-defined or not. -opaque_type_name(<<"int">>) -> integer; -opaque_type_name(<<"bool">>) -> boolean; -opaque_type_name(<<"bits">>) -> bits; -opaque_type_name(<<"char">>) -> char; -opaque_type_name(<<"string">>) -> string; -opaque_type_name(<<"address">>) -> address; -opaque_type_name(<<"signature">>) -> signature; -opaque_type_name(<<"contract">>) -> contract; -opaque_type_name(<<"list">>) -> list; -opaque_type_name(<<"map">>) -> map; -% I'm not sure how to produce channels in Sophia source, but they seem to exist -% in gmb still. -opaque_type_name(<<"channel">>) -> channel; -opaque_type_name(Name) -> binary_to_list(Name). - -builtin_typedefs() -> - #{"unit" => {[], {tuple, []}}, - "void" => {[], {variant, []}}, - "hash" => {[], {bytes, [32]}}, - "option" => {["'T"], {variant, [{"None", []}, - {"Some", [{var, "'T"}]}]}}, - "Chain.ttl" => {[], {variant, [{"FixedTTL", [integer]}, - {"RelativeTTL", [integer]}]}}, - "AENS.pointee" => {[], {variant, [{"AccountPt", [address]}, - {"OraclePt", [address]}, - {"ContractPt", [address]}, - {"ChannelPt", [address]}]}}, - "AENS.name" => {[], {variant, [{"Name", [address, - "Chain.ttl", - {map, [string, "AENS.pointee"]}]}]}}, - "AENSv2.pointee" => {[], {variant, [{"AccountPt", [address]}, - {"OraclePt", [address]}, - {"ContractPt", [address]}, - {"ChannelPt", [address]}, - {"DataPt", [{bytes, [any]}]}]}}, - "AENSv2.name" => {[], {variant, [{"Name", [address, - "Chain.ttl", - {map, [string, "AENSv2.pointee"]}]}]}}, - "Chain.ga_meta_tx" => {[], {variant, [{"GAMetaTx", [address, integer]}]}}, - "Chain.paying_for_tx" => {[], {variant, [{"PayingForTx", [address, integer]}]}}, - "Chain.base_tx" => {[], {variant, [{"SpendTx", [address, integer, string]}, - {"OracleRegisterTx", []}, - {"OracleQueryTx", []}, - {"OracleResponseTx", []}, - {"OracleExtendTx", []}, - {"NamePreclaimTx", []}, - {"NameClaimTx", ["hash"]}, - {"NameUpdateTx", [string]}, - {"NameRevokeTx", ["hash"]}, - {"NameTransferTx", [address, string]}, - {"ChannelCreateTx", [address]}, - {"ChannelDepositTx", [address, integer]}, - {"ChannelWithdrawTx", [address, integer]}, - {"ChannelForceProgressTx", [address]}, - {"ChannelCloseMutualTx", [address]}, - {"ChannelCloseSoloTx", [address]}, - {"ChannelSlashTx", [address]}, - {"ChannelSettleTx", [address]}, - {"ChannelSnapshotSoloTx", [address]}, - {"ContractCreateTx", [integer]}, - {"ContractCallTx", [address, integer]}, - {"GAAttachTx", []}]}}, - "Chain.tx" => {[], {record, [{"paying_for", {"option", ["Chain.paying_for_tx"]}}, - {"ga_metas", {list, ["Chain.ga_meta_tx"]}}, - {"actor", address}, - {"fee", integer}, - {"ttl", integer}, - {"tx", "Chain.base_tx"}]}}, - "MCL_BLS12_381.fr" => {[], {bytes, [32]}}, - "MCL_BLS12_381.fp" => {[], {bytes, [48]}} - }. - -% Type preparation has two goals. First, we need a data structure that can be -% traversed quickly, to take sophia-esque erlang expressions and turn them into -% fate-esque erlang expressions that gmbytecode can serialize. Second, we need -% partially substituted names, so that error messages can be generated for why -% "foobar" is not valid as the third field of a `bazquux`, because the third -% field is supposed to be `option(integer)`, not `string`. -% -% To achieve this we need three representations of each type expression, which -% together form an 'annotated type'. First, we need the fully opaque name, -% "bazquux", then we need the normalized name, which is an opaque name with the -% bare-minimum substitution needed to make the outer-most type-constructor an -% identifiable built-in, ADT, or record type, and then we need the dereferenced -% type, which is the raw {variant, [{Name, Fields}, ...]} or -% {record, [{Name, Type}]} expression that can be used in actual Sophia->FATE -% coercion. The type sub-expressions in these dereferenced types will each be -% fully annotated as well, i.e. they will each contain *all three* of the above -% representations, so that coercion of subexpressions remains fast AND -% informative. -% -% In a lot of cases the opaque type given will already be normalized, in which -% case either the normalized field or the non-normalized field of an annotated -% type can simple be the atom `already_normalized`, which means error messages -% can simply render the normalized type expression and know that the error will -% make sense. - -annotate_function_specs([], _Types, Specs) -> - Specs; -annotate_function_specs([{Name, ArgsOpaque, ResultOpaque} | Rest], Types, Specs) -> - {ok, Args} = annotate_bindings(ArgsOpaque, Types, []), - {ok, Result} = annotate_type(ResultOpaque, Types), - NewSpecs = maps:put(Name, {Args, Result}, Specs), - annotate_function_specs(Rest, Types, NewSpecs). - -annotate_type(T, Types) -> - case normalize_opaque_type(T, Types) of - {ok, AlreadyNormalized, NOpaque, NExpanded} -> - annotate_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types); - Error -> - Error - end. - -annotate_type2(T, _, _, unknown_type, _) -> - % If a type is unknown, then it should not be reported as the normalized - % name. - {ok, {T, unknown_type, unknown_type}}; -annotate_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types) -> - case annotate_type_subexpressions(NExpanded, Types) of - {ok, Flat} -> - case AlreadyNormalized of - true -> {ok, {T, already_normalized, Flat}}; - false -> {ok, {T, NOpaque, Flat}} - end; - Error -> - Error - end. - -annotate_types([T | Rest], Types, Acc) -> - case annotate_type(T, Types) of - {ok, Type} -> annotate_types(Rest, Types, [Type | Acc]); - Error -> Error - end; -annotate_types([], _Types, Acc) -> - {ok, lists:reverse(Acc)}. - -annotate_type_subexpressions(PrimitiveType, _Types) when is_atom(PrimitiveType) -> - {ok, PrimitiveType}; -annotate_type_subexpressions({bytes, [Count]}, _Types) -> - % bytes is weird, because it has an argument, but that argument isn't an - % opaque type. - {ok, {bytes, [Count]}}; -annotate_type_subexpressions({variant, VariantsOpaque}, Types) -> - case annotate_variants(VariantsOpaque, Types, []) of - {ok, Variants} -> {ok, {variant, Variants}}; - Error -> Error - end; -annotate_type_subexpressions({record, FieldsOpaque}, Types) -> - case annotate_bindings(FieldsOpaque, Types, []) of - {ok, Fields} -> {ok, {record, Fields}}; - Error -> Error - end; -annotate_type_subexpressions({T, ElemsOpaque}, Types) -> - case annotate_types(ElemsOpaque, Types, []) of - {ok, Elems} -> {ok, {T, Elems}}; - Error -> Error - end. - -annotate_bindings([{Name, T} | Rest], Types, Acc) -> - case annotate_type(T, Types) of - {ok, Type} -> annotate_bindings(Rest, Types, [{Name, Type} | Acc]); - Error -> Error - end; -annotate_bindings([], _Types, Acc) -> - {ok, lists:reverse(Acc)}. - -annotate_variants([{Name, Elems} | Rest], Types, Acc) -> - case annotate_types(Elems, Types, []) of - {ok, ElemsFlat} -> annotate_variants(Rest, Types, [{Name, ElemsFlat} | Acc]); - Error -> Error - end; -annotate_variants([], _Types, Acc) -> - {ok, lists:reverse(Acc)}. - -% This function evaluates type aliases in a loop, until eventually a usable -% definition is found. -normalize_opaque_type(T, Types) -> normalize_opaque_type(T, Types, true). - -% FIXME detect infinite loops -% FIXME detect builtins with the wrong number of arguments -% FIXME should nullary types have an empty list of arguments added before now? -normalize_opaque_type(T, _Types, IsFirst) when is_atom(T) -> - % Once we have eliminated the above rewrite cases, all other cases are - % handled explicitly by the coerce logic, and so are considered normalized. - {ok, IsFirst, T, T}; -normalize_opaque_type(Type = {T, _}, _Types, IsFirst) when is_atom(T) -> - % Once we have eliminated the above rewrite cases, all other cases are - % handled explicitly by the coerce logic, and so are considered normalized. - {ok, IsFirst, Type, Type}; -normalize_opaque_type(T, Types, IsFirst) when is_list(T) -> - % Lists/strings indicate userspace types, which may require arg - % substitutions. Convert to an explicit but empty arg list, for uniformity. - normalize_opaque_type({T, []}, Types, IsFirst); -normalize_opaque_type({T, TypeArgs}, Types, IsFirst) when is_list(T) -> - case maps:find(T, Types) of - error -> - % We couldn't find this named type... Keep building the AACI, but - % mark this type expression as unknown, so that FATE coercions - % aren't attempted. - {ok, IsFirst, {T, TypeArgs}, unknown_type}; - {ok, {TypeParamNames, Definition}} -> - % We have a definition for this type, including names for whatever - % args we have been given. Subtitute our args into this. - NewType = substitute_opaque_type(TypeParamNames, Definition, TypeArgs), - % Now continue on to see if we need to restart the loop or not. - normalize_opaque_type2(IsFirst, {T, TypeArgs}, NewType, Types) - end. - -normalize_opaque_type2(IsFirst, PrevType, NextType = {variant, _}, _) -> - % We have reduced to a variant. Report the type name as the normalized - % type, but also provide the variant definition itself as the candidate - % flattened type for further annotation. - {ok, IsFirst, PrevType, NextType}; -normalize_opaque_type2(IsFirst, PrevType, NextType = {record, _}, _) -> - % We have reduced to a record. Report the type name as the normalized - % type, but also provide the record definition itself as the candidate - % flattened type for further annotation. - {ok, IsFirst, PrevType, NextType}; -normalize_opaque_type2(_, _, NextType, Types) -> - % Not a variant or record yet, so go back to the start of the loop. - % It will no longer be the first iteration. - normalize_opaque_type(NextType, Types, false). - -% Perform a beta-reduction on a type expression. -substitute_opaque_type([], Definition, _) -> - % There are no parameters to substitute. This is the simplest way of - % defining type aliases, records, and variants, so we should make sure to - % short circuit all the recursive descent logic, since it won't actually - % do anything. - Definition; -substitute_opaque_type(TypeParamNames, Definition, TypeArgs) -> - % Bundle the param names alongside the args that we want to substitute, so - % that we can keyfind the one list. - Bindings = lists:zip(TypeParamNames, TypeArgs), - substitute_opaque_type(Bindings, Definition). - -substitute_opaque_type(Bindings, {var, VarName}) -> - case lists:keyfind(VarName, 1, Bindings) of - {_, TypeArg} -> TypeArg; - % No valid ACI will create this case. Regardless, the user should - % still be able to specify arbitrary gmb FATE terms for whatever this - % is meant to be. - false -> unknown_type - end; -substitute_opaque_type(Bindings, {variant, Variants}) -> - Each = fun({VariantName, Elements}) -> - NewElements = substitute_opaque_types(Bindings, Elements), - {VariantName, NewElements} - end, - NewVariants = lists:map(Each, Variants), - {variant, NewVariants}; -substitute_opaque_type(Bindings, {record, Fields}) -> - Each = fun({FieldName, FieldType}) -> - NewType = substitute_opaque_type(Bindings, FieldType), - {FieldName, NewType} - end, - NewFields = lists:map(Each, Fields), - {record, NewFields}; -substitute_opaque_type(Bindings, {Connective, Args}) -> - NewArgs = substitute_opaque_types(Bindings, Args), - {Connective, NewArgs}; -substitute_opaque_type(_Bindings, Type) -> - Type. - -substitute_opaque_types(Bindings, Types) -> - Each = fun(Type) -> substitute_opaque_type(Bindings, Type) end, - lists:map(Each, Types). - -coerce_bindings(VarTypes, Terms, Direction) -> - DefLength = length(VarTypes), - ArgLength = length(Terms), - if - DefLength =:= ArgLength -> coerce_zipped_bindings(lists:zip(VarTypes, Terms), Direction, arg); - DefLength > ArgLength -> {error, too_few_args}; - DefLength < ArgLength -> {error, too_many_args} - end. - -coerce_zipped_bindings(Bindings, Direction, Tag) -> - coerce_zipped_bindings(Bindings, Direction, Tag, [], []). - -coerce_zipped_bindings([Next | Rest], Direction, Tag, Good, Broken) -> - {{ArgName, Type}, Term} = Next, - case coerce(Type, Term, Direction) of - {ok, NewTerm} -> - coerce_zipped_bindings(Rest, Direction, Tag, [NewTerm | Good], Broken); - {error, Errors} -> - Wrapped = wrap_errors({Tag, ArgName}, Errors), - coerce_zipped_bindings(Rest, Direction, Tag, Good, [Wrapped | Broken]) - end; -coerce_zipped_bindings([], _, _, Good, []) -> - {ok, lists:reverse(Good)}; -coerce_zipped_bindings([], _, _, _, Broken) -> - {error, combine_errors(Broken)}. - -wrap_errors(Location, Errors) -> - F = fun({Error, Path}) -> - {Error, [Location | Path]} - end, - lists:map(F, Errors). - -combine_errors(Broken) -> - F = fun(NextErrors, Acc) -> - NextErrors ++ Acc - end, - lists:foldl(F, [], Broken). - -coerce({_, _, integer}, S, _) when is_integer(S) -> - {ok, S}; -coerce({O, N, integer}, S, to_fate) when is_list(S) -> - try - Val = list_to_integer(S), - {ok, Val} - catch - error:badarg -> single_error({invalid, O, N, S}) - end; -coerce({O, N, address}, S, to_fate) -> - coerce_chain_object(O, N, address, account_pubkey, S); -coerce({_, _, address}, {address, Bin}, from_fate) -> - Address = gmser_api_encoder:encode(account_pubkey, Bin), - {ok, unicode:characters_to_list(Address)}; -coerce({O, N, contract}, S, to_fate) -> - coerce_chain_object(O, N, contract, contract_pubkey, S); -coerce({_, _, contract}, {contract, Bin}, from_fate) -> - Address = gmser_api_encoder:encode(contract_pubkey, Bin), - {ok, unicode:characters_to_list(Address)}; -coerce({_, _, signature}, S, to_fate) when is_binary(S) andalso (byte_size(S) =:= 64) -> - % Usually to pass a binary in, you need to wrap it as {raw, Binary}, but - % since sg_... strings OR hex blobs can be used as signatures in Sophia, we - % special case this case based on the length. Even if a binary starts with - % "sg_", 64 characters is not enough to represent a 64 byte signature, so - % the most optimistic interpretation is to use the binary directly. - {ok, S}; -coerce({O, N, signature}, S, to_fate) -> - coerce_chain_object(O, N, signature, signature, S); -coerce({_, _, signature}, Bin, from_fate) -> - Address = gmser_api_encoder:encode(signature, Bin), - {ok, unicode:characters_to_list(Address)}; -%coerce({_, _, channel}, S, to_fate) when is_binary(S) -> - %{ok, {channel, S}}; -%coerce({_, _, channel}, {channel, S}, from_fate) when is_binary(S) -> - %{ok, S}; -coerce({_, _, boolean}, true, _) -> - {ok, true}; -coerce({_, _, boolean}, "true", _) -> - {ok, true}; -coerce({_, _, boolean}, false, _) -> - {ok, false}; -coerce({_, _, boolean}, "false", _) -> - {ok, false}; -coerce({O, N, boolean}, S, _) -> - single_error({invalid, O, N, S}); -coerce({O, N, string}, Str, Direction) -> - Result = case Direction of - to_fate -> unicode:characters_to_binary(Str); - from_fate -> unicode:characters_to_list(Str) - end, - case Result of - {error, _, _} -> - single_error({invalid, O, N, Str}); - {incomplete, _, _} -> - single_error({invalid, O, N, Str}); - StrBin -> - {ok, StrBin} - end; -coerce({_, _, char}, Val, _Direction) when is_integer(Val) -> - {ok, Val}; -coerce({O, N, char}, Str, to_fate) -> - Result = unicode:characters_to_list(Str), - case Result of - {error, _, _} -> - single_error({invalid, O, N, Str}); - {incomplete, _, _} -> - single_error({invalid, O, N, Str}); - [C] -> - {ok, C}; - _ -> - single_error({invalid, O, N, Str}) - end; -coerce({O, N, {bytes, [Count]}}, Bytes, _Direction) when is_bitstring(Bytes) -> - coerce_bytes(O, N, Count, Bytes); -coerce({_, _, bits}, {bits, Num}, from_fate) -> - {ok, Num}; -coerce({_, _, bits}, Num, to_fate) when is_integer(Num) -> - {ok, {bits, Num}}; -coerce({_, _, bits}, Bits, to_fate) when is_bitstring(Bits) -> - Size = bit_size(Bits), - <> = Bits, - {ok, {bits, IntValue}}; -coerce({_, _, {list, [Type]}}, Data, Direction) when is_list(Data) -> - coerce_list(Type, Data, Direction); -coerce({_, _, {map, [KeyType, ValType]}}, Data, Direction) when is_map(Data) -> - coerce_map(KeyType, ValType, Data, Direction); -coerce({O, N, {tuple, ElementTypes}}, Data, to_fate) when is_tuple(Data) -> - ElementList = tuple_to_list(Data), - coerce_tuple(O, N, ElementTypes, ElementList, to_fate); -coerce({O, N, {tuple, ElementTypes}}, {tuple, Data}, from_fate) -> - ElementList = tuple_to_list(Data), - coerce_tuple(O, N, ElementTypes, ElementList, from_fate); -coerce({O, N, {variant, Variants}}, Data, to_fate) when is_tuple(Data), tuple_size(Data) > 0 -> - [Name | Terms] = tuple_to_list(Data), - case lookup_variant(Name, Variants) of - {Tag, TermTypes} -> - coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, to_fate); - not_found -> - ValidNames = [Valid || {Valid, _} <- Variants], - single_error({invalid_variant, O, N, Name, ValidNames}) - end; -coerce({O, N, {variant, Variants}}, Name, to_fate) when is_list(Name) -> - coerce({O, N, {variant, Variants}}, {Name}, to_fate); -coerce({O, N, {variant, Variants}}, {variant, _, Tag, Tuple}, from_fate) -> - Terms = tuple_to_list(Tuple), - {Name, TermTypes} = lists:nth(Tag + 1, Variants), - coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, from_fate); -coerce({O, N, {record, MemberTypes}}, Map, to_fate) when is_map(Map) -> - coerce_map_to_record(O, N, MemberTypes, Map); -coerce({O, N, {record, MemberTypes}}, {tuple, Tuple}, from_fate) -> - coerce_record_to_map(O, N, MemberTypes, Tuple); -coerce({O, N, {unknown_type, _}}, Data, _) -> - case N of - already_normalized -> - Message = "Warning: Unknown type ~p. Using term ~p as is.~n", - io:format(Message, [O, Data]); - _ -> - Message = "Warning: Unknown type ~p (i.e. ~p). Using term ~p as is.~n", - io:format(Message, [O, N, Data]) - end, - {ok, Data}; -coerce({O, N, _}, Data, from_fate) -> - case N of - already_normalized -> - io:format("Warning: Unimplemented type ~p.~nUsing term as is:~n~p~n", [O, Data]); - _ -> - io:format("Warning: Unimplemented type ~p (i.e. ~p).~nUsing term as is:~n~p~n", [O, N, Data]) - end, - {ok, Data}; -coerce({O, N, _}, Data, _) -> single_error({invalid, O, N, Data}). - -coerce_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 -> - single_error({partial_bytes, O, N, bit_size(Bytes)}); -coerce_bytes(_, _, any, Bytes) -> - {ok, Bytes}; -coerce_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count -> - single_error({incorrect_size, O, N, Bytes}); -coerce_bytes(_, _, _, Bytes) -> - {ok, Bytes}. - -coerce_chain_object(_, _, _, _, {raw, Binary}) -> - {ok, Binary}; -coerce_chain_object(O, N, T, Tag, S) -> - case decode_chain_object(Tag, S) of - {ok, Data} -> {ok, coerce_chain_object2(T, Data)}; - {error, Reason} -> single_error({Reason, O, N, S}) - end. - -coerce_chain_object2(address, Data) -> {address, Data}; -coerce_chain_object2(contract, Data) -> {contract, Data}; -coerce_chain_object2(signature, Data) -> Data. - -decode_chain_object(Tag, S) -> - try - case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of - {Tag, Data} -> {ok, Data}; - {_, _} -> {error, wrong_prefix} - end - catch - error:missing_prefix -> {error, missing_prefix}; - error:incorrect_size -> {error, incorrect_size} - end. - -coerce_list(Type, Elements, Direction) -> - % 0 index since it represents a sophia list - coerce_list(Type, Elements, Direction, 0, [], []). - -coerce_list(Type, [Next | Rest], Direction, Index, Good, Broken) -> - case coerce(Type, Next, Direction) of - {ok, Coerced} -> coerce_list(Type, Rest, Direction, Index + 1, [Coerced | Good], Broken); - {error, Errors} -> - Wrapped = wrap_errors({index, Index}, Errors), - coerce_list(Type, Rest, Direction, Index + 1, Good, [Wrapped | Broken]) - end; -coerce_list(_Type, [], _, _, Good, []) -> - {ok, lists:reverse(Good)}; -coerce_list(_, [], _, _, _, Broken) -> - {error, combine_errors(Broken)}. - -coerce_map(KeyType, ValType, Data, Direction) -> - coerce_map(KeyType, ValType, maps:iterator(Data), Direction, #{}, []). - -coerce_map(KeyType, ValType, Remaining, Direction, Good, Broken) -> - case maps:next(Remaining) of - {K, V, RemainingAfter} -> - coerce_map2(KeyType, ValType, RemainingAfter, Direction, Good, Broken, K, V); - none -> - coerce_map_finish(Good, Broken) - end. - -coerce_map2(KeyType, ValType, Remaining, Direction, Good, Broken, K, V) -> - case coerce(KeyType, K, Direction) of - {ok, KFATE} -> - coerce_map3(KeyType, ValType, Remaining, Direction, Good, Broken, K, V, KFATE); - {error, Errors} -> - Wrapped = wrap_errors(map_key, Errors), - % Continue as if the key coerced successfully, so that we can give - % errors for both the key and the value. - coerce_map3(KeyType, ValType, Remaining, Direction, Good, [Wrapped | Broken], K, V, error) - end. - -coerce_map3(KeyType, ValType, Remaining, Direction, Good, Broken, K, V, KFATE) -> - case coerce(ValType, V, Direction) of - {ok, VFATE} -> - NewGood = Good#{KFATE => VFATE}, - coerce_map(KeyType, ValType, Remaining, Direction, NewGood, Broken); - {error, Errors} -> - Wrapped = wrap_errors({map_value, K}, Errors), - coerce_map(KeyType, ValType, Remaining, Direction, Good, [Wrapped | Broken]) - end. - -coerce_map_finish(Good, []) -> - {ok, Good}; -coerce_map_finish(_, Broken) -> - {error, combine_errors(Broken)}. - -lookup_variant(Name, Variants) -> lookup_variant(Name, Variants, 0). - -lookup_variant(Name, [{Name, Terms} | _], Tag) -> - {Tag, Terms}; -lookup_variant(Name, [_ | Rest], Tag) -> - lookup_variant(Name, Rest, Tag + 1); -lookup_variant(_Name, [], _Tag) -> - not_found. - -coerce_tuple(O, N, TermTypes, Terms, Direction) -> - case coerce_tuple_elements(TermTypes, Terms, Direction, tuple_element) of - {ok, Converted} -> - case Direction of - to_fate -> {ok, {tuple, list_to_tuple(Converted)}}; - from_fate -> {ok, list_to_tuple(Converted)} - end; - {error, too_few_terms} -> - single_error({tuple_too_few_terms, O, N, list_to_tuple(Terms)}); - {error, too_many_terms} -> - single_error({tuple_too_many_terms, O, N, list_to_tuple(Terms)}); - Errors -> Errors - end. - -% Wraps a single error in a list, along with an empty path, so that other -% accumulating error handlers can work with it. -single_error(Reason) -> - {error, [{Reason, []}]}. - -coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, Direction) -> - % FIXME: we could go through and add the variant tag to the adt_element - % paths? - case coerce_tuple_elements(TermTypes, Terms, Direction, adt_element) of - {ok, Converted} -> - case Direction of - to_fate -> - Arities = [length(VariantTerms) - || {_, VariantTerms} <- Variants], - {ok, {variant, Arities, Tag, list_to_tuple(Converted)}}; - from_fate -> - {ok, list_to_tuple([Name | Converted])} - end; - {error, too_few_terms} -> - single_error({adt_too_few_terms, O, N, Name, TermTypes, Terms}); - {error, too_many_terms} -> - single_error({adt_too_many_terms, O, N, Name, TermTypes, Terms}); - Errors -> Errors - end. - -coerce_tuple_elements(Types, Terms, Direction, Tag) -> - % The sophia standard library uses 0 indexing for lists, and fst/snd/thd - % for tuples... Not sure how we should report errors in tuples, then. - coerce_tuple_elements(Types, Terms, Direction, Tag, 0, [], []). - -coerce_tuple_elements([Type | Types], [Term | Terms], Direction, Tag, Index, Good, Broken) -> - case coerce(Type, Term, Direction) of - {ok, Value} -> - coerce_tuple_elements(Types, Terms, Direction, Tag, Index + 1, [Value | Good], Broken); - {error, Errors} -> - Wrapped = wrap_errors({Tag, Index}, Errors), - coerce_tuple_elements(Types, Terms, Direction, Tag, Index + 1, Good, [Wrapped | Broken]) - end; -coerce_tuple_elements([], [], _, _, _, Good, []) -> - {ok, lists:reverse(Good)}; -coerce_tuple_elements([], [], _, _, _, _, Broken) -> - {error, combine_errors(Broken)}; -coerce_tuple_elements(_, [], _, _, _, _, _) -> - {error, too_few_terms}; -coerce_tuple_elements([], _, _, _, _, _, _) -> - {error, too_many_terms}. - -coerce_map_to_record(O, N, MemberTypes, Map) -> - case zip_record_fields(MemberTypes, Map) of - {ok, Zipped} -> - case coerce_zipped_bindings(Zipped, to_fate, field) of - {ok, Converted} -> - {ok, {tuple, list_to_tuple(Converted)}}; - Errors -> - Errors - end; - {error, {missing_fields, Missing}} -> - single_error({missing_fields, O, N, Missing}); - {error, {unexpected_fields, Unexpected}} -> - Names = [Name || {Name, _} <- maps:to_list(Unexpected)], - single_error({unexpected_fields, O, N, Names}) - end. - -coerce_record_to_map(O, N, MemberTypes, Tuple) -> - Names = [Name || {Name, _} <- MemberTypes], - Types = [Type || {_, Type} <- MemberTypes], - Terms = tuple_to_list(Tuple), - % FIXME: We could go through and change the record_element paths into field - % paths? - case coerce_tuple_elements(Types, Terms, from_fate, record_element) of - {ok, Converted} -> - Map = maps:from_list(lists:zip(Names, Converted)), - {ok, Map}; - {error, too_few_terms} -> - single_error({record_too_few_terms, O, N, Tuple}); - {error, too_many_terms} -> - single_error({record_too_many_terms, O, N, Tuple}); - Errors -> - Errors - end. - -zip_record_fields(Fields, Map) -> - case lists:mapfoldl(fun zip_record_field/2, {Map, []}, Fields) of - {_, {_, Missing = [_|_]}} -> - {error, {missing_fields, lists:reverse(Missing)}}; - {_, {Remaining, _}} when map_size(Remaining) > 0 -> - {error, {unexpected_fields, Remaining}}; - {Zipped, _} -> - {ok, Zipped} - end. - -zip_record_field({Name, Type}, {Remaining, Missing}) -> - case maps:take(Name, Remaining) of - {Term, RemainingAfter} -> - ZippedTerm = {{Name, Type}, Term}, - {ZippedTerm, {RemainingAfter, Missing}}; - error -> - {missing, {Remaining, [Name | Missing]}} - end. - - -spec cache_aaci(Label, AACI) -> ok when Label :: term(), AACI :: aaci(). @@ -2244,14 +1474,11 @@ lookup_aaci(Label) -> %% prepare_contract/1. This type information, particularly the return type, is %% useful for calling decode_bytearray/2. -aaci_lookup_spec({aaci, _, FunDefs, _}, Fun) -> - case maps:find(Fun, FunDefs) of - A = {ok, _} -> A; - error -> {error, bad_fun_name} - end; +aaci_lookup_spec(AACI = {aaci, _, _, _}, Fun) -> + hz_aaci:aaci_get_function_signature(AACI, Fun); aaci_lookup_spec({aaci, Label}, Fun) -> case hz_man:lookup_aaci(Label) of - {ok, AACI} -> aaci_lookup_spec(AACI, Fun); + {ok, AACI} -> hz_aaci:aaci_get_function_signature(AACI, Fun); error -> {error, aaci_not_found} end. @@ -2291,7 +1518,7 @@ encode_call_data({aaci, Label}, Fun, Args) -> end. encode_call_data2(ArgDef, Fun, Args) -> - case coerce_bindings(ArgDef, Args, to_fate) of + case hz_aaci:coerce_bindings(ArgDef, Args, to_fate) of {ok, Coerced} -> gmb_fate_abi:create_calldata(Fun, Coerced); Errors -> Errors end. @@ -2612,308 +1839,3 @@ binary_sig_prefix() -> <<"Gajumaru Signed Binary:">>. % /v3/debug/token-supply/height/{height} % /v3/debug/crash - -%%% Simple coerce/3 tests - -% Round trip coerce run for the eunit tests below. If these results don't match -% then the test should fail. -try_coerce(Type, Sophia, Fate) -> - % Run both first, to see if they fail to produce any result. - {ok, FateActual} = coerce(Type, Sophia, to_fate), - {ok, SophiaActual} = coerce(Type, Fate, from_fate), - % Now check that the results were what we expected. - case FateActual of - Fate -> - ok; - _ -> - erlang:error({to_fate_failed, Fate, FateActual}) - end, - case SophiaActual of - Sophia -> - ok; - _ -> - erlang:error({from_fate_failed, Sophia, SophiaActual}) - end, - % Finally, check that the FATE result is something that gmb understands. - gmb_fate_encoding:serialize(Fate), - ok. - -coerce_int_test() -> - {ok, Type} = annotate_type(integer, #{}), - try_coerce(Type, 123, 123). - -coerce_address_test() -> - {ok, Type} = annotate_type(address, #{}), - try_coerce(Type, - "ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", - {address, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, - 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, - 210,39,214>>}). - -coerce_contract_test() -> - {ok, Type} = annotate_type(contract, #{}), - try_coerce(Type, - "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", - {contract, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, - 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, - 210,39,214>>}). - -coerce_signature_test() -> - {ok, Type} = annotate_type(signature, #{}), - try_coerce(Type, - "sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n", - <<231,4,97,129,16,173,37,42,194,249,28,94,134,163,208,84,22,135, - 169,85,212,142,14,12,233,252,97,50,193,158,229,51,123,206,222, - 249,2,3,85,173,106,150,243,253,89,128,248,52,195,140,95,114, - 233,110,119,143,206,137,124,36,63,154,85,7>>). - -coerce_signature_binary_test() -> - {ok, Type} = annotate_type(signature, #{}), - Binary = <<231,4,97,129,16,173,37,42,194,249,28,94,134,163,208,84,22,135, - 169,85,212,142,14,12,233,252,97,50,193,158,229,51,123,206,222, - 249,2,3,85,173,106,150,243,253,89,128,248,52,195,140,95,114, - 233,110,119,143,206,137,124,36,63,154,85,7>>, - {ok, Binary} = coerce(Type, {raw, Binary}, to_fate), - {ok, Binary} = coerce(Type, Binary, to_fate), - ok. - -coerce_bool_test() -> - {ok, Type} = annotate_type(boolean, #{}), - try_coerce(Type, true, true), - try_coerce(Type, false, false). - -coerce_string_test() -> - {ok, Type} = annotate_type(string, #{}), - try_coerce(Type, "hello world", <<"hello world">>). - -coerce_list_test() -> - {ok, Type} = annotate_type({list, [string]}, #{}), - try_coerce(Type, ["hello world", [65, 32, 65]], [<<"hello world">>, <<65, 32, 65>>]). - -coerce_map_test() -> - {ok, Type} = annotate_type({map, [string, {list, [integer]}]}, #{}), - try_coerce(Type, #{"a" => "a", "b" => "b"}, #{<<"a">> => "a", <<"b">> => "b"}). - -coerce_tuple_test() -> - {ok, Type} = annotate_type({tuple, [integer, string]}, #{}), - try_coerce(Type, {123, "456"}, {tuple, {123, <<"456">>}}). - -coerce_variant_test() -> - {ok, Type} = annotate_type({variant, [{"A", [integer]}, - {"B", [integer, integer]}]}, - #{}), - try_coerce(Type, {"A", 123}, {variant, [1, 2], 0, {123}}), - try_coerce(Type, {"B", 456, 789}, {variant, [1, 2], 1, {456, 789}}). - -coerce_option_test() -> - {ok, Type} = annotate_type({"option", [integer]}, builtin_typedefs()), - try_coerce(Type, {"None"}, {variant, [0, 1], 0, {}}), - try_coerce(Type, {"Some", 1}, {variant, [0, 1], 1, {1}}). - -coerce_record_test() -> - {ok, Type} = annotate_type({record, [{"a", integer}, {"b", integer}]}, #{}), - try_coerce(Type, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). - -coerce_bytes_test() -> - {ok, Type} = annotate_type({tuple, [{bytes, [4]}, {bytes, [any]}]}, #{}), - try_coerce(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {<<"abcd">>, <<"efghi">>}}). - -coerce_bits_test() -> - {ok, Type} = annotate_type(bits, #{}), - try_coerce(Type, 5, {bits, 5}). - -coerce_char_test() -> - {ok, Type} = annotate_type(char, #{}), - try_coerce(Type, $?, $?). - -coerce_unicode_test() -> - {ok, Type} = annotate_type(char, #{}), - % Latin Small Letter C with cedilla and acute - {ok, $ḉ} = coerce(Type, <<"ḉ"/utf8>>, to_fate), - ok. - -coerce_hash_test() -> - {ok, Type} = annotate_type("hash", builtin_typedefs()), - Hash = list_to_binary(lists:seq(1,32)), - try_coerce(Type, Hash, Hash), - ok. - - - -%%% Complex AACI paramter and namespace tests - -aaci_from_string(String) -> - case so_compiler:from_string(String, [{aci, json}]) of - {ok, #{aci := ACI}} -> {ok, prepare_aaci(ACI)}; - Error -> Error - end. - -namespace_coerce_test() -> - Contract = " - namespace N = - record pair = { a : int, b : int } - - contract C = - entrypoint f(): N.pair = { a = 1, b = 2 } - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_lookup_spec(AACI, "f"), - try_coerce(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). - -record_substitution_test() -> - Contract = " - contract C = - record pair('t) = { a : 't, b : 't } - entrypoint f(): pair(int) = { a = 1, b = 2 } - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_lookup_spec(AACI, "f"), - try_coerce(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). - -tuple_substitution_test() -> - Contract = " - contract C = - type triple('t1, 't2) = int * 't1 * 't2 - entrypoint f(): triple(int, string) = (1, 2, \"hello\") - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_lookup_spec(AACI, "f"), - try_coerce(Output, {1, 2, "hello"}, {tuple, {1, 2, <<"hello">>}}). - -variant_substitution_test() -> - Contract = " - contract C = - datatype adt('a, 'b) = Left('a, 'b) | Right('b, int) - entrypoint f(): adt(string, int) = Left(\"hi\", 1) - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_lookup_spec(AACI, "f"), - try_coerce(Output, {"Left", "hi", 1}, {variant, [2, 2], 0, {<<"hi">>, 1}}), - try_coerce(Output, {"Right", 2, 3}, {variant, [2, 2], 1, {2, 3}}). - -nested_coerce_test() -> - Contract = " - contract C = - type pair('t) = 't * 't - record r = { f1 : pair(int), f2: pair(string) } - entrypoint f(): r = { f1 = (1, 2), f2 = (\"a\", \"b\") } - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_lookup_spec(AACI, "f"), - try_coerce(Output, - #{ "f1" => {1, 2}, "f2" => {"a", "b"}}, - {tuple, {{tuple, {1, 2}}, {tuple, {<<"a">>, <<"b">>}}}}). - -state_coerce_test() -> - Contract = " - contract C = - type state = int - entrypoint init(): state = 0 - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_lookup_spec(AACI, "init"), - try_coerce(Output, 0, 0). - -param_test() -> - Contract = " - contract C = - type state = int - entrypoint init(x): state = x - ", - {ok, AACI} = aaci_from_string(Contract), - {ok, {[{"x", Input}], Output}} = aaci_lookup_spec(AACI, "init"), - try_coerce(Input, 0, 0), - try_coerce(Output, 0, 0). - -%%% Obscure Sophia types where we should check the AACI as well - -obscure_aaci_test() -> - Contract = " - include \"Set.aes\" - contract C = - entrypoint options(): option(int) = None - entrypoint fixed_bytes(): bytes(4) = #DEADBEEF - entrypoint any_bytes(): bytes() = Bytes.to_any_size(#112233) - entrypoint bits(): bits = Bits.all - entrypoint character(): char = 'a' - entrypoint hash(): hash = #00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF - entrypoint unit(): unit = () - - entrypoint ttl(x): Chain.ttl = FixedTTL(x) - entrypoint paying_for(x, y): Chain.paying_for_tx = Chain.PayingForTx(x, y) - entrypoint ga_meta_tx(x, y): Chain.ga_meta_tx = Chain.GAMetaTx(x, y) - entrypoint base_tx(x, y, z): Chain.base_tx = Chain.SpendTx(x, y, z) - entrypoint tx(a, b, c, d, e, f): Chain.tx = - {paying_for = a, - ga_metas = b, - actor = c, - fee = d, - ttl = e, - tx = f} - - entrypoint pointee(x): AENS.pointee = AENS.AccountPt(x) - entrypoint name(x, y, z): AENS.name = AENS.Name(x, y, z) - entrypoint pointee2(x): AENSv2.pointee = AENSv2.DataPt(x) - entrypoint name2(x, y, z): AENSv2.name = AENSv2.Name(x, y, z) - - entrypoint fr(x): MCL_BLS12_381.fr = x - entrypoint fp(x): MCL_BLS12_381.fp = x - - entrypoint set(): Set.set(int) = Set.new() - - ", - {ok, AACI} = aaci_from_string(Contract), - - {ok, {[], {{bytes, [4]}, _, _}}} = aaci_lookup_spec(AACI, "fixed_bytes"), - {ok, {[], {{bytes, [any]}, _, _}}} = aaci_lookup_spec(AACI, "any_bytes"), - {ok, {[], {bits, _, _}}} = aaci_lookup_spec(AACI, "bits"), - {ok, {[], {char, _, _}}} = aaci_lookup_spec(AACI, "character"), - - {ok, {[], {{"option", [integer]}, _, {variant, [{"None", []}, {"Some", [_]}]}}}} = aaci_lookup_spec(AACI, "options"), - {ok, {[], {"hash", _, {bytes, [32]}}}} = aaci_lookup_spec(AACI, "hash"), - {ok, {[], {"unit", _, {tuple, []}}}} = aaci_lookup_spec(AACI, "unit"), - - {ok, {_, {"Chain.ttl", _, {variant, _}}}} = aaci_lookup_spec(AACI, "ttl"), - {ok, {_, {"Chain.paying_for_tx", _, {variant, _}}}} = aaci_lookup_spec(AACI, "paying_for"), - {ok, {_, {"Chain.ga_meta_tx", _, {variant, _}}}} = aaci_lookup_spec(AACI, "ga_meta_tx"), - {ok, {_, {"Chain.base_tx", _, {variant, _}}}} = aaci_lookup_spec(AACI, "base_tx"), - {ok, {_, {"Chain.tx", _, {record, _}}}} = aaci_lookup_spec(AACI, "tx"), - - {ok, {_, {"AENS.pointee", _, {variant, _}}}} = aaci_lookup_spec(AACI, "pointee"), - {ok, {_, {"AENS.name", _, {variant, _}}}} = aaci_lookup_spec(AACI, "name"), - {ok, {_, {"AENSv2.pointee", _, {variant, _}}}} = aaci_lookup_spec(AACI, "pointee2"), - {ok, {_, {"AENSv2.name", _, {variant, _}}}} = aaci_lookup_spec(AACI, "name2"), - - {ok, {_, {"MCL_BLS12_381.fr", _, {bytes, [32]}}}} = aaci_lookup_spec(AACI, "fr"), - {ok, {_, {"MCL_BLS12_381.fp", _, {bytes, [48]}}}} = aaci_lookup_spec(AACI, "fp"), - - {ok, {[], {{"Set.set", [integer]}, _, {record, [{"to_map", _}]}}}} = aaci_lookup_spec(AACI, "set"), - - ok. - -name_coerce_test() -> - AddrSoph = "ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", - AddrFate = {address, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, - 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, - 210,39,214>>}, - {ok, TTL} = annotate_type("Chain.ttl", builtin_typedefs()), - TTLSoph = {"FixedTTL", 0}, - TTLFate = {variant, [1, 1], 0, {0}}, - try_coerce(TTL, TTLSoph, TTLFate), - {ok, Pointee} = annotate_type("AENS.pointee", builtin_typedefs()), - PointeeSoph = {"AccountPt", AddrSoph}, - PointeeFate = {variant, [1, 1, 1, 1], 0, {AddrFate}}, - try_coerce(Pointee, PointeeSoph, PointeeFate), - {ok, Name} = annotate_type("AENS.name", builtin_typedefs()), - NameSoph = {"Name", AddrSoph, TTLSoph, #{"myname" => PointeeSoph}}, - NameFate = {variant, [3], 0, {AddrFate, TTLFate, #{<<"myname">> => PointeeFate}}}, - try_coerce(Name, NameSoph, NameFate). - -void_coerce_test() -> - % Void itself can't be represented, but other types built out of void are - % valid. - {ok, NonOption} = annotate_type({"option", ["void"]}, builtin_typedefs()), - try_coerce(NonOption, {"None"}, {variant, [0, 1], 0, {}}), - {ok, NonList} = annotate_type({list, ["void"]}, builtin_typedefs()), - try_coerce(NonList, [], []). - diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl new file mode 100644 index 0000000..0d56ef7 --- /dev/null +++ b/src/hz_aaci.erl @@ -0,0 +1,1146 @@ +%%% @doc +%%% Sophia datatype manipulations for Hakuzaru +%%% +%%% Sophia and FATE are two subtly different machine languages with two +%%% different type systems. Application developers probably think about their +%%% smart contracts in terms of the Sophia types, but the node will only accept +%%% the corresponding FATE types, and both of these type systems result in +%%% different erlang terms for representing the same thing. This module defines +%%% the conversion between these different representations of the same data. +%%% @end + +-module(hz_aaci). +-vsn("0.8.2"). +-author("Jarvis Carroll "). +-copyright("Craig Everett "). +-license("GPL-3.0-or-later"). + +% Contract call and serialization interface functions +-export([prepare_contract/1, + prepare_aaci/1, + coerce/3, + coerce_bindings/3, + aaci_get_function_signature/2]). + +%%% Types + +-export_type([aaci/0]). + +-include_lib("eunit/include/eunit.hrl"). + +-type aaci() :: {aaci, term(), term(), term()}. + +%%% ACI/AACI + +-spec prepare_contract(File) -> {ok, AACI} | {error, Reason} + when File :: file:filename(), + AACI :: aaci(), + Reason :: term(). +%% @doc +%% Compile a contract and extract the function spec meta for use in future formation +%% of calldata + +prepare_contract(File) -> + case so_compiler:file(File, [{aci, json}]) of + {ok, #{aci := ACI}} -> {ok, prepare_aaci(ACI)}; + Error -> Error + end. + +prepare_aaci(ACI) -> + % We want to take the types represented by the ACI, things like N1.T(N2.T), + % and dereference them down to concrete types like + % {tuple, [integer, string]}. Our type dereferencing algorithms + % shouldn't act directly on the JSON-based structures that the compiler + % gives us, though, though, so before we do the analysis, we should strip + % the ACI down to a list of 'opaque' type defintions and function specs. + {Name, OpaqueSpecs, TypeDefs} = convert_aci_types(ACI), + + % Now that we have the opaque types, we can dereference the function specs + % down to the concrete types they actually represent. We annotate each + % subexpression of this concrete type with other info too, in case it helps + % make error messages easier to understand. + InternalTypeDefs = maps:merge(builtin_typedefs(), TypeDefs), + Specs = annotate_function_specs(OpaqueSpecs, InternalTypeDefs, #{}), + + {aaci, Name, Specs, TypeDefs}. + +convert_aci_types(ACI) -> + % Find the main contract, so we can get the specifications of its + % entrypoints. + [{NameBin, SpecDefs}] = + [{N, F} + || #{contract := #{kind := contract_main, + functions := F, + name := N}} <- ACI], + Name = binary_to_list(NameBin), + % Turn these specifications into opaque types that we can reason about. + Specs = lists:map(fun convert_function_spec/1, SpecDefs), + + % These specifications can reference other type definitions from the main + % contract and any other namespaces, so extract these types and convert + % them too. + TypeDefTree = lists:map(fun convert_namespace_typedefs/1, ACI), + % The tree structure of the ACI naturally leads to a tree of opaque types, + % but we want a map, so flatten it out before we continue. + TypeDefMap = collect_opaque_types(TypeDefTree, #{}), + + % This is all the information we actually need from the ACI, the rest is + % just pre-compute and acceleration. + {Name, Specs, TypeDefMap}. + +convert_function_spec(#{name := NameBin, arguments := Args, returns := Result}) -> + Name = binary_to_list(NameBin), + ArgTypes = lists:map(fun convert_arg/1, Args), + ResultType = opaque_type([], Result), + {Name, ArgTypes, ResultType}. + +convert_arg(#{name := NameBin, type := TypeDef}) -> + Name = binary_to_list(NameBin), + Type = opaque_type([], TypeDef), + {Name, Type}. + +convert_namespace_typedefs(#{namespace := NS}) -> + Name = namespace_name(NS), + convert_typedefs(NS, Name); +convert_namespace_typedefs(#{contract := NS}) -> + Name = namespace_name(NS), + ImplicitTypes = convert_implicit_types(NS, Name), + ExplicitTypes = convert_typedefs(NS, Name), + [ImplicitTypes, ExplicitTypes]. + +namespace_name(#{name := NameBin}) -> + binary_to_list(NameBin). + +convert_implicit_types(#{state := StateDefACI}, Name) -> + StateDefOpaque = opaque_type([], StateDefACI), + [{Name, [], contract}, + {Name ++ ".state", [], StateDefOpaque}]; +convert_implicit_types(_, Name) -> + [{Name, [], contract}]. + +convert_typedefs(#{typedefs := TypeDefs}, Name) -> + convert_typedefs_loop(TypeDefs, Name ++ ".", []). + +% Take a namespace that has already had a period appended, and use that as a +% prefix to convert and annotate a list of types. +convert_typedefs_loop([], _NamePrefix, Converted) -> + Converted; +convert_typedefs_loop([Next | Rest], NamePrefix, Converted) -> + #{name := NameBin, vars := ParamDefs, typedef := DefACI} = Next, + Name = NamePrefix ++ binary_to_list(NameBin), + Params = [binary_to_list(Param) || #{name := Param} <- ParamDefs], + Def = opaque_type(Params, DefACI), + convert_typedefs_loop(Rest, NamePrefix, [Converted, {Name, Params, Def}]). + +collect_opaque_types([], Types) -> + Types; +collect_opaque_types([L | R], Types) -> + NewTypes = collect_opaque_types(L, Types), + collect_opaque_types(R, NewTypes); +collect_opaque_types({Name, Params, Def}, Types) -> + maps:put(Name, {Params, Def}, Types). + +%%% ACI Type -> Opaque Type + +% Convert an ACI type defintion/spec into the 'opaque type' representation that +% our dereferencing algorithms can reason about. +opaque_type(Params, NameBin) when is_binary(NameBin) -> + Name = opaque_type_name(NameBin), + case not is_atom(Name) and lists:member(Name, Params) of + false -> Name; + true -> {var, Name} + end; +opaque_type(Params, #{record := FieldDefs}) -> + Fields = [{binary_to_list(Name), opaque_type(Params, Type)} + || #{name := Name, type := Type} <- FieldDefs], + {record, Fields}; +opaque_type(Params, #{variant := VariantDefs}) -> + ConvertVariant = fun(Pair) -> + [{Name, Types}] = maps:to_list(Pair), + {binary_to_list(Name), [opaque_type(Params, Type) || Type <- Types]} + end, + Variants = lists:map(ConvertVariant, VariantDefs), + {variant, Variants}; +opaque_type(Params, #{tuple := TypeDefs}) -> + {tuple, [opaque_type(Params, Type) || Type <- TypeDefs]}; +opaque_type(_, #{bytes := Count}) -> + {bytes, [Count]}; +opaque_type(Params, Pair) when is_map(Pair) -> + [{Name, TypeArgs}] = maps:to_list(Pair), + {opaque_type_name(Name), [opaque_type(Params, Arg) || Arg <- TypeArgs]}. + +% Atoms for any builtins that aren't qualified by a namespace in Sophia. +% Everything else stays as a string, user-defined or not. +opaque_type_name(<<"int">>) -> integer; +opaque_type_name(<<"bool">>) -> boolean; +opaque_type_name(<<"bits">>) -> bits; +opaque_type_name(<<"char">>) -> char; +opaque_type_name(<<"string">>) -> string; +opaque_type_name(<<"address">>) -> address; +opaque_type_name(<<"signature">>) -> signature; +opaque_type_name(<<"contract">>) -> contract; +opaque_type_name(<<"list">>) -> list; +opaque_type_name(<<"map">>) -> map; +% I'm not sure how to produce channels in Sophia source, but they seem to exist +% in gmb still. +opaque_type_name(<<"channel">>) -> channel; +opaque_type_name(Name) -> binary_to_list(Name). + +builtin_typedefs() -> + #{"unit" => {[], {tuple, []}}, + "void" => {[], {variant, []}}, + "hash" => {[], {bytes, [32]}}, + "option" => {["'T"], {variant, [{"None", []}, + {"Some", [{var, "'T"}]}]}}, + "Chain.ttl" => {[], {variant, [{"FixedTTL", [integer]}, + {"RelativeTTL", [integer]}]}}, + "AENS.pointee" => {[], {variant, [{"AccountPt", [address]}, + {"OraclePt", [address]}, + {"ContractPt", [address]}, + {"ChannelPt", [address]}]}}, + "AENS.name" => {[], {variant, [{"Name", [address, + "Chain.ttl", + {map, [string, "AENS.pointee"]}]}]}}, + "AENSv2.pointee" => {[], {variant, [{"AccountPt", [address]}, + {"OraclePt", [address]}, + {"ContractPt", [address]}, + {"ChannelPt", [address]}, + {"DataPt", [{bytes, [any]}]}]}}, + "AENSv2.name" => {[], {variant, [{"Name", [address, + "Chain.ttl", + {map, [string, "AENSv2.pointee"]}]}]}}, + "Chain.ga_meta_tx" => {[], {variant, [{"GAMetaTx", [address, integer]}]}}, + "Chain.paying_for_tx" => {[], {variant, [{"PayingForTx", [address, integer]}]}}, + "Chain.base_tx" => {[], {variant, [{"SpendTx", [address, integer, string]}, + {"OracleRegisterTx", []}, + {"OracleQueryTx", []}, + {"OracleResponseTx", []}, + {"OracleExtendTx", []}, + {"NamePreclaimTx", []}, + {"NameClaimTx", ["hash"]}, + {"NameUpdateTx", [string]}, + {"NameRevokeTx", ["hash"]}, + {"NameTransferTx", [address, string]}, + {"ChannelCreateTx", [address]}, + {"ChannelDepositTx", [address, integer]}, + {"ChannelWithdrawTx", [address, integer]}, + {"ChannelForceProgressTx", [address]}, + {"ChannelCloseMutualTx", [address]}, + {"ChannelCloseSoloTx", [address]}, + {"ChannelSlashTx", [address]}, + {"ChannelSettleTx", [address]}, + {"ChannelSnapshotSoloTx", [address]}, + {"ContractCreateTx", [integer]}, + {"ContractCallTx", [address, integer]}, + {"GAAttachTx", []}]}}, + "Chain.tx" => {[], {record, [{"paying_for", {"option", ["Chain.paying_for_tx"]}}, + {"ga_metas", {list, ["Chain.ga_meta_tx"]}}, + {"actor", address}, + {"fee", integer}, + {"ttl", integer}, + {"tx", "Chain.base_tx"}]}}, + "MCL_BLS12_381.fr" => {[], {bytes, [32]}}, + "MCL_BLS12_381.fp" => {[], {bytes, [48]}} + }. + +%%% Opaque Type -> Accelerated 'Annotated' Type + +% Type preparation has two goals. First, we need a data structure that can be +% traversed quickly, to take sophia-esque erlang expressions and turn them into +% fate-esque erlang expressions that gmbytecode can serialize. Second, we need +% partially substituted names, so that error messages can be generated for why +% "foobar" is not valid as the third field of a `bazquux`, because the third +% field is supposed to be `option(integer)`, not `string`. +% +% To achieve this we need three representations of each type expression, which +% together form an 'annotated type'. First, we need the fully opaque name, +% "bazquux", then we need the normalized name, which is an opaque name with the +% bare-minimum substitution needed to make the outer-most type-constructor an +% identifiable built-in, ADT, or record type, and then we need the dereferenced +% type, which is the raw {variant, [{Name, Fields}, ...]} or +% {record, [{Name, Type}]} expression that can be used in actual Sophia->FATE +% coercion. The type sub-expressions in these dereferenced types will each be +% fully annotated as well, i.e. they will each contain *all three* of the above +% representations, so that coercion of subexpressions remains fast AND +% informative. +% +% In a lot of cases the opaque type given will already be normalized, in which +% case either the normalized field or the non-normalized field of an annotated +% type can simple be the atom `already_normalized`, which means error messages +% can simply render the normalized type expression and know that the error will +% make sense. + +annotate_function_specs([], _Types, Specs) -> + Specs; +annotate_function_specs([{Name, ArgsOpaque, ResultOpaque} | Rest], Types, Specs) -> + {ok, Args} = annotate_bindings(ArgsOpaque, Types, []), + {ok, Result} = annotate_type(ResultOpaque, Types), + NewSpecs = maps:put(Name, {Args, Result}, Specs), + annotate_function_specs(Rest, Types, NewSpecs). + +annotate_type(T, Types) -> + case normalize_opaque_type(T, Types) of + {ok, AlreadyNormalized, NOpaque, NExpanded} -> + annotate_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types); + Error -> + Error + end. + +annotate_type2(T, _, _, unknown_type, _) -> + % If a type is unknown, then it should not be reported as the normalized + % name. + {ok, {T, unknown_type, unknown_type}}; +annotate_type2(T, AlreadyNormalized, NOpaque, NExpanded, Types) -> + case annotate_type_subexpressions(NExpanded, Types) of + {ok, Flat} -> + case AlreadyNormalized of + true -> {ok, {T, already_normalized, Flat}}; + false -> {ok, {T, NOpaque, Flat}} + end; + Error -> + Error + end. + +annotate_types([T | Rest], Types, Acc) -> + case annotate_type(T, Types) of + {ok, Type} -> annotate_types(Rest, Types, [Type | Acc]); + Error -> Error + end; +annotate_types([], _Types, Acc) -> + {ok, lists:reverse(Acc)}. + +annotate_type_subexpressions(PrimitiveType, _Types) when is_atom(PrimitiveType) -> + {ok, PrimitiveType}; +annotate_type_subexpressions({bytes, [Count]}, _Types) -> + % bytes is weird, because it has an argument, but that argument isn't an + % opaque type. + {ok, {bytes, [Count]}}; +annotate_type_subexpressions({variant, VariantsOpaque}, Types) -> + case annotate_variants(VariantsOpaque, Types, []) of + {ok, Variants} -> {ok, {variant, Variants}}; + Error -> Error + end; +annotate_type_subexpressions({record, FieldsOpaque}, Types) -> + case annotate_bindings(FieldsOpaque, Types, []) of + {ok, Fields} -> {ok, {record, Fields}}; + Error -> Error + end; +annotate_type_subexpressions({T, ElemsOpaque}, Types) -> + case annotate_types(ElemsOpaque, Types, []) of + {ok, Elems} -> {ok, {T, Elems}}; + Error -> Error + end. + +annotate_bindings([{Name, T} | Rest], Types, Acc) -> + case annotate_type(T, Types) of + {ok, Type} -> annotate_bindings(Rest, Types, [{Name, Type} | Acc]); + Error -> Error + end; +annotate_bindings([], _Types, Acc) -> + {ok, lists:reverse(Acc)}. + +annotate_variants([{Name, Elems} | Rest], Types, Acc) -> + case annotate_types(Elems, Types, []) of + {ok, ElemsFlat} -> annotate_variants(Rest, Types, [{Name, ElemsFlat} | Acc]); + Error -> Error + end; +annotate_variants([], _Types, Acc) -> + {ok, lists:reverse(Acc)}. + +% This function evaluates type aliases in a loop, until eventually a usable +% definition is found. +normalize_opaque_type(T, Types) -> normalize_opaque_type(T, Types, true). + +% FIXME detect infinite loops +% FIXME detect builtins with the wrong number of arguments +% FIXME should nullary types have an empty list of arguments added before now? +normalize_opaque_type(T, _Types, IsFirst) when is_atom(T) -> + % Once we have eliminated the above rewrite cases, all other cases are + % handled explicitly by the coerce logic, and so are considered normalized. + {ok, IsFirst, T, T}; +normalize_opaque_type(Type = {T, _}, _Types, IsFirst) when is_atom(T) -> + % Once we have eliminated the above rewrite cases, all other cases are + % handled explicitly by the coerce logic, and so are considered normalized. + {ok, IsFirst, Type, Type}; +normalize_opaque_type(T, Types, IsFirst) when is_list(T) -> + % Lists/strings indicate userspace types, which may require arg + % substitutions. Convert to an explicit but empty arg list, for uniformity. + normalize_opaque_type({T, []}, Types, IsFirst); +normalize_opaque_type({T, TypeArgs}, Types, IsFirst) when is_list(T) -> + case maps:find(T, Types) of + error -> + % We couldn't find this named type... Keep building the AACI, but + % mark this type expression as unknown, so that FATE coercions + % aren't attempted. + {ok, IsFirst, {T, TypeArgs}, unknown_type}; + {ok, {TypeParamNames, Definition}} -> + % We have a definition for this type, including names for whatever + % args we have been given. Subtitute our args into this. + NewType = substitute_opaque_type(TypeParamNames, Definition, TypeArgs), + % Now continue on to see if we need to restart the loop or not. + normalize_opaque_type2(IsFirst, {T, TypeArgs}, NewType, Types) + end. + +normalize_opaque_type2(IsFirst, PrevType, NextType = {variant, _}, _) -> + % We have reduced to a variant. Report the type name as the normalized + % type, but also provide the variant definition itself as the candidate + % flattened type for further annotation. + {ok, IsFirst, PrevType, NextType}; +normalize_opaque_type2(IsFirst, PrevType, NextType = {record, _}, _) -> + % We have reduced to a record. Report the type name as the normalized + % type, but also provide the record definition itself as the candidate + % flattened type for further annotation. + {ok, IsFirst, PrevType, NextType}; +normalize_opaque_type2(_, _, NextType, Types) -> + % Not a variant or record yet, so go back to the start of the loop. + % It will no longer be the first iteration. + normalize_opaque_type(NextType, Types, false). + +% Perform a beta-reduction on a type expression. +substitute_opaque_type([], Definition, _) -> + % There are no parameters to substitute. This is the simplest way of + % defining type aliases, records, and variants, so we should make sure to + % short circuit all the recursive descent logic, since it won't actually + % do anything. + Definition; +substitute_opaque_type(TypeParamNames, Definition, TypeArgs) -> + % Bundle the param names alongside the args that we want to substitute, so + % that we can keyfind the one list. + Bindings = lists:zip(TypeParamNames, TypeArgs), + substitute_opaque_type(Bindings, Definition). + +substitute_opaque_type(Bindings, {var, VarName}) -> + case lists:keyfind(VarName, 1, Bindings) of + {_, TypeArg} -> TypeArg; + % No valid ACI will create this case. Regardless, the user should + % still be able to specify arbitrary gmb FATE terms for whatever this + % is meant to be. + false -> unknown_type + end; +substitute_opaque_type(Bindings, {variant, Variants}) -> + Each = fun({VariantName, Elements}) -> + NewElements = substitute_opaque_types(Bindings, Elements), + {VariantName, NewElements} + end, + NewVariants = lists:map(Each, Variants), + {variant, NewVariants}; +substitute_opaque_type(Bindings, {record, Fields}) -> + Each = fun({FieldName, FieldType}) -> + NewType = substitute_opaque_type(Bindings, FieldType), + {FieldName, NewType} + end, + NewFields = lists:map(Each, Fields), + {record, NewFields}; +substitute_opaque_type(Bindings, {Connective, Args}) -> + NewArgs = substitute_opaque_types(Bindings, Args), + {Connective, NewArgs}; +substitute_opaque_type(_Bindings, Type) -> + Type. + +substitute_opaque_types(Bindings, Types) -> + Each = fun(Type) -> substitute_opaque_type(Bindings, Type) end, + lists:map(Each, Types). + +%%% Coerce + +coerce_bindings(VarTypes, Terms, Direction) -> + DefLength = length(VarTypes), + ArgLength = length(Terms), + if + DefLength =:= ArgLength -> coerce_zipped_bindings(lists:zip(VarTypes, Terms), Direction, arg); + DefLength > ArgLength -> {error, too_few_args}; + DefLength < ArgLength -> {error, too_many_args} + end. + +coerce_zipped_bindings(Bindings, Direction, Tag) -> + coerce_zipped_bindings(Bindings, Direction, Tag, [], []). + +coerce_zipped_bindings([Next | Rest], Direction, Tag, Good, Broken) -> + {{ArgName, Type}, Term} = Next, + case coerce(Type, Term, Direction) of + {ok, NewTerm} -> + coerce_zipped_bindings(Rest, Direction, Tag, [NewTerm | Good], Broken); + {error, Errors} -> + Wrapped = wrap_errors({Tag, ArgName}, Errors), + coerce_zipped_bindings(Rest, Direction, Tag, Good, [Wrapped | Broken]) + end; +coerce_zipped_bindings([], _, _, Good, []) -> + {ok, lists:reverse(Good)}; +coerce_zipped_bindings([], _, _, _, Broken) -> + {error, combine_errors(Broken)}. + +wrap_errors(Location, Errors) -> + F = fun({Error, Path}) -> + {Error, [Location | Path]} + end, + lists:map(F, Errors). + +combine_errors(Broken) -> + F = fun(NextErrors, Acc) -> + NextErrors ++ Acc + end, + lists:foldl(F, [], Broken). + +coerce({_, _, integer}, S, _) when is_integer(S) -> + {ok, S}; +coerce({O, N, integer}, S, to_fate) when is_list(S) -> + try + Val = list_to_integer(S), + {ok, Val} + catch + error:badarg -> single_error({invalid, O, N, S}) + end; +coerce({O, N, address}, S, to_fate) -> + coerce_chain_object(O, N, address, account_pubkey, S); +coerce({_, _, address}, {address, Bin}, from_fate) -> + Address = gmser_api_encoder:encode(account_pubkey, Bin), + {ok, unicode:characters_to_list(Address)}; +coerce({O, N, contract}, S, to_fate) -> + coerce_chain_object(O, N, contract, contract_pubkey, S); +coerce({_, _, contract}, {contract, Bin}, from_fate) -> + Address = gmser_api_encoder:encode(contract_pubkey, Bin), + {ok, unicode:characters_to_list(Address)}; +coerce({_, _, signature}, S, to_fate) when is_binary(S) andalso (byte_size(S) =:= 64) -> + % Usually to pass a binary in, you need to wrap it as {raw, Binary}, but + % since sg_... strings OR hex blobs can be used as signatures in Sophia, we + % special case this case based on the length. Even if a binary starts with + % "sg_", 64 characters is not enough to represent a 64 byte signature, so + % the most optimistic interpretation is to use the binary directly. + {ok, S}; +coerce({O, N, signature}, S, to_fate) -> + coerce_chain_object(O, N, signature, signature, S); +coerce({_, _, signature}, Bin, from_fate) -> + Address = gmser_api_encoder:encode(signature, Bin), + {ok, unicode:characters_to_list(Address)}; +%coerce({_, _, channel}, S, to_fate) when is_binary(S) -> + %{ok, {channel, S}}; +%coerce({_, _, channel}, {channel, S}, from_fate) when is_binary(S) -> + %{ok, S}; +coerce({_, _, boolean}, true, _) -> + {ok, true}; +coerce({_, _, boolean}, "true", _) -> + {ok, true}; +coerce({_, _, boolean}, false, _) -> + {ok, false}; +coerce({_, _, boolean}, "false", _) -> + {ok, false}; +coerce({O, N, boolean}, S, _) -> + single_error({invalid, O, N, S}); +coerce({O, N, string}, Str, Direction) -> + Result = case Direction of + to_fate -> unicode:characters_to_binary(Str); + from_fate -> unicode:characters_to_list(Str) + end, + case Result of + {error, _, _} -> + single_error({invalid, O, N, Str}); + {incomplete, _, _} -> + single_error({invalid, O, N, Str}); + StrBin -> + {ok, StrBin} + end; +coerce({_, _, char}, Val, _Direction) when is_integer(Val) -> + {ok, Val}; +coerce({O, N, char}, Str, to_fate) -> + Result = unicode:characters_to_list(Str), + case Result of + {error, _, _} -> + single_error({invalid, O, N, Str}); + {incomplete, _, _} -> + single_error({invalid, O, N, Str}); + [C] -> + {ok, C}; + _ -> + single_error({invalid, O, N, Str}) + end; +coerce({O, N, {bytes, [Count]}}, Bytes, _Direction) when is_bitstring(Bytes) -> + coerce_bytes(O, N, Count, Bytes); +coerce({_, _, bits}, {bits, Num}, from_fate) -> + {ok, Num}; +coerce({_, _, bits}, Num, to_fate) when is_integer(Num) -> + {ok, {bits, Num}}; +coerce({_, _, bits}, Bits, to_fate) when is_bitstring(Bits) -> + Size = bit_size(Bits), + <> = Bits, + {ok, {bits, IntValue}}; +coerce({_, _, {list, [Type]}}, Data, Direction) when is_list(Data) -> + coerce_list(Type, Data, Direction); +coerce({_, _, {map, [KeyType, ValType]}}, Data, Direction) when is_map(Data) -> + coerce_map(KeyType, ValType, Data, Direction); +coerce({O, N, {tuple, ElementTypes}}, Data, to_fate) when is_tuple(Data) -> + ElementList = tuple_to_list(Data), + coerce_tuple(O, N, ElementTypes, ElementList, to_fate); +coerce({O, N, {tuple, ElementTypes}}, {tuple, Data}, from_fate) -> + ElementList = tuple_to_list(Data), + coerce_tuple(O, N, ElementTypes, ElementList, from_fate); +coerce({O, N, {variant, Variants}}, Data, to_fate) when is_tuple(Data), tuple_size(Data) > 0 -> + [Name | Terms] = tuple_to_list(Data), + case lookup_variant(Name, Variants) of + {Tag, TermTypes} -> + coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, to_fate); + not_found -> + ValidNames = [Valid || {Valid, _} <- Variants], + single_error({invalid_variant, O, N, Name, ValidNames}) + end; +coerce({O, N, {variant, Variants}}, Name, to_fate) when is_list(Name) -> + coerce({O, N, {variant, Variants}}, {Name}, to_fate); +coerce({O, N, {variant, Variants}}, {variant, _, Tag, Tuple}, from_fate) -> + Terms = tuple_to_list(Tuple), + {Name, TermTypes} = lists:nth(Tag + 1, Variants), + coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, from_fate); +coerce({O, N, {record, MemberTypes}}, Map, to_fate) when is_map(Map) -> + coerce_map_to_record(O, N, MemberTypes, Map); +coerce({O, N, {record, MemberTypes}}, {tuple, Tuple}, from_fate) -> + coerce_record_to_map(O, N, MemberTypes, Tuple); +coerce({O, N, {unknown_type, _}}, Data, _) -> + case N of + already_normalized -> + Message = "Warning: Unknown type ~p. Using term ~p as is.~n", + io:format(Message, [O, Data]); + _ -> + Message = "Warning: Unknown type ~p (i.e. ~p). Using term ~p as is.~n", + io:format(Message, [O, N, Data]) + end, + {ok, Data}; +coerce({O, N, _}, Data, from_fate) -> + case N of + already_normalized -> + io:format("Warning: Unimplemented type ~p.~nUsing term as is:~n~p~n", [O, Data]); + _ -> + io:format("Warning: Unimplemented type ~p (i.e. ~p).~nUsing term as is:~n~p~n", [O, N, Data]) + end, + {ok, Data}; +coerce({O, N, _}, Data, _) -> single_error({invalid, O, N, Data}). + +coerce_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 -> + single_error({partial_bytes, O, N, bit_size(Bytes)}); +coerce_bytes(_, _, any, Bytes) -> + {ok, Bytes}; +coerce_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count -> + single_error({incorrect_size, O, N, Bytes}); +coerce_bytes(_, _, _, Bytes) -> + {ok, Bytes}. + +coerce_chain_object(_, _, _, _, {raw, Binary}) -> + {ok, Binary}; +coerce_chain_object(O, N, T, Tag, S) -> + case decode_chain_object(Tag, S) of + {ok, Data} -> {ok, coerce_chain_object2(T, Data)}; + {error, Reason} -> single_error({Reason, O, N, S}) + end. + +coerce_chain_object2(address, Data) -> {address, Data}; +coerce_chain_object2(contract, Data) -> {contract, Data}; +coerce_chain_object2(signature, Data) -> Data. + +decode_chain_object(Tag, S) -> + try + case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of + {Tag, Data} -> {ok, Data}; + {_, _} -> {error, wrong_prefix} + end + catch + error:missing_prefix -> {error, missing_prefix}; + error:incorrect_size -> {error, incorrect_size} + end. + +coerce_list(Type, Elements, Direction) -> + % 0 index since it represents a sophia list + coerce_list(Type, Elements, Direction, 0, [], []). + +coerce_list(Type, [Next | Rest], Direction, Index, Good, Broken) -> + case coerce(Type, Next, Direction) of + {ok, Coerced} -> coerce_list(Type, Rest, Direction, Index + 1, [Coerced | Good], Broken); + {error, Errors} -> + Wrapped = wrap_errors({index, Index}, Errors), + coerce_list(Type, Rest, Direction, Index + 1, Good, [Wrapped | Broken]) + end; +coerce_list(_Type, [], _, _, Good, []) -> + {ok, lists:reverse(Good)}; +coerce_list(_, [], _, _, _, Broken) -> + {error, combine_errors(Broken)}. + +coerce_map(KeyType, ValType, Data, Direction) -> + coerce_map(KeyType, ValType, maps:iterator(Data), Direction, #{}, []). + +coerce_map(KeyType, ValType, Remaining, Direction, Good, Broken) -> + case maps:next(Remaining) of + {K, V, RemainingAfter} -> + coerce_map2(KeyType, ValType, RemainingAfter, Direction, Good, Broken, K, V); + none -> + coerce_map_finish(Good, Broken) + end. + +coerce_map2(KeyType, ValType, Remaining, Direction, Good, Broken, K, V) -> + case coerce(KeyType, K, Direction) of + {ok, KFATE} -> + coerce_map3(KeyType, ValType, Remaining, Direction, Good, Broken, K, V, KFATE); + {error, Errors} -> + Wrapped = wrap_errors(map_key, Errors), + % Continue as if the key coerced successfully, so that we can give + % errors for both the key and the value. + coerce_map3(KeyType, ValType, Remaining, Direction, Good, [Wrapped | Broken], K, V, error) + end. + +coerce_map3(KeyType, ValType, Remaining, Direction, Good, Broken, K, V, KFATE) -> + case coerce(ValType, V, Direction) of + {ok, VFATE} -> + NewGood = Good#{KFATE => VFATE}, + coerce_map(KeyType, ValType, Remaining, Direction, NewGood, Broken); + {error, Errors} -> + Wrapped = wrap_errors({map_value, K}, Errors), + coerce_map(KeyType, ValType, Remaining, Direction, Good, [Wrapped | Broken]) + end. + +coerce_map_finish(Good, []) -> + {ok, Good}; +coerce_map_finish(_, Broken) -> + {error, combine_errors(Broken)}. + +lookup_variant(Name, Variants) -> lookup_variant(Name, Variants, 0). + +lookup_variant(Name, [{Name, Terms} | _], Tag) -> + {Tag, Terms}; +lookup_variant(Name, [_ | Rest], Tag) -> + lookup_variant(Name, Rest, Tag + 1); +lookup_variant(_Name, [], _Tag) -> + not_found. + +coerce_tuple(O, N, TermTypes, Terms, Direction) -> + case coerce_tuple_elements(TermTypes, Terms, Direction, tuple_element) of + {ok, Converted} -> + case Direction of + to_fate -> {ok, {tuple, list_to_tuple(Converted)}}; + from_fate -> {ok, list_to_tuple(Converted)} + end; + {error, too_few_terms} -> + single_error({tuple_too_few_terms, O, N, list_to_tuple(Terms)}); + {error, too_many_terms} -> + single_error({tuple_too_many_terms, O, N, list_to_tuple(Terms)}); + Errors -> Errors + end. + +% Wraps a single error in a list, along with an empty path, so that other +% accumulating error handlers can work with it. +single_error(Reason) -> + {error, [{Reason, []}]}. + +coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, Direction) -> + % FIXME: we could go through and add the variant tag to the adt_element + % paths? + case coerce_tuple_elements(TermTypes, Terms, Direction, adt_element) of + {ok, Converted} -> + case Direction of + to_fate -> + Arities = [length(VariantTerms) + || {_, VariantTerms} <- Variants], + {ok, {variant, Arities, Tag, list_to_tuple(Converted)}}; + from_fate -> + {ok, list_to_tuple([Name | Converted])} + end; + {error, too_few_terms} -> + single_error({adt_too_few_terms, O, N, Name, TermTypes, Terms}); + {error, too_many_terms} -> + single_error({adt_too_many_terms, O, N, Name, TermTypes, Terms}); + Errors -> Errors + end. + +coerce_tuple_elements(Types, Terms, Direction, Tag) -> + % The sophia standard library uses 0 indexing for lists, and fst/snd/thd + % for tuples... Not sure how we should report errors in tuples, then. + coerce_tuple_elements(Types, Terms, Direction, Tag, 0, [], []). + +coerce_tuple_elements([Type | Types], [Term | Terms], Direction, Tag, Index, Good, Broken) -> + case coerce(Type, Term, Direction) of + {ok, Value} -> + coerce_tuple_elements(Types, Terms, Direction, Tag, Index + 1, [Value | Good], Broken); + {error, Errors} -> + Wrapped = wrap_errors({Tag, Index}, Errors), + coerce_tuple_elements(Types, Terms, Direction, Tag, Index + 1, Good, [Wrapped | Broken]) + end; +coerce_tuple_elements([], [], _, _, _, Good, []) -> + {ok, lists:reverse(Good)}; +coerce_tuple_elements([], [], _, _, _, _, Broken) -> + {error, combine_errors(Broken)}; +coerce_tuple_elements(_, [], _, _, _, _, _) -> + {error, too_few_terms}; +coerce_tuple_elements([], _, _, _, _, _, _) -> + {error, too_many_terms}. + +coerce_map_to_record(O, N, MemberTypes, Map) -> + case zip_record_fields(MemberTypes, Map) of + {ok, Zipped} -> + case coerce_zipped_bindings(Zipped, to_fate, field) of + {ok, Converted} -> + {ok, {tuple, list_to_tuple(Converted)}}; + Errors -> + Errors + end; + {error, {missing_fields, Missing}} -> + single_error({missing_fields, O, N, Missing}); + {error, {unexpected_fields, Unexpected}} -> + Names = [Name || {Name, _} <- maps:to_list(Unexpected)], + single_error({unexpected_fields, O, N, Names}) + end. + +coerce_record_to_map(O, N, MemberTypes, Tuple) -> + Names = [Name || {Name, _} <- MemberTypes], + Types = [Type || {_, Type} <- MemberTypes], + Terms = tuple_to_list(Tuple), + % FIXME: We could go through and change the record_element paths into field + % paths? + case coerce_tuple_elements(Types, Terms, from_fate, record_element) of + {ok, Converted} -> + Map = maps:from_list(lists:zip(Names, Converted)), + {ok, Map}; + {error, too_few_terms} -> + single_error({record_too_few_terms, O, N, Tuple}); + {error, too_many_terms} -> + single_error({record_too_many_terms, O, N, Tuple}); + Errors -> + Errors + end. + +zip_record_fields(Fields, Map) -> + case lists:mapfoldl(fun zip_record_field/2, {Map, []}, Fields) of + {_, {_, Missing = [_|_]}} -> + {error, {missing_fields, lists:reverse(Missing)}}; + {_, {Remaining, _}} when map_size(Remaining) > 0 -> + {error, {unexpected_fields, Remaining}}; + {Zipped, _} -> + {ok, Zipped} + end. + +zip_record_field({Name, Type}, {Remaining, Missing}) -> + case maps:take(Name, Remaining) of + {Term, RemainingAfter} -> + ZippedTerm = {{Name, Type}, Term}, + {ZippedTerm, {RemainingAfter, Missing}}; + error -> + {missing, {Remaining, [Name | Missing]}} + end. + + +%%% AACI Getters + +-spec aaci_get_function_signature(AACI, Fun) -> {ok, Type} | {error, Reason} + when AACI :: aaci(), + Fun :: binary() | string(), + Type :: {term(), term()}, % FIXME + Reason :: bad_fun_name. + +%% @doc +%% Look up the type information of a given function, in the AACI provided by +%% prepare_contract/1. This type information, particularly the return type, is +%% useful for calling decode_bytearray/2. + +aaci_get_function_signature({aaci, _, FunDefs, _}, Fun) -> + case maps:find(Fun, FunDefs) of + {ok, A} -> {ok, A}; + error -> {error, bad_fun_name} + end. + + +%%% Simple coerce/3 tests + +% Round trip coerce run for the eunit tests below. If these results don't match +% then the test should fail. +try_coerce(Type, Sophia, Fate) -> + % Run both first, to see if they fail to produce any result. + {ok, FateActual} = coerce(Type, Sophia, to_fate), + {ok, SophiaActual} = coerce(Type, Fate, from_fate), + % Now check that the results were what we expected. + case FateActual of + Fate -> + ok; + _ -> + erlang:error({to_fate_failed, Fate, FateActual}) + end, + case SophiaActual of + Sophia -> + ok; + _ -> + erlang:error({from_fate_failed, Sophia, SophiaActual}) + end, + % Finally, check that the FATE result is something that gmb understands. + gmb_fate_encoding:serialize(Fate), + ok. + +coerce_int_test() -> + {ok, Type} = annotate_type(integer, #{}), + try_coerce(Type, 123, 123). + +coerce_address_test() -> + {ok, Type} = annotate_type(address, #{}), + try_coerce(Type, + "ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", + {address, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, + 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, + 210,39,214>>}). + +coerce_contract_test() -> + {ok, Type} = annotate_type(contract, #{}), + try_coerce(Type, + "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", + {contract, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, + 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, + 210,39,214>>}). + +coerce_signature_test() -> + {ok, Type} = annotate_type(signature, #{}), + try_coerce(Type, + "sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n", + <<231,4,97,129,16,173,37,42,194,249,28,94,134,163,208,84,22,135, + 169,85,212,142,14,12,233,252,97,50,193,158,229,51,123,206,222, + 249,2,3,85,173,106,150,243,253,89,128,248,52,195,140,95,114, + 233,110,119,143,206,137,124,36,63,154,85,7>>). + +coerce_signature_binary_test() -> + {ok, Type} = annotate_type(signature, #{}), + Binary = <<231,4,97,129,16,173,37,42,194,249,28,94,134,163,208,84,22,135, + 169,85,212,142,14,12,233,252,97,50,193,158,229,51,123,206,222, + 249,2,3,85,173,106,150,243,253,89,128,248,52,195,140,95,114, + 233,110,119,143,206,137,124,36,63,154,85,7>>, + {ok, Binary} = coerce(Type, {raw, Binary}, to_fate), + {ok, Binary} = coerce(Type, Binary, to_fate), + ok. + +coerce_bool_test() -> + {ok, Type} = annotate_type(boolean, #{}), + try_coerce(Type, true, true), + try_coerce(Type, false, false). + +coerce_string_test() -> + {ok, Type} = annotate_type(string, #{}), + try_coerce(Type, "hello world", <<"hello world">>). + +coerce_list_test() -> + {ok, Type} = annotate_type({list, [string]}, #{}), + try_coerce(Type, ["hello world", [65, 32, 65]], [<<"hello world">>, <<65, 32, 65>>]). + +coerce_map_test() -> + {ok, Type} = annotate_type({map, [string, {list, [integer]}]}, #{}), + try_coerce(Type, #{"a" => "a", "b" => "b"}, #{<<"a">> => "a", <<"b">> => "b"}). + +coerce_tuple_test() -> + {ok, Type} = annotate_type({tuple, [integer, string]}, #{}), + try_coerce(Type, {123, "456"}, {tuple, {123, <<"456">>}}). + +coerce_variant_test() -> + {ok, Type} = annotate_type({variant, [{"A", [integer]}, + {"B", [integer, integer]}]}, + #{}), + try_coerce(Type, {"A", 123}, {variant, [1, 2], 0, {123}}), + try_coerce(Type, {"B", 456, 789}, {variant, [1, 2], 1, {456, 789}}). + +coerce_option_test() -> + {ok, Type} = annotate_type({"option", [integer]}, builtin_typedefs()), + try_coerce(Type, {"None"}, {variant, [0, 1], 0, {}}), + try_coerce(Type, {"Some", 1}, {variant, [0, 1], 1, {1}}). + +coerce_record_test() -> + {ok, Type} = annotate_type({record, [{"a", integer}, {"b", integer}]}, #{}), + try_coerce(Type, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). + +coerce_bytes_test() -> + {ok, Type} = annotate_type({tuple, [{bytes, [4]}, {bytes, [any]}]}, #{}), + try_coerce(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {<<"abcd">>, <<"efghi">>}}). + +coerce_bits_test() -> + {ok, Type} = annotate_type(bits, #{}), + try_coerce(Type, 5, {bits, 5}). + +coerce_char_test() -> + {ok, Type} = annotate_type(char, #{}), + try_coerce(Type, $?, $?). + +coerce_unicode_test() -> + {ok, Type} = annotate_type(char, #{}), + % Latin Small Letter C with cedilla and acute + {ok, $ḉ} = coerce(Type, <<"ḉ"/utf8>>, to_fate), + ok. + +coerce_hash_test() -> + {ok, Type} = annotate_type("hash", builtin_typedefs()), + Hash = list_to_binary(lists:seq(1,32)), + try_coerce(Type, Hash, Hash), + ok. + + +%%% Complex AACI paramter and namespace tests + +aaci_from_string(String) -> + case so_compiler:from_string(String, [{aci, json}]) of + {ok, #{aci := ACI}} -> {ok, prepare_aaci(ACI)}; + Error -> Error + end. + +namespace_coerce_test() -> + Contract = " + namespace N = + record pair = { a : int, b : int } + + contract C = + entrypoint f(): N.pair = { a = 1, b = 2 } + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + try_coerce(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). + +record_substitution_test() -> + Contract = " + contract C = + record pair('t) = { a : 't, b : 't } + entrypoint f(): pair(int) = { a = 1, b = 2 } + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + try_coerce(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). + +tuple_substitution_test() -> + Contract = " + contract C = + type triple('t1, 't2) = int * 't1 * 't2 + entrypoint f(): triple(int, string) = (1, 2, \"hello\") + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + try_coerce(Output, {1, 2, "hello"}, {tuple, {1, 2, <<"hello">>}}). + +variant_substitution_test() -> + Contract = " + contract C = + datatype adt('a, 'b) = Left('a, 'b) | Right('b, int) + entrypoint f(): adt(string, int) = Left(\"hi\", 1) + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + try_coerce(Output, {"Left", "hi", 1}, {variant, [2, 2], 0, {<<"hi">>, 1}}), + try_coerce(Output, {"Right", 2, 3}, {variant, [2, 2], 1, {2, 3}}). + +nested_coerce_test() -> + Contract = " + contract C = + type pair('t) = 't * 't + record r = { f1 : pair(int), f2: pair(string) } + entrypoint f(): r = { f1 = (1, 2), f2 = (\"a\", \"b\") } + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + try_coerce(Output, + #{ "f1" => {1, 2}, "f2" => {"a", "b"}}, + {tuple, {{tuple, {1, 2}}, {tuple, {<<"a">>, <<"b">>}}}}). + +state_coerce_test() -> + Contract = " + contract C = + type state = int + entrypoint init(): state = 0 + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[], Output}} = aaci_get_function_signature(AACI, "init"), + try_coerce(Output, 0, 0). + +param_test() -> + Contract = " + contract C = + type state = int + entrypoint init(x): state = x + ", + {ok, AACI} = aaci_from_string(Contract), + {ok, {[{"x", Input}], Output}} = aaci_get_function_signature(AACI, "init"), + try_coerce(Input, 0, 0), + try_coerce(Output, 0, 0). + +%%% Obscure Sophia types where we should check the AACI as well + +obscure_aaci_test() -> + Contract = " + include \"Set.aes\" + contract C = + entrypoint options(): option(int) = None + entrypoint fixed_bytes(): bytes(4) = #DEADBEEF + entrypoint any_bytes(): bytes() = Bytes.to_any_size(#112233) + entrypoint bits(): bits = Bits.all + entrypoint character(): char = 'a' + entrypoint hash(): hash = #00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF + entrypoint unit(): unit = () + + entrypoint ttl(x): Chain.ttl = FixedTTL(x) + entrypoint paying_for(x, y): Chain.paying_for_tx = Chain.PayingForTx(x, y) + entrypoint ga_meta_tx(x, y): Chain.ga_meta_tx = Chain.GAMetaTx(x, y) + entrypoint base_tx(x, y, z): Chain.base_tx = Chain.SpendTx(x, y, z) + entrypoint tx(a, b, c, d, e, f): Chain.tx = + {paying_for = a, + ga_metas = b, + actor = c, + fee = d, + ttl = e, + tx = f} + + entrypoint pointee(x): AENS.pointee = AENS.AccountPt(x) + entrypoint name(x, y, z): AENS.name = AENS.Name(x, y, z) + entrypoint pointee2(x): AENSv2.pointee = AENSv2.DataPt(x) + entrypoint name2(x, y, z): AENSv2.name = AENSv2.Name(x, y, z) + + entrypoint fr(x): MCL_BLS12_381.fr = x + entrypoint fp(x): MCL_BLS12_381.fp = x + + entrypoint set(): Set.set(int) = Set.new() + + ", + {ok, AACI} = aaci_from_string(Contract), + + {ok, {[], {{bytes, [4]}, _, _}}} = aaci_get_function_signature(AACI, "fixed_bytes"), + {ok, {[], {{bytes, [any]}, _, _}}} = aaci_get_function_signature(AACI, "any_bytes"), + {ok, {[], {bits, _, _}}} = aaci_get_function_signature(AACI, "bits"), + {ok, {[], {char, _, _}}} = aaci_get_function_signature(AACI, "character"), + + {ok, {[], {{"option", [integer]}, _, {variant, [{"None", []}, {"Some", [_]}]}}}} = aaci_get_function_signature(AACI, "options"), + {ok, {[], {"hash", _, {bytes, [32]}}}} = aaci_get_function_signature(AACI, "hash"), + {ok, {[], {"unit", _, {tuple, []}}}} = aaci_get_function_signature(AACI, "unit"), + + {ok, {_, {"Chain.ttl", _, {variant, _}}}} = aaci_get_function_signature(AACI, "ttl"), + {ok, {_, {"Chain.paying_for_tx", _, {variant, _}}}} = aaci_get_function_signature(AACI, "paying_for"), + {ok, {_, {"Chain.ga_meta_tx", _, {variant, _}}}} = aaci_get_function_signature(AACI, "ga_meta_tx"), + {ok, {_, {"Chain.base_tx", _, {variant, _}}}} = aaci_get_function_signature(AACI, "base_tx"), + {ok, {_, {"Chain.tx", _, {record, _}}}} = aaci_get_function_signature(AACI, "tx"), + + {ok, {_, {"AENS.pointee", _, {variant, _}}}} = aaci_get_function_signature(AACI, "pointee"), + {ok, {_, {"AENS.name", _, {variant, _}}}} = aaci_get_function_signature(AACI, "name"), + {ok, {_, {"AENSv2.pointee", _, {variant, _}}}} = aaci_get_function_signature(AACI, "pointee2"), + {ok, {_, {"AENSv2.name", _, {variant, _}}}} = aaci_get_function_signature(AACI, "name2"), + + {ok, {_, {"MCL_BLS12_381.fr", _, {bytes, [32]}}}} = aaci_get_function_signature(AACI, "fr"), + {ok, {_, {"MCL_BLS12_381.fp", _, {bytes, [48]}}}} = aaci_get_function_signature(AACI, "fp"), + + {ok, {[], {{"Set.set", [integer]}, _, {record, [{"to_map", _}]}}}} = aaci_get_function_signature(AACI, "set"), + + ok. + +name_coerce_test() -> + AddrSoph = "ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", + AddrFate = {address, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, + 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, + 210,39,214>>}, + {ok, TTL} = annotate_type("Chain.ttl", builtin_typedefs()), + TTLSoph = {"FixedTTL", 0}, + TTLFate = {variant, [1, 1], 0, {0}}, + try_coerce(TTL, TTLSoph, TTLFate), + {ok, Pointee} = annotate_type("AENS.pointee", builtin_typedefs()), + PointeeSoph = {"AccountPt", AddrSoph}, + PointeeFate = {variant, [1, 1, 1, 1], 0, {AddrFate}}, + try_coerce(Pointee, PointeeSoph, PointeeFate), + {ok, Name} = annotate_type("AENS.name", builtin_typedefs()), + NameSoph = {"Name", AddrSoph, TTLSoph, #{"myname" => PointeeSoph}}, + NameFate = {variant, [3], 0, {AddrFate, TTLFate, #{<<"myname">> => PointeeFate}}}, + try_coerce(Name, NameSoph, NameFate). + +void_coerce_test() -> + % Void itself can't be represented, but other types built out of void are + % valid. + {ok, NonOption} = annotate_type({"option", ["void"]}, builtin_typedefs()), + try_coerce(NonOption, {"None"}, {variant, [0, 1], 0, {}}), + {ok, NonList} = annotate_type({list, ["void"]}, builtin_typedefs()), + try_coerce(NonList, [], []). + From 3da9bd570babc6fcb77325bd58e4f1ecc9761755 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 9 Jan 2026 04:39:58 +0000 Subject: [PATCH 02/42] split coerce/3 into two functions Also renamed coerce_bindings to erlang_args_to_fate, to match. --- src/hz.erl | 4 +- src/hz_aaci.erl | 365 +++++++++++++++++++++++++++--------------------- 2 files changed, 204 insertions(+), 165 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 277f122..c36dbc3 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -716,7 +716,7 @@ decode_bytearray_fate(EncodedStr) -> decode_bytearray(Type, EncodedStr) -> case decode_bytearray_fate(EncodedStr) of {ok, none} -> {ok, none}; - {ok, Object} -> hz_aaci:coerce(Type, Object, from_fate); + {ok, Object} -> hz_aaci:fate_to_erlang(Type, Object); {error, Reason} -> {error, Reason} end. @@ -1518,7 +1518,7 @@ encode_call_data({aaci, Label}, Fun, Args) -> end. encode_call_data2(ArgDef, Fun, Args) -> - case hz_aaci:coerce_bindings(ArgDef, Args, to_fate) of + case hz_aaci:erlang_args_to_fate(ArgDef, Args) of {ok, Coerced} -> gmb_fate_abi:create_calldata(Fun, Coerced); Errors -> Errors end. diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 0d56ef7..2d21846 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -18,8 +18,9 @@ % Contract call and serialization interface functions -export([prepare_contract/1, prepare_aaci/1, - coerce/3, - coerce_bindings/3, + erlang_to_fate/2, + fate_to_erlang/2, + erlang_args_to_fate/2, aaci_get_function_signature/2]). %%% Types @@ -441,97 +442,51 @@ substitute_opaque_types(Bindings, Types) -> Each = fun(Type) -> substitute_opaque_type(Bindings, Type) end, lists:map(Each, Types). -%%% Coerce +%%% Erlang to FATE -coerce_bindings(VarTypes, Terms, Direction) -> +erlang_args_to_fate(VarTypes, Terms) -> DefLength = length(VarTypes), ArgLength = length(Terms), if - DefLength =:= ArgLength -> coerce_zipped_bindings(lists:zip(VarTypes, Terms), Direction, arg); + DefLength =:= ArgLength -> coerce_zipped_bindings(lists:zip(VarTypes, Terms), to_fate, arg); DefLength > ArgLength -> {error, too_few_args}; DefLength < ArgLength -> {error, too_many_args} end. -coerce_zipped_bindings(Bindings, Direction, Tag) -> - coerce_zipped_bindings(Bindings, Direction, Tag, [], []). - -coerce_zipped_bindings([Next | Rest], Direction, Tag, Good, Broken) -> - {{ArgName, Type}, Term} = Next, - case coerce(Type, Term, Direction) of - {ok, NewTerm} -> - coerce_zipped_bindings(Rest, Direction, Tag, [NewTerm | Good], Broken); - {error, Errors} -> - Wrapped = wrap_errors({Tag, ArgName}, Errors), - coerce_zipped_bindings(Rest, Direction, Tag, Good, [Wrapped | Broken]) - end; -coerce_zipped_bindings([], _, _, Good, []) -> - {ok, lists:reverse(Good)}; -coerce_zipped_bindings([], _, _, _, Broken) -> - {error, combine_errors(Broken)}. - -wrap_errors(Location, Errors) -> - F = fun({Error, Path}) -> - {Error, [Location | Path]} - end, - lists:map(F, Errors). - -combine_errors(Broken) -> - F = fun(NextErrors, Acc) -> - NextErrors ++ Acc - end, - lists:foldl(F, [], Broken). - -coerce({_, _, integer}, S, _) when is_integer(S) -> +erlang_to_fate({_, _, integer}, S) when is_integer(S) -> {ok, S}; -coerce({O, N, integer}, S, to_fate) when is_list(S) -> +erlang_to_fate({O, N, integer}, S) when is_list(S) -> try Val = list_to_integer(S), {ok, Val} catch error:badarg -> single_error({invalid, O, N, S}) end; -coerce({O, N, address}, S, to_fate) -> +erlang_to_fate({O, N, address}, S) -> coerce_chain_object(O, N, address, account_pubkey, S); -coerce({_, _, address}, {address, Bin}, from_fate) -> - Address = gmser_api_encoder:encode(account_pubkey, Bin), - {ok, unicode:characters_to_list(Address)}; -coerce({O, N, contract}, S, to_fate) -> +erlang_to_fate({O, N, contract}, S) -> coerce_chain_object(O, N, contract, contract_pubkey, S); -coerce({_, _, contract}, {contract, Bin}, from_fate) -> - Address = gmser_api_encoder:encode(contract_pubkey, Bin), - {ok, unicode:characters_to_list(Address)}; -coerce({_, _, signature}, S, to_fate) when is_binary(S) andalso (byte_size(S) =:= 64) -> +erlang_to_fate({_, _, signature}, S) when is_binary(S) andalso (byte_size(S) =:= 64) -> % Usually to pass a binary in, you need to wrap it as {raw, Binary}, but % since sg_... strings OR hex blobs can be used as signatures in Sophia, we % special case this case based on the length. Even if a binary starts with % "sg_", 64 characters is not enough to represent a 64 byte signature, so % the most optimistic interpretation is to use the binary directly. {ok, S}; -coerce({O, N, signature}, S, to_fate) -> +erlang_to_fate({O, N, signature}, S) -> coerce_chain_object(O, N, signature, signature, S); -coerce({_, _, signature}, Bin, from_fate) -> - Address = gmser_api_encoder:encode(signature, Bin), - {ok, unicode:characters_to_list(Address)}; -%coerce({_, _, channel}, S, to_fate) when is_binary(S) -> +%erlang_to_fate({_, _, channel}, S) when is_binary(S) -> %{ok, {channel, S}}; -%coerce({_, _, channel}, {channel, S}, from_fate) when is_binary(S) -> - %{ok, S}; -coerce({_, _, boolean}, true, _) -> +erlang_to_fate({_, _, boolean}, true) -> {ok, true}; -coerce({_, _, boolean}, "true", _) -> +erlang_to_fate({_, _, boolean}, "true") -> {ok, true}; -coerce({_, _, boolean}, false, _) -> +erlang_to_fate({_, _, boolean}, false) -> {ok, false}; -coerce({_, _, boolean}, "false", _) -> +erlang_to_fate({_, _, boolean}, "false") -> {ok, false}; -coerce({O, N, boolean}, S, _) -> - single_error({invalid, O, N, S}); -coerce({O, N, string}, Str, Direction) -> - Result = case Direction of - to_fate -> unicode:characters_to_binary(Str); - from_fate -> unicode:characters_to_list(Str) - end, - case Result of +erlang_to_fate({O, N, string}, Str) -> + case unicode:characters_to_binary(Str) of {error, _, _} -> single_error({invalid, O, N, Str}); {incomplete, _, _} -> @@ -539,9 +494,9 @@ coerce({O, N, string}, Str, Direction) -> StrBin -> {ok, StrBin} end; -coerce({_, _, char}, Val, _Direction) when is_integer(Val) -> +erlang_to_fate({_, _, char}, Val) when is_integer(Val) -> {ok, Val}; -coerce({O, N, char}, Str, to_fate) -> +erlang_to_fate({O, N, char}, Str) -> Result = unicode:characters_to_list(Str), case Result of {error, _, _} -> @@ -553,27 +508,24 @@ coerce({O, N, char}, Str, to_fate) -> _ -> single_error({invalid, O, N, Str}) end; -coerce({O, N, {bytes, [Count]}}, Bytes, _Direction) when is_bitstring(Bytes) -> +erlang_to_fate({O, N, {bytes, [Count]}}, Bytes) when is_bitstring(Bytes) -> coerce_bytes(O, N, Count, Bytes); -coerce({_, _, bits}, {bits, Num}, from_fate) -> - {ok, Num}; -coerce({_, _, bits}, Num, to_fate) when is_integer(Num) -> +erlang_to_fate({_, _, bits}, Num) when is_integer(Num) -> {ok, {bits, Num}}; -coerce({_, _, bits}, Bits, to_fate) when is_bitstring(Bits) -> +erlang_to_fate({_, _, bits}, Bits) when is_bitstring(Bits) -> Size = bit_size(Bits), <> = Bits, {ok, {bits, IntValue}}; -coerce({_, _, {list, [Type]}}, Data, Direction) when is_list(Data) -> - coerce_list(Type, Data, Direction); -coerce({_, _, {map, [KeyType, ValType]}}, Data, Direction) when is_map(Data) -> - coerce_map(KeyType, ValType, Data, Direction); -coerce({O, N, {tuple, ElementTypes}}, Data, to_fate) when is_tuple(Data) -> +erlang_to_fate({_, _, {list, [Type]}}, Data) when is_list(Data) -> + coerce_list(Type, Data, to_fate); +erlang_to_fate({_, _, {map, [KeyType, ValType]}}, Data) when is_map(Data) -> + coerce_map(KeyType, ValType, Data, to_fate); +erlang_to_fate({O, N, {tuple, ElementTypes}}, Data) when is_tuple(Data) -> ElementList = tuple_to_list(Data), coerce_tuple(O, N, ElementTypes, ElementList, to_fate); -coerce({O, N, {tuple, ElementTypes}}, {tuple, Data}, from_fate) -> - ElementList = tuple_to_list(Data), - coerce_tuple(O, N, ElementTypes, ElementList, from_fate); -coerce({O, N, {variant, Variants}}, Data, to_fate) when is_tuple(Data), tuple_size(Data) > 0 -> +erlang_to_fate({O, N, {variant, Variants}}, Name) when is_list(Name) -> + erlang_to_fate({O, N, {variant, Variants}}, {Name}); +erlang_to_fate({O, N, {variant, Variants}}, Data) when is_tuple(Data), tuple_size(Data) > 0 -> [Name | Terms] = tuple_to_list(Data), case lookup_variant(Name, Variants) of {Tag, TermTypes} -> @@ -582,17 +534,9 @@ coerce({O, N, {variant, Variants}}, Data, to_fate) when is_tuple(Data), tuple_si ValidNames = [Valid || {Valid, _} <- Variants], single_error({invalid_variant, O, N, Name, ValidNames}) end; -coerce({O, N, {variant, Variants}}, Name, to_fate) when is_list(Name) -> - coerce({O, N, {variant, Variants}}, {Name}, to_fate); -coerce({O, N, {variant, Variants}}, {variant, _, Tag, Tuple}, from_fate) -> - Terms = tuple_to_list(Tuple), - {Name, TermTypes} = lists:nth(Tag + 1, Variants), - coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, from_fate); -coerce({O, N, {record, MemberTypes}}, Map, to_fate) when is_map(Map) -> +erlang_to_fate({O, N, {record, MemberTypes}}, Map) when is_map(Map) -> coerce_map_to_record(O, N, MemberTypes, Map); -coerce({O, N, {record, MemberTypes}}, {tuple, Tuple}, from_fate) -> - coerce_record_to_map(O, N, MemberTypes, Tuple); -coerce({O, N, {unknown_type, _}}, Data, _) -> +erlang_to_fate({O, N, {unknown_type, _}}, Data) -> case N of already_normalized -> Message = "Warning: Unknown type ~p. Using term ~p as is.~n", @@ -602,24 +546,7 @@ coerce({O, N, {unknown_type, _}}, Data, _) -> io:format(Message, [O, N, Data]) end, {ok, Data}; -coerce({O, N, _}, Data, from_fate) -> - case N of - already_normalized -> - io:format("Warning: Unimplemented type ~p.~nUsing term as is:~n~p~n", [O, Data]); - _ -> - io:format("Warning: Unimplemented type ~p (i.e. ~p).~nUsing term as is:~n~p~n", [O, N, Data]) - end, - {ok, Data}; -coerce({O, N, _}, Data, _) -> single_error({invalid, O, N, Data}). - -coerce_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 -> - single_error({partial_bytes, O, N, bit_size(Bytes)}); -coerce_bytes(_, _, any, Bytes) -> - {ok, Bytes}; -coerce_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count -> - single_error({incorrect_size, O, N, Bytes}); -coerce_bytes(_, _, _, Bytes) -> - {ok, Bytes}. +erlang_to_fate({O, N, _}, Data) -> single_error({invalid, O, N, Data}). coerce_chain_object(_, _, _, _, {raw, Binary}) -> {ok, Binary}; @@ -644,12 +571,38 @@ decode_chain_object(Tag, S) -> error:incorrect_size -> {error, incorrect_size} end. +coerce_bytes(O, N, _, Bytes) when bit_size(Bytes) rem 8 /= 0 -> + single_error({partial_bytes, O, N, bit_size(Bytes)}); +coerce_bytes(_, _, any, Bytes) -> + {ok, Bytes}; +coerce_bytes(O, N, Count, Bytes) when byte_size(Bytes) /= Count -> + single_error({incorrect_size, O, N, Bytes}); +coerce_bytes(_, _, _, Bytes) -> + {ok, Bytes}. + +coerce_zipped_bindings(Bindings, Direction, Tag) -> + coerce_zipped_bindings(Bindings, Direction, Tag, [], []). + +coerce_zipped_bindings([Next | Rest], Direction, Tag, Good, Broken) -> + {{ArgName, Type}, Term} = Next, + case coerce_direction(Type, Term, Direction) of + {ok, NewTerm} -> + coerce_zipped_bindings(Rest, Direction, Tag, [NewTerm | Good], Broken); + {error, Errors} -> + Wrapped = wrap_errors({Tag, ArgName}, Errors), + coerce_zipped_bindings(Rest, Direction, Tag, Good, [Wrapped | Broken]) + end; +coerce_zipped_bindings([], _, _, Good, []) -> + {ok, lists:reverse(Good)}; +coerce_zipped_bindings([], _, _, _, Broken) -> + {error, combine_errors(Broken)}. + coerce_list(Type, Elements, Direction) -> % 0 index since it represents a sophia list coerce_list(Type, Elements, Direction, 0, [], []). coerce_list(Type, [Next | Rest], Direction, Index, Good, Broken) -> - case coerce(Type, Next, Direction) of + case coerce_direction(Type, Next, Direction) of {ok, Coerced} -> coerce_list(Type, Rest, Direction, Index + 1, [Coerced | Good], Broken); {error, Errors} -> Wrapped = wrap_errors({index, Index}, Errors), @@ -672,7 +625,7 @@ coerce_map(KeyType, ValType, Remaining, Direction, Good, Broken) -> end. coerce_map2(KeyType, ValType, Remaining, Direction, Good, Broken, K, V) -> - case coerce(KeyType, K, Direction) of + case coerce_direction(KeyType, K, Direction) of {ok, KFATE} -> coerce_map3(KeyType, ValType, Remaining, Direction, Good, Broken, K, V, KFATE); {error, Errors} -> @@ -683,7 +636,7 @@ coerce_map2(KeyType, ValType, Remaining, Direction, Good, Broken, K, V) -> end. coerce_map3(KeyType, ValType, Remaining, Direction, Good, Broken, K, V, KFATE) -> - case coerce(ValType, V, Direction) of + case coerce_direction(ValType, V, Direction) of {ok, VFATE} -> NewGood = Good#{KFATE => VFATE}, coerce_map(KeyType, ValType, Remaining, Direction, NewGood, Broken); @@ -720,11 +673,6 @@ coerce_tuple(O, N, TermTypes, Terms, Direction) -> Errors -> Errors end. -% Wraps a single error in a list, along with an empty path, so that other -% accumulating error handlers can work with it. -single_error(Reason) -> - {error, [{Reason, []}]}. - coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, Direction) -> % FIXME: we could go through and add the variant tag to the adt_element % paths? @@ -751,7 +699,7 @@ coerce_tuple_elements(Types, Terms, Direction, Tag) -> coerce_tuple_elements(Types, Terms, Direction, Tag, 0, [], []). coerce_tuple_elements([Type | Types], [Term | Terms], Direction, Tag, Index, Good, Broken) -> - case coerce(Type, Term, Direction) of + case coerce_direction(Type, Term, Direction) of {ok, Value} -> coerce_tuple_elements(Types, Terms, Direction, Tag, Index + 1, [Value | Good], Broken); {error, Errors} -> @@ -820,6 +768,91 @@ zip_record_field({Name, Type}, {Remaining, Missing}) -> {missing, {Remaining, [Name | Missing]}} end. +% Wraps a single error in a list, along with an empty path, so that other +% accumulating error handlers can work with it. +single_error(Reason) -> + {error, [{Reason, []}]}. + +wrap_errors(Location, Errors) -> + F = fun({Error, Path}) -> + {Error, [Location | Path]} + end, + lists:map(F, Errors). + +combine_errors(Broken) -> + F = fun(NextErrors, Acc) -> + NextErrors ++ Acc + end, + lists:foldl(F, [], Broken). + + +%%% FATE to Erlang + +% Not sure if this is needed... fate_to_erlang shouldn't fail. +coerce_direction(Type, Term, to_fate) -> + erlang_to_fate(Type, Term); +coerce_direction(Type, Term, from_fate) -> + fate_to_erlang(Type, Term). + +fate_to_erlang({_, _, integer}, S) when is_integer(S) -> + {ok, S}; +fate_to_erlang({_, _, address}, {address, Bin}) -> + Address = gmser_api_encoder:encode(account_pubkey, Bin), + {ok, unicode:characters_to_list(Address)}; +fate_to_erlang({_, _, contract}, {contract, Bin}) -> + Address = gmser_api_encoder:encode(contract_pubkey, Bin), + {ok, unicode:characters_to_list(Address)}; +fate_to_erlang({_, _, signature}, Bin) -> + Address = gmser_api_encoder:encode(signature, Bin), + {ok, unicode:characters_to_list(Address)}; +%fate_to_erlang({_, _, channel}, {channel, S}) when is_binary(S) -> + %{ok, S}; +fate_to_erlang({_, _, boolean}, true) -> + {ok, true}; +fate_to_erlang({_, _, boolean}, false) -> + {ok, false}; +fate_to_erlang({_, _, string}, Bin) -> + Str = binary_to_list(Bin), + {ok, Str}; +fate_to_erlang({_, _, char}, Val) -> + {ok, Val}; +fate_to_erlang({O, N, {bytes, [Count]}}, Bytes) when is_bitstring(Bytes) -> + coerce_bytes(O, N, Count, Bytes); +fate_to_erlang({_, _, bits}, {bits, Num}) -> + {ok, Num}; +fate_to_erlang({_, _, {list, [Type]}}, Data) when is_list(Data) -> + coerce_list(Type, Data, from_fate); +fate_to_erlang({_, _, {map, [KeyType, ValType]}}, Data) when is_map(Data) -> + coerce_map(KeyType, ValType, Data, from_fate); +fate_to_erlang({O, N, {tuple, ElementTypes}}, {tuple, Data}) -> + ElementList = tuple_to_list(Data), + coerce_tuple(O, N, ElementTypes, ElementList, from_fate); +fate_to_erlang({O, N, {variant, Variants}}, {variant, _, Tag, Tuple}) -> + Terms = tuple_to_list(Tuple), + {Name, TermTypes} = lists:nth(Tag + 1, Variants), + coerce_variant2(O, N, Variants, Name, Tag, TermTypes, Terms, from_fate); +fate_to_erlang({O, N, {record, MemberTypes}}, {tuple, Tuple}) -> + coerce_record_to_map(O, N, MemberTypes, Tuple); +fate_to_erlang({O, N, {unknown_type, _}}, Data) -> + case N of + already_normalized -> + Message = "Warning: Unknown type ~p. Using term ~p as is.~n", + io:format(Message, [O, Data]); + _ -> + Message = "Warning: Unknown type ~p (i.e. ~p). Using term ~p as is.~n", + io:format(Message, [O, N, Data]) + end, + {ok, Data}; +fate_to_erlang({O, N, _}, Data) -> + case N of + already_normalized -> + io:format("Warning: Unimplemented type ~p.~nUsing term as is:~n~p~n", [O, Data]); + _ -> + io:format("Warning: Unimplemented type ~p (i.e. ~p).~nUsing term as is:~n~p~n", [O, N, Data]) + end, + {ok, Data}. + + %%% AACI Getters @@ -841,38 +874,44 @@ aaci_get_function_signature({aaci, _, FunDefs, _}, Fun) -> end. -%%% Simple coerce/3 tests +%%% Simple FATE/erlang tests -% Round trip coerce run for the eunit tests below. If these results don't match -% then the test should fail. -try_coerce(Type, Sophia, Fate) -> - % Run both first, to see if they fail to produce any result. - {ok, FateActual} = coerce(Type, Sophia, to_fate), - {ok, SophiaActual} = coerce(Type, Fate, from_fate), - % Now check that the results were what we expected. +check_erlang_to_fate(Type, Sophia, Fate) -> + {ok, FateActual} = erlang_to_fate(Type, Sophia), case FateActual of Fate -> ok; _ -> erlang:error({to_fate_failed, Fate, FateActual}) - end, + end. + +check_fate_to_erlang(Type, Fate, Sophia) -> + {ok, SophiaActual} = fate_to_erlang(Type, Fate), + % Now check that the results were what we expected. case SophiaActual of Sophia -> ok; _ -> erlang:error({from_fate_failed, Sophia, SophiaActual}) - end, + end. + +% Round trip coerce run for the eunit tests below. If these results don't match +% then the test should fail. +check_roundtrip(Type, Sophia, Fate) -> + check_erlang_to_fate(Type, Sophia, Fate), + check_fate_to_erlang(Type, Fate, Sophia), + % Finally, check that the FATE result is something that gmb understands. gmb_fate_encoding:serialize(Fate), ok. coerce_int_test() -> {ok, Type} = annotate_type(integer, #{}), - try_coerce(Type, 123, 123). + check_roundtrip(Type, 123, 123). coerce_address_test() -> {ok, Type} = annotate_type(address, #{}), - try_coerce(Type, + check_roundtrip(Type, "ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", {address, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, @@ -880,7 +919,7 @@ coerce_address_test() -> coerce_contract_test() -> {ok, Type} = annotate_type(contract, #{}), - try_coerce(Type, + check_roundtrip(Type, "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", {contract, <<164,136,155,90,124,22,40,206,255,76,213,56,238,123, 167,208,53,78,40,235,2,163,132,36,47,183,228,151,9, @@ -888,7 +927,7 @@ coerce_contract_test() -> coerce_signature_test() -> {ok, Type} = annotate_type(signature, #{}), - try_coerce(Type, + check_roundtrip(Type, "sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n", <<231,4,97,129,16,173,37,42,194,249,28,94,134,163,208,84,22,135, 169,85,212,142,14,12,233,252,97,50,193,158,229,51,123,206,222, @@ -901,69 +940,69 @@ coerce_signature_binary_test() -> 169,85,212,142,14,12,233,252,97,50,193,158,229,51,123,206,222, 249,2,3,85,173,106,150,243,253,89,128,248,52,195,140,95,114, 233,110,119,143,206,137,124,36,63,154,85,7>>, - {ok, Binary} = coerce(Type, {raw, Binary}, to_fate), - {ok, Binary} = coerce(Type, Binary, to_fate), + {ok, Binary} = erlang_to_fate(Type, {raw, Binary}), + {ok, Binary} = erlang_to_fate(Type, Binary), ok. coerce_bool_test() -> {ok, Type} = annotate_type(boolean, #{}), - try_coerce(Type, true, true), - try_coerce(Type, false, false). + check_roundtrip(Type, true, true), + check_roundtrip(Type, false, false). coerce_string_test() -> {ok, Type} = annotate_type(string, #{}), - try_coerce(Type, "hello world", <<"hello world">>). + check_roundtrip(Type, "hello world", <<"hello world">>). coerce_list_test() -> {ok, Type} = annotate_type({list, [string]}, #{}), - try_coerce(Type, ["hello world", [65, 32, 65]], [<<"hello world">>, <<65, 32, 65>>]). + check_roundtrip(Type, ["hello world", [65, 32, 65]], [<<"hello world">>, <<65, 32, 65>>]). coerce_map_test() -> {ok, Type} = annotate_type({map, [string, {list, [integer]}]}, #{}), - try_coerce(Type, #{"a" => "a", "b" => "b"}, #{<<"a">> => "a", <<"b">> => "b"}). + check_roundtrip(Type, #{"a" => "a", "b" => "b"}, #{<<"a">> => "a", <<"b">> => "b"}). coerce_tuple_test() -> {ok, Type} = annotate_type({tuple, [integer, string]}, #{}), - try_coerce(Type, {123, "456"}, {tuple, {123, <<"456">>}}). + check_roundtrip(Type, {123, "456"}, {tuple, {123, <<"456">>}}). coerce_variant_test() -> {ok, Type} = annotate_type({variant, [{"A", [integer]}, {"B", [integer, integer]}]}, #{}), - try_coerce(Type, {"A", 123}, {variant, [1, 2], 0, {123}}), - try_coerce(Type, {"B", 456, 789}, {variant, [1, 2], 1, {456, 789}}). + check_roundtrip(Type, {"A", 123}, {variant, [1, 2], 0, {123}}), + check_roundtrip(Type, {"B", 456, 789}, {variant, [1, 2], 1, {456, 789}}). coerce_option_test() -> {ok, Type} = annotate_type({"option", [integer]}, builtin_typedefs()), - try_coerce(Type, {"None"}, {variant, [0, 1], 0, {}}), - try_coerce(Type, {"Some", 1}, {variant, [0, 1], 1, {1}}). + check_roundtrip(Type, {"None"}, {variant, [0, 1], 0, {}}), + check_roundtrip(Type, {"Some", 1}, {variant, [0, 1], 1, {1}}). coerce_record_test() -> {ok, Type} = annotate_type({record, [{"a", integer}, {"b", integer}]}, #{}), - try_coerce(Type, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). + check_roundtrip(Type, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). coerce_bytes_test() -> {ok, Type} = annotate_type({tuple, [{bytes, [4]}, {bytes, [any]}]}, #{}), - try_coerce(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {<<"abcd">>, <<"efghi">>}}). + check_roundtrip(Type, {<<"abcd">>, <<"efghi">>}, {tuple, {<<"abcd">>, <<"efghi">>}}). coerce_bits_test() -> {ok, Type} = annotate_type(bits, #{}), - try_coerce(Type, 5, {bits, 5}). + check_roundtrip(Type, 5, {bits, 5}). coerce_char_test() -> {ok, Type} = annotate_type(char, #{}), - try_coerce(Type, $?, $?). + check_roundtrip(Type, $?, $?). coerce_unicode_test() -> {ok, Type} = annotate_type(char, #{}), % Latin Small Letter C with cedilla and acute - {ok, $ḉ} = coerce(Type, <<"ḉ"/utf8>>, to_fate), + {ok, $ḉ} = erlang_to_fate(Type, <<"ḉ"/utf8>>), ok. coerce_hash_test() -> {ok, Type} = annotate_type("hash", builtin_typedefs()), Hash = list_to_binary(lists:seq(1,32)), - try_coerce(Type, Hash, Hash), + check_roundtrip(Type, Hash, Hash), ok. @@ -985,7 +1024,7 @@ namespace_coerce_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), - try_coerce(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). + check_roundtrip(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). record_substitution_test() -> Contract = " @@ -995,7 +1034,7 @@ record_substitution_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), - try_coerce(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). + check_roundtrip(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). tuple_substitution_test() -> Contract = " @@ -1005,7 +1044,7 @@ tuple_substitution_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), - try_coerce(Output, {1, 2, "hello"}, {tuple, {1, 2, <<"hello">>}}). + check_roundtrip(Output, {1, 2, "hello"}, {tuple, {1, 2, <<"hello">>}}). variant_substitution_test() -> Contract = " @@ -1015,8 +1054,8 @@ variant_substitution_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), - try_coerce(Output, {"Left", "hi", 1}, {variant, [2, 2], 0, {<<"hi">>, 1}}), - try_coerce(Output, {"Right", 2, 3}, {variant, [2, 2], 1, {2, 3}}). + check_roundtrip(Output, {"Left", "hi", 1}, {variant, [2, 2], 0, {<<"hi">>, 1}}), + check_roundtrip(Output, {"Right", 2, 3}, {variant, [2, 2], 1, {2, 3}}). nested_coerce_test() -> Contract = " @@ -1027,7 +1066,7 @@ nested_coerce_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), - try_coerce(Output, + check_roundtrip(Output, #{ "f1" => {1, 2}, "f2" => {"a", "b"}}, {tuple, {{tuple, {1, 2}}, {tuple, {<<"a">>, <<"b">>}}}}). @@ -1039,7 +1078,7 @@ state_coerce_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[], Output}} = aaci_get_function_signature(AACI, "init"), - try_coerce(Output, 0, 0). + check_roundtrip(Output, 0, 0). param_test() -> Contract = " @@ -1049,8 +1088,8 @@ param_test() -> ", {ok, AACI} = aaci_from_string(Contract), {ok, {[{"x", Input}], Output}} = aaci_get_function_signature(AACI, "init"), - try_coerce(Input, 0, 0), - try_coerce(Output, 0, 0). + check_roundtrip(Input, 0, 0), + check_roundtrip(Output, 0, 0). %%% Obscure Sophia types where we should check the AACI as well @@ -1126,21 +1165,21 @@ name_coerce_test() -> {ok, TTL} = annotate_type("Chain.ttl", builtin_typedefs()), TTLSoph = {"FixedTTL", 0}, TTLFate = {variant, [1, 1], 0, {0}}, - try_coerce(TTL, TTLSoph, TTLFate), + check_roundtrip(TTL, TTLSoph, TTLFate), {ok, Pointee} = annotate_type("AENS.pointee", builtin_typedefs()), PointeeSoph = {"AccountPt", AddrSoph}, PointeeFate = {variant, [1, 1, 1, 1], 0, {AddrFate}}, - try_coerce(Pointee, PointeeSoph, PointeeFate), + check_roundtrip(Pointee, PointeeSoph, PointeeFate), {ok, Name} = annotate_type("AENS.name", builtin_typedefs()), NameSoph = {"Name", AddrSoph, TTLSoph, #{"myname" => PointeeSoph}}, NameFate = {variant, [3], 0, {AddrFate, TTLFate, #{<<"myname">> => PointeeFate}}}, - try_coerce(Name, NameSoph, NameFate). + check_roundtrip(Name, NameSoph, NameFate). void_coerce_test() -> % Void itself can't be represented, but other types built out of void are % valid. {ok, NonOption} = annotate_type({"option", ["void"]}, builtin_typedefs()), - try_coerce(NonOption, {"None"}, {variant, [0, 1], 0, {}}), + check_roundtrip(NonOption, {"None"}, {variant, [0, 1], 0, {}}), {ok, NonList} = annotate_type({list, ["void"]}, builtin_typedefs()), - try_coerce(NonList, [], []). + check_roundtrip(NonList, [], []). From 4f1958b21046595a49f60e95793f6adca4f8b444 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 13 Jan 2026 01:19:29 +0000 Subject: [PATCH 03/42] use lists:unzip/1 Just a little thing I noticed could be improved. --- src/hz_aaci.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 2d21846..8e10795 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -732,8 +732,7 @@ coerce_map_to_record(O, N, MemberTypes, Map) -> end. coerce_record_to_map(O, N, MemberTypes, Tuple) -> - Names = [Name || {Name, _} <- MemberTypes], - Types = [Type || {_, Type} <- MemberTypes], + {Names, Types} = lists:unzip(MemberTypes), Terms = tuple_to_list(Tuple), % FIXME: We could go through and change the record_element paths into field % paths? From 6f5525afcf19b2c260603768a39bedb0fc7da799 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 15 Jan 2026 01:50:50 +0000 Subject: [PATCH 04/42] Rename get_function_signature hz_aaci:aaci_get_function_signature is a bit redundant. --- src/hz.erl | 4 ++-- src/hz_aaci.erl | 58 ++++++++++++++++++++++++------------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index c36dbc3..4734e74 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -1475,10 +1475,10 @@ lookup_aaci(Label) -> %% useful for calling decode_bytearray/2. aaci_lookup_spec(AACI = {aaci, _, _, _}, Fun) -> - hz_aaci:aaci_get_function_signature(AACI, Fun); + hz_aaci:get_function_signature(AACI, Fun); aaci_lookup_spec({aaci, Label}, Fun) -> case hz_man:lookup_aaci(Label) of - {ok, AACI} -> hz_aaci:aaci_get_function_signature(AACI, Fun); + {ok, AACI} -> hz_aaci:get_function_signature(AACI, Fun); error -> {error, aaci_not_found} end. diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 8e10795..f9da33f 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -21,7 +21,7 @@ erlang_to_fate/2, fate_to_erlang/2, erlang_args_to_fate/2, - aaci_get_function_signature/2]). + get_function_signature/2]). %%% Types @@ -855,7 +855,7 @@ fate_to_erlang({O, N, _}, Data) -> %%% AACI Getters --spec aaci_get_function_signature(AACI, Fun) -> {ok, Type} | {error, Reason} +-spec get_function_signature(AACI, Fun) -> {ok, Type} | {error, Reason} when AACI :: aaci(), Fun :: binary() | string(), Type :: {term(), term()}, % FIXME @@ -866,7 +866,7 @@ fate_to_erlang({O, N, _}, Data) -> %% prepare_contract/1. This type information, particularly the return type, is %% useful for calling decode_bytearray/2. -aaci_get_function_signature({aaci, _, FunDefs, _}, Fun) -> +get_function_signature({aaci, _, FunDefs, _}, Fun) -> case maps:find(Fun, FunDefs) of {ok, A} -> {ok, A}; error -> {error, bad_fun_name} @@ -1022,7 +1022,7 @@ namespace_coerce_test() -> entrypoint f(): N.pair = { a = 1, b = 2 } ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + {ok, {[], Output}} = get_function_signature(AACI, "f"), check_roundtrip(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). record_substitution_test() -> @@ -1032,7 +1032,7 @@ record_substitution_test() -> entrypoint f(): pair(int) = { a = 1, b = 2 } ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + {ok, {[], Output}} = get_function_signature(AACI, "f"), check_roundtrip(Output, #{"a" => 123, "b" => 456}, {tuple, {123, 456}}). tuple_substitution_test() -> @@ -1042,7 +1042,7 @@ tuple_substitution_test() -> entrypoint f(): triple(int, string) = (1, 2, \"hello\") ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + {ok, {[], Output}} = get_function_signature(AACI, "f"), check_roundtrip(Output, {1, 2, "hello"}, {tuple, {1, 2, <<"hello">>}}). variant_substitution_test() -> @@ -1052,7 +1052,7 @@ variant_substitution_test() -> entrypoint f(): adt(string, int) = Left(\"hi\", 1) ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + {ok, {[], Output}} = get_function_signature(AACI, "f"), check_roundtrip(Output, {"Left", "hi", 1}, {variant, [2, 2], 0, {<<"hi">>, 1}}), check_roundtrip(Output, {"Right", 2, 3}, {variant, [2, 2], 1, {2, 3}}). @@ -1064,7 +1064,7 @@ nested_coerce_test() -> entrypoint f(): r = { f1 = (1, 2), f2 = (\"a\", \"b\") } ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_get_function_signature(AACI, "f"), + {ok, {[], Output}} = get_function_signature(AACI, "f"), check_roundtrip(Output, #{ "f1" => {1, 2}, "f2" => {"a", "b"}}, {tuple, {{tuple, {1, 2}}, {tuple, {<<"a">>, <<"b">>}}}}). @@ -1076,7 +1076,7 @@ state_coerce_test() -> entrypoint init(): state = 0 ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], Output}} = aaci_get_function_signature(AACI, "init"), + {ok, {[], Output}} = get_function_signature(AACI, "init"), check_roundtrip(Output, 0, 0). param_test() -> @@ -1086,7 +1086,7 @@ param_test() -> entrypoint init(x): state = x ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[{"x", Input}], Output}} = aaci_get_function_signature(AACI, "init"), + {ok, {[{"x", Input}], Output}} = get_function_signature(AACI, "init"), check_roundtrip(Input, 0, 0), check_roundtrip(Output, 0, 0). @@ -1129,30 +1129,30 @@ obscure_aaci_test() -> ", {ok, AACI} = aaci_from_string(Contract), - {ok, {[], {{bytes, [4]}, _, _}}} = aaci_get_function_signature(AACI, "fixed_bytes"), - {ok, {[], {{bytes, [any]}, _, _}}} = aaci_get_function_signature(AACI, "any_bytes"), - {ok, {[], {bits, _, _}}} = aaci_get_function_signature(AACI, "bits"), - {ok, {[], {char, _, _}}} = aaci_get_function_signature(AACI, "character"), + {ok, {[], {{bytes, [4]}, _, _}}} = get_function_signature(AACI, "fixed_bytes"), + {ok, {[], {{bytes, [any]}, _, _}}} = get_function_signature(AACI, "any_bytes"), + {ok, {[], {bits, _, _}}} = get_function_signature(AACI, "bits"), + {ok, {[], {char, _, _}}} = get_function_signature(AACI, "character"), - {ok, {[], {{"option", [integer]}, _, {variant, [{"None", []}, {"Some", [_]}]}}}} = aaci_get_function_signature(AACI, "options"), - {ok, {[], {"hash", _, {bytes, [32]}}}} = aaci_get_function_signature(AACI, "hash"), - {ok, {[], {"unit", _, {tuple, []}}}} = aaci_get_function_signature(AACI, "unit"), + {ok, {[], {{"option", [integer]}, _, {variant, [{"None", []}, {"Some", [_]}]}}}} = get_function_signature(AACI, "options"), + {ok, {[], {"hash", _, {bytes, [32]}}}} = get_function_signature(AACI, "hash"), + {ok, {[], {"unit", _, {tuple, []}}}} = get_function_signature(AACI, "unit"), - {ok, {_, {"Chain.ttl", _, {variant, _}}}} = aaci_get_function_signature(AACI, "ttl"), - {ok, {_, {"Chain.paying_for_tx", _, {variant, _}}}} = aaci_get_function_signature(AACI, "paying_for"), - {ok, {_, {"Chain.ga_meta_tx", _, {variant, _}}}} = aaci_get_function_signature(AACI, "ga_meta_tx"), - {ok, {_, {"Chain.base_tx", _, {variant, _}}}} = aaci_get_function_signature(AACI, "base_tx"), - {ok, {_, {"Chain.tx", _, {record, _}}}} = aaci_get_function_signature(AACI, "tx"), + {ok, {_, {"Chain.ttl", _, {variant, _}}}} = get_function_signature(AACI, "ttl"), + {ok, {_, {"Chain.paying_for_tx", _, {variant, _}}}} = get_function_signature(AACI, "paying_for"), + {ok, {_, {"Chain.ga_meta_tx", _, {variant, _}}}} = get_function_signature(AACI, "ga_meta_tx"), + {ok, {_, {"Chain.base_tx", _, {variant, _}}}} = get_function_signature(AACI, "base_tx"), + {ok, {_, {"Chain.tx", _, {record, _}}}} = get_function_signature(AACI, "tx"), - {ok, {_, {"AENS.pointee", _, {variant, _}}}} = aaci_get_function_signature(AACI, "pointee"), - {ok, {_, {"AENS.name", _, {variant, _}}}} = aaci_get_function_signature(AACI, "name"), - {ok, {_, {"AENSv2.pointee", _, {variant, _}}}} = aaci_get_function_signature(AACI, "pointee2"), - {ok, {_, {"AENSv2.name", _, {variant, _}}}} = aaci_get_function_signature(AACI, "name2"), + {ok, {_, {"AENS.pointee", _, {variant, _}}}} = get_function_signature(AACI, "pointee"), + {ok, {_, {"AENS.name", _, {variant, _}}}} = get_function_signature(AACI, "name"), + {ok, {_, {"AENSv2.pointee", _, {variant, _}}}} = get_function_signature(AACI, "pointee2"), + {ok, {_, {"AENSv2.name", _, {variant, _}}}} = get_function_signature(AACI, "name2"), - {ok, {_, {"MCL_BLS12_381.fr", _, {bytes, [32]}}}} = aaci_get_function_signature(AACI, "fr"), - {ok, {_, {"MCL_BLS12_381.fp", _, {bytes, [48]}}}} = aaci_get_function_signature(AACI, "fp"), + {ok, {_, {"MCL_BLS12_381.fr", _, {bytes, [32]}}}} = get_function_signature(AACI, "fr"), + {ok, {_, {"MCL_BLS12_381.fp", _, {bytes, [48]}}}} = get_function_signature(AACI, "fp"), - {ok, {[], {{"Set.set", [integer]}, _, {record, [{"to_map", _}]}}}} = aaci_get_function_signature(AACI, "set"), + {ok, {[], {{"Set.set", [integer]}, _, {record, [{"to_map", _}]}}}} = get_function_signature(AACI, "set"), ok. From 97e32574c4251209f91da8028bc40b9cf5fa2367 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 15 Jan 2026 00:57:25 +0000 Subject: [PATCH 05/42] set up parsing structure We tokenize, and then do the simplest possible recursive descent. We don't want to evaluate anything, so infix operators are out, meaning no shunting yard or tree rearranging or LR(1) shenanigans are necessary, just write the code. If we want to 'peek', just take the next token, and pass it around from that point on, until it can actually be consumed. --- src/hz_aaci.erl | 2 + src/hz_sophia.erl | 130 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 src/hz_sophia.erl diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index f9da33f..8fda712 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -22,6 +22,8 @@ fate_to_erlang/2, erlang_args_to_fate/2, get_function_signature/2]). +% Internal stuff that is useful for writing AACI unit tests. +-export([annotate_type/2]). %%% Types diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl new file mode 100644 index 0000000..b428616 --- /dev/null +++ b/src/hz_sophia.erl @@ -0,0 +1,130 @@ +-module(hz_sophia). +-vsn("0.8.2"). +-author("Jarvis Carroll "). +-copyright("Jarvis Carroll "). +-license("GPL-3.0-or-later"). + +-include_lib("eunit/include/eunit.hrl"). + +parse_literal(Type, String) -> + case parse_expression(Type, {tk, 1, 1}, String) of + {ok, {Result, NewTk, NewString}} -> + parse_literal2(Result, NewTk, NewString); + {error, Reason} -> + {error, Reason} + end. + +parse_literal2(Result, Tk, String) -> + % We have parsed a valid expression. Now check that the string ends. + case next_token(Tk, String) of + {ok, {{eof, _, _, _, _}, _, _}} -> + {ok, Result}; + {ok, {{_, S, Row, Start, End}, _, _}} -> + {error, {unexpected_token, S, Row, Start, End}}; + {error, Reason} -> + {error, Reason} + end. + +%%% Tokenizer + +next_token({tk, Row, Col}, []) -> + {ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}}; +next_token({tk, Row, Col}, " " ++ Rest) -> + next_token({tk, Row + 1, Col}, Rest); +next_token({tk, Row, Col}, "\t" ++ Rest) -> + next_token({tk, Row + 1, Col}, Rest); +next_token(Tk, [N | _] = String) when N >= $0, N =< $9 -> + num_token(Tk, Tk, String, []); +next_token(Tk, [N | _] = String) when N >= $A, N =< $Z -> + alphanum_token(Tk, Tk, String, []); +next_token(Tk, [N | _] = String) when N >= $a, N =< $z -> + alphanum_token(Tk, Tk, String, []); +next_token(Tk, [$_ | _] = String) -> + alphanum_token(Tk, Tk, String, []); +next_token({tk, Row, Col}, [Char | _]) -> + {error, {unknown_char, Row, Col, [Char]}}. + +num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> + num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +num_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> + NumString = lists:reverse(Acc), + Token = {integer, NumString, Row, Start, End}, + {ok, {Token, {tk, Row, End}, String}}. + +alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z -> + alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); +alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z -> + alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); +alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $0, C =< $9 -> + alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); +alphanum_token(Start, {tk, Row, Col}, [$_ | Rest], Acc) -> + alphanum_token(Start, {tk, Row, Col}, Rest, [$_ | Acc]); +alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> + AlphaString = lists:reverse(Acc), + Token = {alphanum, AlphaString, Row, Start, End}, + {ok, {Token, {tk, Row, End}, String}}. + + +%%% Sophia Literal Parser + +%%% This parser is a simple recursive descent parser, written explicitly in +%%% erlang. +%%% +%%% There are no infix operators in the subset we want to parse, so recursive +%%% descent is fine with no special tricks, no shunting yard algorithm, no +%%% parser generators, etc. +%%% +%%% If we were writing this in C then we might want to work iteratively with an +%%% array of finite state machines, i.e. with a pushdown automaton, instead of +%%% using recursion. This is a tried and true method of making fast parsers. +%%% Recall, however, that the BEAM *is* a stack machine, written in C, so +%%% rather than writing confusing iterative code in Erlang, to simulate a +%%% pushdown automaton inside another simulated stack machine... we should just +%%% write the recursive code, thus programming the BEAM to implement the +%%% pushdown automaton that we want. + +parse_expression(Type, Tk, String) -> + {ok, {Token, NewTk, NewString}} = next_token(Tk, String), + parse_expression2(Type, NewTk, NewString, Token). + +parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> + Value = list_to_integer(S), + check_type(integer, Type, Row, Start, End, {Value, Tk, String}); +parse_expression2(_, _, _, {_, S, Row, Start, End}) -> + {error, {unexpected_token, S, Row, Start, End}}. + +check_type(Expected, {_, _, Expected}, _, _, _, Result) -> + {ok, Result}; +check_type(_, {_, _, unknown_type}, _, _, _, Result) -> + % We want it to be possible to opt out of type-checking, since FATE is + % dynamically typed anyway. + {ok, Result}; +check_type(Expected, {O, N, _}, Row, Start, End, _) -> + {error, {wrong_type, O, N, Expected, Row, Start, End}}. + + +%%% Tests + +check_sophia_to_fate(Type, Sophia, Fate) -> + {ok, FateActual} = parse_literal(Type, Sophia), + case FateActual of + Fate -> + ok; + _ -> + erlang:error({to_fate_failed, Fate, FateActual}) + end. + +check_parser(Type, Sophia, Fate) -> + UnknownType = setelement(3, Type, unknown_type), + check_sophia_to_fate(Type, Sophia, Fate), + check_sophia_to_fate(UnknownType, Sophia, Fate), + + % Finally, check that the FATE result is something that gmb understands. + gmb_fate_encoding:serialize(Fate), + + ok. + +int_test() -> + {ok, Type} = hz_aaci:annotate_type(integer, #{}), + check_parser(Type, "123", 123). + From 3f1c9bd626e3e7be45d6532dffebbb798eac9069 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 15 Jan 2026 09:38:04 +0000 Subject: [PATCH 06/42] List parsing Slowly chipping away at cases... --- src/hz_aaci.erl | 2 +- src/hz_sophia.erl | 79 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 8fda712..dfd158c 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -23,7 +23,7 @@ erlang_args_to_fate/2, get_function_signature/2]). % Internal stuff that is useful for writing AACI unit tests. --export([annotate_type/2]). +-export([aaci_from_string/1, annotate_type/2]). %%% Types diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index b428616..5b7c944 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -41,8 +41,9 @@ next_token(Tk, [N | _] = String) when N >= $a, N =< $z -> alphanum_token(Tk, Tk, String, []); next_token(Tk, [$_ | _] = String) -> alphanum_token(Tk, Tk, String, []); -next_token({tk, Row, Col}, [Char | _]) -> - {error, {unknown_char, Row, Col, [Char]}}. +next_token({tk, Row, Col}, [Char | Rest]) -> + Token = {character, [Char], Row, Col, Col}, + {ok, {Token, {tk, Row + 1, Col}, Rest}}. num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); @@ -89,19 +90,58 @@ parse_expression(Type, Tk, String) -> parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> Value = list_to_integer(S), - check_type(integer, Type, Row, Start, End, {Value, Tk, String}); + case Type of + {_, _, integer} -> + {ok, {Value, Tk, String}}; + {_, _, unknown_type} -> + {ok, {Value, Tk, String}}; + {O, N, _} -> + {error, {wrong_type, O, N, integer, Row, Start, End}} + end; +parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> + parse_list(Type, Tk, String, Row, Start); parse_expression2(_, _, _, {_, S, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. -check_type(Expected, {_, _, Expected}, _, _, _, Result) -> - {ok, Result}; -check_type(_, {_, _, unknown_type}, _, _, _, Result) -> - % We want it to be possible to opt out of type-checking, since FATE is - % dynamically typed anyway. - {ok, Result}; -check_type(Expected, {O, N, _}, Row, Start, End, _) -> - {error, {wrong_type, O, N, Expected, Row, Start, End}}. +parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> + parse_list_loop(Inner, Tk, String, Row, Start, []); +parse_list({_, _, unknown_type}, Tk, String, Row, Start) -> + parse_list_loop(unknown_type(), Tk, String, Row, Start, []); +parse_list({O, N, _}, _, _, Row, Start) -> + {error, {wrong_type, O, N, list, Row, Start, Start}}. +parse_list_loop(Inner, Tk, String, Row, Start, Acc) -> + case next_token(Tk, String) of + {ok, {{character, "]", _, _, _}, NewTk, NewString}} -> + {ok, {lists:reverse(Acc), NewTk, NewString}}; + {ok, {Token, NewTk, NewString}} -> + parse_list_loop2(Inner, NewTk, NewString, Row, Start, Acc, Token) + end. + +parse_list_loop2(Inner, Tk, String, Row, Start, Acc, Token) -> + case parse_expression2(Inner, Tk, String, Token) of + {ok, {Value, NewTk, NewString}} -> + parse_list_loop3(Inner, NewTk, NewString, Row, Start, [Value | Acc]); + {error, Reason} -> + Wrapped = wrap_error(Reason, {list_element, length(Acc)}), + {error, Wrapped} + end. + +parse_list_loop3(Inner, Tk, String, Row, Start, Acc) -> + case next_token(Tk, String) of + {ok, {{character, "]", _, _, _}, NewTk, NewString}} -> + {ok, {lists:reverse(Acc), NewTk, NewString}}; + {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + parse_list_loop(Inner, NewTk, NewString, Row, Start, Acc); + {error, Reason} -> + {error, Reason} + end. + +unknown_type() -> + {unknown_type, already_normalized, unknown_type}. + +% TODO +wrap_error(Reason, _) -> Reason. %%% Tests @@ -115,16 +155,23 @@ check_sophia_to_fate(Type, Sophia, Fate) -> end. check_parser(Type, Sophia, Fate) -> - UnknownType = setelement(3, Type, unknown_type), check_sophia_to_fate(Type, Sophia, Fate), - check_sophia_to_fate(UnknownType, Sophia, Fate), + check_sophia_to_fate(unknown_type(), Sophia, Fate), % Finally, check that the FATE result is something that gmb understands. gmb_fate_encoding:serialize(Fate), ok. -int_test() -> - {ok, Type} = hz_aaci:annotate_type(integer, #{}), - check_parser(Type, "123", 123). +check_parser(Sophia, Fate) -> + Source = "contract C = entrypoint f() = " ++ Sophia, + {ok, AACI} = hz_aaci:aaci_from_string(Source), + {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), + check_parser(Type, Sophia, Fate). + +int_test() -> + check_parser("123", 123). + +list_test() -> + check_parser("[1, 2, 3]", [1, 2, 3]). From 56e63051bc8cf5c90f101d53241da6b5a6df97b6 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 16 Jan 2026 05:46:27 +0000 Subject: [PATCH 07/42] Map parsing --- src/hz_sophia.erl | 94 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 5b7c944..927fe3c 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -100,9 +100,26 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> end; parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); +parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> + parse_record_or_map(Type, Tk, String, Row, Start); parse_expression2(_, _, _, {_, S, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. +unknown_type() -> + {unknown_type, already_normalized, unknown_type}. + +expect_tokens([], Tk, String) -> + {ok, {Tk, String}}; +expect_tokens([Str | Rest], Tk, String) -> + case next_token(Tk, String) of + {ok, {{_, Str, _, _, _}, NewTk, NewString}} -> + expect_tokens(Rest, NewTk, NewString); + {ok, {{_, Actual, Row, Start, End}}} -> + {error, {unexpected_token, Actual, Row, Start, End}} + end. + +%%% List Parsing + parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> parse_list_loop(Inner, Tk, String, Row, Start, []); parse_list({_, _, unknown_type}, Tk, String, Row, Start) -> @@ -137,8 +154,75 @@ parse_list_loop3(Inner, Tk, String, Row, Start, Acc) -> {error, Reason} end. -unknown_type() -> - {unknown_type, already_normalized, unknown_type}. +%%% Record parsing + +parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> + parse_map(KeyType, ValueType, Tk, String, #{}); +parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) -> + parse_record(Fields, Tk, String); +parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> + case next_token(Tk, String) of + {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {#{}, NewTk, NewString}}; + {ok, {{character, "[", _, _, _}, NewTk, NewString}} -> + parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{}); + {ok, {{alphanum, _, Row, Start, End}, _, _}} -> + {error, {unresolved_record, Row, Start, End}}; + {ok, {{_, S, Row, Start, End}, _, _}} -> + {error, {unexpected_token, S, Row, Start, End}} + end; +parse_record_or_map({O, N, _}, _, _, Row, Start) -> + {error, {wrong_type, O, N, map, Row, Start, Start}}. + +parse_record(Fields, Tk, String) -> + {error, not_yet_implemented}. + +%%% Map Parsing + +parse_map(KeyType, ValueType, Tk, String, Acc) -> + case next_token(Tk, String) of + {ok, {{character, "[", _, _, _}, NewTk, NewString}} -> + parse_map2(KeyType, ValueType, NewTk, NewString, Acc); + {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {Acc, NewTk, NewString}}; + {ok, {{_, S, Row, Start, End}}} -> + {error, {unexpected_token, S, Row, Start, End}} + end. + +parse_map2(KeyType, ValueType, Tk, String, Acc) -> + case parse_expression(KeyType, Tk, String) of + {ok, {Result, NewTk, NewString}} -> + parse_map3(KeyType, ValueType, NewTk, NewString, Acc, Result); + {error, Reason} -> + wrap_error(Reason, {map_key, maps:size(Acc)}) + end. + +parse_map3(KeyType, ValueType, Tk, String, Acc, Key) -> + case expect_tokens(["]", "="], Tk, String) of + {ok, {NewTk, NewString}} -> + parse_map4(KeyType, ValueType, NewTk, NewString, Acc, Key); + {error, Reason} -> + {error, Reason} + end. + +parse_map4(KeyType, ValueType, Tk, String, Acc, Key) -> + case parse_expression(ValueType, Tk, String) of + {ok, {Result, NewTk, NewString}} -> + NewAcc = maps:put(Key, Result, Acc), + parse_map5(KeyType, ValueType, NewTk, NewString, NewAcc); + {error, Reason} -> + {error, Reason} + end. + +parse_map5(KeyType, ValueType, Tk, String, Acc) -> + case next_token(Tk, String) of + {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + parse_map(KeyType, ValueType, NewTk, NewString, Acc); + {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {Acc, NewTk, NewString}}; + {ok, {{_, S, Row, Start, End}}} -> + {error, {unexpected_token, S, Row, Start, End}} + end. % TODO wrap_error(Reason, _) -> Reason. @@ -175,3 +259,9 @@ int_test() -> list_test() -> check_parser("[1, 2, 3]", [1, 2, 3]). +list_of_lists_test() -> + check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]). + +maps_test() -> + check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}). + From 6f02d4c4e6618ff139d7febf9eaab86aea39ce08 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 23 Jan 2026 00:48:06 +0000 Subject: [PATCH 08/42] Record parsing --- src/hz_sophia.erl | 123 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 13 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 927fe3c..c9f106c 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -159,7 +159,7 @@ parse_list_loop3(Inner, Tk, String, Row, Start, Acc) -> parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> parse_map(KeyType, ValueType, Tk, String, #{}); parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) -> - parse_record(Fields, Tk, String); + parse_record(Fields, Tk, String, #{}); parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> case next_token(Tk, String) of {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> @@ -174,8 +174,83 @@ parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> parse_record_or_map({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, map, Row, Start, Start}}. -parse_record(Fields, Tk, String) -> - {error, not_yet_implemented}. +parse_record(Fields, Tk, String, Acc) -> + case next_token(Tk, String) of + {ok, {{alphanum, Ident, Row, Start, End}, NewTk, NewString}} -> + parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End); + {ok, {{character, "}", Row, Start, End}, NewTk, NewString}} -> + parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); + {ok, {{_, S, Row, Start, End}, _, _}} -> + {error, {unexpected_token, S, Row, Start, End}}; + {error, Reason} -> + {error, Reason} + end. + +parse_record2(Fields, Tk, String, Acc, Ident, Row, Start, End) -> + case lists:keyfind(Ident, 1, Fields) of + {_, Type} -> + parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type); + false -> + {error, {invalid_field, Ident, Row, Start, End}} + end. + +parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type) -> + case maps:is_key(Ident, Acc) of + false -> + parse_record4(Fields, Tk, String, Acc, Ident, Type); + true -> + {error, {field_already_present, Ident, Row, Start, End}} + end. + +parse_record4(Fields, Tk, String, Acc, Ident, Type) -> + case expect_tokens(["="], Tk, String) of + {ok, {NewTk, NewString}} -> + parse_record5(Fields, NewTk, NewString, Acc, Ident, Type); + {error, Reason} -> + {error, Reason} + end. + +parse_record5(Fields, Tk, String, Acc, Ident, Type) -> + case parse_expression(Type, Tk, String) of + {ok, {Result, NewTk, NewString}} -> + NewAcc = maps:put(Ident, Result, Acc), + parse_record6(Fields, NewTk, NewString, NewAcc); + {error, Reason} -> + wrap_error(Reason, {record_field, Ident}) + end. + +parse_record6(Fields, Tk, String, Acc) -> + case next_token(Tk, String) of + {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + parse_record(Fields, NewTk, NewString, Acc); + {ok, {{character, "}", Row, Start, End}, NewTk, NewString}} -> + parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); + {ok, {{_, S, Row, Start, End}, _, _}} -> + {error, {unexpected_token, S, Row, Start, End}}; + {error, Reason} -> + {error, Reason} + end. + +parse_record_end(Fields, Tk, String, FieldValues, Row, Start, End) -> + case parse_record_final_loop(Fields, FieldValues, []) of + {ok, Result} -> + {ok, {Result, Tk, String}}; + {error, {missing_field, Name}} -> + {error, {missing_field, Name, Row, Start, End}} + end. + +parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) -> + case maps:find(Name, FieldValues) of + {ok, Value} -> + parse_record_final_loop(Rest, FieldValues, [Value | Acc]); + error -> + {error, {missing_field, Name}} + end; +parse_record_final_loop([], _, FieldsReverse) -> + Fields = lists:reverse(FieldsReverse), + Tuple = list_to_tuple(Fields), + {ok, {tuple, Tuple}}. + %%% Map Parsing @@ -238,20 +313,33 @@ check_sophia_to_fate(Type, Sophia, Fate) -> erlang:error({to_fate_failed, Fate, FateActual}) end. -check_parser(Type, Sophia, Fate) -> - check_sophia_to_fate(Type, Sophia, Fate), - check_sophia_to_fate(unknown_type(), Sophia, Fate), - - % Finally, check that the FATE result is something that gmb understands. - gmb_fate_encoding:serialize(Fate), - - ok. - check_parser(Sophia, Fate) -> + % Compile the literal using the compiler, to check that it is valid Sophia + % syntax, and to get an AACI object to pass to the parser. Source = "contract C = entrypoint f() = " ++ Sophia, {ok, AACI} = hz_aaci:aaci_from_string(Source), {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), - check_parser(Type, Sophia, Fate). + + % Also check that the FATE term is valid, by running it through gmb. + gmb_fate_encoding:serialize(Fate), + + % Now check that our parser produces that output. + check_sophia_to_fate(Type, Sophia, Fate), + % Also check that it can be parsed without type information. + check_sophia_to_fate(unknown_type(), Sophia, Fate). + +check_parser_with_typedef(Typedef, Sophia, Fate) -> + % Compile the type definitions alongside the usual literal expression. + Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, + {ok, AACI} = hz_aaci:aaci_from_string(Source), + {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), + + % Check the FATE term as usual. + gmb_fate_encoding:serialize(Fate), + + % Do a typed parse, as usual, but there are probably record/variant + % definitions in the AACI, so untyped parses probably don't work. + check_sophia_to_fate(Type, Sophia, Fate). int_test() -> check_parser("123", 123). @@ -265,3 +353,12 @@ list_of_lists_test() -> maps_test() -> check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}). +records_test() -> + TypeDef = "record pair = {x: int, y: int}", + Sophia = "{x = 1, y = 2}", + Fate = {tuple, {1, 2}}, + check_parser_with_typedef(TypeDef, Sophia, Fate), + % The above won't run an untyped parse on the expression, but we can. It + % will error, though. + {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). + From 7df04a81be859a2b9a060a19ea646469ef3753a1 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 23 Jan 2026 02:45:23 +0000 Subject: [PATCH 09/42] Tuple parsing --- src/hz_sophia.erl | 96 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 11 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index c9f106c..0e491a5 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -100,6 +100,8 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> end; parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); +parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> + parse_tuple(Type, Tk, String, Row, Start); parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); parse_expression2(_, _, _, {_, S, Row, Start, End}) -> @@ -121,39 +123,108 @@ expect_tokens([Str | Rest], Tk, String) -> %%% List Parsing parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> - parse_list_loop(Inner, Tk, String, Row, Start, []); + parse_list_loop(Inner, Tk, String, "]", Row, Start, []); parse_list({_, _, unknown_type}, Tk, String, Row, Start) -> - parse_list_loop(unknown_type(), Tk, String, Row, Start, []); + parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []); parse_list({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, list, Row, Start, Start}}. -parse_list_loop(Inner, Tk, String, Row, Start, Acc) -> +parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, "]", _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {Token, NewTk, NewString}} -> - parse_list_loop2(Inner, NewTk, NewString, Row, Start, Acc, Token) + parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token) end. -parse_list_loop2(Inner, Tk, String, Row, Start, Acc, Token) -> +parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> case parse_expression2(Inner, Tk, String, Token) of {ok, {Value, NewTk, NewString}} -> - parse_list_loop3(Inner, NewTk, NewString, Row, Start, [Value | Acc]); + parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]); {error, Reason} -> - Wrapped = wrap_error(Reason, {list_element, length(Acc)}), + Wrapper = choose_list_error_wrapper(CloseChar), + % TODO: Are tuple indices off by one from list indices? + Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end. -parse_list_loop3(Inner, Tk, String, Row, Start, Acc) -> +parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, "]", _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> - parse_list_loop(Inner, NewTk, NewString, Row, Start, Acc); + parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc); {error, Reason} -> {error, Reason} end. +choose_list_error_wrapper("]") -> list_element; +choose_list_error_wrapper(")") -> tuple_element. + +%%% Tuple Parsing + +parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) -> + case parse_multivalue(Types, Tk, String, Row, Start, []) of + {ok, {TermList, NewTk, NewString}} -> + Result = {tuple, list_to_tuple(TermList)}, + {ok, {Result, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end; +parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) -> + % An untyped tuple is a list of untyped terms, and weirdly our list parser + % works perfectly for that, as long as we change the closing character to + % be ")" instead of "]". + case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of + {ok, {TermList, NewTk, NewString}} -> + Result = {tuple, list_to_tuple(TermList)}, + {ok, {Result, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end; +parse_tuple({O, N, _}, _, _, Row, Start) -> + {error, {wrong_type, O, N, tuple, Row, Start, Start}}. + +parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) -> + case next_token(Tk, String) of + {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); + {ok, {Token, NewTk, NewString}} -> + parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token) + end. + +parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> + case parse_expression2(Next, Tk, String, Token) of + {ok, {Value, NewTk, NewString}} -> + parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]); + {error, Reason} -> + Wrapper = choose_list_error_wrapper(")"), + % TODO: Are tuple indices off by one from list indices? + Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), + {error, Wrapped} + end; +parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) -> + {ok, {lists:reverse(Acc), Tk, String}}; +parse_multivalue2([], _, _, _, _, _, {_, S, Row, Start, End}) -> + {error, {unexpected_token, S, Row, Start, End}}. + +parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> + case next_token(Tk, String) of + {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); + {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); + {error, Reason} -> + {error, Reason} + end. + +check_multivalue_long_enough([], Tk, String, _, _, Acc) -> + {ok, {lists:reverse(Acc), Tk, String}}; +check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> + GotCount = length(Got), + ExpectCount = length(Remaining) + GotCount, + {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}. + %%% Record parsing parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> @@ -350,6 +421,9 @@ list_test() -> list_of_lists_test() -> check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]). +tuple_test() -> + check_parser("(1, [2, 3], (4, 5))", {tuple, {1, [2, 3], {tuple, {4, 5}}}}). + maps_test() -> check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}). From 4f2a3c6c6f85422e03c3c0737b2d0a46afe55945 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 23 Jan 2026 06:18:39 +0000 Subject: [PATCH 10/42] Variant parsing --- src/hz_sophia.erl | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 0e491a5..d63d1be 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -104,6 +104,8 @@ parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> parse_tuple(Type, Tk, String, Row, Start); parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); +parse_expression2(Type, Tk, String, {alphanum, Ident, Row, Start, End}) -> + parse_variant(Type, Tk, String, Ident, Row, Start, End); parse_expression2(_, _, _, {_, S, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. @@ -225,6 +227,58 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> ExpectCount = length(Remaining) + GotCount, {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}. +%%% Variant parsing + +parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) -> + parse_variant2(Variants, Tk, String, Ident, Row, Start, End); +parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> + {error, {unresolved_variant, Row, Start, End}}; +parse_variant({O, N, _}, _, _, _, Row, Start, End) -> + % In normal code, identifiers can have many meanings, which can result in + % lots of different errors. In this Sophia 'object notation', identifiers + % can only ever be variant constructors, (sort of like the Sophia version + % of atoms,) and so immediately lead to a type error if we aren't expecting + % a variant. + {error, {wrong_type, O, N, variant, Row, Start, End}}. + +parse_variant2(Variants, Tk, String, Ident, Row, Start, End) -> + case lookup_variant(Ident, Variants, 0) of + {ok, {Tag, ElemTypes}} -> + GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, + Arities = lists:map(GetArity, Variants), + parse_variant3(Arities, Tag, ElemTypes, Tk, String); + error -> + {error, {invalid_constructor, Ident, Row, Start, End}} + end. + +parse_variant3(Arities, Tag, [], Tk, String) -> + % Parsing of 0-arity variants is different. + Result = {variant, Arities, Tag, {}}, + {ok, {Result, Tk, String}}; +parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> + case next_token(Tk, String) of + {ok, {{character, "(", Row, Start, _}, NewTk, NewString}} -> + parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); + {ok, {{_, Actual, Row, Start, End}}} -> + {error, {unexpected_token, Actual, Row, Start, End}} + end. + +parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) -> + case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of + {ok, {Terms, NewTk, NewString}} -> + Result = {variant, Arities, Tag, list_to_tuple(Terms)}, + {ok, {Result, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end. + +lookup_variant(_, [], _) -> + error; +lookup_variant(Ident, [{Ident, ElemTypes} | _], Tag) -> + {ok, {Tag, ElemTypes}}; +lookup_variant(Ident, [_ | Rest], Tag) -> + lookup_variant(Ident, Rest, Tag + 1). + %%% Record parsing parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> @@ -436,3 +490,17 @@ records_test() -> % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). +variant_test() -> + TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", + TestFn = fun(Sophia, Fate) -> + check_parser_with_typedef(TypeDef, Sophia, Fate), + {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), Sophia) + end, + + TestFn("Zero", {variant, [0, 1, 2], 0, {}}), + TestFn("One(0)", {variant, [0, 1, 2], 1, {0}}), + TestFn("Two(0, 1)", {variant, [0, 1, 2], 2, {0, 1}}), + TestFn("Two([], [1, 2, 3])", {variant, [0, 1, 2], 2, {[], [1, 2, 3]}}), + ok. + + From 2bf384ca826fba47f689764da22bb1c875c9462d Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 27 Jan 2026 06:42:55 +0000 Subject: [PATCH 11/42] Infer correct values for tests automatically Now tests compare the literal parser against the output of the compiler. The little example contracts we are compiling for the AACI already had the FATE value in them, in the form of the instruction {'RETURNR', {immediate, FateValue}} so we just extract that and use it for the tests. --- src/hz_aaci.erl | 2 -- src/hz_sophia.erl | 61 +++++++++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index dfd158c..f9da33f 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -22,8 +22,6 @@ fate_to_erlang/2, erlang_args_to_fate/2, get_function_signature/2]). -% Internal stuff that is useful for writing AACI unit tests. --export([aaci_from_string/1, annotate_type/2]). %%% Types diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index d63d1be..a158ba8 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -438,12 +438,33 @@ check_sophia_to_fate(Type, Sophia, Fate) -> erlang:error({to_fate_failed, Fate, FateActual}) end. -check_parser(Sophia, Fate) -> +compile_entrypoint_code_and_type(Source, Entrypoint) -> + {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]), + + % Find the fcode for the correct entrypoint. + {fcode, Bodies, NamesMap, _} = FateCode, + Names = maps:to_list(NamesMap), + Name = unicode:characters_to_binary(Entrypoint), + {Hash, Name} = lists:keyfind(Name, 2, Names), + {_, _, Code} = maps:get(Hash, Bodies), + + % Generate the AACI, and get the AACI type info for the correct entrypoint. + AACI = hz_aaci:prepare_aaci(ACI), + {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), + + {Code, Type}. + +extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) -> + FATE; +extract_return_value(Code) -> + erlang:exit({invalid_literal_fcode, Code}). + +check_parser(Sophia) -> % Compile the literal using the compiler, to check that it is valid Sophia % syntax, and to get an AACI object to pass to the parser. Source = "contract C = entrypoint f() = " ++ Sophia, - {ok, AACI} = hz_aaci:aaci_from_string(Source), - {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), + {Code, Type} = compile_entrypoint_code_and_type(Source, "f"), + Fate = extract_return_value(Code), % Also check that the FATE term is valid, by running it through gmb. gmb_fate_encoding:serialize(Fate), @@ -453,11 +474,11 @@ check_parser(Sophia, Fate) -> % Also check that it can be parsed without type information. check_sophia_to_fate(unknown_type(), Sophia, Fate). -check_parser_with_typedef(Typedef, Sophia, Fate) -> +check_parser_with_typedef(Typedef, Sophia) -> % Compile the type definitions alongside the usual literal expression. Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, - {ok, AACI} = hz_aaci:aaci_from_string(Source), - {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), + {Code, Type} = compile_entrypoint_code_and_type(Source, "f"), + Fate = extract_return_value(Code), % Check the FATE term as usual. gmb_fate_encoding:serialize(Fate), @@ -467,40 +488,38 @@ check_parser_with_typedef(Typedef, Sophia, Fate) -> check_sophia_to_fate(Type, Sophia, Fate). int_test() -> - check_parser("123", 123). + check_parser("123"). list_test() -> - check_parser("[1, 2, 3]", [1, 2, 3]). + check_parser("[1, 2, 3]"). list_of_lists_test() -> - check_parser("[[], [1], [2, 3]]", [[], [1], [2, 3]]). + check_parser("[[], [1], [2, 3]]"). tuple_test() -> - check_parser("(1, [2, 3], (4, 5))", {tuple, {1, [2, 3], {tuple, {4, 5}}}}). + check_parser("(1, [2, 3], (4, 5))"). maps_test() -> - check_parser("{[1] = 2, [3] = 4}", #{1 => 2, 3 => 4}). + check_parser("{[1] = 2, [3] = 4}"). records_test() -> TypeDef = "record pair = {x: int, y: int}", Sophia = "{x = 1, y = 2}", - Fate = {tuple, {1, 2}}, - check_parser_with_typedef(TypeDef, Sophia, Fate), + check_parser_with_typedef(TypeDef, Sophia), % The above won't run an untyped parse on the expression, but we can. It % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). variant_test() -> TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", - TestFn = fun(Sophia, Fate) -> - check_parser_with_typedef(TypeDef, Sophia, Fate), - {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), Sophia) - end, - TestFn("Zero", {variant, [0, 1, 2], 0, {}}), - TestFn("One(0)", {variant, [0, 1, 2], 1, {0}}), - TestFn("Two(0, 1)", {variant, [0, 1, 2], 2, {0, 1}}), - TestFn("Two([], [1, 2, 3])", {variant, [0, 1, 2], 2, {[], [1, 2, 3]}}), + check_parser_with_typedef(TypeDef, "Zero"), + check_parser_with_typedef(TypeDef, "One(0)"), + check_parser_with_typedef(TypeDef, "Two(0, 1)"), + check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), + + {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), + ok. From f1696e2b9ef76300c8bf7cfa4bd64118f0811d64 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 29 Jan 2026 02:01:16 +0000 Subject: [PATCH 12/42] Bytes lexing I don't handle underscores in bytes correctly... Nor in integers, for that matter. --- src/hz_sophia.erl | 90 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 15 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index a158ba8..e84e7a7 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -4,6 +4,8 @@ -copyright("Jarvis Carroll "). -license("GPL-3.0-or-later"). +-export([check_parser/1]). + -include_lib("eunit/include/eunit.hrl"). parse_literal(Type, String) -> @@ -33,25 +35,26 @@ next_token({tk, Row, Col}, " " ++ Rest) -> next_token({tk, Row + 1, Col}, Rest); next_token({tk, Row, Col}, "\t" ++ Rest) -> next_token({tk, Row + 1, Col}, Rest); -next_token(Tk, [N | _] = String) when N >= $0, N =< $9 -> - num_token(Tk, Tk, String, []); +next_token({tk, _, Col}, "\r\n" ++ Rest) -> + next_token({tk, 1, Col + 1}, Rest); +next_token({tk, _, Col}, "\r" ++ Rest) -> + next_token({tk, 1, Col + 1}, Rest); +next_token({tk, _, Col}, "\n" ++ Rest) -> + next_token({tk, 1, Col + 1}, Rest); next_token(Tk, [N | _] = String) when N >= $A, N =< $Z -> alphanum_token(Tk, Tk, String, []); next_token(Tk, [N | _] = String) when N >= $a, N =< $z -> alphanum_token(Tk, Tk, String, []); -next_token(Tk, [$_ | _] = String) -> +next_token(Tk, "_" ++ _ = String) -> alphanum_token(Tk, Tk, String, []); +next_token(Tk, [N | _] = String) when N >= $0, N =< $9 -> + num_token(Tk, Tk, String, []); +next_token({tk, Row, Col}, "#" ++ Rest) -> + bytes_token({tk, Row, Col}, {tk, Row + 1, col}, Rest, "#"); next_token({tk, Row, Col}, [Char | Rest]) -> Token = {character, [Char], Row, Col, Col}, {ok, {Token, {tk, Row + 1, Col}, Rest}}. -num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> - num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); -num_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> - NumString = lists:reverse(Acc), - Token = {integer, NumString, Row, Start, End}, - {ok, {Token, {tk, Row, End}, String}}. - alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z -> alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z -> @@ -65,6 +68,24 @@ alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> Token = {alphanum, AlphaString, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. +num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> + num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +num_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> + NumString = lists:reverse(Acc), + Token = {integer, NumString, Row, Start, End}, + {ok, {Token, {tk, Row, End}, String}}. + +bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> + bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $A, N =< $F -> + bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $a, N =< $f -> + bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> + BytesString = lists:reverse(Acc), + Token = {bytes, BytesString, Row, Start, End}, + {ok, {Token, {tk, Row, End}, String}}. + %%% Sophia Literal Parser @@ -98,6 +119,22 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> {O, N, _} -> {error, {wrong_type, O, N, integer, Row, Start, End}} end; +parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) -> + Value = convert_bytes(S), + Len = byte_size(Value), + Result = {bytes, Value}, + case Type of + {_, _, {bytes, [any]}} -> + {ok, {Result, Tk, String}}; + {_, _, {bytes, [Len]}} -> + {ok, {Result, Tk, String}}; + {_, _, {bytes, [ExpectedLen]}} -> + {error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}}; + {_, _, unknown_type} -> + {ok, {Result, Tk, String}}; + {O, N, _} -> + {error, {wrong_type, O, N, integer, Row, Start, End}} + end; parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> @@ -122,6 +159,25 @@ expect_tokens([Str | Rest], Tk, String) -> {error, {unexpected_token, Actual, Row, Start, End}} end. +convert_bytes(Chars) -> + Digits = lists:foldl(fun(C, Acc) -> [convert_nibble(C) | Acc] end, [], Chars), + reverse_combine_nibbles(Digits, <<>>). + +convert_nibble(C) when C >= $0, C =< $9 -> + C - $0; +convert_nibble(C) when C >= $A, C =< $Z -> + C - $A + 10; +convert_nibble(C) when C >= $a, C =< $z -> + C - $a + 10. + +reverse_combine_nibbles([D1, D2 | Rest], Acc) -> + NewAcc = <>, + reverse_combine_nibbles(Rest, NewAcc); +reverse_combine_nibbles([D1], Acc) -> + <<0:4, D1:4, Acc/binary>>; +reverse_combine_nibbles([], Acc) -> + Acc. + %%% List Parsing parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> @@ -430,12 +486,13 @@ wrap_error(Reason, _) -> Reason. %%% Tests check_sophia_to_fate(Type, Sophia, Fate) -> - {ok, FateActual} = parse_literal(Type, Sophia), - case FateActual of - Fate -> + case parse_literal(Type, Sophia) of + {ok, Fate} -> ok; - _ -> - erlang:error({to_fate_failed, Fate, FateActual}) + {ok, FateActual} -> + erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}}); + {error, Reason} -> + erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) end. compile_entrypoint_code_and_type(Source, Entrypoint) -> @@ -502,6 +559,9 @@ tuple_test() -> maps_test() -> check_parser("{[1] = 2, [3] = 4}"). +bytes_test() -> + check_parser("#DEAD000BEEF"). + records_test() -> TypeDef = "record pair = {x: int, y: int}", Sophia = "{x = 1, y = 2}", From fe182a52337c9c5670dffacf2e164b82ceddc3d4 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 29 Jan 2026 03:03:11 +0000 Subject: [PATCH 13/42] Handle underscores in integers/bytes This forces us to test for alpha/num/hex enough times that it's now worth making macros for these things. --- src/hz_sophia.erl | 106 +++++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 44 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index e84e7a7..c73ef06 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -29,6 +29,11 @@ parse_literal2(Result, Tk, String) -> %%% Tokenizer +-define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))). +-define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). +-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). +-define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). + next_token({tk, Row, Col}, []) -> {ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}}; next_token({tk, Row, Col}, " " ++ Rest) -> @@ -41,46 +46,36 @@ next_token({tk, _, Col}, "\r" ++ Rest) -> next_token({tk, 1, Col + 1}, Rest); next_token({tk, _, Col}, "\n" ++ Rest) -> next_token({tk, 1, Col + 1}, Rest); -next_token(Tk, [N | _] = String) when N >= $A, N =< $Z -> +next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> alphanum_token(Tk, Tk, String, []); -next_token(Tk, [N | _] = String) when N >= $a, N =< $z -> - alphanum_token(Tk, Tk, String, []); -next_token(Tk, "_" ++ _ = String) -> - alphanum_token(Tk, Tk, String, []); -next_token(Tk, [N | _] = String) when N >= $0, N =< $9 -> +next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> num_token(Tk, Tk, String, []); -next_token({tk, Row, Col}, "#" ++ Rest) -> - bytes_token({tk, Row, Col}, {tk, Row + 1, col}, Rest, "#"); +next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> + bytes_token({tk, Row, Col}, {tk, Row + 2, Col}, Rest, [C, $#]); next_token({tk, Row, Col}, [Char | Rest]) -> Token = {character, [Char], Row, Col, Col}, {ok, {Token, {tk, Row + 1, Col}, Rest}}. -alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $A, C =< $Z -> +alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); -alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $a, C =< $z -> - alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); -alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when C >= $0, C =< $9 -> - alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); -alphanum_token(Start, {tk, Row, Col}, [$_ | Rest], Acc) -> - alphanum_token(Start, {tk, Row, Col}, Rest, [$_ | Acc]); alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), Token = {alphanum, AlphaString, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. -num_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> - num_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +num_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_NUM(C) -> + num_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]); +num_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_NUM(C) -> + num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Acc]); num_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> NumString = lists:reverse(Acc), Token = {integer, NumString, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. -bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $0, N =< $9 -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); -bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $A, N =< $F -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); -bytes_token(Start, {tk, Row, Col}, [N | Rest], Acc) when N >= $a, N =< $f -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [N | Acc]); +bytes_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_HEX(C) -> + bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]); +bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_HEX(C) -> + bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Acc]); bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> BytesString = lists:reverse(Acc), Token = {bytes, BytesString, Row, Start, End}, @@ -110,7 +105,7 @@ parse_expression(Type, Tk, String) -> parse_expression2(Type, NewTk, NewString, Token). parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> - Value = list_to_integer(S), + Value = convert_int(S), case Type of {_, _, integer} -> {ok, {Value, Tk, String}}; @@ -159,15 +154,38 @@ expect_tokens([Str | Rest], Tk, String) -> {error, {unexpected_token, Actual, Row, Start, End}} end. +convert_int(Chars) -> + convert_int(Chars, 0). + +convert_int("_" ++ Chars, Result) -> + convert_int(Chars, Result); +convert_int([N | Chars], Result) -> + Digit = N - $0, + NewResult = Result * 10 + Digit, + convert_int(Chars, NewResult); +convert_int([], Result) -> + Result. + convert_bytes(Chars) -> - Digits = lists:foldl(fun(C, Acc) -> [convert_nibble(C) | Acc] end, [], Chars), + % We do this as two reversing foldl type loops. One removes underscores and + % converts the ASCII into integers, and the other peels off pairs of + % numbers to form bytes. + Digits = reverse_convert_digits(Chars, []), reverse_combine_nibbles(Digits, <<>>). -convert_nibble(C) when C >= $0, C =< $9 -> +reverse_convert_digits("_" ++ Rest, Acc) -> + reverse_convert_digits(Rest, Acc); +reverse_convert_digits([C | Rest], Acc) -> + Digit = convert_digit(C), + reverse_convert_digits(Rest, [Digit | Acc]); +reverse_convert_digits([], Acc) -> + Acc. + +convert_digit(C) when C >= $0, C =< $9 -> C - $0; -convert_nibble(C) when C >= $A, C =< $Z -> +convert_digit(C) when C >= $A, C =< $Z -> C - $A + 10; -convert_nibble(C) when C >= $a, C =< $z -> +convert_digit(C) when C >= $a, C =< $z -> C - $a + 10. reverse_combine_nibbles([D1, D2 | Rest], Acc) -> @@ -544,23 +562,23 @@ check_parser_with_typedef(Typedef, Sophia) -> % definitions in the AACI, so untyped parses probably don't work. check_sophia_to_fate(Type, Sophia, Fate). -int_test() -> - check_parser("123"). +anon_types_test() -> + % Integers. + check_parser("123"), + check_parser("1_2_3"), + % Bytes. + check_parser("#DEAD000BEEF"), + check_parser("#DE_AD0_00B_EEF"), + % List of integers. + check_parser("[1, 2, 3]"), + % List of lists. + check_parser("[[], [1], [2, 3]]"), + % Tuple. + check_parser("(1, [2, 3], (4, 5))"), + % Map. + check_parser("{[1] = 2, [3] = 4}"), -list_test() -> - check_parser("[1, 2, 3]"). - -list_of_lists_test() -> - check_parser("[[], [1], [2, 3]]"). - -tuple_test() -> - check_parser("(1, [2, 3], (4, 5))"). - -maps_test() -> - check_parser("{[1] = 2, [3] = 4}"). - -bytes_test() -> - check_parser("#DEAD000BEEF"). + ok. records_test() -> TypeDef = "record pair = {x: int, y: int}", From 966b4b274809f943227ab611ee8a629a99c7af88 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 29 Jan 2026 04:06:19 +0000 Subject: [PATCH 14/42] Calculate scalar values during lexing This saves some effort and probably some performance for things like integers, but I'm mainly doing this in anticipation of string literals, because it would just be ridiculous to read code that lexes string literals twice. --- src/hz_sophia.erl | 168 ++++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 96 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index c73ef06..db47d22 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -21,7 +21,7 @@ parse_literal2(Result, Tk, String) -> case next_token(Tk, String) of {ok, {{eof, _, _, _, _}, _, _}} -> {ok, Result}; - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} @@ -49,38 +49,58 @@ next_token({tk, _, Col}, "\n" ++ Rest) -> next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> alphanum_token(Tk, Tk, String, []); next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> - num_token(Tk, Tk, String, []); + num_token(Tk, Tk, String, [], 0); next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> - bytes_token({tk, Row, Col}, {tk, Row + 2, Col}, Rest, [C, $#]); + bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []); next_token({tk, Row, Col}, [Char | Rest]) -> - Token = {character, [Char], Row, Col, Col}, + Token = {character, [Char], Char, Row, Col, Col}, {ok, {Token, {tk, Row + 1, Col}, Rest}}. alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), - Token = {alphanum, AlphaString, Row, Start, End}, + Token = {alphanum, AlphaString, AlphaString, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. -num_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_NUM(C) -> - num_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]); -num_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_NUM(C) -> - num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Acc]); -num_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> - NumString = lists:reverse(Acc), - Token = {integer, NumString, Row, Start, End}, +num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> + NewValue = Value * 10 + (C - $0), + num_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], NewValue); +num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> + NewValue = Value * 10 + (C - $0), + num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Chars], NewValue); +num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) -> + NumString = lists:reverse(Chars), + Token = {integer, NumString, Value, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. -bytes_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_HEX(C) -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Acc]); -bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Acc) when ?IS_HEX(C) -> - bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Acc]); -bytes_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> - BytesString = lists:reverse(Acc), - Token = {bytes, BytesString, Row, Start, End}, +bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> + Digit = convert_digit(C), + bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], [Digit | Digits]); +bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> + Digit = convert_digit(C), + bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Chars], [Digit | Digits]); +bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) -> + BytesString = lists:reverse(Chars), + Value = reverse_combine_nibbles(Digits, <<>>), + Token = {bytes, BytesString, Value, Row, Start, End}, {ok, {Token, {tk, Row, End}, String}}. +convert_digit(C) when C >= $0, C =< $9 -> + C - $0; +convert_digit(C) when C >= $A, C =< $Z -> + C - $A + 10; +convert_digit(C) when C >= $a, C =< $z -> + C - $a + 10. + +reverse_combine_nibbles([D1, D2 | Rest], Acc) -> + NewAcc = <>, + reverse_combine_nibbles(Rest, NewAcc); +reverse_combine_nibbles([D1], Acc) -> + <<0:4, D1:4, Acc/binary>>; +reverse_combine_nibbles([], Acc) -> + Acc. + %%% Sophia Literal Parser @@ -104,8 +124,7 @@ parse_expression(Type, Tk, String) -> {ok, {Token, NewTk, NewString}} = next_token(Tk, String), parse_expression2(Type, NewTk, NewString, Token). -parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> - Value = convert_int(S), +parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) -> case Type of {_, _, integer} -> {ok, {Value, Tk, String}}; @@ -114,8 +133,7 @@ parse_expression2(Type, Tk, String, {integer, S, Row, Start, End}) -> {O, N, _} -> {error, {wrong_type, O, N, integer, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) -> - Value = convert_bytes(S), +parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) -> Len = byte_size(Value), Result = {bytes, Value}, case Type of @@ -130,15 +148,15 @@ parse_expression2(Type, Tk, String, {bytes, "#" ++ S, Row, Start, End}) -> {O, N, _} -> {error, {wrong_type, O, N, integer, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {character, "[", Row, Start, _}) -> +parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {character, "(", Row, Start, _}) -> +parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) -> parse_tuple(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {character, "{", Row, Start, _}) -> +parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {alphanum, Ident, Row, Start, End}) -> +parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) -> parse_variant(Type, Tk, String, Ident, Row, Start, End); -parse_expression2(_, _, _, {_, S, Row, Start, End}) -> +parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. unknown_type() -> @@ -148,54 +166,12 @@ expect_tokens([], Tk, String) -> {ok, {Tk, String}}; expect_tokens([Str | Rest], Tk, String) -> case next_token(Tk, String) of - {ok, {{_, Str, _, _, _}, NewTk, NewString}} -> + {ok, {{_, Str, _, _, _, _}, NewTk, NewString}} -> expect_tokens(Rest, NewTk, NewString); - {ok, {{_, Actual, Row, Start, End}}} -> + {ok, {{_, Actual, _, Row, Start, End}}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. -convert_int(Chars) -> - convert_int(Chars, 0). - -convert_int("_" ++ Chars, Result) -> - convert_int(Chars, Result); -convert_int([N | Chars], Result) -> - Digit = N - $0, - NewResult = Result * 10 + Digit, - convert_int(Chars, NewResult); -convert_int([], Result) -> - Result. - -convert_bytes(Chars) -> - % We do this as two reversing foldl type loops. One removes underscores and - % converts the ASCII into integers, and the other peels off pairs of - % numbers to form bytes. - Digits = reverse_convert_digits(Chars, []), - reverse_combine_nibbles(Digits, <<>>). - -reverse_convert_digits("_" ++ Rest, Acc) -> - reverse_convert_digits(Rest, Acc); -reverse_convert_digits([C | Rest], Acc) -> - Digit = convert_digit(C), - reverse_convert_digits(Rest, [Digit | Acc]); -reverse_convert_digits([], Acc) -> - Acc. - -convert_digit(C) when C >= $0, C =< $9 -> - C - $0; -convert_digit(C) when C >= $A, C =< $Z -> - C - $A + 10; -convert_digit(C) when C >= $a, C =< $z -> - C - $a + 10. - -reverse_combine_nibbles([D1, D2 | Rest], Acc) -> - NewAcc = <>, - reverse_combine_nibbles(Rest, NewAcc); -reverse_combine_nibbles([D1], Acc) -> - <<0:4, D1:4, Acc/binary>>; -reverse_combine_nibbles([], Acc) -> - Acc. - %%% List Parsing parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> @@ -207,7 +183,7 @@ parse_list({O, N, _}, _, _, Row, Start) -> parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; {ok, {Token, NewTk, NewString}} -> parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token) @@ -226,9 +202,9 @@ parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, CloseChar, _, _, _}, NewTk, NewString}} -> + {ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> {ok, {lists:reverse(Acc), NewTk, NewString}}; - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc); {error, Reason} -> {error, Reason} @@ -281,14 +257,14 @@ parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> end; parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) -> {ok, {lists:reverse(Acc), Tk, String}}; -parse_multivalue2([], _, _, _, _, _, {_, S, Row, Start, End}) -> +parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + {ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); {error, Reason} -> {error, Reason} @@ -331,9 +307,9 @@ parse_variant3(Arities, Tag, [], Tk, String) -> {ok, {Result, Tk, String}}; parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> case next_token(Tk, String) of - {ok, {{character, "(", Row, Start, _}, NewTk, NewString}} -> + {ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} -> parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); - {ok, {{_, Actual, Row, Start, End}}} -> + {ok, {{_, Actual, _, Row, Start, End}}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. @@ -361,13 +337,13 @@ parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) -> parse_record(Fields, Tk, String, #{}); parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> case next_token(Tk, String) of - {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {#{}, NewTk, NewString}}; - {ok, {{character, "[", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{}); - {ok, {{alphanum, _, Row, Start, End}, _, _}} -> + {ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {error, {unresolved_record, Row, Start, End}}; - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}} end; parse_record_or_map({O, N, _}, _, _, Row, Start) -> @@ -375,11 +351,11 @@ parse_record_or_map({O, N, _}, _, _, Row, Start) -> parse_record(Fields, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{alphanum, Ident, Row, Start, End}, NewTk, NewString}} -> + {ok, {{alphanum, Ident, _, Row, Start, End}, NewTk, NewString}} -> parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End); - {ok, {{character, "}", Row, Start, End}, NewTk, NewString}} -> + {ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} @@ -420,11 +396,11 @@ parse_record5(Fields, Tk, String, Acc, Ident, Type) -> parse_record6(Fields, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_record(Fields, NewTk, NewString, Acc); - {ok, {{character, "}", Row, Start, End}, NewTk, NewString}} -> + {ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); - {ok, {{_, S, Row, Start, End}, _, _}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} @@ -455,11 +431,11 @@ parse_record_final_loop([], _, FieldsReverse) -> parse_map(KeyType, ValueType, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{character, "[", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> parse_map2(KeyType, ValueType, NewTk, NewString, Acc); - {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {Acc, NewTk, NewString}}; - {ok, {{_, S, Row, Start, End}}} -> + {ok, {{_, S, _, Row, Start, End}}} -> {error, {unexpected_token, S, Row, Start, End}} end. @@ -490,11 +466,11 @@ parse_map4(KeyType, ValueType, Tk, String, Acc, Key) -> parse_map5(KeyType, ValueType, Tk, String, Acc) -> case next_token(Tk, String) of - {ok, {{character, ",", _, _, _}, NewTk, NewString}} -> + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_map(KeyType, ValueType, NewTk, NewString, Acc); - {ok, {{character, "}", _, _, _}, NewTk, NewString}} -> + {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {Acc, NewTk, NewString}}; - {ok, {{_, S, Row, Start, End}}} -> + {ok, {{_, S, _, Row, Start, End}}} -> {error, {unexpected_token, S, Row, Start, End}} end. From 49cd8b6687ce68342bd121b9398b9b8259a66a38 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 29 Jan 2026 06:18:06 +0000 Subject: [PATCH 15/42] Parse strings --- src/hz_sophia.erl | 65 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index db47d22..b73a4c0 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -52,6 +52,8 @@ next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> num_token(Tk, Tk, String, [], 0); next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []); +next_token({tk, Row, Col}, "\"" ++ Rest) -> + string_token({tk, Row, Col}, {tk, Row + 1, Col}, Rest, "\"", <<>>); next_token({tk, Row, Col}, [Char | Rest]) -> Token = {character, [Char], Char, Row, Col, Col}, {ok, {Token, {tk, Row + 1, Col}, Rest}}. @@ -101,6 +103,46 @@ reverse_combine_nibbles([D1], Acc) -> reverse_combine_nibbles([], Acc) -> Acc. +string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) -> + case escape_hex_code(A, B) of + {ok, ByteVal} -> + string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <>); + error -> + {error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}} + end; +string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) -> + case escape_char(C) of + {ok, ByteVal} -> + string_token(Start, {tk, Row + 2, Col}, Rest, [C, $\ | SourceChars], <>); + error -> + {error, {invalid_escape_code, [C], Row, Col}} + end; +string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) -> + SourceStr = lists:reverse([$" | SourceChars]), + Token = {string, SourceStr, Value, Row, Start, End}, + {ok, {Token, {tk, Row, End}, Rest}}; +string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) -> + string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <>). + +escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) -> + % As of writing this, the Sophia compiler will convert this byte from + % extended ASCII to unicode... But it really shouldn't. The literal parser + % does what the compiler should do. + Byte = convert_digit(A) * 16 + convert_digit(B), + {ok, Byte}; +escape_hex_code(_, _) -> + error. + +escape_char($b) -> {ok, $\b}; +escape_char($e) -> {ok, $\e}; +escape_char($f) -> {ok, $\f}; +escape_char($n) -> {ok, $\n}; +escape_char($r) -> {ok, $\r}; +escape_char($t) -> {ok, $\t}; +escape_char($v) -> {ok, $\v}; +escape_char($") -> {ok, $\"}; +escape_char($\\) -> {ok, $\\}; +escape_char(_) -> error. %%% Sophia Literal Parser @@ -121,8 +163,12 @@ reverse_combine_nibbles([], Acc) -> %%% pushdown automaton that we want. parse_expression(Type, Tk, String) -> - {ok, {Token, NewTk, NewString}} = next_token(Tk, String), - parse_expression2(Type, NewTk, NewString, Token). + case next_token(Tk, String) of + {ok, {Token, NewTk, NewString}} -> + parse_expression2(Type, NewTk, NewString, Token); + {error, Reason} -> + {error, Reason} + end. parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) -> case Type of @@ -146,7 +192,16 @@ parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) -> {_, _, unknown_type} -> {ok, {Result, Tk, String}}; {O, N, _} -> - {error, {wrong_type, O, N, integer, Row, Start, End}} + {error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}} + end; +parse_expression2(Type, Tk, String, {string, _, Value, Row, Start, End}) -> + case Type of + {_, _, string} -> + {ok, {Value, Tk, String}}; + {_, _, unknown_type} -> + {ok, {Value, Tk, String}}; + {O, N, _} -> + {error, {wrong_type, O, N, string, Row, Start, End}} end; parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Tk, String, Row, Start); @@ -545,6 +600,10 @@ anon_types_test() -> % Bytes. check_parser("#DEAD000BEEF"), check_parser("#DE_AD0_00B_EEF"), + % Strings. + check_parser("\"hello world\""), + check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), + check_parser("\"\\x00\\x11\\x77\""), % List of integers. check_parser("[1, 2, 3]"), % List of lists. From 272ed01fdc7e647d930310ccbd5fef41de435f25 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 30 Jan 2026 08:12:32 +0000 Subject: [PATCH 16/42] Singleton record/tuple parsing. Records are a simple case to detect and handle correctly. Tuples took an entire rewrite of the little tuple parsing bit of the code. --- src/hz_sophia.erl | 224 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 198 insertions(+), 26 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index b73a4c0..5017a92 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -223,7 +223,7 @@ expect_tokens([Str | Rest], Tk, String) -> case next_token(Tk, String) of {ok, {{_, Str, _, _, _, _}, NewTk, NewString}} -> expect_tokens(Rest, NewTk, NewString); - {ok, {{_, Actual, _, Row, Start, End}}} -> + {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. @@ -268,33 +268,158 @@ parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> choose_list_error_wrapper("]") -> list_element; choose_list_error_wrapper(")") -> tuple_element. -%%% Tuple Parsing +%%% Ambiguous Parenthesis Parsing -parse_tuple({_, _, {tuple, Types}}, Tk, String, Row, Start) -> - case parse_multivalue(Types, Tk, String, Row, Start, []) of - {ok, {TermList, NewTk, NewString}} -> - Result = {tuple, list_to_tuple(TermList)}, - {ok, {Result, NewTk, NewString}}; - {error, Reason} -> - {error, Reason} - end; parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) -> % An untyped tuple is a list of untyped terms, and weirdly our list parser % works perfectly for that, as long as we change the closing character to % be ")" instead of "]". case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of + {ok, {[Inner], NewTk, NewString}} -> + % In Sophia, singleton tuples are unwrapped, and given the inner + % type. + {ok, {Inner, NewTk, NewString}}; {ok, {TermList, NewTk, NewString}} -> Result = {tuple, list_to_tuple(TermList)}, {ok, {Result, NewTk, NewString}}; {error, Reason} -> {error, Reason} end; -parse_tuple({O, N, _}, _, _, Row, Start) -> - {error, {wrong_type, O, N, tuple, Row, Start, Start}}. +parse_tuple({O, N, T}, Tk, String, _, _) -> + % Typed tuple parsing is quite complex, because we also want to support + % normal parentheses for grouping. It's not strictly necessary for + % inputting data, since we don't have any infix operators in simple + % data/term notation, but the alternatives are to generate singleton tuples + % naively, (which are impossible to generate from Sophia,) or to hard error + % on singleton tuples! Being faithful to Sophia is clearly nice! + + % Count how many ambiguous parens there are, including the one we already + % saw. + case count_open_parens(Tk, String, 1) of + {ok, {Count, Token, NewTk, NewString}} -> + % Compare that to the amount of nesting tuple connectives are in + % the type we are expected to produce. + {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []), + % Now work out what to do with all this information. + parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewTk, NewString, Token); + {error, Reason} -> + {error, Reason} + end. + +count_open_parens(Tk, String, Count) -> + case next_token(Tk, String) of + {ok, {{character, "(", _, _, _, _}, NewTk, NewString}} -> + count_open_parens(NewTk, NewString, Count + 1); + {ok, {Token, NewTk, NewString}} -> + {ok, {Count, Token, NewTk, NewString}}; + {error, Reason} -> + {error, Reason} + end. + +extract_tuple_type_info(ParenCount, {_, _, {tuple, [Head | Rest]}}, Tails) when ParenCount > 0 -> + % Have an open paren, and a tuple type. We need to go deeper! + extract_tuple_type_info(ParenCount - 1, Head, [Rest | Tails]); +extract_tuple_type_info(ParenCount, HeadType, Tails) -> + % No parens, or no more (non-empty) tuples. Stop! + {ParenCount, HeadType, Tails}. + +parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) -> + {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; +parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, {character, ")", _, Row, Col, _}) -> + parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Tk, String, Row, Col); +parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, Token) -> + % Finished with parentheses for now, try and parse an expression out, to + % get our head term. + case parse_expression2(HeadType, Tk, String, Token) of + {ok, {Result, NewTk, NewString}} -> + % Got a head term. Now try to build all the other tuple layers. + parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewTk, NewString); + {error, Reason} -> + % TODO: Wrap errors here too. + {error, Reason} + end. + +parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> + % There are zero excess parens, meaning all our parens are tuples. Get the + % top one. + [Tail | _] = Tails, + % We expected some nonzero number of elements before the close paren, but + % got zero. + ExpectCount = 1 + length(Tail), + {error, {not_enough_elements, ExpectCount, 0, Row, Col}}; +parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Tk, String, _, _) -> + % If we have some ambiguous parentheses left, we now know one of them is + % this empty tuple. + HeadTerm = {tuple, {}}, + NewExcessCount = ExcessCount - 1, + % Now continue the loop as if it were an integer or something, in the head + % position. + parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Tk, String); +parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) -> + % We were expecting a head term of a different type! + {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. + +parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Tk, String) -> + % Tuples left to build, but no extra open parens to deal with, so we can + % just parse multivalues naively, starting from the "we have a term, + % waiting for a comma" stage of the loop. + case parse_multivalue3(TailTypes, Tk, String, -1, -1, [HeadTerm]) of + {ok, {Terms, NewTk, NewString}} -> + NewHead = {tuple, list_to_tuple(Terms)}, + parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewTk, NewString); + {error, Reason} -> + % TODO: More error wrapping? + {error, Reason} + end; +parse_tuple_tails(_, _, 0, HeadTerm, [], Tk, String) -> + % No open parens left, no tuples left to build, we are done! + {ok, {HeadTerm, Tk, String}}; +parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Tk, String) -> + % The ambiguous case, where we have a mix of tuple parens, and grouping + % parens. We want to peek at the next token, to see if it closes a grouping + % paren. + case next_token(Tk, String) of + {ok, {{character, ")", _, _, _, _}, NewTk, NewString}} -> + % It is grouping! Close one excess paren, and continue. + parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewTk, NewString); + {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> + % It is a real tuple! Try the normal logic, then. + parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewTk, NewString); + {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> + % Anything else is just a boring parse error we can complain about. + {error, {unexpected_token, Actual, Row, Start, End}}; + {error, Reason} -> + {error, Reason} + end. + +parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Tk, String) -> + case parse_multivalue(TailTypes, Tk, String, -1, -1, [HeadTerm]) of + {ok, {Terms, NewTk, NewString}} -> + NewHead = {tuple, list_to_tuple(Terms)}, + parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewTk, NewString); + {error, Reason} -> + % TODO: wrap errors? + {error, Reason} + end; +parse_tuple_tails2(O, N, _, _, [], _, _) -> + % This case is created when, for example, we want int * int, but instead we + % get a term like ((1, 2), 3), of type (int * int) * int. The trouble is, + % ((1, 2)) would have been valid, so it's actually the second comma that + % tips us off to the error, not the first one. + % + % For simpler cases, like (1, 2) when int was expected, this error message + % is fine: + Err = {error, {wrong_type, O, N, tuple, -1, -1, -1}}, + % TODO: Row/col + % TODO: Generate better error messages in the cases where N *is* a tuple, + % but the first thing inside that tuple is the problem. + Err. + +%%% Unambiguous Tuple/Variant Parsing parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) -> case next_token(Tk, String) of - {ok, {{character, ")", Row2, Start2, _}, NewTk, NewString}} -> + {ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); {ok, {Token, NewTk, NewString}} -> parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token) @@ -310,7 +435,7 @@ parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end; -parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _}) -> +parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _, _}) -> {ok, {lists:reverse(Acc), Tk, String}}; parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. @@ -321,6 +446,8 @@ parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); + {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> + {error, {unexpected_token, Actual, Row, Start, End}}; {error, Reason} -> {error, Reason} end. @@ -364,7 +491,7 @@ parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> case next_token(Tk, String) of {ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} -> parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); - {ok, {{_, Actual, _, Row, Start, End}}} -> + {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. @@ -476,6 +603,10 @@ parse_record_final_loop([{Name, _} | Rest], FieldValues, Acc) -> error -> {error, {missing_field, Name}} end; +parse_record_final_loop([], _, [Field]) -> + % Singleton records are type-checked in Sophia, but unwrapped in the + % resulting FATE. + {ok, Field}; parse_record_final_loop([], _, FieldsReverse) -> Fields = lists:reverse(FieldsReverse), Tuple = list_to_tuple(Fields), @@ -490,7 +621,7 @@ parse_map(KeyType, ValueType, Tk, String, Acc) -> parse_map2(KeyType, ValueType, NewTk, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {Acc, NewTk, NewString}}; - {ok, {{_, S, _, Row, Start, End}}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}} end. @@ -525,7 +656,7 @@ parse_map5(KeyType, ValueType, Tk, String, Acc) -> parse_map(KeyType, ValueType, NewTk, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> {ok, {Acc, NewTk, NewString}}; - {ok, {{_, S, _, Row, Start, End}}} -> + {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}} end. @@ -570,15 +701,15 @@ check_parser(Sophia) -> % syntax, and to get an AACI object to pass to the parser. Source = "contract C = entrypoint f() = " ++ Sophia, {Code, Type} = compile_entrypoint_code_and_type(Source, "f"), + + % Check that when we parse the term we get the same value as the Sophia + % compiler. Fate = extract_return_value(Code), + check_sophia_to_fate(unknown_type(), Sophia, Fate), - % Also check that the FATE term is valid, by running it through gmb. - gmb_fate_encoding:serialize(Fate), - - % Now check that our parser produces that output. - check_sophia_to_fate(Type, Sophia, Fate), - % Also check that it can be parsed without type information. - check_sophia_to_fate(unknown_type(), Sophia, Fate). + % Then, once we know that the term is correct, make sure that it is still + % accepted *with* type info. + check_sophia_to_fate(Type, Sophia, Fate). check_parser_with_typedef(Typedef, Sophia) -> % Compile the type definitions alongside the usual literal expression. @@ -602,8 +733,6 @@ anon_types_test() -> check_parser("#DE_AD0_00B_EEF"), % Strings. check_parser("\"hello world\""), - check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), - check_parser("\"\\x00\\x11\\x77\""), % List of integers. check_parser("[1, 2, 3]"), % List of lists. @@ -615,6 +744,12 @@ anon_types_test() -> ok. +string_escape_codes_test() -> + check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), + check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), + check_parser("\"\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\""), + ok. + records_test() -> TypeDef = "record pair = {x: int, y: int}", Sophia = "{x = 1, y = 2}", @@ -623,6 +758,43 @@ records_test() -> % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). +singleton_records_test() -> + TypeDef = "record singleton('a) = {it: 'a}", + check_parser_with_typedef(TypeDef, "{it = 123}"), + check_parser_with_typedef(TypeDef, "{it = {it = {it = 5}}}"), + check_parser_with_typedef(TypeDef, "[{it = 1}, {it = 2}, {it = 3}]"), + + ok. + +singleton_variants_test() -> + % Similar tests to the singleton records, but this time there isn't + % actually a special case; singleton variants are in fact wrapped in the + % FATE too. + TypeDef = "datatype wrapped('a) = Wrap('a)", + check_parser_with_typedef(TypeDef, "Wrap(123)"), + check_parser_with_typedef(TypeDef, "Wrap(Wrap(123))"), + check_parser_with_typedef(TypeDef, "[Wrap(1), Wrap(2), Wrap(3)]"), + + ok. + +excess_parens_test() -> + % 'singleton' parens are another special case, but unlike singleton + % records, which exist in the type system, singleton parens aren't tuples + % at all! They are just grouping, for arithmetic. For example. + check_parser("(123)"), + check_parser("[1, (2), ((3))]"), + % Where this gets tricky, though, is when grouping parens are mixed with + % tuple parens. E.g. this list of three tuples should all parse to the same + % result. + check_parser("[((1, 2)), ((1), 2), (((1), 2))]"), + % Including multiple nestings of tuples and grouping, interleaved. + check_parser("((((1), ((2, 3)))), 4)"), + % Also empty tuples exist! + check_parser("()"), + check_parser("(((((), ())), ()))"), + + ok. + variant_test() -> TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", From 17f635af618463c68e219fee73e74821b6903c7b Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 3 Feb 2026 00:41:00 +0000 Subject: [PATCH 17/42] Parse long hex escape codes This doesn't work super consistently in the compiler, for codepoints above 127, but it should work fine for us, so, oh well! --- src/hz_sophia.erl | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 5017a92..32bbde4 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -103,12 +103,13 @@ reverse_combine_nibbles([D1], Acc) -> reverse_combine_nibbles([], Acc) -> Acc. -string_token(Start, {tk, Row, Col}, [$\\, $x, A, B | Rest], SourceChars, Value) -> - case escape_hex_code(A, B) of - {ok, ByteVal} -> - string_token(Start, {tk, Row + 4, Col}, Rest, [B, A, $x, $\ | SourceChars], <>); - error -> - {error, {invalid_escape_code, [$\\, $x, A, B], Row, Col}} +string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) -> + case escape_hex_code({tk, Row, Col}, {tk, Row + 2, Col}, String, "x\\" ++ SourceChars) of + {ok, {Codepoint, NewSourceChars, NewTk, NewString}} -> + NewValue = <>, + string_token(Start, NewTk, NewString, NewSourceChars, NewValue); + {error, Reason} -> + {error, Reason} end; string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) -> case escape_char(C) of @@ -122,16 +123,34 @@ string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) -> Token = {string, SourceStr, Value, Row, Start, End}, {ok, {Token, {tk, Row, End}, Rest}}; string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) -> - string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <>). + % TODO: ERTS probably had to convert this FROM utf8 at some point, so why + % bother, if we need to convert it back? I guess we could accept iolists if + % we really wanted to waste time on this point... + string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <>). -escape_hex_code(A, B) when ?IS_HEX(A), ?IS_HEX(B) -> +escape_hex_code(Start, {tk, Row, Col}, "{" ++ String, SourceChars) -> + escape_long_hex_code(Start, {tk, Row + 1, Col}, String, "{" ++ SourceChars, 0); +escape_hex_code(_, {tk, Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> % As of writing this, the Sophia compiler will convert this byte from % extended ASCII to unicode... But it really shouldn't. The literal parser % does what the compiler should do. Byte = convert_digit(A) * 16 + convert_digit(B), - {ok, Byte}; -escape_hex_code(_, _) -> - error. + {ok, {Byte, [B, A | SourceChars], {tk, Row + 2, Col}, String}}; +escape_hex_code({tk, Row1, Col1}, _, _, _) -> + {error, {invalid_escape_code, "\\x", Row1, Col1}}. + +escape_long_hex_code(_, {tk, Row, Col}, "}" ++ String, SourceChars, Value) -> + {ok, {Value, "}" ++ SourceChars, {tk, Row + 1, Col}, String}}; +escape_long_hex_code(Start, {tk, Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) -> + NewSourceChars = [C | SourceChars], + NewValue = 16 * Value + convert_digit(C), + escape_long_hex_code(Start, {tk, Row + 1, Col}, String, NewSourceChars, NewValue); +escape_long_hex_code(_, {tk, Row, Col}, [C | _], _, _) -> + {error, {invalid_hexadecimal, [C], Row, Col}}; +escape_long_hex_code(_, Tk, [], SourceChars, Value) -> + % Just return as if the escape code were closed, and let the string parser + % produce an unclosed string error instead. + {ok, {Value, SourceChars, Tk, []}}. escape_char($b) -> {ok, $\b}; escape_char($e) -> {ok, $\e}; @@ -747,7 +766,7 @@ anon_types_test() -> string_escape_codes_test() -> check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), - check_parser("\"\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\""), + check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""), ok. records_test() -> From 493bdb990c97492ebd0c743cbc6e69066bb30ea3 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 3 Feb 2026 01:42:17 +0000 Subject: [PATCH 18/42] Fix lexer row/column calculations. --- src/hz_sophia.erl | 87 +++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 32bbde4..b43a48c 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -4,10 +4,13 @@ -copyright("Jarvis Carroll "). -license("GPL-3.0-or-later"). --export([check_parser/1]). +-export([parse_literal/1, parse_literal/2, check_parser/1]). -include_lib("eunit/include/eunit.hrl"). +parse_literal(String) -> + parse_literal(unknown_type(), String). + parse_literal(Type, String) -> case parse_expression(Type, {tk, 1, 1}, String) of {ok, {Result, NewTk, NewString}} -> @@ -37,55 +40,55 @@ parse_literal2(Result, Tk, String) -> next_token({tk, Row, Col}, []) -> {ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}}; next_token({tk, Row, Col}, " " ++ Rest) -> - next_token({tk, Row + 1, Col}, Rest); + next_token({tk, Row, Col + 1}, Rest); next_token({tk, Row, Col}, "\t" ++ Rest) -> - next_token({tk, Row + 1, Col}, Rest); -next_token({tk, _, Col}, "\r\n" ++ Rest) -> - next_token({tk, 1, Col + 1}, Rest); -next_token({tk, _, Col}, "\r" ++ Rest) -> - next_token({tk, 1, Col + 1}, Rest); -next_token({tk, _, Col}, "\n" ++ Rest) -> - next_token({tk, 1, Col + 1}, Rest); + next_token({tk, Row, Col + 1}, Rest); +next_token({tk, Row, _}, "\r\n" ++ Rest) -> + next_token({tk, Row + 1, 1}, Rest); +next_token({tk, Row, _}, "\r" ++ Rest) -> + next_token({tk, Row + 1, 1}, Rest); +next_token({tk, Row, _}, "\n" ++ Rest) -> + next_token({tk, Row + 1, 1}, Rest); next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> alphanum_token(Tk, Tk, String, []); next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> num_token(Tk, Tk, String, [], 0); next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> - bytes_token({tk, Row, Col}, {tk, Row + 1, Col}, [C | Rest], "#", []); + bytes_token({tk, Row, Col}, {tk, Row, Col + 1}, [C | Rest], "#", []); next_token({tk, Row, Col}, "\"" ++ Rest) -> - string_token({tk, Row, Col}, {tk, Row + 1, Col}, Rest, "\"", <<>>); + string_token({tk, Row, Col}, {tk, Row, Col + 1}, Rest, "\"", <<>>); next_token({tk, Row, Col}, [Char | Rest]) -> Token = {character, [Char], Char, Row, Col, Col}, - {ok, {Token, {tk, Row + 1, Col}, Rest}}. + {ok, {Token, {tk, Row, Col + 1}, Rest}}. alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> - alphanum_token(Start, {tk, Row, Col}, Rest, [C | Acc]); + alphanum_token(Start, {tk, Row, Col + 1}, Rest, [C | Acc]); alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), - Token = {alphanum, AlphaString, AlphaString, Row, Start, End}, + Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1}, {ok, {Token, {tk, Row, End}, String}}. num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> NewValue = Value * 10 + (C - $0), - num_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], NewValue); + num_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], NewValue); num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> NewValue = Value * 10 + (C - $0), - num_token(Start, {tk, Row + 2, Col}, Rest, [C, $_ | Chars], NewValue); + num_token(Start, {tk, Row, Col + 2}, Rest, [C, $_ | Chars], NewValue); num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) -> NumString = lists:reverse(Chars), - Token = {integer, NumString, Value, Row, Start, End}, + Token = {integer, NumString, Value, Row, Start, End - 1}, {ok, {Token, {tk, Row, End}, String}}. bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> Digit = convert_digit(C), - bytes_token(Start, {tk, Row + 1, Col}, Rest, [C | Chars], [Digit | Digits]); + bytes_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]); bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> Digit = convert_digit(C), - bytes_token(Start, {tk, Row + 1, Col}, Rest, [C, $_ | Chars], [Digit | Digits]); + bytes_token(Start, {tk, Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]); bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) -> BytesString = lists:reverse(Chars), Value = reverse_combine_nibbles(Digits, <<>>), - Token = {bytes, BytesString, Value, Row, Start, End}, + Token = {bytes, BytesString, Value, Row, Start, End - 1}, {ok, {Token, {tk, Row, End}, String}}. convert_digit(C) when C >= $0, C =< $9 -> @@ -104,7 +107,7 @@ reverse_combine_nibbles([], Acc) -> Acc. string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) -> - case escape_hex_code({tk, Row, Col}, {tk, Row + 2, Col}, String, "x\\" ++ SourceChars) of + case escape_hex_code({tk, Row, Col}, {tk, Row, Col + 2}, String, "x\\" ++ SourceChars) of {ok, {Codepoint, NewSourceChars, NewTk, NewString}} -> NewValue = <>, string_token(Start, NewTk, NewString, NewSourceChars, NewValue); @@ -114,37 +117,37 @@ string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) -> string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) -> case escape_char(C) of {ok, ByteVal} -> - string_token(Start, {tk, Row + 2, Col}, Rest, [C, $\ | SourceChars], <>); + string_token(Start, {tk, Row, Col + 2}, Rest, [C, $\ | SourceChars], <>); error -> {error, {invalid_escape_code, [C], Row, Col}} end; -string_token({tk, _, Start}, {tk, Row, End}, [$" | Rest], SourceChars, Value) -> +string_token({tk, _, Start}, {tk, Row, Col}, [$" | Rest], SourceChars, Value) -> SourceStr = lists:reverse([$" | SourceChars]), - Token = {string, SourceStr, Value, Row, Start, End}, - {ok, {Token, {tk, Row, End}, Rest}}; + Token = {string, SourceStr, Value, Row, Start, Col}, + {ok, {Token, {tk, Row, Col + 1}, Rest}}; string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) -> % TODO: ERTS probably had to convert this FROM utf8 at some point, so why % bother, if we need to convert it back? I guess we could accept iolists if % we really wanted to waste time on this point... - string_token(Start, {tk, Row + 1, Col}, Rest, [C | SourceChars], <>). + string_token(Start, {tk, Row, Col + 1}, Rest, [C | SourceChars], <>). escape_hex_code(Start, {tk, Row, Col}, "{" ++ String, SourceChars) -> - escape_long_hex_code(Start, {tk, Row + 1, Col}, String, "{" ++ SourceChars, 0); + escape_long_hex_code(Start, {tk, Row, Col + 1}, String, "{" ++ SourceChars, 0); escape_hex_code(_, {tk, Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> % As of writing this, the Sophia compiler will convert this byte from % extended ASCII to unicode... But it really shouldn't. The literal parser % does what the compiler should do. Byte = convert_digit(A) * 16 + convert_digit(B), - {ok, {Byte, [B, A | SourceChars], {tk, Row + 2, Col}, String}}; + {ok, {Byte, [B, A | SourceChars], {tk, Row, Col + 2}, String}}; escape_hex_code({tk, Row1, Col1}, _, _, _) -> {error, {invalid_escape_code, "\\x", Row1, Col1}}. escape_long_hex_code(_, {tk, Row, Col}, "}" ++ String, SourceChars, Value) -> - {ok, {Value, "}" ++ SourceChars, {tk, Row + 1, Col}, String}}; + {ok, {Value, "}" ++ SourceChars, {tk, Row, Col + 1}, String}}; escape_long_hex_code(Start, {tk, Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) -> NewSourceChars = [C | SourceChars], NewValue = 16 * Value + convert_digit(C), - escape_long_hex_code(Start, {tk, Row + 1, Col}, String, NewSourceChars, NewValue); + escape_long_hex_code(Start, {tk, Row, Col + 1}, String, NewSourceChars, NewValue); escape_long_hex_code(_, {tk, Row, Col}, [C | _], _, _) -> {error, {invalid_hexadecimal, [C], Row, Col}}; escape_long_hex_code(_, Tk, [], SourceChars, Value) -> @@ -826,4 +829,28 @@ variant_test() -> ok. +lexer_offset_test() -> + % Test that various tokens report their position correctly. + {error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"), + {error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"), + {error, {unexpected_token, "abc", 1, 5, 7}} = parse_literal("123 abc"), + {error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"), + {error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""), + {error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""), + + % Check that the tokenizer knows its position correctly *after* various + % tokens. + {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"), + ABCType = {"mytype", already_normalized, {variant, [{"abc", []}]}}, + {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "abc 123"), + {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"), + {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"), + {error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"), + + % Check that the tokenizer accounts for various line separators correctly. + {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\nabc"), + {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\r\nabc"), + {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\rabc"), + + ok. From a695c21fc926bfdedc3f611627b504e2de29be58 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 3 Feb 2026 06:00:40 +0000 Subject: [PATCH 19/42] Parse address literals. Also signatures. --- src/hz_sophia.erl | 141 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 30 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index b43a48c..f41443d 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -32,7 +32,9 @@ parse_literal2(Result, Tk, String) -> %%% Tokenizer --define(IS_ALPHA(C), ((((C) >= $A) and ((C) =< $Z)) or (((C) >= $a) and ((C) =< $z)) or ((C) == $_))). +-define(IS_LATIN_UPPER(C), (((C) >= $A) and ((C) =< $Z))). +-define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))). +-define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))). -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). @@ -231,8 +233,8 @@ parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) -> parse_tuple(Type, Tk, String, Row, Start); parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {alphanum, Ident, _, Row, Start, End}) -> - parse_variant(Type, Tk, String, Ident, Row, Start, End); +parse_expression2(Type, Tk, String, {alphanum, S, _, Row, Start, End}) -> + parse_alphanum(Type, Tk, String, S, Row, Start, End); parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. @@ -249,6 +251,69 @@ expect_tokens([Str | Rest], Tk, String) -> {error, {unexpected_token, Actual, Row, Start, End}} end. +%%% Ambiguous Chain Object vs Identifier Parsing + +parse_alphanum(Type, Tk, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> + % From a programming perspective, we are trying to parse a constant, so + % an alphanum token can really only be a constructor, or a chain object. + % Chain objects start with lowercase prefixes, like ak_, so clearly this is + % a variant constructor. + parse_variant(Type, Tk, String, S, Row, Start, End); +parse_alphanum(Type, Tk, String, S, Row, Start, End) -> + % Inversely, variant constructors are always uppercase, so now that we have + % handled that case, only chain objects are left. + try + case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of + {account_pubkey, Data} -> + typecheck_address(Type, Tk, String, Data, Row, Start, End); + {contract_pubkey, Data} -> + typecheck_contract(Type, Tk, String, Data, Row, Start, End); + {signature, Data} -> + typecheck_signature(Type, Tk, String, Data, Row, Start, End); + {_, _} -> + % Only a few chain objects are recognized by Sophia. The rest + % are interpreted as identifiers, so we might as well give the + % same sort of error that the compiler would give. + {error, {unexpected_identifier, S, Row, Start, End}} + end + catch + _:_ -> {error, {unexpected_identifier, S, Row, Start, End}} + end. + +typecheck_address({_, _, address}, Tk, String, Data, _, _, _) -> + {ok, {{address, Data}, Tk, String}}; +typecheck_address({_, _, contract}, Tk, String, Data, _, _, _) -> + % The compiler would type error, but we should be lenient here. + {ok, {{contract, Data}, Tk, String}}; +typecheck_address({_, _, unknown_type}, Tk, String, Data, _, _, _) -> + {ok, {{address, Data}, Tk, String}}; +typecheck_address({O, N, _}, _, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, address, Row, Start, End}}. + +typecheck_contract({_, _, contract}, Tk, String, Data, _, _, _) -> + {ok, {{contract, Data}, Tk, String}}; +typecheck_contract({_, _, address}, Tk, String, Data, _, _, _) -> + % The compiler would type error, but we should be lenient here. + {ok, {{address, Data}, Tk, String}}; +typecheck_contract({_, _, unknown_type}, Tk, String, Data, _, _, _) -> + {ok, {{contract, Data}, Tk, String}}; +typecheck_contract({O, N, _}, _, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, contract, Row, Start, End}}. + +typecheck_signature({_, _, signature}, Tk, String, Data, _, _, _) -> + {ok, {{bytes, Data}, Tk, String}}; +typecheck_signature({_, _, {bytes, [64]}}, Tk, String, Data, _, _, _) -> + % The compiler would probably type-error, but whatever. + {ok, {{bytes, Data}, Tk, String}}; +typecheck_signature({_, _, {bytes, [any]}}, Tk, String, Data, _, _, _) -> + % The compiler would probably type-error, but whatever. + {ok, {{bytes, Data}, Tk, String}}; +typecheck_signature({_, _, unknown_type}, Tk, String, Data, _, _, _) -> + {ok, {{bytes, Data}, Tk, String}}; +typecheck_signature({O, N, _}, _, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, signature, Row, Start, End}}. + + %%% List Parsing parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> @@ -697,7 +762,7 @@ check_sophia_to_fate(Type, Sophia, Fate) -> erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) end. -compile_entrypoint_code_and_type(Source, Entrypoint) -> +compile_entrypoint_value_and_type(Source, Entrypoint) -> {ok, #{fate_code := FateCode, aci := ACI}} = so_compiler:from_string(Source, [{aci, json}]), % Find the fcode for the correct entrypoint. @@ -706,12 +771,13 @@ compile_entrypoint_code_and_type(Source, Entrypoint) -> Name = unicode:characters_to_binary(Entrypoint), {Hash, Name} = lists:keyfind(Name, 2, Names), {_, _, Code} = maps:get(Hash, Bodies), + FATE = extract_return_value(Code), % Generate the AACI, and get the AACI type info for the correct entrypoint. AACI = hz_aaci:prepare_aaci(ACI), {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), - {Code, Type}. + {FATE, Type}. extract_return_value(#{0 := [{'RETURNR', {immediate, FATE}}]}) -> FATE; @@ -722,11 +788,10 @@ check_parser(Sophia) -> % Compile the literal using the compiler, to check that it is valid Sophia % syntax, and to get an AACI object to pass to the parser. Source = "contract C = entrypoint f() = " ++ Sophia, - {Code, Type} = compile_entrypoint_code_and_type(Source, "f"), + {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Check that when we parse the term we get the same value as the Sophia % compiler. - Fate = extract_return_value(Code), check_sophia_to_fate(unknown_type(), Sophia, Fate), % Then, once we know that the term is correct, make sure that it is still @@ -736,11 +801,7 @@ check_parser(Sophia) -> check_parser_with_typedef(Typedef, Sophia) -> % Compile the type definitions alongside the usual literal expression. Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, - {Code, Type} = compile_entrypoint_code_and_type(Source, "f"), - Fate = extract_return_value(Code), - - % Check the FATE term as usual. - gmb_fate_encoding:serialize(Fate), + {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Do a typed parse, as usual, but there are probably record/variant % definitions in the AACI, so untyped parses probably don't work. @@ -780,6 +841,38 @@ records_test() -> % will error, though. {error, {unresolved_record, _, _, _}} = parse_literal(unknown_type(), Sophia). +variant_test() -> + TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", + + check_parser_with_typedef(TypeDef, "Zero"), + check_parser_with_typedef(TypeDef, "One(0)"), + check_parser_with_typedef(TypeDef, "Two(0, 1)"), + check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), + + {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), + + ok. + +chain_objects_test() -> + % Address, + check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), + % Two different forms of signature, + check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"), + + % We have to build a totally custom contract example in order to get an + % AACI and return value for parsing contract addresses. This is because the + % compiler demands that contract addresses be type checked according to the + % logic of "contract oriented programming", including covariance, etc. and + % "contract oriented programming" is not very compatible with ML style type + % inference. + Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", + Source = "contract C = entrypoint f(): C = " ++ Contract, + {Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"), + check_sophia_to_fate(ContractType, Contract, Fate), + check_sophia_to_fate(unknown_type(), Contract, Fate), + + ok. + singleton_records_test() -> TypeDef = "record singleton('a) = {it: 'a}", check_parser_with_typedef(TypeDef, "{it = 123}"), @@ -817,23 +910,11 @@ excess_parens_test() -> ok. -variant_test() -> - TypeDef = "datatype multi('a) = Zero | One('a) | Two('a, 'a)", - - check_parser_with_typedef(TypeDef, "Zero"), - check_parser_with_typedef(TypeDef, "One(0)"), - check_parser_with_typedef(TypeDef, "Two(0, 1)"), - check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), - - {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), - - ok. - lexer_offset_test() -> % Test that various tokens report their position correctly. {error, {unexpected_token, "456", 1, 5, 7}} = parse_literal("123 456"), {error, {unexpected_token, "[", 1, 5, 5}} = parse_literal("123 [0]"), - {error, {unexpected_token, "abc", 1, 5, 7}} = parse_literal("123 abc"), + {error, {unexpected_token, "ABC", 1, 5, 7}} = parse_literal("123 ABC"), {error, {unexpected_token, "#AA", 1, 5, 7}} = parse_literal("123 #AA"), {error, {unexpected_token, "\"x\"", 1, 5, 7}} = parse_literal("123 \"x\""), {error, {unexpected_token, "\"\\x{123}\"", 1, 5, 13}} = parse_literal("123 \"\\x{123}\""), @@ -841,16 +922,16 @@ lexer_offset_test() -> % Check that the tokenizer knows its position correctly *after* various % tokens. {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("[0] 123"), - ABCType = {"mytype", already_normalized, {variant, [{"abc", []}]}}, - {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "abc 123"), + ABCType = {"mytype", already_normalized, {variant, [{"ABC", []}]}}, + {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal(ABCType, "ABC 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("#AA 123"), {error, {unexpected_token, "123", 1, 5, 7}} = parse_literal("\"x\" 123"), {error, {unexpected_token, "123", 1, 11, 13}} = parse_literal("\"\\x{123}\" 123"), % Check that the tokenizer accounts for various line separators correctly. - {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\nabc"), - {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\r\nabc"), - {error, {unexpected_token, "abc", 2, 1, 3}} = parse_literal("123\rabc"), + {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\nABC"), + {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\r\nABC"), + {error, {unexpected_token, "ABC", 2, 1, 3}} = parse_literal("123\rABC"), ok. From bb4bcbb7de0b2c0337ee2c3f355bd58e05612048 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 3 Feb 2026 06:08:54 +0000 Subject: [PATCH 20/42] remove 'tk' atom from file positions --- src/hz_sophia.erl | 546 +++++++++++++++++++++++----------------------- 1 file changed, 273 insertions(+), 273 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index f41443d..8d578c2 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -12,16 +12,16 @@ parse_literal(String) -> parse_literal(unknown_type(), String). parse_literal(Type, String) -> - case parse_expression(Type, {tk, 1, 1}, String) of - {ok, {Result, NewTk, NewString}} -> - parse_literal2(Result, NewTk, NewString); + case parse_expression(Type, {1, 1}, String) of + {ok, {Result, NewPos, NewString}} -> + parse_literal2(Result, NewPos, NewString); {error, Reason} -> {error, Reason} end. -parse_literal2(Result, Tk, String) -> +parse_literal2(Result, Pos, String) -> % We have parsed a valid expression. Now check that the string ends. - case next_token(Tk, String) of + case next_token(Pos, String) of {ok, {{eof, _, _, _, _}, _, _}} -> {ok, Result}; {ok, {{_, S, _, Row, Start, End}, _, _}} -> @@ -39,59 +39,59 @@ parse_literal2(Result, Tk, String) -> -define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). -next_token({tk, Row, Col}, []) -> - {ok, {{eof, "", Row, Col, Col}, {tk, Row, Col}, []}}; -next_token({tk, Row, Col}, " " ++ Rest) -> - next_token({tk, Row, Col + 1}, Rest); -next_token({tk, Row, Col}, "\t" ++ Rest) -> - next_token({tk, Row, Col + 1}, Rest); -next_token({tk, Row, _}, "\r\n" ++ Rest) -> - next_token({tk, Row + 1, 1}, Rest); -next_token({tk, Row, _}, "\r" ++ Rest) -> - next_token({tk, Row + 1, 1}, Rest); -next_token({tk, Row, _}, "\n" ++ Rest) -> - next_token({tk, Row + 1, 1}, Rest); -next_token(Tk, [C | _] = String) when ?IS_ALPHA(C) -> - alphanum_token(Tk, Tk, String, []); -next_token(Tk, [C | _] = String) when ?IS_NUM(C) -> - num_token(Tk, Tk, String, [], 0); -next_token({tk, Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> - bytes_token({tk, Row, Col}, {tk, Row, Col + 1}, [C | Rest], "#", []); -next_token({tk, Row, Col}, "\"" ++ Rest) -> - string_token({tk, Row, Col}, {tk, Row, Col + 1}, Rest, "\"", <<>>); -next_token({tk, Row, Col}, [Char | Rest]) -> +next_token({Row, Col}, []) -> + {ok, {{eof, "", Row, Col, Col}, {Row, Col}, []}}; +next_token({Row, Col}, " " ++ Rest) -> + next_token({Row, Col + 1}, Rest); +next_token({Row, Col}, "\t" ++ Rest) -> + next_token({Row, Col + 1}, Rest); +next_token({Row, _}, "\r\n" ++ Rest) -> + next_token({Row + 1, 1}, Rest); +next_token({Row, _}, "\r" ++ Rest) -> + next_token({Row + 1, 1}, Rest); +next_token({Row, _}, "\n" ++ Rest) -> + next_token({Row + 1, 1}, Rest); +next_token(Pos, [C | _] = String) when ?IS_ALPHA(C) -> + alphanum_token(Pos, Pos, String, []); +next_token(Pos, [C | _] = String) when ?IS_NUM(C) -> + num_token(Pos, Pos, String, [], 0); +next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> + bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []); +next_token({Row, Col}, "\"" ++ Rest) -> + string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>); +next_token({Row, Col}, [Char | Rest]) -> Token = {character, [Char], Char, Row, Col, Col}, - {ok, {Token, {tk, Row, Col + 1}, Rest}}. + {ok, {Token, {Row, Col + 1}, Rest}}. -alphanum_token(Start, {tk, Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> - alphanum_token(Start, {tk, Row, Col + 1}, Rest, [C | Acc]); -alphanum_token({tk, _, Start}, {tk, Row, End}, String, Acc) -> +alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> + alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]); +alphanum_token({_, Start}, {Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1}, - {ok, {Token, {tk, Row, End}, String}}. + {ok, {Token, {Row, End}, String}}. -num_token(Start, {tk, Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> +num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> NewValue = Value * 10 + (C - $0), - num_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], NewValue); -num_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> + num_token(Start, {Row, Col + 1}, Rest, [C | Chars], NewValue); +num_token(Start, {Row, Col}, [$_, C | Rest], Chars, Value) when ?IS_NUM(C) -> NewValue = Value * 10 + (C - $0), - num_token(Start, {tk, Row, Col + 2}, Rest, [C, $_ | Chars], NewValue); -num_token({tk, _, Start}, {tk, Row, End}, String, Chars, Value) -> + num_token(Start, {Row, Col + 2}, Rest, [C, $_ | Chars], NewValue); +num_token({_, Start}, {Row, End}, String, Chars, Value) -> NumString = lists:reverse(Chars), Token = {integer, NumString, Value, Row, Start, End - 1}, - {ok, {Token, {tk, Row, End}, String}}. + {ok, {Token, {Row, End}, String}}. -bytes_token(Start, {tk, Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> +bytes_token(Start, {Row, Col}, [C | Rest], Chars, Digits) when ?IS_HEX(C) -> Digit = convert_digit(C), - bytes_token(Start, {tk, Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]); -bytes_token(Start, {tk, Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> + bytes_token(Start, {Row, Col + 1}, Rest, [C | Chars], [Digit | Digits]); +bytes_token(Start, {Row, Col}, [$_, C | Rest], Chars, Digits) when ?IS_HEX(C) -> Digit = convert_digit(C), - bytes_token(Start, {tk, Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]); -bytes_token({tk, _, Start}, {tk, Row, End}, String, Chars, Digits) -> + bytes_token(Start, {Row, Col + 1}, Rest, [C, $_ | Chars], [Digit | Digits]); +bytes_token({_, Start}, {Row, End}, String, Chars, Digits) -> BytesString = lists:reverse(Chars), Value = reverse_combine_nibbles(Digits, <<>>), Token = {bytes, BytesString, Value, Row, Start, End - 1}, - {ok, {Token, {tk, Row, End}, String}}. + {ok, {Token, {Row, End}, String}}. convert_digit(C) when C >= $0, C =< $9 -> C - $0; @@ -108,54 +108,54 @@ reverse_combine_nibbles([D1], Acc) -> reverse_combine_nibbles([], Acc) -> Acc. -string_token(Start, {tk, Row, Col}, "\\x" ++ String, SourceChars, Value) -> - case escape_hex_code({tk, Row, Col}, {tk, Row, Col + 2}, String, "x\\" ++ SourceChars) of - {ok, {Codepoint, NewSourceChars, NewTk, NewString}} -> +string_token(Start, {Row, Col}, "\\x" ++ String, SourceChars, Value) -> + case escape_hex_code({Row, Col}, {Row, Col + 2}, String, "x\\" ++ SourceChars) of + {ok, {Codepoint, NewSourceChars, NewPos, NewString}} -> NewValue = <>, - string_token(Start, NewTk, NewString, NewSourceChars, NewValue); + string_token(Start, NewPos, NewString, NewSourceChars, NewValue); {error, Reason} -> {error, Reason} end; -string_token(Start, {tk, Row, Col}, [$\\, C | Rest], SourceChars, Value) -> +string_token(Start, {Row, Col}, [$\\, C | Rest], SourceChars, Value) -> case escape_char(C) of {ok, ByteVal} -> - string_token(Start, {tk, Row, Col + 2}, Rest, [C, $\ | SourceChars], <>); + string_token(Start, {Row, Col + 2}, Rest, [C, $\ | SourceChars], <>); error -> {error, {invalid_escape_code, [C], Row, Col}} end; -string_token({tk, _, Start}, {tk, Row, Col}, [$" | Rest], SourceChars, Value) -> +string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) -> SourceStr = lists:reverse([$" | SourceChars]), Token = {string, SourceStr, Value, Row, Start, Col}, - {ok, {Token, {tk, Row, Col + 1}, Rest}}; -string_token(Start, {tk, Row, Col}, [C | Rest], SourceChars, Value) -> + {ok, {Token, {Row, Col + 1}, Rest}}; +string_token(Start, {Row, Col}, [C | Rest], SourceChars, Value) -> % TODO: ERTS probably had to convert this FROM utf8 at some point, so why % bother, if we need to convert it back? I guess we could accept iolists if % we really wanted to waste time on this point... - string_token(Start, {tk, Row, Col + 1}, Rest, [C | SourceChars], <>). + string_token(Start, {Row, Col + 1}, Rest, [C | SourceChars], <>). -escape_hex_code(Start, {tk, Row, Col}, "{" ++ String, SourceChars) -> - escape_long_hex_code(Start, {tk, Row, Col + 1}, String, "{" ++ SourceChars, 0); -escape_hex_code(_, {tk, Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> +escape_hex_code(Start, {Row, Col}, "{" ++ String, SourceChars) -> + escape_long_hex_code(Start, {Row, Col + 1}, String, "{" ++ SourceChars, 0); +escape_hex_code(_, {Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> % As of writing this, the Sophia compiler will convert this byte from % extended ASCII to unicode... But it really shouldn't. The literal parser % does what the compiler should do. Byte = convert_digit(A) * 16 + convert_digit(B), - {ok, {Byte, [B, A | SourceChars], {tk, Row, Col + 2}, String}}; -escape_hex_code({tk, Row1, Col1}, _, _, _) -> + {ok, {Byte, [B, A | SourceChars], {Row, Col + 2}, String}}; +escape_hex_code({Row1, Col1}, _, _, _) -> {error, {invalid_escape_code, "\\x", Row1, Col1}}. -escape_long_hex_code(_, {tk, Row, Col}, "}" ++ String, SourceChars, Value) -> - {ok, {Value, "}" ++ SourceChars, {tk, Row, Col + 1}, String}}; -escape_long_hex_code(Start, {tk, Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) -> +escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) -> + {ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}}; +escape_long_hex_code(Start, {Row, Col}, [C | String], SourceChars, Value) when ?IS_HEX(C) -> NewSourceChars = [C | SourceChars], NewValue = 16 * Value + convert_digit(C), - escape_long_hex_code(Start, {tk, Row, Col + 1}, String, NewSourceChars, NewValue); -escape_long_hex_code(_, {tk, Row, Col}, [C | _], _, _) -> + escape_long_hex_code(Start, {Row, Col + 1}, String, NewSourceChars, NewValue); +escape_long_hex_code(_, {Row, Col}, [C | _], _, _) -> {error, {invalid_hexadecimal, [C], Row, Col}}; -escape_long_hex_code(_, Tk, [], SourceChars, Value) -> +escape_long_hex_code(_, Pos, [], SourceChars, Value) -> % Just return as if the escape code were closed, and let the string parser % produce an unclosed string error instead. - {ok, {Value, SourceChars, Tk, []}}. + {ok, {Value, SourceChars, Pos, []}}. escape_char($b) -> {ok, $\b}; escape_char($e) -> {ok, $\e}; @@ -186,90 +186,90 @@ escape_char(_) -> error. %%% write the recursive code, thus programming the BEAM to implement the %%% pushdown automaton that we want. -parse_expression(Type, Tk, String) -> - case next_token(Tk, String) of - {ok, {Token, NewTk, NewString}} -> - parse_expression2(Type, NewTk, NewString, Token); +parse_expression(Type, Pos, String) -> + case next_token(Pos, String) of + {ok, {Token, NewPos, NewString}} -> + parse_expression2(Type, NewPos, NewString, Token); {error, Reason} -> {error, Reason} end. -parse_expression2(Type, Tk, String, {integer, _, Value, Row, Start, End}) -> +parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) -> case Type of {_, _, integer} -> - {ok, {Value, Tk, String}}; + {ok, {Value, Pos, String}}; {_, _, unknown_type} -> - {ok, {Value, Tk, String}}; + {ok, {Value, Pos, String}}; {O, N, _} -> {error, {wrong_type, O, N, integer, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {bytes, _, Value, Row, Start, End}) -> +parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) -> Len = byte_size(Value), Result = {bytes, Value}, case Type of {_, _, {bytes, [any]}} -> - {ok, {Result, Tk, String}}; + {ok, {Result, Pos, String}}; {_, _, {bytes, [Len]}} -> - {ok, {Result, Tk, String}}; + {ok, {Result, Pos, String}}; {_, _, {bytes, [ExpectedLen]}} -> {error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}}; {_, _, unknown_type} -> - {ok, {Result, Tk, String}}; + {ok, {Result, Pos, String}}; {O, N, _} -> {error, {wrong_type, O, N, {bytes, [Len]}, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {string, _, Value, Row, Start, End}) -> +parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) -> case Type of {_, _, string} -> - {ok, {Value, Tk, String}}; + {ok, {Value, Pos, String}}; {_, _, unknown_type} -> - {ok, {Value, Tk, String}}; + {ok, {Value, Pos, String}}; {O, N, _} -> {error, {wrong_type, O, N, string, Row, Start, End}} end; -parse_expression2(Type, Tk, String, {character, "[", _, Row, Start, _}) -> - parse_list(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {character, "(", _, Row, Start, _}) -> - parse_tuple(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {character, "{", _, Row, Start, _}) -> - parse_record_or_map(Type, Tk, String, Row, Start); -parse_expression2(Type, Tk, String, {alphanum, S, _, Row, Start, End}) -> - parse_alphanum(Type, Tk, String, S, Row, Start, End); +parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) -> + parse_list(Type, Pos, String, Row, Start); +parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) -> + parse_tuple(Type, Pos, String, Row, Start); +parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> + parse_record_or_map(Type, Pos, String, Row, Start); +parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) -> + parse_alphanum(Type, Pos, String, S, Row, Start, End); parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. unknown_type() -> {unknown_type, already_normalized, unknown_type}. -expect_tokens([], Tk, String) -> - {ok, {Tk, String}}; -expect_tokens([Str | Rest], Tk, String) -> - case next_token(Tk, String) of - {ok, {{_, Str, _, _, _, _}, NewTk, NewString}} -> - expect_tokens(Rest, NewTk, NewString); +expect_tokens([], Pos, String) -> + {ok, {Pos, String}}; +expect_tokens([Str | Rest], Pos, String) -> + case next_token(Pos, String) of + {ok, {{_, Str, _, _, _, _}, NewPos, NewString}} -> + expect_tokens(Rest, NewPos, NewString); {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. %%% Ambiguous Chain Object vs Identifier Parsing -parse_alphanum(Type, Tk, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> +parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. % Chain objects start with lowercase prefixes, like ak_, so clearly this is % a variant constructor. - parse_variant(Type, Tk, String, S, Row, Start, End); -parse_alphanum(Type, Tk, String, S, Row, Start, End) -> + parse_variant(Type, Pos, String, S, Row, Start, End); +parse_alphanum(Type, Pos, String, S, Row, Start, End) -> % Inversely, variant constructors are always uppercase, so now that we have % handled that case, only chain objects are left. try case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of {account_pubkey, Data} -> - typecheck_address(Type, Tk, String, Data, Row, Start, End); + typecheck_address(Type, Pos, String, Data, Row, Start, End); {contract_pubkey, Data} -> - typecheck_contract(Type, Tk, String, Data, Row, Start, End); + typecheck_contract(Type, Pos, String, Data, Row, Start, End); {signature, Data} -> - typecheck_signature(Type, Tk, String, Data, Row, Start, End); + typecheck_signature(Type, Pos, String, Data, Row, Start, End); {_, _} -> % Only a few chain objects are recognized by Sophia. The rest % are interpreted as identifiers, so we might as well give the @@ -280,61 +280,61 @@ parse_alphanum(Type, Tk, String, S, Row, Start, End) -> _:_ -> {error, {unexpected_identifier, S, Row, Start, End}} end. -typecheck_address({_, _, address}, Tk, String, Data, _, _, _) -> - {ok, {{address, Data}, Tk, String}}; -typecheck_address({_, _, contract}, Tk, String, Data, _, _, _) -> +typecheck_address({_, _, address}, Pos, String, Data, _, _, _) -> + {ok, {{address, Data}, Pos, String}}; +typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) -> % The compiler would type error, but we should be lenient here. - {ok, {{contract, Data}, Tk, String}}; -typecheck_address({_, _, unknown_type}, Tk, String, Data, _, _, _) -> - {ok, {{address, Data}, Tk, String}}; + {ok, {{contract, Data}, Pos, String}}; +typecheck_address({_, _, unknown_type}, Pos, String, Data, _, _, _) -> + {ok, {{address, Data}, Pos, String}}; typecheck_address({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, address, Row, Start, End}}. -typecheck_contract({_, _, contract}, Tk, String, Data, _, _, _) -> - {ok, {{contract, Data}, Tk, String}}; -typecheck_contract({_, _, address}, Tk, String, Data, _, _, _) -> +typecheck_contract({_, _, contract}, Pos, String, Data, _, _, _) -> + {ok, {{contract, Data}, Pos, String}}; +typecheck_contract({_, _, address}, Pos, String, Data, _, _, _) -> % The compiler would type error, but we should be lenient here. - {ok, {{address, Data}, Tk, String}}; -typecheck_contract({_, _, unknown_type}, Tk, String, Data, _, _, _) -> - {ok, {{contract, Data}, Tk, String}}; + {ok, {{address, Data}, Pos, String}}; +typecheck_contract({_, _, unknown_type}, Pos, String, Data, _, _, _) -> + {ok, {{contract, Data}, Pos, String}}; typecheck_contract({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, contract, Row, Start, End}}. -typecheck_signature({_, _, signature}, Tk, String, Data, _, _, _) -> - {ok, {{bytes, Data}, Tk, String}}; -typecheck_signature({_, _, {bytes, [64]}}, Tk, String, Data, _, _, _) -> +typecheck_signature({_, _, signature}, Pos, String, Data, _, _, _) -> + {ok, {{bytes, Data}, Pos, String}}; +typecheck_signature({_, _, {bytes, [64]}}, Pos, String, Data, _, _, _) -> % The compiler would probably type-error, but whatever. - {ok, {{bytes, Data}, Tk, String}}; -typecheck_signature({_, _, {bytes, [any]}}, Tk, String, Data, _, _, _) -> + {ok, {{bytes, Data}, Pos, String}}; +typecheck_signature({_, _, {bytes, [any]}}, Pos, String, Data, _, _, _) -> % The compiler would probably type-error, but whatever. - {ok, {{bytes, Data}, Tk, String}}; -typecheck_signature({_, _, unknown_type}, Tk, String, Data, _, _, _) -> - {ok, {{bytes, Data}, Tk, String}}; + {ok, {{bytes, Data}, Pos, String}}; +typecheck_signature({_, _, unknown_type}, Pos, String, Data, _, _, _) -> + {ok, {{bytes, Data}, Pos, String}}; typecheck_signature({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, signature, Row, Start, End}}. %%% List Parsing -parse_list({_, _, {list, [Inner]}}, Tk, String, Row, Start) -> - parse_list_loop(Inner, Tk, String, "]", Row, Start, []); -parse_list({_, _, unknown_type}, Tk, String, Row, Start) -> - parse_list_loop(unknown_type(), Tk, String, "]", Row, Start, []); +parse_list({_, _, {list, [Inner]}}, Pos, String, Row, Start) -> + parse_list_loop(Inner, Pos, String, "]", Row, Start, []); +parse_list({_, _, unknown_type}, Pos, String, Row, Start) -> + parse_list_loop(unknown_type(), Pos, String, "]", Row, Start, []); parse_list({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, list, Row, Start, Start}}. -parse_list_loop(Inner, Tk, String, CloseChar, Row, Start, Acc) -> - case next_token(Tk, String) of - {ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> - {ok, {lists:reverse(Acc), NewTk, NewString}}; - {ok, {Token, NewTk, NewString}} -> - parse_list_loop2(Inner, NewTk, NewString, CloseChar, Row, Start, Acc, Token) +parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) -> + case next_token(Pos, String) of + {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> + {ok, {lists:reverse(Acc), NewPos, NewString}}; + {ok, {Token, NewPos, NewString}} -> + parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token) end. -parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> - case parse_expression2(Inner, Tk, String, Token) of - {ok, {Value, NewTk, NewString}} -> - parse_list_loop3(Inner, NewTk, NewString, CloseChar, Row, Start, [Value | Acc]); +parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> + case parse_expression2(Inner, Pos, String, Token) of + {ok, {Value, NewPos, NewString}} -> + parse_list_loop3(Inner, NewPos, NewString, CloseChar, Row, Start, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(CloseChar), % TODO: Are tuple indices off by one from list indices? @@ -342,12 +342,12 @@ parse_list_loop2(Inner, Tk, String, CloseChar, Row, Start, Acc, Token) -> {error, Wrapped} end. -parse_list_loop3(Inner, Tk, String, CloseChar, Row, Start, Acc) -> - case next_token(Tk, String) of - {ok, {{character, CloseChar, _, _, _, _}, NewTk, NewString}} -> - {ok, {lists:reverse(Acc), NewTk, NewString}}; - {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> - parse_list_loop(Inner, NewTk, NewString, CloseChar, Row, Start, Acc); +parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) -> + case next_token(Pos, String) of + {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> + {ok, {lists:reverse(Acc), NewPos, NewString}}; + {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> + parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc); {error, Reason} -> {error, Reason} end. @@ -357,22 +357,22 @@ choose_list_error_wrapper(")") -> tuple_element. %%% Ambiguous Parenthesis Parsing -parse_tuple({_, _, unknown_type}, Tk, String, Row, Start) -> +parse_tuple({_, _, unknown_type}, Pos, String, Row, Start) -> % An untyped tuple is a list of untyped terms, and weirdly our list parser % works perfectly for that, as long as we change the closing character to % be ")" instead of "]". - case parse_list_loop(unknown_type(), Tk, String, ")", Row, Start, []) of - {ok, {[Inner], NewTk, NewString}} -> + case parse_list_loop(unknown_type(), Pos, String, ")", Row, Start, []) of + {ok, {[Inner], NewPos, NewString}} -> % In Sophia, singleton tuples are unwrapped, and given the inner % type. - {ok, {Inner, NewTk, NewString}}; - {ok, {TermList, NewTk, NewString}} -> + {ok, {Inner, NewPos, NewString}}; + {ok, {TermList, NewPos, NewString}} -> Result = {tuple, list_to_tuple(TermList)}, - {ok, {Result, NewTk, NewString}}; + {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end; -parse_tuple({O, N, T}, Tk, String, _, _) -> +parse_tuple({O, N, T}, Pos, String, _, _) -> % Typed tuple parsing is quite complex, because we also want to support % normal parentheses for grouping. It's not strictly necessary for % inputting data, since we don't have any infix operators in simple @@ -382,23 +382,23 @@ parse_tuple({O, N, T}, Tk, String, _, _) -> % Count how many ambiguous parens there are, including the one we already % saw. - case count_open_parens(Tk, String, 1) of - {ok, {Count, Token, NewTk, NewString}} -> + case count_open_parens(Pos, String, 1) of + {ok, {Count, Token, NewPos, NewString}} -> % Compare that to the amount of nesting tuple connectives are in % the type we are expected to produce. {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []), % Now work out what to do with all this information. - parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewTk, NewString, Token); + parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewPos, NewString, Token); {error, Reason} -> {error, Reason} end. -count_open_parens(Tk, String, Count) -> - case next_token(Tk, String) of - {ok, {{character, "(", _, _, _, _}, NewTk, NewString}} -> - count_open_parens(NewTk, NewString, Count + 1); - {ok, {Token, NewTk, NewString}} -> - {ok, {Count, Token, NewTk, NewString}}; +count_open_parens(Pos, String, Count) -> + case next_token(Pos, String) of + {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} -> + count_open_parens(NewPos, NewString, Count + 1); + {ok, {Token, NewPos, NewString}} -> + {ok, {Count, Token, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. @@ -412,15 +412,15 @@ extract_tuple_type_info(ParenCount, HeadType, Tails) -> parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) -> {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; -parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, {character, ")", _, Row, Col, _}) -> - parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Tk, String, Row, Col); -parse_tuple2(O, N, ExcessCount, HeadType, Tails, Tk, String, Token) -> +parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> + parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Pos, String, Row, Col); +parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, Token) -> % Finished with parentheses for now, try and parse an expression out, to % get our head term. - case parse_expression2(HeadType, Tk, String, Token) of - {ok, {Result, NewTk, NewString}} -> + case parse_expression2(HeadType, Pos, String, Token) of + {ok, {Result, NewPos, NewString}} -> % Got a head term. Now try to build all the other tuple layers. - parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewTk, NewString); + parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewPos, NewString); {error, Reason} -> % TODO: Wrap errors here too. {error, Reason} @@ -434,44 +434,44 @@ parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> % got zero. ExpectCount = 1 + length(Tail), {error, {not_enough_elements, ExpectCount, 0, Row, Col}}; -parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Tk, String, _, _) -> +parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> % If we have some ambiguous parentheses left, we now know one of them is % this empty tuple. HeadTerm = {tuple, {}}, NewExcessCount = ExcessCount - 1, % Now continue the loop as if it were an integer or something, in the head % position. - parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Tk, String); + parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Pos, String); parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) -> % We were expecting a head term of a different type! {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. -parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Tk, String) -> +parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Pos, String) -> % Tuples left to build, but no extra open parens to deal with, so we can % just parse multivalues naively, starting from the "we have a term, % waiting for a comma" stage of the loop. - case parse_multivalue3(TailTypes, Tk, String, -1, -1, [HeadTerm]) of - {ok, {Terms, NewTk, NewString}} -> + case parse_multivalue3(TailTypes, Pos, String, -1, -1, [HeadTerm]) of + {ok, {Terms, NewPos, NewString}} -> NewHead = {tuple, list_to_tuple(Terms)}, - parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewTk, NewString); + parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewPos, NewString); {error, Reason} -> % TODO: More error wrapping? {error, Reason} end; -parse_tuple_tails(_, _, 0, HeadTerm, [], Tk, String) -> +parse_tuple_tails(_, _, 0, HeadTerm, [], Pos, String) -> % No open parens left, no tuples left to build, we are done! - {ok, {HeadTerm, Tk, String}}; -parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Tk, String) -> + {ok, {HeadTerm, Pos, String}}; +parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> % The ambiguous case, where we have a mix of tuple parens, and grouping % parens. We want to peek at the next token, to see if it closes a grouping % paren. - case next_token(Tk, String) of - {ok, {{character, ")", _, _, _, _}, NewTk, NewString}} -> + case next_token(Pos, String) of + {ok, {{character, ")", _, _, _, _}, NewPos, NewString}} -> % It is grouping! Close one excess paren, and continue. - parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewTk, NewString); - {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> + parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewPos, NewString); + {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> % It is a real tuple! Try the normal logic, then. - parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewTk, NewString); + parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString); {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> % Anything else is just a boring parse error we can complain about. {error, {unexpected_token, Actual, Row, Start, End}}; @@ -479,11 +479,11 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Tk, String) -> {error, Reason} end. -parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Tk, String) -> - case parse_multivalue(TailTypes, Tk, String, -1, -1, [HeadTerm]) of - {ok, {Terms, NewTk, NewString}} -> +parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Pos, String) -> + case parse_multivalue(TailTypes, Pos, String, -1, -1, [HeadTerm]) of + {ok, {Terms, NewPos, NewString}} -> NewHead = {tuple, list_to_tuple(Terms)}, - parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewTk, NewString); + parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewPos, NewString); {error, Reason} -> % TODO: wrap errors? {error, Reason} @@ -504,43 +504,43 @@ parse_tuple_tails2(O, N, _, _, [], _, _) -> %%% Unambiguous Tuple/Variant Parsing -parse_multivalue(ElemTypes, Tk, String, Row, Start, Acc) -> - case next_token(Tk, String) of - {ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> - check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); - {ok, {Token, NewTk, NewString}} -> - parse_multivalue2(ElemTypes, NewTk, NewString, Row, Start, Acc, Token) +parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) -> + case next_token(Pos, String) of + {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> + check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); + {ok, {Token, NewPos, NewString}} -> + parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token) end. -parse_multivalue2([Next | Rest], Tk, String, Row, Start, Acc, Token) -> - case parse_expression2(Next, Tk, String, Token) of - {ok, {Value, NewTk, NewString}} -> - parse_multivalue3(Rest, NewTk, NewString, Row, Start, [Value | Acc]); +parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> + case parse_expression2(Next, Pos, String, Token) of + {ok, {Value, NewPos, NewString}} -> + parse_multivalue3(Rest, NewPos, NewString, Row, Start, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(")"), % TODO: Are tuple indices off by one from list indices? Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end; -parse_multivalue2([], Tk, String, _, _, Acc, {character, ")", _, _, _, _}) -> - {ok, {lists:reverse(Acc), Tk, String}}; +parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) -> + {ok, {lists:reverse(Acc), Pos, String}}; parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> {error, {unexpected_token, S, Row, Start, End}}. -parse_multivalue3(ElemTypes, Tk, String, Row, Start, Acc) -> - case next_token(Tk, String) of - {ok, {{character, ")", _, Row2, Start2, _}, NewTk, NewString}} -> - check_multivalue_long_enough(ElemTypes, NewTk, NewString, Row2, Start2, Acc); - {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> - parse_multivalue(ElemTypes, NewTk, NewString, Row, Start, Acc); +parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> + case next_token(Pos, String) of + {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> + check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); + {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> + parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc); {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, Actual, Row, Start, End}}; {error, Reason} -> {error, Reason} end. -check_multivalue_long_enough([], Tk, String, _, _, Acc) -> - {ok, {lists:reverse(Acc), Tk, String}}; +check_multivalue_long_enough([], Pos, String, _, _, Acc) -> + {ok, {lists:reverse(Acc), Pos, String}}; check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> GotCount = length(Got), ExpectCount = length(Remaining) + GotCount, @@ -548,8 +548,8 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> %%% Variant parsing -parse_variant({_, _, {variant, Variants}}, Tk, String, Ident, Row, Start, End) -> - parse_variant2(Variants, Tk, String, Ident, Row, Start, End); +parse_variant({_, _, {variant, Variants}}, Pos, String, Ident, Row, Start, End) -> + parse_variant2(Variants, Pos, String, Ident, Row, Start, End); parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> {error, {unresolved_variant, Row, Start, End}}; parse_variant({O, N, _}, _, _, _, Row, Start, End) -> @@ -560,33 +560,33 @@ parse_variant({O, N, _}, _, _, _, Row, Start, End) -> % a variant. {error, {wrong_type, O, N, variant, Row, Start, End}}. -parse_variant2(Variants, Tk, String, Ident, Row, Start, End) -> +parse_variant2(Variants, Pos, String, Ident, Row, Start, End) -> case lookup_variant(Ident, Variants, 0) of {ok, {Tag, ElemTypes}} -> GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, Arities = lists:map(GetArity, Variants), - parse_variant3(Arities, Tag, ElemTypes, Tk, String); + parse_variant3(Arities, Tag, ElemTypes, Pos, String); error -> {error, {invalid_constructor, Ident, Row, Start, End}} end. -parse_variant3(Arities, Tag, [], Tk, String) -> +parse_variant3(Arities, Tag, [], Pos, String) -> % Parsing of 0-arity variants is different. Result = {variant, Arities, Tag, {}}, - {ok, {Result, Tk, String}}; -parse_variant3(Arities, Tag, ElemTypes, Tk, String) -> - case next_token(Tk, String) of - {ok, {{character, "(", _, Row, Start, _}, NewTk, NewString}} -> - parse_variant4(Arities, Tag, ElemTypes, NewTk, NewString, Row, Start); + {ok, {Result, Pos, String}}; +parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> + case next_token(Pos, String) of + {ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} -> + parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start); {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, Actual, Row, Start, End}} end. -parse_variant4(Arities, Tag, ElemTypes, Tk, String, Row, Start) -> - case parse_multivalue(ElemTypes, Tk, String, Row, Start, []) of - {ok, {Terms, NewTk, NewString}} -> +parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) -> + case parse_multivalue(ElemTypes, Pos, String, Row, Start, []) of + {ok, {Terms, NewPos, NewString}} -> Result = {variant, Arities, Tag, list_to_tuple(Terms)}, - {ok, {Result, NewTk, NewString}}; + {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. @@ -600,16 +600,16 @@ lookup_variant(Ident, [_ | Rest], Tag) -> %%% Record parsing -parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Tk, String, _, _) -> - parse_map(KeyType, ValueType, Tk, String, #{}); -parse_record_or_map({_, _, {record, Fields}}, Tk, String, _, _) -> - parse_record(Fields, Tk, String, #{}); -parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> - case next_token(Tk, String) of - {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> - {ok, {#{}, NewTk, NewString}}; - {ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> - parse_map2(unknown_type(), unknown_type(), NewTk, NewString, #{}); +parse_record_or_map({_, _, {map, [KeyType, ValueType]}}, Pos, String, _, _) -> + parse_map(KeyType, ValueType, Pos, String, #{}); +parse_record_or_map({_, _, {record, Fields}}, Pos, String, _, _) -> + parse_record(Fields, Pos, String, #{}); +parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) -> + case next_token(Pos, String) of + {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> + {ok, {#{}, NewPos, NewString}}; + {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} -> + parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{}); {ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {error, {unresolved_record, Row, Start, End}}; {ok, {{_, S, _, Row, Start, End}, _, _}} -> @@ -618,67 +618,67 @@ parse_record_or_map({_, _, unknown_type}, Tk, String, _, _) -> parse_record_or_map({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, map, Row, Start, Start}}. -parse_record(Fields, Tk, String, Acc) -> - case next_token(Tk, String) of - {ok, {{alphanum, Ident, _, Row, Start, End}, NewTk, NewString}} -> - parse_record2(Fields, NewTk, NewString, Acc, Ident, Row, Start, End); - {ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> - parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); +parse_record(Fields, Pos, String, Acc) -> + case next_token(Pos, String) of + {ok, {{alphanum, Ident, _, Row, Start, End}, NewPos, NewString}} -> + parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End); + {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> + parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} end. -parse_record2(Fields, Tk, String, Acc, Ident, Row, Start, End) -> +parse_record2(Fields, Pos, String, Acc, Ident, Row, Start, End) -> case lists:keyfind(Ident, 1, Fields) of {_, Type} -> - parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type); + parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type); false -> {error, {invalid_field, Ident, Row, Start, End}} end. -parse_record3(Fields, Tk, String, Acc, Ident, Row, Start, End, Type) -> +parse_record3(Fields, Pos, String, Acc, Ident, Row, Start, End, Type) -> case maps:is_key(Ident, Acc) of false -> - parse_record4(Fields, Tk, String, Acc, Ident, Type); + parse_record4(Fields, Pos, String, Acc, Ident, Type); true -> {error, {field_already_present, Ident, Row, Start, End}} end. -parse_record4(Fields, Tk, String, Acc, Ident, Type) -> - case expect_tokens(["="], Tk, String) of - {ok, {NewTk, NewString}} -> - parse_record5(Fields, NewTk, NewString, Acc, Ident, Type); +parse_record4(Fields, Pos, String, Acc, Ident, Type) -> + case expect_tokens(["="], Pos, String) of + {ok, {NewPos, NewString}} -> + parse_record5(Fields, NewPos, NewString, Acc, Ident, Type); {error, Reason} -> {error, Reason} end. -parse_record5(Fields, Tk, String, Acc, Ident, Type) -> - case parse_expression(Type, Tk, String) of - {ok, {Result, NewTk, NewString}} -> +parse_record5(Fields, Pos, String, Acc, Ident, Type) -> + case parse_expression(Type, Pos, String) of + {ok, {Result, NewPos, NewString}} -> NewAcc = maps:put(Ident, Result, Acc), - parse_record6(Fields, NewTk, NewString, NewAcc); + parse_record6(Fields, NewPos, NewString, NewAcc); {error, Reason} -> wrap_error(Reason, {record_field, Ident}) end. -parse_record6(Fields, Tk, String, Acc) -> - case next_token(Tk, String) of - {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> - parse_record(Fields, NewTk, NewString, Acc); - {ok, {{character, "}", _, Row, Start, End}, NewTk, NewString}} -> - parse_record_end(Fields, NewTk, NewString, Acc, Row, Start, End); +parse_record6(Fields, Pos, String, Acc) -> + case next_token(Pos, String) of + {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> + parse_record(Fields, NewPos, NewString, Acc); + {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> + parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}}; {error, Reason} -> {error, Reason} end. -parse_record_end(Fields, Tk, String, FieldValues, Row, Start, End) -> +parse_record_end(Fields, Pos, String, FieldValues, Row, Start, End) -> case parse_record_final_loop(Fields, FieldValues, []) of {ok, Result} -> - {ok, {Result, Tk, String}}; + {ok, {Result, Pos, String}}; {error, {missing_field, Name}} -> {error, {missing_field, Name, Row, Start, End}} end. @@ -702,47 +702,47 @@ parse_record_final_loop([], _, FieldsReverse) -> %%% Map Parsing -parse_map(KeyType, ValueType, Tk, String, Acc) -> - case next_token(Tk, String) of - {ok, {{character, "[", _, _, _, _}, NewTk, NewString}} -> - parse_map2(KeyType, ValueType, NewTk, NewString, Acc); - {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> - {ok, {Acc, NewTk, NewString}}; +parse_map(KeyType, ValueType, Pos, String, Acc) -> + case next_token(Pos, String) of + {ok, {{character, "[", _, _, _, _}, NewPos, NewString}} -> + parse_map2(KeyType, ValueType, NewPos, NewString, Acc); + {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> + {ok, {Acc, NewPos, NewString}}; {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}} end. -parse_map2(KeyType, ValueType, Tk, String, Acc) -> - case parse_expression(KeyType, Tk, String) of - {ok, {Result, NewTk, NewString}} -> - parse_map3(KeyType, ValueType, NewTk, NewString, Acc, Result); +parse_map2(KeyType, ValueType, Pos, String, Acc) -> + case parse_expression(KeyType, Pos, String) of + {ok, {Result, NewPos, NewString}} -> + parse_map3(KeyType, ValueType, NewPos, NewString, Acc, Result); {error, Reason} -> wrap_error(Reason, {map_key, maps:size(Acc)}) end. -parse_map3(KeyType, ValueType, Tk, String, Acc, Key) -> - case expect_tokens(["]", "="], Tk, String) of - {ok, {NewTk, NewString}} -> - parse_map4(KeyType, ValueType, NewTk, NewString, Acc, Key); +parse_map3(KeyType, ValueType, Pos, String, Acc, Key) -> + case expect_tokens(["]", "="], Pos, String) of + {ok, {NewPos, NewString}} -> + parse_map4(KeyType, ValueType, NewPos, NewString, Acc, Key); {error, Reason} -> {error, Reason} end. -parse_map4(KeyType, ValueType, Tk, String, Acc, Key) -> - case parse_expression(ValueType, Tk, String) of - {ok, {Result, NewTk, NewString}} -> +parse_map4(KeyType, ValueType, Pos, String, Acc, Key) -> + case parse_expression(ValueType, Pos, String) of + {ok, {Result, NewPos, NewString}} -> NewAcc = maps:put(Key, Result, Acc), - parse_map5(KeyType, ValueType, NewTk, NewString, NewAcc); + parse_map5(KeyType, ValueType, NewPos, NewString, NewAcc); {error, Reason} -> {error, Reason} end. -parse_map5(KeyType, ValueType, Tk, String, Acc) -> - case next_token(Tk, String) of - {ok, {{character, ",", _, _, _, _}, NewTk, NewString}} -> - parse_map(KeyType, ValueType, NewTk, NewString, Acc); - {ok, {{character, "}", _, _, _, _}, NewTk, NewString}} -> - {ok, {Acc, NewTk, NewString}}; +parse_map5(KeyType, ValueType, Pos, String, Acc) -> + case next_token(Pos, String) of + {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> + parse_map(KeyType, ValueType, NewPos, NewString, Acc); + {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> + {ok, {Acc, NewPos, NewString}}; {ok, {{_, S, _, Row, Start, End}, _, _}} -> {error, {unexpected_token, S, Row, Start, End}} end. From d014ae09826c16c06b50b3234e32645397cb175b Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Wed, 4 Feb 2026 07:00:39 +0000 Subject: [PATCH 21/42] Handle token/parse errors more carefully --- src/hz_sophia.erl | 82 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 8d578c2..3ae94d5 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -24,8 +24,8 @@ parse_literal2(Result, Pos, String) -> case next_token(Pos, String) of {ok, {{eof, _, _, _, _}, _, _}} -> {ok, Result}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token); {error, Reason} -> {error, Reason} end. @@ -235,8 +235,10 @@ parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) -> parse_alphanum(Type, Pos, String, S, Row, Start, End); -parse_expression2(_, _, _, {_, S, _, Row, Start, End}) -> - {error, {unexpected_token, S, Row, Start, End}}. +parse_expression2(_, _, _, {eof, _, _, _, _, _}) -> + {error, unexpected_end_of_file}; +parse_expression2(_, _, _, Token) -> + unexpected_token(Token). unknown_type() -> {unknown_type, already_normalized, unknown_type}. @@ -247,10 +249,24 @@ expect_tokens([Str | Rest], Pos, String) -> case next_token(Pos, String) of {ok, {{_, Str, _, _, _, _}, NewPos, NewString}} -> expect_tokens(Rest, NewPos, NewString); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, Actual, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, Str); + {error, Reason} -> + {error, Reason} end. +unexpected_token(Token, _Expected) -> + % I don't know if this is a good idea, but sometimes there are only one or + % two tokens that could have worked, which might make for simple + % non-technical error messages. I don't know how to format that yet, + % though. + unexpected_token(Token). + +unexpected_token({eof, _, _, _, _, _}) -> + {error, expression_incomplete}; +unexpected_token({_, S, _, Row, Start, End}) -> + {error, {unexpected_token, S, Row, Start, End}}. + %%% Ambiguous Chain Object vs Identifier Parsing parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> @@ -328,7 +344,9 @@ parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) -> {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> {ok, {lists:reverse(Acc), NewPos, NewString}}; {ok, {Token, NewPos, NewString}} -> - parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token) + parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token); + {error, Reason} -> + {error, Reason} end. parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> @@ -348,6 +366,8 @@ parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) -> {ok, {lists:reverse(Acc), NewPos, NewString}}; {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc); + {ok, {Token, _, _}} -> + unexpected_token(Token, CloseChar); {error, Reason} -> {error, Reason} end. @@ -472,9 +492,9 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> % It is a real tuple! Try the normal logic, then. parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> + {ok, {Token, _, _}} -> % Anything else is just a boring parse error we can complain about. - {error, {unexpected_token, Actual, Row, Start, End}}; + unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. @@ -509,7 +529,9 @@ parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) -> {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {Token, NewPos, NewString}} -> - parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token) + parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token); + {error, Reason} -> + {error, Reason} end. parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> @@ -524,8 +546,8 @@ parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> end; parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) -> {ok, {lists:reverse(Acc), Pos, String}}; -parse_multivalue2([], _, _, _, _, _, {_, S, _, Row, Start, End}) -> - {error, {unexpected_token, S, Row, Start, End}}. +parse_multivalue2([], _, _, _, _, _, Token) -> + unexpected_token(Token, ")"). parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> case next_token(Pos, String) of @@ -533,8 +555,8 @@ parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, Actual, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. @@ -578,8 +600,10 @@ parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> case next_token(Pos, String) of {ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} -> parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start); - {ok, {{_, Actual, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, Actual, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "("); + {error, Reason} -> + {error, Reason} end. parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) -> @@ -612,8 +636,10 @@ parse_record_or_map({_, _, unknown_type}, Pos, String, _, _) -> parse_map2(unknown_type(), unknown_type(), NewPos, NewString, #{}); {ok, {{alphanum, _, _, Row, Start, End}, _, _}} -> {error, {unresolved_record, Row, Start, End}}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); + {error, Reason} -> + {error, Reason} end; parse_record_or_map({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, map, Row, Start, Start}}. @@ -624,8 +650,8 @@ parse_record(Fields, Pos, String, Acc) -> parse_record2(Fields, NewPos, NewString, Acc, Ident, Row, Start, End); {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. @@ -669,8 +695,8 @@ parse_record6(Fields, Pos, String, Acc) -> parse_record(Fields, NewPos, NewString, Acc); {ok, {{character, "}", _, Row, Start, End}, NewPos, NewString}} -> parse_record_end(Fields, NewPos, NewString, Acc, Row, Start, End); - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}}; + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); {error, Reason} -> {error, Reason} end. @@ -708,8 +734,10 @@ parse_map(KeyType, ValueType, Pos, String, Acc) -> parse_map2(KeyType, ValueType, NewPos, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {Acc, NewPos, NewString}}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); + {error, Reason} -> + {error, Reason} end. parse_map2(KeyType, ValueType, Pos, String, Acc) -> @@ -743,8 +771,10 @@ parse_map5(KeyType, ValueType, Pos, String, Acc) -> parse_map(KeyType, ValueType, NewPos, NewString, Acc); {ok, {{character, "}", _, _, _, _}, NewPos, NewString}} -> {ok, {Acc, NewPos, NewString}}; - {ok, {{_, S, _, Row, Start, End}, _, _}} -> - {error, {unexpected_token, S, Row, Start, End}} + {ok, {Token, _, _}} -> + unexpected_token(Token, "}"); + {error, Reason} -> + {error, Reason} end. % TODO From 3838a7e3c5df4838e4abb7099627906cd1ad56a5 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 5 Feb 2026 07:13:25 +0000 Subject: [PATCH 22/42] Parse qualified names. This seemed like it was going to be insanely insanely complex, but then it turns out the compiler doesn't accept spaces in qualified names, so I can just dump periods in the lexer and hit it with string:split/3. Easy. --- src/hz_sophia.erl | 68 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 3ae94d5..4ac67f2 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -36,7 +36,7 @@ parse_literal2(Result, Pos, String) -> -define(IS_LATIN_LOWER(C), (((C) >= $a) and ((C) =< $z))). -define(IS_ALPHA(C), (?IS_LATIN_UPPER(C) or ?IS_LATIN_LOWER(C) or ((C) == $_))). -define(IS_NUM(C), (((C) >= $0) and ((C) =< $9))). --define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C))). +-define(IS_ALPHANUM(C), (?IS_ALPHA(C) or ?IS_NUM(C) or ((C) == $.))). -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). next_token({Row, Col}, []) -> @@ -67,7 +67,8 @@ alphanum_token(Start, {Row, Col}, [C | Rest], Acc) when ?IS_ALPHANUM(C) -> alphanum_token(Start, {Row, Col + 1}, Rest, [C | Acc]); alphanum_token({_, Start}, {Row, End}, String, Acc) -> AlphaString = lists:reverse(Acc), - Token = {alphanum, AlphaString, AlphaString, Row, Start, End - 1}, + Path = string:split(AlphaString, ".", all), + Token = {alphanum, AlphaString, Path, Row, Start, End - 1}, {ok, {Token, {Row, End}, String}}. num_token(Start, {Row, Col}, [C | Rest], Chars, Value) when ?IS_NUM(C) -> @@ -233,8 +234,8 @@ parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) -> parse_tuple(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); -parse_expression2(Type, Pos, String, {alphanum, S, _, Row, Start, End}) -> - parse_alphanum(Type, Pos, String, S, Row, Start, End); +parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) -> + parse_alphanum(Type, Pos, String, Path, Row, Start, End); parse_expression2(_, _, _, {eof, _, _, _, _, _}) -> {error, unexpected_end_of_file}; parse_expression2(_, _, _, Token) -> @@ -269,15 +270,11 @@ unexpected_token({_, S, _, Row, Start, End}) -> %%% Ambiguous Chain Object vs Identifier Parsing -parse_alphanum(Type, Pos, String, [C | _] = S, Row, Start, End) when ?IS_LATIN_UPPER(C) -> +parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. - % Chain objects start with lowercase prefixes, like ak_, so clearly this is - % a variant constructor. - parse_variant(Type, Pos, String, S, Row, Start, End); -parse_alphanum(Type, Pos, String, S, Row, Start, End) -> - % Inversely, variant constructors are always uppercase, so now that we have - % handled that case, only chain objects are left. + % Constructors start with uppercase characters, so lowercase can only be a + % chain object. try case gmser_api_encoder:decode(unicode:characters_to_binary(S)) of {account_pubkey, Data} -> @@ -294,7 +291,11 @@ parse_alphanum(Type, Pos, String, S, Row, Start, End) -> end catch _:_ -> {error, {unexpected_identifier, S, Row, Start, End}} - end. + end; +parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> + % Inversely, chain object prefixes are always lowercase, so any other path + % must be a variant constructor, or invalid. + parse_variant(Type, Pos, String, Path, Row, Start, End). typecheck_address({_, _, address}, Pos, String, Data, _, _, _) -> {ok, {{address, Data}, Pos, String}}; @@ -570,26 +571,42 @@ check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> %%% Variant parsing -parse_variant({_, _, {variant, Variants}}, Pos, String, Ident, Row, Start, End) -> - parse_variant2(Variants, Pos, String, Ident, Row, Start, End); +parse_variant({O, N, {variant, Variants}}, Pos, String, [Ident], Row, Start, End) -> + parse_variant2(O, N, Variants, Pos, String, "", Ident, Row, Start, End); +parse_variant({O, N, {variant, Variants}}, Pos, String, [Namespace, Constructor], Row, Start, End) -> + case get_typename(O, N) of + [Namespace, _] -> + parse_variant2(O, N, Variants, Pos, String, Namespace ++ ".", Constructor, Row, Start, End); + _ -> + {error, {invalid_constructor, O, N, Namespace ++ "." ++ Constructor, Row, Start, End}} + end; parse_variant({_, _, unknown_type}, _, _, _, Row, Start, End) -> {error, {unresolved_variant, Row, Start, End}}; parse_variant({O, N, _}, _, _, _, Row, Start, End) -> % In normal code, identifiers can have many meanings, which can result in - % lots of different errors. In this Sophia 'object notation', identifiers - % can only ever be variant constructors, (sort of like the Sophia version - % of atoms,) and so immediately lead to a type error if we aren't expecting - % a variant. + % lots of different errors. In constant/immediate/normalized Sophia terms + % we know identifiers are always variants, so we can type error if any + % other type was expected. {error, {wrong_type, O, N, variant, Row, Start, End}}. -parse_variant2(Variants, Pos, String, Ident, Row, Start, End) -> - case lookup_variant(Ident, Variants, 0) of +get_typename(O, already_normalized) -> + get_typename(O); +get_typename(_, N) -> + get_typename(N). + +get_typename({Name, _}) -> + string:split(Name, ".", all); +get_typename(Name) -> + string:split(Name, ".", all). + +parse_variant2(O, N, Variants, Pos, String, Prefix, Constructor, Row, Start, End) -> + case lookup_variant(Constructor, Variants, 0) of {ok, {Tag, ElemTypes}} -> GetArity = fun({_, OtherElemTypes}) -> length(OtherElemTypes) end, Arities = lists:map(GetArity, Variants), parse_variant3(Arities, Tag, ElemTypes, Pos, String); error -> - {error, {invalid_constructor, Ident, Row, Start, End}} + {error, {invalid_constructor, O, N, Prefix ++ Constructor, Row, Start, End}} end. parse_variant3(Arities, Tag, [], Pos, String) -> @@ -878,11 +895,20 @@ variant_test() -> check_parser_with_typedef(TypeDef, "One(0)"), check_parser_with_typedef(TypeDef, "Two(0, 1)"), check_parser_with_typedef(TypeDef, "Two([], [1, 2, 3])"), + check_parser_with_typedef(TypeDef, "C.Zero"), {error, {unresolved_variant, _, _, _}} = parse_literal(unknown_type(), "Zero"), ok. +namespace_variant_test() -> + Term = "[N.A, N.B]", + Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, + {Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"), + check_sophia_to_fate(VariantType, Term, Fate), + + ok. + chain_objects_test() -> % Address, check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), From 6c172c4783e94abf5522c265aebec054675aa74b Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Thu, 12 Feb 2026 17:44:56 +0900 Subject: [PATCH 23/42] Adjusting a few calls. --- src/hz.erl | 31 ++++++++++++++++++++++++------- src/hz_aaci.erl | 19 ++++++++++--------- src/hz_sophia.erl | 8 +++++++- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 4734e74..fc50119 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -1098,7 +1098,7 @@ contract_create_built(CreatorID, Compiled, InitArgs) -> contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, InitArgs) -> - AACI = hz_aaci:prepare_aaci(maps:get(aci, Compiled)), + AACI = hz_aaci:prepare(maps:get(aci, Compiled)), case encode_call_data(AACI, "init", InitArgs) of {ok, CallData} -> assemble_calldata(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, @@ -1192,7 +1192,7 @@ read_aci(Path) -> AACI :: aaci() | {aaci, Label :: term()}, ConID :: unicode:chardata(), Fun :: string(), - Args :: [string()], + Args :: [string()] | {erlang, [term()]}, Result :: {ok, CallTX} | {error, Reason}, CallTX :: binary(), Reason :: term(). @@ -1227,7 +1227,7 @@ contract_call(CallerID, AACI, ConID, Fun, Args) -> AACI :: aaci() | {aaci, Label :: term()}, ConID :: unicode:chardata(), Fun :: string(), - Args :: [string()], + Args :: [string()] | {erlang, [term()]}, Result :: {ok, CallTX} | {error, Reason}, CallTX :: binary(), Reason :: term(). @@ -1265,7 +1265,7 @@ contract_call(CallerID, Gas, AACI, ConID, Fun, Args) -> AACI :: aaci() | {aaci, Label :: term()}, ConID :: unicode:chardata(), Fun :: string(), - Args :: [string()], + Args :: [string()] | {erlang, [term()]}, Result :: {ok, CallTX} | {error, Reason}, CallTX :: binary(), Reason :: term(). @@ -1437,7 +1437,7 @@ contract_call4(PK, Nonce, Gas, GasPrice, Amount, TTL, CK, CallData) -> prepare_contract(File) -> case so_compiler:file(File, [{aci, json}]) of - {ok, #{aci := ACI}} -> {ok, hz_aaci:prepare_aaci(ACI)}; + {ok, #{aci := ACI}} -> {ok, hz_aaci:prepare(ACI)}; Error -> Error end. @@ -1517,12 +1517,29 @@ encode_call_data({aaci, Label}, Fun, Args) -> error -> {error, aaci_not_found} end. -encode_call_data2(ArgDef, Fun, Args) -> +encode_call_data2(ArgDef, Fun, {erlang, Args}) -> case hz_aaci:erlang_args_to_fate(ArgDef, Args) of {ok, Coerced} -> gmb_fate_abi:create_calldata(Fun, Coerced); - Errors -> Errors + Errors -> Errors + end; +encode_call_data2(ArgDef, Fun, SophiaArgs) -> + case convert(ArgDef, SophiaArgs) of + {ok, Args} -> gmb_fate_abi:create_calldata(Fun, Args); + Errors -> Errors end. +convert(Defs, Args) -> convert(Defs, Args, 1, [], []). + +convert([{Name, Def} | Defs], [Arg | Args], Nth, Terms, Errors) -> + case hz_sophia:parse_literal(Def, Arg) of + {ok, Term} -> convert(Defs, Args, Nth + 1, [Term | Terms], Errors); + {error, Reason} -> convert(Defs, Args, Nth + 1, Terms, [{Nth, Name, Reason} | Errors]) + end; +convert([], [], _, Terms, []) -> + {ok, lists:reverse(Terms)}; +convert([], [], _, _, Errors) -> + {error, Errors}. + sign_tx(Unsigned, SecKey) -> case network_id() of diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index f9da33f..90ec5bc 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -16,8 +16,8 @@ -license("GPL-3.0-or-later"). % Contract call and serialization interface functions --export([prepare_contract/1, - prepare_aaci/1, +-export([prepare_from_file/1, + prepare/1, erlang_to_fate/2, fate_to_erlang/2, erlang_args_to_fate/2, @@ -33,21 +33,22 @@ %%% ACI/AACI --spec prepare_contract(File) -> {ok, AACI} | {error, Reason} - when File :: file:filename(), +-spec prepare_from_file(Path) -> {ok, AACI} | {error, Reason} + when Path :: file:filename(), AACI :: aaci(), Reason :: term(). %% @doc %% Compile a contract and extract the function spec meta for use in future formation %% of calldata -prepare_contract(File) -> - case so_compiler:file(File, [{aci, json}]) of - {ok, #{aci := ACI}} -> {ok, prepare_aaci(ACI)}; +prepare_from_file(Path) -> + case so_compiler:file(Path, [{aci, json}]) of + {ok, #{aci := ACI}} -> {ok, prepare(ACI)}; Error -> Error end. -prepare_aaci(ACI) -> + +prepare(ACI) -> % We want to take the types represented by the ACI, things like N1.T(N2.T), % and dereference them down to concrete types like % {tuple, [integer, string]}. Our type dereferencing algorithms @@ -1009,7 +1010,7 @@ coerce_hash_test() -> aaci_from_string(String) -> case so_compiler:from_string(String, [{aci, json}]) of - {ok, #{aci := ACI}} -> {ok, prepare_aaci(ACI)}; + {ok, #{aci := ACI}} -> {ok, prepare(ACI)}; Error -> Error end. diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 4ac67f2..a2e00ee 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -8,6 +8,12 @@ -include_lib("eunit/include/eunit.hrl"). + +-spec parse_literal(String) -> Result + when String :: string(), + Result :: {ok, gmb_fate_data:fate_type()} + | {error, Reason :: term()}. + parse_literal(String) -> parse_literal(unknown_type(), String). @@ -821,7 +827,7 @@ compile_entrypoint_value_and_type(Source, Entrypoint) -> FATE = extract_return_value(Code), % Generate the AACI, and get the AACI type info for the correct entrypoint. - AACI = hz_aaci:prepare_aaci(ACI), + AACI = hz_aaci:prepare(ACI), {ok, {_, Type}} = hz_aaci:get_function_signature(AACI, "f"), {FATE, Type}. From 60985130cb039eff53877c7e55415a7a9bce77c3 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 13 Feb 2026 04:08:58 +0000 Subject: [PATCH 24/42] Refine tuple parsing errors There are four major fixes here: 1. some eof tokens were being pattern matched with the wrong arity 2. tuples that are too long actually speculatively parse as an untyped tuple, and then complain that there were too many elements, 3. singleton tuples with a trailing comma are now handled differently to grouping parentheses, consistently between typed and untyped logic 4. the extra return values used to detect untyped singleton tuples are also used to pass the close paren position, so that too_many_elements can report the correct file position too. Point 4. also completely removes the need for tracking open paren positions that I was doing, and that I thought I would need to do even more of in the ambiguous-open-paren-stack case. --- src/hz_sophia.erl | 254 ++++++++++++++++++++++++++++++---------------- 1 file changed, 165 insertions(+), 89 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index a2e00ee..13ba1e7 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -28,7 +28,7 @@ parse_literal(Type, String) -> parse_literal2(Result, Pos, String) -> % We have parsed a valid expression. Now check that the string ends. case next_token(Pos, String) of - {ok, {{eof, _, _, _, _}, _, _}} -> + {ok, {{eof, _, _, _, _, _}, _, _}} -> {ok, Result}; {ok, {Token, _, _}} -> unexpected_token(Token); @@ -46,7 +46,7 @@ parse_literal2(Result, Pos, String) -> -define(IS_HEX(C), (?IS_NUM(C) or (((C) >= $A) and ((C) =< $F)) or (((C) >= $a) and ((C) =< $f)))). next_token({Row, Col}, []) -> - {ok, {{eof, "", Row, Col, Col}, {Row, Col}, []}}; + {ok, {{eof, "", [], Row, Col, Col}, {Row, Col}, []}}; next_token({Row, Col}, " " ++ Rest) -> next_token({Row, Col + 1}, Rest); next_token({Row, Col}, "\t" ++ Rest) -> @@ -236,8 +236,8 @@ parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) -> end; parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Pos, String, Row, Start); -parse_expression2(Type, Pos, String, {character, "(", _, Row, Start, _}) -> - parse_tuple(Type, Pos, String, Row, Start); +parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) -> + parse_tuple(Type, Pos, String); parse_expression2(Type, Pos, String, {character, "{", _, Row, Start, _}) -> parse_record_or_map(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {alphanum, _, Path, Row, Start, End}) -> @@ -339,27 +339,35 @@ typecheck_signature({O, N, _}, _, _, _, Row, Start, End) -> %%% List Parsing -parse_list({_, _, {list, [Inner]}}, Pos, String, Row, Start) -> - parse_list_loop(Inner, Pos, String, "]", Row, Start, []); -parse_list({_, _, unknown_type}, Pos, String, Row, Start) -> - parse_list_loop(unknown_type(), Pos, String, "]", Row, Start, []); +parse_list({_, _, {list, [Inner]}}, Pos, String, _, _) -> + parse_list2(Inner, Pos, String); +parse_list({_, _, unknown_type}, Pos, String, _, _) -> + parse_list2(unknown_type(), Pos, String); parse_list({O, N, _}, _, _, Row, Start) -> {error, {wrong_type, O, N, list, Row, Start, Start}}. -parse_list_loop(Inner, Pos, String, CloseChar, Row, Start, Acc) -> - case next_token(Pos, String) of - {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> - {ok, {lists:reverse(Acc), NewPos, NewString}}; - {ok, {Token, NewPos, NewString}} -> - parse_list_loop2(Inner, NewPos, NewString, CloseChar, Row, Start, Acc, Token); +parse_list2(Inner, Pos, String) -> + case parse_list_loop(Inner, Pos, String, "]", []) of + {ok, {Result, _, _, NewPos, NewString}} -> + {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end. -parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> +parse_list_loop(Inner, Pos, String, CloseChar, Acc) -> + case next_token(Pos, String) of + {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} -> + {ok, {lists:reverse(Acc), true, {Row, Col}, NewPos, NewString}}; + {ok, {Token, NewPos, NewString}} -> + parse_list_loop2(Inner, NewPos, NewString, CloseChar, Acc, Token); + {error, Reason} -> + {error, Reason} + end. + +parse_list_loop2(Inner, Pos, String, CloseChar, Acc, Token) -> case parse_expression2(Inner, Pos, String, Token) of {ok, {Value, NewPos, NewString}} -> - parse_list_loop3(Inner, NewPos, NewString, CloseChar, Row, Start, [Value | Acc]); + parse_list_loop3(Inner, NewPos, NewString, CloseChar, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(CloseChar), % TODO: Are tuple indices off by one from list indices? @@ -367,12 +375,12 @@ parse_list_loop2(Inner, Pos, String, CloseChar, Row, Start, Acc, Token) -> {error, Wrapped} end. -parse_list_loop3(Inner, Pos, String, CloseChar, Row, Start, Acc) -> +parse_list_loop3(Inner, Pos, String, CloseChar, Acc) -> case next_token(Pos, String) of - {ok, {{character, CloseChar, _, _, _, _}, NewPos, NewString}} -> - {ok, {lists:reverse(Acc), NewPos, NewString}}; + {ok, {{character, CloseChar, _, Row, Col, _}, NewPos, NewString}} -> + {ok, {lists:reverse(Acc), false, {Row, Col}, NewPos, NewString}}; {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> - parse_list_loop(Inner, NewPos, NewString, CloseChar, Row, Start, Acc); + parse_list_loop(Inner, NewPos, NewString, CloseChar, Acc); {ok, {Token, _, _}} -> unexpected_token(Token, CloseChar); {error, Reason} -> @@ -384,22 +392,22 @@ choose_list_error_wrapper(")") -> tuple_element. %%% Ambiguous Parenthesis Parsing -parse_tuple({_, _, unknown_type}, Pos, String, Row, Start) -> +parse_tuple({_, _, unknown_type}, Pos, String) -> % An untyped tuple is a list of untyped terms, and weirdly our list parser % works perfectly for that, as long as we change the closing character to % be ")" instead of "]". - case parse_list_loop(unknown_type(), Pos, String, ")", Row, Start, []) of - {ok, {[Inner], NewPos, NewString}} -> - % In Sophia, singleton tuples are unwrapped, and given the inner - % type. + case parse_list_loop(unknown_type(), Pos, String, ")", []) of + {ok, {[Inner], false, _, NewPos, NewString}} -> + % In Sophia, trailing commas are invalid, and so all singleton + % tuples are unwrapped, and translated into the inner type. {ok, {Inner, NewPos, NewString}}; - {ok, {TermList, NewPos, NewString}} -> + {ok, {TermList, _, _, NewPos, NewString}} -> Result = {tuple, list_to_tuple(TermList)}, {ok, {Result, NewPos, NewString}}; {error, Reason} -> {error, Reason} end; -parse_tuple({O, N, T}, Pos, String, _, _) -> +parse_tuple(Type, Pos, String) -> % Typed tuple parsing is quite complex, because we also want to support % normal parentheses for grouping. It's not strictly necessary for % inputting data, since we don't have any infix operators in simple @@ -413,9 +421,9 @@ parse_tuple({O, N, T}, Pos, String, _, _) -> {ok, {Count, Token, NewPos, NewString}} -> % Compare that to the amount of nesting tuple connectives are in % the type we are expected to produce. - {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, {O, N, T}, []), + {ExcessCount, HeadType, Tails} = extract_tuple_type_info(Count, Type, []), % Now work out what to do with all this information. - parse_tuple2(O, N, ExcessCount, HeadType, Tails, NewPos, NewString, Token); + parse_tuple2(ExcessCount, HeadType, Tails, NewPos, NewString, Token); {error, Reason} -> {error, Reason} end. @@ -437,23 +445,23 @@ extract_tuple_type_info(ParenCount, HeadType, Tails) -> % No parens, or no more (non-empty) tuples. Stop! {ParenCount, HeadType, Tails}. -parse_tuple2(_, _, _, {_, _, unknown_type}, [_ | _], _, _, _) -> +parse_tuple2(_, {_, _, unknown_type}, [_ | _], _, _, _) -> {error, "Parsing of tuples with known lengths but unknown contents is not yet implemented."}; -parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> - parse_empty_tuple(O, N, ExcessCount, HeadType, Tails, Pos, String, Row, Col); -parse_tuple2(O, N, ExcessCount, HeadType, Tails, Pos, String, Token) -> +parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, {character, ")", _, Row, Col, _}) -> + parse_empty_tuple(ExcessCount, HeadType, Tails, Pos, String, Row, Col); +parse_tuple2(ExcessCount, HeadType, Tails, Pos, String, Token) -> % Finished with parentheses for now, try and parse an expression out, to % get our head term. case parse_expression2(HeadType, Pos, String, Token) of {ok, {Result, NewPos, NewString}} -> % Got a head term. Now try to build all the other tuple layers. - parse_tuple_tails(O, N, ExcessCount, Result, Tails, NewPos, NewString); + parse_tuple_tails(ExcessCount, Result, Tails, NewPos, NewString); {error, Reason} -> % TODO: Wrap errors here too. {error, Reason} end. -parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> +parse_empty_tuple(0, _, Tails, _, _, Row, Col) -> % There are zero excess parens, meaning all our parens are tuples. Get the % top one. [Tail | _] = Tails, @@ -461,44 +469,32 @@ parse_empty_tuple(_, _, 0, _, Tails, _, _, Row, Col) -> % got zero. ExpectCount = 1 + length(Tail), {error, {not_enough_elements, ExpectCount, 0, Row, Col}}; -parse_empty_tuple(O, N, ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> +parse_empty_tuple(ExcessCount, {_, _, {tuple, []}}, Tails, Pos, String, _, _) -> % If we have some ambiguous parentheses left, we now know one of them is % this empty tuple. HeadTerm = {tuple, {}}, NewExcessCount = ExcessCount - 1, % Now continue the loop as if it were an integer or something, in the head % position. - parse_tuple_tails(O, N, NewExcessCount, HeadTerm, Tails, Pos, String); -parse_empty_tuple(_, _, _, {HeadO, HeadN, _}, _, _, _, Row, Col) -> + parse_tuple_tails(NewExcessCount, HeadTerm, Tails, Pos, String); +parse_empty_tuple(_, {HeadO, HeadN, _}, _, _, _, Row, Col) -> % We were expecting a head term of a different type! {error, {wrong_type, HeadO, HeadN, unit, Row, Col, Col}}. -parse_tuple_tails(O, N, 0, HeadTerm, [TailTypes | ParentTails], Pos, String) -> - % Tuples left to build, but no extra open parens to deal with, so we can - % just parse multivalues naively, starting from the "we have a term, - % waiting for a comma" stage of the loop. - case parse_multivalue3(TailTypes, Pos, String, -1, -1, [HeadTerm]) of - {ok, {Terms, NewPos, NewString}} -> - NewHead = {tuple, list_to_tuple(Terms)}, - parse_tuple_tails(O, N, 0, NewHead, ParentTails, NewPos, NewString); - {error, Reason} -> - % TODO: More error wrapping? - {error, Reason} - end; -parse_tuple_tails(_, _, 0, HeadTerm, [], Pos, String) -> +parse_tuple_tails(0, HeadTerm, [], Pos, String) -> % No open parens left, no tuples left to build, we are done! {ok, {HeadTerm, Pos, String}}; -parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> +parse_tuple_tails(ExcessCount, HeadTerm, Tails, Pos, String) -> % The ambiguous case, where we have a mix of tuple parens, and grouping % parens. We want to peek at the next token, to see if it closes a grouping % paren. case next_token(Pos, String) of - {ok, {{character, ")", _, _, _, _}, NewPos, NewString}} -> - % It is grouping! Close one excess paren, and continue. - parse_tuple_tails(O, N, ExcessCount - 1, HeadTerm, Tails, NewPos, NewString); - {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> - % It is a real tuple! Try the normal logic, then. - parse_tuple_tails2(O, N, ExcessCount, HeadTerm, Tails, NewPos, NewString); + {ok, {{character, ")", _, Row, Col, _}, NewPos, NewString}} -> + % It is grouping! Try closing a grouping paren. + parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col); + {ok, {{character, ",", _, Row, Col, _}, NewPos, NewString}} -> + % It is a real tuple! Try parsing a tuple. + parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, NewPos, NewString, Row, Col); {ok, {Token, _, _}} -> % Anything else is just a boring parse error we can complain about. unexpected_token(Token, ")"); @@ -506,68 +502,93 @@ parse_tuple_tails(O, N, ExcessCount, HeadTerm, Tails, Pos, String) -> {error, Reason} end. -parse_tuple_tails2(O, N, ExcessCount, HeadTerm, [TailTypes | ParentTails], Pos, String) -> - case parse_multivalue(TailTypes, Pos, String, -1, -1, [HeadTerm]) of +parse_tuple_tails_paren(0, _, [[] | _], _, _, Row, Col) -> + % A singleton tuple was expected, but a grouping paren was given. In theory + % we could be permissive here, but we were asked to do type checking, and + % this is a type error. The type error itself is a bit hard to reproduce, + % but we do know exactly what the fix is, so let's report that instead. + {error, {expected_trailing_comma, Row, Col}}; +parse_tuple_tails_paren(0, _, [Tail | _], _, _, Row, Col) -> + % A tuple (of more than one elements) was expected, but a grouping paren + % was given. Again, the type error is hard to produce, but the actual + % solution is simple; add more elements. + ExpectCount = length(Tail) + 1, + GotCount = 1, + {error, {not_enough_elements, ExpectCount, GotCount, Row, Col}}; +parse_tuple_tails_paren(ExcessCount, HeadTerm, Tails, Pos, String, _, _) -> + % We were expecting some grouping parens, and now we know that one of them + % was in fact grouping. Good. + parse_tuple_tails(ExcessCount - 1, HeadTerm, Tails, Pos, String). + +parse_tuple_tails_comma(_, _, [], _, _, Row, Col) -> + % No more tuples, so commas are invalid. It's hard to describe the type + % error that a comma would actually produce, so instead let's just give + % the user the actual solution to their problems, which is to remove the + % comma. + {error, {expected_close_paren, Row, Col}}; +parse_tuple_tails_comma(ExcessCount, HeadTerm, Tails, Pos, String, _, _) -> + % If there are no tails then we would have exited into the "grouping parens + % only" case, so we know this works: + [TailTypes | ParentTails] = Tails, + % Now we can parse this tuple as a tuple. + case parse_multivalue(TailTypes, Pos, String, [HeadTerm]) of {ok, {Terms, NewPos, NewString}} -> NewHead = {tuple, list_to_tuple(Terms)}, - parse_tuple_tails(O, N, ExcessCount, NewHead, ParentTails, NewPos, NewString); + % Then continue the loop, with whatever parent tuple types this + % tuple is meant to be a part of. + parse_tuple_tails(ExcessCount, NewHead, ParentTails, NewPos, NewString); {error, Reason} -> % TODO: wrap errors? {error, Reason} - end; -parse_tuple_tails2(O, N, _, _, [], _, _) -> - % This case is created when, for example, we want int * int, but instead we - % get a term like ((1, 2), 3), of type (int * int) * int. The trouble is, - % ((1, 2)) would have been valid, so it's actually the second comma that - % tips us off to the error, not the first one. - % - % For simpler cases, like (1, 2) when int was expected, this error message - % is fine: - Err = {error, {wrong_type, O, N, tuple, -1, -1, -1}}, - % TODO: Row/col - % TODO: Generate better error messages in the cases where N *is* a tuple, - % but the first thing inside that tuple is the problem. - Err. + end. %%% Unambiguous Tuple/Variant Parsing -parse_multivalue(ElemTypes, Pos, String, Row, Start, Acc) -> +parse_multivalue(ElemTypes, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {Token, NewPos, NewString}} -> - parse_multivalue2(ElemTypes, NewPos, NewString, Row, Start, Acc, Token); + parse_multivalue2(ElemTypes, NewPos, NewString, Acc, Token); {error, Reason} -> {error, Reason} end. -parse_multivalue2([Next | Rest], Pos, String, Row, Start, Acc, Token) -> +parse_multivalue2([Next | Rest], Pos, String, Acc, Token) -> case parse_expression2(Next, Pos, String, Token) of {ok, {Value, NewPos, NewString}} -> - parse_multivalue3(Rest, NewPos, NewString, Row, Start, [Value | Acc]); + parse_multivalue3(Rest, NewPos, NewString, [Value | Acc]); {error, Reason} -> Wrapper = choose_list_error_wrapper(")"), % TODO: Are tuple indices off by one from list indices? Wrapped = wrap_error(Reason, {Wrapper, length(Acc)}), {error, Wrapped} end; -parse_multivalue2([], Pos, String, _, _, Acc, {character, ")", _, _, _, _}) -> - {ok, {lists:reverse(Acc), Pos, String}}; -parse_multivalue2([], _, _, _, _, _, Token) -> - unexpected_token(Token, ")"). +parse_multivalue2([], Pos, String, Acc, Token) -> + count_multivalue_excess(Pos, String, Acc, Token). -parse_multivalue3(ElemTypes, Pos, String, Row, Start, Acc) -> +parse_multivalue3(ElemTypes, Pos, String, Acc) -> case next_token(Pos, String) of {ok, {{character, ")", _, Row2, Start2, _}, NewPos, NewString}} -> check_multivalue_long_enough(ElemTypes, NewPos, NewString, Row2, Start2, Acc); {ok, {{character, ",", _, _, _, _}, NewPos, NewString}} -> - parse_multivalue(ElemTypes, NewPos, NewString, Row, Start, Acc); + parse_multivalue(ElemTypes, NewPos, NewString, Acc); {ok, {Token, _, _}} -> unexpected_token(Token, ")"); {error, Reason} -> {error, Reason} end. +count_multivalue_excess(Pos, String, TypedAcc, Token) -> + ExpectedLen = length(TypedAcc), + case parse_list_loop2(unknown_type(), Pos, String, ")", TypedAcc, Token) of + {ok, {TermList, _, {Row, Col}, _, _}} -> + ActualLen = length(TermList), + {error, {too_many_elements, ExpectedLen, ActualLen, Row, Col}}; + {error, Reason} -> + {error, Reason} + end. + check_multivalue_long_enough([], Pos, String, _, _, Acc) -> {ok, {lists:reverse(Acc), Pos, String}}; check_multivalue_long_enough(Remaining, _, _, Row, Col, Got) -> @@ -621,16 +642,16 @@ parse_variant3(Arities, Tag, [], Pos, String) -> {ok, {Result, Pos, String}}; parse_variant3(Arities, Tag, ElemTypes, Pos, String) -> case next_token(Pos, String) of - {ok, {{character, "(", _, Row, Start, _}, NewPos, NewString}} -> - parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString, Row, Start); + {ok, {{character, "(", _, _, _, _}, NewPos, NewString}} -> + parse_variant4(Arities, Tag, ElemTypes, NewPos, NewString); {ok, {Token, _, _}} -> unexpected_token(Token, "("); {error, Reason} -> {error, Reason} end. -parse_variant4(Arities, Tag, ElemTypes, Pos, String, Row, Start) -> - case parse_multivalue(ElemTypes, Pos, String, Row, Start, []) of +parse_variant4(Arities, Tag, ElemTypes, Pos, String) -> + case parse_multivalue(ElemTypes, Pos, String, []) of {ok, {Terms, NewPos, NewString}} -> Result = {variant, Arities, Tag, list_to_tuple(Terms)}, {ok, {Result, NewPos, NewString}}; @@ -907,6 +928,15 @@ variant_test() -> ok. +ambiguous_variant_test() -> + TypeDef = "datatype mytype = C | D", + check_parser_with_typedef(TypeDef, "C"), + check_parser_with_typedef(TypeDef, "D"), + check_parser_with_typedef(TypeDef, "C.C"), + check_parser_with_typedef(TypeDef, "C.D"), + + ok. + namespace_variant_test() -> Term = "[N.A, N.B]", Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, @@ -997,3 +1027,49 @@ lexer_offset_test() -> ok. +parser_offset_test() -> + {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = ((1, 2), (3, 4))", "f"), + + {error, {not_enough_elements, 2, 1, 1, 8}} = parse_literal(Type, "((1, 2))"), + {error, {not_enough_elements, 2, 1, 1, 10}} = parse_literal(Type, "(((1, 2)))"), + {error, {too_many_elements, 2, 3, 1, 24}} = parse_literal(Type, "((1, 2), (3, 4), (5, 6))"), + {error, {too_many_elements, 2, 3, 1, 10}} = parse_literal(Type, "((1, 2, 3), (4, 5))"), + + ok. + +singleton_test() -> + % The Sophia compiler would never generate this, but it is a valid type + % within the FATE virtual machine, and it is possible to represent within + % the ACI itself. + SingletonACI = #{tuple => [<<"int">>]}, + + % Build an AACI around this, and run it through the AACI machinery. + Function = #{name => <<"f">>, + arguments => [], + stateful => false, + payable => false, + returns => SingletonACI}, + ACI = [#{contract => #{functions => [Function], + name => <<"C">>, + kind => contract_main, + payable => false, + typedefs => []}}], + {aaci, "C", #{"f" := {[], SingletonType}}, _} = hz_aaci:prepare(ACI), + + % Now let's do some testing with this weird type, to see if we handle it + % correctly. + {ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"), + % Some ambiguous nesting parens, for fun. + {ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"), + % No trailing comma should give an error. + {error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"), + % All of the above should behave the same in untyped contexts: + {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"), + {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"), + {ok, 1} = parse_literal(unknown_type(), "(1)"), + + % Also if we wanted an integer, the singleton is NOT dropped, so is also an + % error. + {error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"), + + ok. From efe0a64056c7231c8e18b82adbd0f00cd953dfcf Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Fri, 13 Feb 2026 13:50:29 +0900 Subject: [PATCH 25/42] Update comments, add fate and sophia tagged args --- src/hz.erl | 71 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index fc50119..84d1535 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -892,7 +892,10 @@ result(Received) -> Received. -spec contract_create(CreatorID, Path, InitArgs) -> Result when CreatorID :: unicode:chardata(), Path :: file:filename(), - InitArgs :: [string()], + InitArgs :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, Result :: {ok, CreateTX} | {error, Reason}, CreateTX :: binary(), Reason :: file:posix() | term(). @@ -927,7 +930,10 @@ contract_create(CreatorID, Path, InitArgs) -> Gas :: pos_integer(), GasPrice :: pos_integer(), Path :: file:filename(), - InitArgs :: [string()], + InitArgs :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, Result :: {ok, CreateTX} | {error, Reason}, CreateTX :: binary(), Reason :: term(). @@ -1072,7 +1078,10 @@ contract_create2(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Source, Options, -spec contract_create_built(CreatorID, Compiled, InitArgs) -> Result when CreatorID :: unicode:chardata(), Compiled :: map(), - InitArgs :: [string()], + InitArgs :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, Result :: {ok, CreateTX} | {error, Reason}, CreateTX :: binary(), Reason :: file:posix() | bad_fun_name | aaci_not_found | term(). @@ -1097,12 +1106,31 @@ contract_create_built(CreatorID, Compiled, InitArgs) -> end. +-spec contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, InitArgs) -> Result + when CreatorID :: unicode:chardata(), + Nonce :: pos_integer(), + Amount :: non_neg_integer(), + TTL :: non_neg_integer(), + Gas :: pos_integer(), + GasPrice :: pos_integer(), + Compiled :: map(), + InitArgs :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, + Result :: {ok, CreateTX} | {error, Reason}, + CreateTX :: binary(), + Reason :: file:posix() | bad_fun_name | aaci_not_found | term(). +%% @doc +%% See `contract_create/8' for detailed information on argument types. +%% The `Compiled' argument is the output of contract compilation and replaces the `File' +%% argument in `contract_create/8'. + contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, InitArgs) -> AACI = hz_aaci:prepare(maps:get(aci, Compiled)), case encode_call_data(AACI, "init", InitArgs) of {ok, CallData} -> - assemble_calldata(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, - Compiled, CallData); + assemble_calldata(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, CallData); Error -> Error end. @@ -1192,7 +1220,10 @@ read_aci(Path) -> AACI :: aaci() | {aaci, Label :: term()}, ConID :: unicode:chardata(), Fun :: string(), - Args :: [string()] | {erlang, [term()]}, + Args :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, Result :: {ok, CallTX} | {error, Reason}, CallTX :: binary(), Reason :: term(). @@ -1227,7 +1258,10 @@ contract_call(CallerID, AACI, ConID, Fun, Args) -> AACI :: aaci() | {aaci, Label :: term()}, ConID :: unicode:chardata(), Fun :: string(), - Args :: [string()] | {erlang, [term()]}, + Args :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, Result :: {ok, CallTX} | {error, Reason}, CallTX :: binary(), Reason :: term(). @@ -1265,7 +1299,10 @@ contract_call(CallerID, Gas, AACI, ConID, Fun, Args) -> AACI :: aaci() | {aaci, Label :: term()}, ConID :: unicode:chardata(), Fun :: string(), - Args :: [string()] | {erlang, [term()]}, + Args :: [string()] + | {erlang, [term()]} + | {fate, [term()]} + | {sophia, [string()]}, Result :: {ok, CallTX} | {error, Reason}, CallTX :: binary(), Reason :: term(). @@ -1503,7 +1540,7 @@ min_gas_price() -> %% This function always returns 200,000 in the current version. min_gas() -> - 200000. + 200_000. encode_call_data({aaci, _ContractName, FunDefs, _TypeDefs}, Fun, Args) -> @@ -1517,16 +1554,22 @@ encode_call_data({aaci, Label}, Fun, Args) -> error -> {error, aaci_not_found} end. +encode_call_data2(ArgDef, Fun, {sophia, Args}) -> + case convert(ArgDef, Args) of + {ok, Converted} -> gmb_fate_abi:create_calldata(Fun, Converted); + Errors -> Errors + end; encode_call_data2(ArgDef, Fun, {erlang, Args}) -> case hz_aaci:erlang_args_to_fate(ArgDef, Args) of {ok, Coerced} -> gmb_fate_abi:create_calldata(Fun, Coerced); Errors -> Errors end; -encode_call_data2(ArgDef, Fun, SophiaArgs) -> - case convert(ArgDef, SophiaArgs) of - {ok, Args} -> gmb_fate_abi:create_calldata(Fun, Args); - Errors -> Errors - end. +encode_call_data2(_, Fun, {fate, Args}) -> + % TODO: This should probably be moved back closer to the initiating call. + % 2026-02-13: Craig + gmb_fate_abi:create_calldata(Fun, Args); +encode_call_data2(ArgDef, Fun, Args) -> + encode_call_data2(ArgDef, Fun, {sophia, Args}). convert(Defs, Args) -> convert(Defs, Args, 1, [], []). From 9bc0ffafd1fb2d25b60288ee0a07f968c85425e5 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 13 Feb 2026 05:52:27 +0000 Subject: [PATCH 26/42] bool/char literals Character literals were the main complexity here, but I threw booleans in as well, since that covers all the major literals. --- src/hz_sophia.erl | 131 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 29 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 13ba1e7..ef6a55e 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -65,6 +65,8 @@ next_token({Row, Col}, [$#, C | Rest]) when ?IS_HEX(C) -> bytes_token({Row, Col}, {Row, Col + 1}, [C | Rest], "#", []); next_token({Row, Col}, "\"" ++ Rest) -> string_token({Row, Col}, {Row, Col + 1}, Rest, "\"", <<>>); +next_token({Row, Col}, "'" ++ Rest) -> + character_token({Row, Col}, {Row, Col + 1}, Rest, "'"); next_token({Row, Col}, [Char | Rest]) -> Token = {character, [Char], Char, Row, Col, Col}, {ok, {Token, {Row, Col + 1}, Rest}}. @@ -115,41 +117,70 @@ reverse_combine_nibbles([D1], Acc) -> reverse_combine_nibbles([], Acc) -> Acc. -string_token(Start, {Row, Col}, "\\x" ++ String, SourceChars, Value) -> - case escape_hex_code({Row, Col}, {Row, Col + 2}, String, "x\\" ++ SourceChars) of - {ok, {Codepoint, NewSourceChars, NewPos, NewString}} -> - NewValue = <>, - string_token(Start, NewPos, NewString, NewSourceChars, NewValue); - {error, Reason} -> - {error, Reason} - end; -string_token(Start, {Row, Col}, [$\\, C | Rest], SourceChars, Value) -> - case escape_char(C) of - {ok, ByteVal} -> - string_token(Start, {Row, Col + 2}, Rest, [C, $\ | SourceChars], <>); - error -> - {error, {invalid_escape_code, [C], Row, Col}} - end; string_token({_, Start}, {Row, Col}, [$" | Rest], SourceChars, Value) -> SourceStr = lists:reverse([$" | SourceChars]), Token = {string, SourceStr, Value, Row, Start, Col}, {ok, {Token, {Row, Col + 1}, Rest}}; -string_token(Start, {Row, Col}, [C | Rest], SourceChars, Value) -> - % TODO: ERTS probably had to convert this FROM utf8 at some point, so why - % bother, if we need to convert it back? I guess we could accept iolists if - % we really wanted to waste time on this point... - string_token(Start, {Row, Col + 1}, Rest, [C | SourceChars], <>). +string_token({_, Start}, {Row, Col}, [], SourceChars, _) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}}; +string_token({_, Start}, {Row, Col}, [$\r | _], SourceChars, _) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}}; +string_token({_, Start}, {Row, Col}, [$\n | _], SourceChars, _) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_string_literal, SourceStr, Start, Row, Col - 1}}; +string_token(Start, Pos, String, SourceChars, Value) -> + case parse_char(Start, Pos, String, SourceChars) of + {ok, {Char, NewSourceChars, NewPos, NewString}} -> + % TODO: ERTS probably had to convert this FROM utf8 at some point, + % so why bother, if we need to convert it back? I guess we could + % accept iolists if we really wanted to waste time on this point... + NewValue = <>, + string_token(Start, NewPos, NewString, NewSourceChars, NewValue); + {error, Reason} -> + {error, Reason} + end. -escape_hex_code(Start, {Row, Col}, "{" ++ String, SourceChars) -> - escape_long_hex_code(Start, {Row, Col + 1}, String, "{" ++ SourceChars, 0); -escape_hex_code(_, {Row, Col}, [A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> - % As of writing this, the Sophia compiler will convert this byte from - % extended ASCII to unicode... But it really shouldn't. The literal parser - % does what the compiler should do. +character_token({_, Start}, {Row, Col}, [], SourceChars) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}; +character_token({_, Start}, {Row, Col}, [$\r | _], SourceChars) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}; +character_token({_, Start}, {Row, Col}, [$\n | _], SourceChars) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}; +character_token(Start, Pos, String, SourceChars) -> + case parse_char(Start, Pos, String, SourceChars) of + {ok, {Char, NewSourceChars, NewPos, NewString}} -> + character_token2(Start, NewPos, NewString, NewSourceChars, Char); + {error, Reason} -> + {error, Reason} + end. + +character_token2({_, Start}, {Row, Col}, [$' | Rest], SourceChars, Value) -> + SourceStr = lists:reverse([$' | SourceChars]), + Token = {char_literal, SourceStr, Value, Row, Start, Col}, + {ok, {Token, {Row, Col + 1}, Rest}}; +character_token2({_, Start}, {Row, Col}, _, SourceChars, _) -> + SourceStr = lists:reverse(SourceChars), + {error, {unclosed_character_literal, SourceStr, Start, Row, Col - 1}}. + +parse_char(Start, {Row, Col}, "\\x{" ++ String, SourceChars) -> + escape_long_hex_code(Start, {Row, Col + 3}, String, "{x\\" ++ SourceChars, 0); +parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A), ?IS_HEX(B) -> Byte = convert_digit(A) * 16 + convert_digit(B), - {ok, {Byte, [B, A | SourceChars], {Row, Col + 2}, String}}; -escape_hex_code({Row1, Col1}, _, _, _) -> - {error, {invalid_escape_code, "\\x", Row1, Col1}}. + {ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}}; +parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) -> + case escape_char(C) of + {ok, ByteVal} -> + {ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}}; + error -> + {error, {invalid_escape_code, [$\\, C], Row, Start, Col + 1}} + end; +parse_char(_, {Row, Col}, [C | Rest], SourceChars) -> + {ok, {C, [C | SourceChars], {Row, Col + 1}, Rest}}. escape_long_hex_code(_, {Row, Col}, "}" ++ String, SourceChars, Value) -> {ok, {Value, "}" ++ SourceChars, {Row, Col + 1}, String}}; @@ -171,7 +202,10 @@ escape_char($n) -> {ok, $\n}; escape_char($r) -> {ok, $\r}; escape_char($t) -> {ok, $\t}; escape_char($v) -> {ok, $\v}; +% Technically \" and \' are only valid inside their own quote characters, not +% each other, but whatever, we will just be permissive here. escape_char($") -> {ok, $\"}; +escape_char($') -> {ok, $\'}; escape_char($\\) -> {ok, $\\}; escape_char(_) -> error. @@ -234,6 +268,15 @@ parse_expression2(Type, Pos, String, {string, _, Value, Row, Start, End}) -> {O, N, _} -> {error, {wrong_type, O, N, string, Row, Start, End}} end; +parse_expression2(Type, Pos, String, {char_literal, _, Value, Row, Start, End}) -> + case Type of + {_, _, char} -> + {ok, {Value, Pos, String}}; + {_, _, unknown_type} -> + {ok, {Value, Pos, String}}; + {O, N, _} -> + {error, {wrong_type, O, N, char, Row, Start, End}} + end; parse_expression2(Type, Pos, String, {character, "[", _, Row, Start, _}) -> parse_list(Type, Pos, String, Row, Start); parse_expression2(Type, Pos, String, {character, "(", _, _, _, _}) -> @@ -276,6 +319,10 @@ unexpected_token({_, S, _, Row, Start, End}) -> %%% Ambiguous Chain Object vs Identifier Parsing +parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) -> + typecheck_bool(Type, Pos, String, true, Row, Start, End); +parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) -> + typecheck_bool(Type, Pos, String, false, Row, Start, End); parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. @@ -303,6 +350,13 @@ parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> % must be a variant constructor, or invalid. parse_variant(Type, Pos, String, Path, Row, Start, End). +typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) -> + {ok, {Value, Pos, String}}; +typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) -> + {ok, {Value, Pos, String}}; +typecheck_bool({O, N, _}, _, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, boolean, Row, Start, End}}. + typecheck_address({_, _, address}, Pos, String, Data, _, _, _) -> {ok, {{address, Data}, Pos, String}}; typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) -> @@ -885,11 +939,23 @@ anon_types_test() -> % Integers. check_parser("123"), check_parser("1_2_3"), + % Booleans. + check_parser("true"), + check_parser("false"), + check_parser("[true, false]"), % Bytes. check_parser("#DEAD000BEEF"), check_parser("#DE_AD0_00B_EEF"), % Strings. check_parser("\"hello world\""), + % The Sophia compiler doesn't handle this right, but we should still. + %check_parser("\"ÿ\""), + %check_parser("\"♣\""), + % Characters. + check_parser("'A'"), + check_parser("['a', ' ', '[']"), + %check_parser("'ÿ'"), + %check_parser("'♣'"), % List of integers. check_parser("[1, 2, 3]"), % List of lists. @@ -905,6 +971,13 @@ string_escape_codes_test() -> check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""), + check_parser("\"'\""), + + check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"), + check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"), + check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"), + check_parser("'\"'"), + ok. records_test() -> From 78c9c67f38cba528249c16dc06b6fca9f9393f76 Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Fri, 13 Feb 2026 06:22:07 +0000 Subject: [PATCH 27/42] typecheck bits Sophia bitstrings aren't really something you initialize manually, so we have to make up a literal format for them. Failing that, we just accept arbitrary integers and bytearrays as bitstrings. --- src/hz_sophia.erl | 48 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index ef6a55e..be294c2 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -236,13 +236,13 @@ parse_expression(Type, Pos, String) -> end. parse_expression2(Type, Pos, String, {integer, _, Value, Row, Start, End}) -> - case Type of - {_, _, integer} -> - {ok, {Value, Pos, String}}; - {_, _, unknown_type} -> - {ok, {Value, Pos, String}}; - {O, N, _} -> - {error, {wrong_type, O, N, integer, Row, Start, End}} + typecheck_integer(Type, Pos, String, Value, Row, Start, End); +parse_expression2(Type, Pos, String, {character, "-", _, _, _, _}) -> + case next_token(Pos, String) of + {ok, {{integer, _, Value, Row, Start, End}, NewPos, NewString}} -> + typecheck_integer(Type, NewPos, NewString, -Value, Row, Start, End); + {error, Reason} -> + {error, Reason} end; parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) -> Len = byte_size(Value), @@ -254,6 +254,10 @@ parse_expression2(Type, Pos, String, {bytes, _, Value, Row, Start, End}) -> {ok, {Result, Pos, String}}; {_, _, {bytes, [ExpectedLen]}} -> {error, {bytes_wrong_size, ExpectedLen, Len, Row, Start, End}}; + {_, _, bits} -> + Size = bit_size(Value), + <> = Value, + {ok, {{bits, IntValue}, Pos, String}}; {_, _, unknown_type} -> {ok, {Result, Pos, String}}; {O, N, _} -> @@ -323,6 +327,10 @@ parse_alphanum(Type, Pos, String, ["true"], Row, Start, End) -> typecheck_bool(Type, Pos, String, true, Row, Start, End); parse_alphanum(Type, Pos, String, ["false"], Row, Start, End) -> typecheck_bool(Type, Pos, String, false, Row, Start, End); +parse_alphanum(Type, Pos, String, ["Bits", "all"], Row, Start, End) -> + typecheck_bits(Type, Pos, String, -1, Row, Start, End); +parse_alphanum(Type, Pos, String, ["Bits", "none"], Row, Start, End) -> + typecheck_bits(Type, Pos, String, 0, Row, Start, End); parse_alphanum(Type, Pos, String, [[C | _] = S], Row, Start, End) when ?IS_LATIN_LOWER(C) -> % From a programming perspective, we are trying to parse a constant, so % an alphanum token can really only be a constructor, or a chain object. @@ -350,6 +358,15 @@ parse_alphanum(Type, Pos, String, Path, Row, Start, End) -> % must be a variant constructor, or invalid. parse_variant(Type, Pos, String, Path, Row, Start, End). +typecheck_integer({_, _, integer}, Pos, String, Value, _, _, _) -> + {ok, {Value, Pos, String}}; +typecheck_integer({_, _, unknown_type}, Pos, String, Value, _, _, _) -> + {ok, {Value, Pos, String}}; +typecheck_integer({_, _, bits}, Pos, String, Value, _, _, _) -> + {ok, {{bits, Value}, Pos, String}}; +typecheck_integer({O, N, _}, _, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, integer, Row, Start, End}}. + typecheck_bool({_, _, unknown_type}, Pos, String, Value, _, _, _) -> {ok, {Value, Pos, String}}; typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) -> @@ -357,6 +374,13 @@ typecheck_bool({_, _, boolean}, Pos, String, Value, _, _, _) -> typecheck_bool({O, N, _}, _, _, _, Row, Start, End) -> {error, {wrong_type, O, N, boolean, Row, Start, End}}. +typecheck_bits({_, _, unknown_type}, Pos, String, Value, _, _, _) -> + {ok, {{bits, Value}, Pos, String}}; +typecheck_bits({_, _, bits}, Pos, String, Value, _, _, _) -> + {ok, {{bits, Value}, Pos, String}}; +typecheck_bits({O, N, _}, _, _, _, Row, Start, End) -> + {error, {wrong_type, O, N, bits, Row, Start, End}}. + typecheck_address({_, _, address}, Pos, String, Data, _, _, _) -> {ok, {{address, Data}, Pos, String}}; typecheck_address({_, _, contract}, Pos, String, Data, _, _, _) -> @@ -939,6 +963,7 @@ anon_types_test() -> % Integers. check_parser("123"), check_parser("1_2_3"), + check_parser("-123"), % Booleans. check_parser("true"), check_parser("false"), @@ -1038,6 +1063,15 @@ chain_objects_test() -> ok. +bits_test() -> + check_parser("Bits.all"), + check_parser("Bits.none"), + {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"), + check_sophia_to_fate(Type, "5", {bits, 5}), + check_sophia_to_fate(Type, "-5", {bits, -5}), + check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}), + ok. + singleton_records_test() -> TypeDef = "record singleton('a) = {it: 'a}", check_parser_with_typedef(TypeDef, "{it = 123}"), From a0fbeebcdb362857d9263cdc4c72eb30d2ec9b3e Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 17 Feb 2026 07:26:42 +0000 Subject: [PATCH 28/42] Pretty print Sophia expressions. I think all of the tests roundtrip now, so if my parser was thorough, the pretty printer should be as thorough. --- src/hz_sophia.erl | 359 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 306 insertions(+), 53 deletions(-) diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index be294c2..4c1c8cb 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -4,7 +4,8 @@ -copyright("Jarvis Carroll "). -license("GPL-3.0-or-later"). --export([parse_literal/1, parse_literal/2, check_parser/1]). +-export([parse_literal/1, parse_literal/2]). +-export([fate_to_list/1, fate_to_list/2, fate_to_iolist/1, fate_to_iolist/2]). -include_lib("eunit/include/eunit.hrl"). @@ -173,7 +174,7 @@ parse_char(_, {Row, Col}, [$\\, $x, A, B | String], SourceChars) when ?IS_HEX(A) Byte = convert_digit(A) * 16 + convert_digit(B), {ok, {Byte, [B, A, $x, $\\ | SourceChars], {Row, Col + 4}, String}}; parse_char({Row, Start}, {Row, Col}, [$\\, C | Rest], SourceChars) -> - case escape_char(C) of + case unescape_char(C) of {ok, ByteVal} -> {ok, {ByteVal, [C, $\ | SourceChars], {Row, Col + 2}, Rest}}; error -> @@ -195,19 +196,31 @@ escape_long_hex_code(_, Pos, [], SourceChars, Value) -> % produce an unclosed string error instead. {ok, {Value, SourceChars, Pos, []}}. -escape_char($b) -> {ok, $\b}; -escape_char($e) -> {ok, $\e}; -escape_char($f) -> {ok, $\f}; -escape_char($n) -> {ok, $\n}; -escape_char($r) -> {ok, $\r}; -escape_char($t) -> {ok, $\t}; -escape_char($v) -> {ok, $\v}; +unescape_char($b) -> {ok, $\b}; +unescape_char($e) -> {ok, $\e}; +unescape_char($f) -> {ok, $\f}; +unescape_char($n) -> {ok, $\n}; +unescape_char($r) -> {ok, $\r}; +unescape_char($t) -> {ok, $\t}; +unescape_char($v) -> {ok, $\v}; % Technically \" and \' are only valid inside their own quote characters, not % each other, but whatever, we will just be permissive here. -escape_char($") -> {ok, $\"}; -escape_char($') -> {ok, $\'}; -escape_char($\\) -> {ok, $\\}; -escape_char(_) -> error. +unescape_char($") -> {ok, $\"}; +unescape_char($') -> {ok, $\'}; +unescape_char($\\) -> {ok, $\\}; +unescape_char(_) -> error. + +% Not needed until later, but we'll put it here for symmetry. +escape_char($\b) -> "\\b"; +escape_char($\e) -> "\\e"; +escape_char($\f) -> "\\f"; +escape_char($\n) -> "\\n"; +escape_char($\r) -> "\\r"; +escape_char($\t) -> "\\t"; +escape_char($\v) -> "\\v"; +escape_char($\") -> "\\\""; +escape_char($\\) -> "\\\\"; +escape_char(I) -> I. %%% Sophia Literal Parser @@ -902,16 +915,240 @@ parse_map5(KeyType, ValueType, Pos, String, Acc) -> % TODO wrap_error(Reason, _) -> Reason. +%%% Pretty Printing + +fate_to_list(Term) -> + fate_to_list(unknown_type(), Term). + +fate_to_list(Type, Term) -> + IOList = fate_to_iolist(Type, Term), + unicode:characters_to_list(IOList). + +fate_to_iolist(Term) -> + fate_to_iolist(unknown_type(), Term). + +% Special case for singleton records, since they are erased during compilation. +fate_to_iolist({_, _, {record, [{FieldName, FieldType}]}}, Term) -> + singleton_record_to_iolist(FieldName, FieldType, Term); +% Aggregate types, where we should check if there is useful type information to +% act on. Case logic is made explicit so that the default cases stand out. +fate_to_iolist(Type, {tuple, Tuple}) -> + case Type of + {_, _, {record, FieldTypes}} -> + record_to_iolist(FieldTypes, Tuple); + {_, _, {tuple, ElemTypes}} -> + tuple_to_iolist(ElemTypes, Tuple); + _ -> + tuple_to_iolist([], Tuple) + end; +fate_to_iolist(Type, {variant, _, Tag, Tuple}) -> + case Type of + {O, N, {variant, VariantTypes}} when Tag < length(VariantTypes) -> + variant_to_iolist(O, N, VariantTypes, Tag, Tuple); + {O, N, _} -> + % TODO: Make up a special syntax for anonymous variant terms. + erlang:exit({untyped_variant, O, N}); + _ -> + erlang:exit({untyped_variant, unknown_type, already_normalized}) + end; +fate_to_iolist(Type, List) when is_list(List) -> + case Type of + {_, _, {list, [InnerType]}} -> + list_to_iolist(InnerType, List); + _ -> + list_to_iolist(unknown_type(), List) + end; +fate_to_iolist(Type, Map) when is_map(Map) -> + case Type of + {_, _, {map, [K, V]}} -> + map_to_iolist(K, V, Map); + _ -> + map_to_iolist(unknown_type(), unknown_type(), Map) + end; +% Other FATE types, where no recursion is needed, but type information could +% influence the format that is used. +fate_to_iolist(_, true) -> + "true"; +fate_to_iolist(_, false) -> + "false"; +fate_to_iolist(_, {bits, 0}) -> + "Bits.none"; +fate_to_iolist(_, {bits, -1}) -> + "Bits.all"; +fate_to_iolist(_, {bits, I}) when I > 0 -> + ["#", integer_to_list(I, 16)]; +fate_to_iolist(_, {bits, I}) when I < 0 -> + integer_to_list(I, 10); +fate_to_iolist({_, _, char}, $') -> + % Special case since it needs to be escaped in char literals. + "'\\''"; +fate_to_iolist({_, _, char}, $") -> + % Special case since it does NOT need to be escaped in char literals. + "'\"'"; +fate_to_iolist({_, _, char}, I) when is_integer(I) -> + [$', escape_char(I), $']; +fate_to_iolist(_, I) when is_integer(I) -> + integer_to_list(I); +fate_to_iolist(_, {address, Addr}) -> + gmser_api_encoder:encode(account_pubkey, Addr); +fate_to_iolist(_, {contract, Addr}) -> + gmser_api_encoder:encode(contract_pubkey, Addr); +fate_to_iolist(_, {bytes, Bytes}) -> + Size = bit_size(Bytes), + <> = Bytes, + ["#", integer_to_list(IntValue, 16)]; +fate_to_iolist(_, Bytes) when is_binary(Bytes) -> + escape_string(Bytes). + +escape_string(Binary) -> + escape_string(Binary, []). + +escape_string(<>, Acc) -> + NewAcc = [Acc, escape_char(C)], + escape_string(Rest, NewAcc); +escape_string(<<>>, Acc) -> + [$", Acc, $"]. + +tuple_to_iolist([ElemType], {Elem}) -> + Inner = fate_to_iolist(ElemType, Elem), + ["(", Inner, ",)"]; +tuple_to_iolist(_, {Elem}) -> + Inner = fate_to_iolist(unknown_type(), Elem), + ["(", Inner, ",)"]; +tuple_to_iolist(ElemTypes, Tuple) -> + Elems = tuple_to_list(Tuple), + Multivalue = multivalue_to_iolist(ElemTypes, Elems), + ["(", Multivalue, ")"]. + +list_to_iolist(InnerType, Elems) -> + InnerChars = list_elems_to_iolist(InnerType, Elems), + ["[", InnerChars, "]"]. + +variant_to_iolist(O, N, Variants, Tag, Tuple) -> + Prefix = choose_variant_prefix(O, N), + {Name, ElemTypes} = lists:nth(Tag + 1, Variants), + case tuple_size(Tuple) of + 0 -> + [Prefix, Name]; + _ -> + Elems = tuple_to_list(Tuple), + Multivalue = multivalue_to_iolist(ElemTypes, Elems), + [Prefix, Name, "(", Multivalue, ")"] + end. + +choose_variant_prefix(O, N) -> + case get_typename(O, N) of + [Namespace, _] -> + [Namespace, "."]; + _ -> + [] + end. + +multivalue_to_iolist([FirstType | ElemTypes], [FirstTerm | Elems]) -> + FirstTermChars = fate_to_iolist(FirstType, FirstTerm), + multivalue_to_iolist(ElemTypes, Elems, FirstTermChars); +multivalue_to_iolist(_, Elems) -> + list_elems_to_iolist(unknown_type(), Elems). + +multivalue_to_iolist([NextType | RestTypes], [NextTerm | RestTerms], Acc) -> + NextTermChars = fate_to_iolist(NextType, NextTerm), + multivalue_to_iolist(RestTypes, RestTerms, [Acc, ", ", NextTermChars]); +multivalue_to_iolist(_, Elems, Acc) -> + list_elems_to_iolist(unknown_type(), Elems, Acc). + +list_elems_to_iolist(Type, [FirstTerm | Rest]) -> + FirstTermChars = fate_to_iolist(Type, FirstTerm), + list_elems_to_iolist(Type, Rest, FirstTermChars); +list_elems_to_iolist(_, []) -> + "". + +list_elems_to_iolist(Type, [Next | Rest], Acc) -> + NextChars = fate_to_iolist(Type, Next), + list_elems_to_iolist(Type, Rest, [Acc, ", ", NextChars]); +list_elems_to_iolist(_, [], Acc) -> + Acc. + +singleton_record_to_iolist(FieldName, FieldType, Term) -> + FieldChars = fate_to_iolist(FieldType, Term), + ["{", FieldName, " = ", FieldChars, "}"]. + +record_to_iolist(FieldTypes, Tuple) -> + case length(FieldTypes) == tuple_size(Tuple) of + true -> + Chars = record_fields_to_iolist(FieldTypes, tuple_to_list(Tuple)), + ["{", Chars, "}"]; + false -> + tuple_to_iolist([], Tuple) + end. + +record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms]) -> + TermChars = fate_to_iolist(Type, Term), + record_fields_to_iolist(FieldTypes, Terms, [Name, " = ", TermChars]); +record_fields_to_iolist(_, []) -> + "". + +record_fields_to_iolist([{Name, Type} | FieldTypes], [Term | Terms], Acc) -> + TermChars = fate_to_iolist(Type, Term), + NewAcc = [Acc, ", ", Name, " = ", TermChars], + record_fields_to_iolist(FieldTypes, Terms, NewAcc); +record_fields_to_iolist(_, [], Acc) -> + Acc. + +map_to_iolist(K, V, Map) -> + Iter = maps:iterator(Map), + case maps:next(Iter) of + {KeyTerm, ValTerm, Rest} -> + KChars = fate_to_iolist(K, KeyTerm), + VChars = fate_to_iolist(V, ValTerm), + RestChars = map_to_iolist_inner(K, V, Rest, ["[", KChars, "] = ", VChars]), + ["{", RestChars, "}"]; + none -> + "{}" + end. + +map_to_iolist_inner(K, V, Iter, Acc) -> + case maps:next(Iter) of + {KeyTerm, ValTerm, Rest} -> + KChars = fate_to_iolist(K, KeyTerm), + VChars = fate_to_iolist(V, ValTerm), + map_to_iolist_inner(K, V, Rest, [Acc, ", [", KChars, "] = ", VChars]); + none -> + Acc + end. + %%% Tests check_sophia_to_fate(Type, Sophia, Fate) -> case parse_literal(Type, Sophia) of {ok, Fate} -> ok; - {ok, FateActual} -> - erlang:error({to_fate_failed, Sophia, Fate, {ok, FateActual}}); - {error, Reason} -> - erlang:error({to_fate_failed, Sophia, Fate, {error, Reason}}) + Result -> + erlang:error({to_fate_failed, Sophia, Fate, Result}) + end. + +check_fate_to_sophia(Type, Fate, Sophia) -> + case fate_to_list(Type, Fate) of + Sophia -> + ok; + Result -> + erlang:error({to_sophia_failed, Fate, Sophia, Result}) + end. + +roundtrip_parser(Type, Sophia, Fate) -> + check_sophia_to_fate(Type, Sophia, Fate), + check_fate_to_sophia(Type, Fate, Sophia), + + ok. + +% These test function names are getting ridiculous... I might want to optarg +% them or something, but, whatever, it's test code. +roundtrip_parser_lenient(Type, Sophia, Fate) -> + check_sophia_to_fate(Type, Sophia, Fate), + case fate_to_list(Type, Fate) of + Sophia -> + ok; + SophiaActual -> + check_sophia_to_fate(Type, SophiaActual, Fate) end. compile_entrypoint_value_and_type(Source, Entrypoint) -> @@ -943,65 +1180,76 @@ check_parser(Sophia) -> {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Check that when we parse the term we get the same value as the Sophia - % compiler. + % compiler. Also check that the pretty printer gives the same string back. check_sophia_to_fate(unknown_type(), Sophia, Fate), % Then, once we know that the term is correct, make sure that it is still - % accepted *with* type info. + % accepted *with* type info. Don't bother roundtripping this, since the + % pretty printer doesn't enforce types anyway. check_sophia_to_fate(Type, Sophia, Fate). +check_parser_roundtrip(Sophia) -> + Source = "contract C = entrypoint f() = " ++ Sophia, + {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), + roundtrip_parser(Type, Sophia, Fate), + % Without type information we might get a more generic result in Sophia + % syntax. Let's do a lenient test. + roundtrip_parser_lenient(unknown_type(), Sophia, Fate). + check_parser_with_typedef(Typedef, Sophia) -> % Compile the type definitions alongside the usual literal expression. Source = "contract C =\n " ++ Typedef ++ "\n entrypoint f() = " ++ Sophia, {Fate, Type} = compile_entrypoint_value_and_type(Source, "f"), % Do a typed parse, as usual, but there are probably record/variant - % definitions in the AACI, so untyped parses probably don't work. - check_sophia_to_fate(Type, Sophia, Fate). + % definitions in the AACI, so untyped parses probably don't work, and + % variants often have optional namespaces, so the sophia result might not + % match exactly, but should still be equivalent. + roundtrip_parser_lenient(Type, Sophia, Fate). anon_types_test() -> % Integers. - check_parser("123"), + check_parser_roundtrip("123"), check_parser("1_2_3"), - check_parser("-123"), + check_parser_roundtrip("-123"), % Booleans. - check_parser("true"), - check_parser("false"), - check_parser("[true, false]"), + check_parser_roundtrip("true"), + check_parser_roundtrip("false"), + check_parser_roundtrip("[true, false]"), % Bytes. - check_parser("#DEAD000BEEF"), + check_parser_roundtrip("#DEAD000BEEF"), check_parser("#DE_AD0_00B_EEF"), % Strings. - check_parser("\"hello world\""), + check_parser_roundtrip("\"hello world\""), % The Sophia compiler doesn't handle this right, but we should still. - %check_parser("\"ÿ\""), - %check_parser("\"♣\""), + %check_parser_roundtrip("\"ÿ\""), + %check_parser_roundtrip("\"♣\""), % Characters. - check_parser("'A'"), - check_parser("['a', ' ', '[']"), - %check_parser("'ÿ'"), - %check_parser("'♣'"), + check_parser_roundtrip("'A'"), + check_parser_roundtrip("['a', ' ', '[']"), + %check_parser_roundtrip("'ÿ'"), + %check_parser_roundtrip("'♣'"), % List of integers. - check_parser("[1, 2, 3]"), + check_parser_roundtrip("[1, 2, 3]"), % List of lists. - check_parser("[[], [1], [2, 3]]"), + check_parser_roundtrip("[[], [1], [2, 3]]"), % Tuple. - check_parser("(1, [2, 3], (4, 5))"), + check_parser_roundtrip("(1, [2, 3], (4, 5))"), % Map. - check_parser("{[1] = 2, [3] = 4}"), + check_parser_roundtrip("{[1] = 2, [3] = 4}"), ok. string_escape_codes_test() -> - check_parser("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), + check_parser_roundtrip("\" \\b\\e\\f\\n\\r\\t\\v\\\"\\\\ \""), check_parser("\"\\x00\\x11\\x77\\x4a\\x4A\""), check_parser("\"\\x{0}\\x{7}\\x{7F}\\x{07F}\\x{007F}\\x{0007F}\\x{0000007F}\""), - check_parser("\"'\""), + check_parser_roundtrip("\"'\""), - check_parser("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"), + check_parser_roundtrip("['\\b', '\\e', '\\f', '\\n', '\\r', '\\t', '\\v', '\"', '\\'', '\\\\']"), check_parser("['\\x00', '\\x11', '\\x77', '\\x4a', '\\x4A']"), check_parser("['\\x{0}', '\\x{7}', '\\x{7F}', '\\x{07F}', '\\x{007F}', '\\x{0007F}', '\\x{0000007F}']"), - check_parser("'\"'"), + check_parser_roundtrip("'\"'"), ok. @@ -1039,15 +1287,17 @@ namespace_variant_test() -> Term = "[N.A, N.B]", Source = "namespace N = datatype mytype = A | B\ncontract C = entrypoint f() = " ++ Term, {Fate, VariantType} = compile_entrypoint_value_and_type(Source, "f"), - check_sophia_to_fate(VariantType, Term, Fate), + roundtrip_parser(VariantType, Term, Fate), ok. chain_objects_test() -> % Address, - check_parser("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), + check_parser_roundtrip("ak_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx"), % Two different forms of signature, - check_parser("[sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n, #00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF]"), + check_parser("sg_XDyF8LJC4tpMyAySvpaG1f5V9F2XxAbRx9iuVjvvdNMwVracLhzAuXhRM5kXAFtpwW1DCHuz5jGehUayCah4jub32Ti2n"), + check_parser("#00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF_00112233445566778899AABBCCDDEEFF"), + check_parser_roundtrip("#112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF00112233445566778899AABBCCDDEEFF"), % We have to build a totally custom contract example in order to get an % AACI and return value for parsing contract addresses. This is because the @@ -1058,18 +1308,18 @@ chain_objects_test() -> Contract = "ct_2FTnrGfV8qsfHpaSEHpBrziioCpwwzLqSevHqfxQY3PaAAdARx", Source = "contract C = entrypoint f(): C = " ++ Contract, {Fate, ContractType} = compile_entrypoint_value_and_type(Source, "f"), - check_sophia_to_fate(ContractType, Contract, Fate), - check_sophia_to_fate(unknown_type(), Contract, Fate), + roundtrip_parser(ContractType, Contract, Fate), + roundtrip_parser(unknown_type(), Contract, Fate), ok. bits_test() -> - check_parser("Bits.all"), - check_parser("Bits.none"), + check_parser_roundtrip("Bits.all"), + check_parser_roundtrip("Bits.none"), {_, Type} = compile_entrypoint_value_and_type("contract C = entrypoint f() = Bits.all", "f"), - check_sophia_to_fate(Type, "5", {bits, 5}), - check_sophia_to_fate(Type, "-5", {bits, -5}), - check_sophia_to_fate(Type, "#123", {bits, 256 + 32 + 3}), + roundtrip_parser_lenient(Type, "5", {bits, 5}), + roundtrip_parser(Type, "-5", {bits, -5}), + roundtrip_parser(Type, "#123", {bits, 256 + 32 + 3}), ok. singleton_records_test() -> @@ -1104,7 +1354,8 @@ excess_parens_test() -> % Including multiple nestings of tuples and grouping, interleaved. check_parser("((((1), ((2, 3)))), 4)"), % Also empty tuples exist! - check_parser("()"), + check_parser_roundtrip("()"), + check_parser_roundtrip("(((), ()), ((), ()))"), check_parser("(((((), ())), ()))"), ok. @@ -1166,12 +1417,14 @@ singleton_test() -> % Now let's do some testing with this weird type, to see if we handle it % correctly. {ok, {tuple, {1}}} = parse_literal(SingletonType, "(1,)"), + "(1,)" = fate_to_list(SingletonType, {tuple, {1}}), % Some ambiguous nesting parens, for fun. {ok, {tuple, {1}}} = parse_literal(SingletonType, "(((1),))"), % No trailing comma should give an error. {error, {expected_trailing_comma, 1, 3}} = parse_literal(SingletonType, "(1)"), % All of the above should behave the same in untyped contexts: {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(1,)"), + "(1,)" = fate_to_list(unknown_type(), {tuple, {1}}), {ok, {tuple, {1}}} = parse_literal(unknown_type(), "(((1),))"), {ok, 1} = parse_literal(unknown_type(), "(1)"), From ddec3bfa7477a84ea8844a8996a026444c32858a Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Tue, 24 Feb 2026 06:12:00 +0000 Subject: [PATCH 29/42] add more format options to decode_bytearray I reversed the argument order here, since the Format option is sort of kind of almost optional, but I am not sure if that was a good idea. --- src/hz.erl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 84d1535..0ed98b6 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -71,7 +71,7 @@ contract_call/5, contract_call/6, contract_call/10, - decode_bytearray_fate/1, decode_bytearray/2, + decode_bytearray/2, spend/5, spend/10, sign_tx/2, sign_tx/3, sign_message/2, verify_signature/3, @@ -701,8 +701,9 @@ decode_bytearray_fate(EncodedStr) -> {ok, Object} end. --spec decode_bytearray(Type, EncodedStr) -> {ok, Result} | {error, Reason} - when Type :: term(), +-spec decode_bytearray(EncodedStr, Format) -> {ok, Result} | {error, Reason} + when Format :: fate | sophia | {sophia, Type} | {erlang, Type}, + Type :: term(), EncodedStr :: binary() | string(), Result :: none | term(), Reason :: term(). @@ -713,13 +714,18 @@ decode_bytearray_fate(EncodedStr) -> %% must be the result type of the same function in the same AACI that was used %% to create the transaction that EncodedStr came from. -decode_bytearray(Type, EncodedStr) -> +decode_bytearray(EncodedStr, Format) -> case decode_bytearray_fate(EncodedStr) of {ok, none} -> {ok, none}; - {ok, Object} -> hz_aaci:fate_to_erlang(Type, Object); + {ok, FATE} -> decode_bytearray2(FATE, Format); {error, Reason} -> {error, Reason} end. +decode_bytearray2(FATE, fate) -> FATE; +decode_bytearray2(FATE, sophia) -> hz_sophia:fate_to_list(FATE); +decode_bytearray2(FATE, {sophia, Type}) -> hz_sophia:fate_to_list(Type, FATE); +decode_bytearray2(FATE, {erlang, Type}) -> hz_aaci:fate_to_erlang(Type, FATE). + to_binary(S) when is_binary(S) -> S; to_binary(S) when is_list(S) -> list_to_binary(S). From f277e79096e49e7ed37e01dea290ce69c7805802 Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Wed, 25 Feb 2026 16:20:52 +0900 Subject: [PATCH 30/42] Minor doc and style edits --- src/hz.erl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 84d1535..97d8d66 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -691,8 +691,10 @@ decode_bytearray_fate(EncodedStr) -> Encoded = unicode:characters_to_binary(EncodedStr), {contract_bytearray, Binary} = gmser_api_encoder:decode(Encoded), case Binary of - <<>> -> {ok, none}; - <<"Out of gas">> -> {error, out_of_gas}; + <<>> -> + {ok, none}; + <<"Out of gas">> -> + {error, out_of_gas}; _ -> % FIXME there may be other errors that are encoded directly into % the byte array. We could try and catch to at least return @@ -1038,8 +1040,9 @@ contract_create(CreatorID, Path, InitArgs) -> %%
  • %% Args: %% This is a list of the arguments to provide to the function, listed in order -%% according to the function's spec, and represented as strings (that is, an integer -%% argument of `10' must be cast to the textual representation `"10"'). +%% according to the function's spec. Arguments can be represented as a list of +%% Sophia literals (a simple list of strings), or alternately as a list of compatible +%% Erlang, FATE or Sophia terms wrapped in a tuple which specifies the representation. %%
  • %% %% As should be obvious from the above description, it is pretty helpful to have a @@ -1399,8 +1402,9 @@ contract_call(CallerID, Gas, AACI, ConID, Fun, Args) -> %%
  • %% Args: %% This is a list of the arguments to provide to the function, listed in order -%% according to the function's spec, and represented as strings (that is, an integer -%% argument of `10' must be cast to the textual representation `"10"'). +%% according to the function's spec. Arguments can be represented as a list of +%% Sophia literals (a simple list of strings), or alternately as a list of compatible +%% Erlang, FATE or Sophia terms wrapped in a tuple which specifies the representation. %%
  • %% %% As should be obvious from the above description, it is pretty helpful to have a From 540b2c513becf1dfb52a3c312f35c06556ceaddb Mon Sep 17 00:00:00 2001 From: Jarvis Carroll Date: Thu, 26 Feb 2026 12:54:28 +0000 Subject: [PATCH 31/42] Fill AACI and coerce type specs Any error reasons or paths are just term() still, and ACI doesn't have a defined spec in the compiler, so whatever, but the AACI types, the erlang representation of terms, and the four different kinds of coerce function are all spec'd now. Also some internal type substitution functions were given types, just in the hopes of catching some errors, but dyalizer doesn't seem to complain at all no matter how badly I break my code. Strange approach to making a type system, but oh well. --- src/hz_aaci.erl | 88 +++++++++++++++++++++++++++++++++++++++++++++-- src/hz_sophia.erl | 34 +++++++++++++++--- 2 files changed, 115 insertions(+), 7 deletions(-) diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 90ec5bc..5011e29 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -25,11 +25,43 @@ %%% Types --export_type([aaci/0]). +-export_type([aaci/0, annotated_type/0, erlang_repr/0]). -include_lib("eunit/include/eunit.hrl"). --type aaci() :: {aaci, term(), term(), term()}. +-type aaci() :: {aaci, string(), #{string() => function_spec()}, #{string() => typedef()}}. +-type function_spec() :: {[{string(), annotated_type()}], annotated_type()}. +-type typedef() :: {[string()], typedef_rhs()}. + +-type annotated_type() :: {opaque_type(), already_normalized | opaque_type(), builtin_type(annotated_type())}. +-type builtin_type(T) :: {bytes, [integer() | any]} + | {record, [{string(), T}]} + | {variant, [{string(), [T]}]} + | {tuple, [T]} + | {list, [T]} + | {map, [T]} + | integer + | boolean + | bits + | char + | string + | address + | signature + | contract + | channel + | unknown_type. + +-type opaque_type() :: string() | {string(), [opaque_type()]} | builtin_type(opaque_type()). + +-type typedef_rhs() :: {var, string()} | string() | {string(), [opaque_type()]} | builtin_type(typedef_rhs()). + +-type erlang_repr() :: integer() + | string() + | boolean() + | binary() + | tuple() % Tuples, variants, or raw addresses + | [erlang_repr()] + | #{erlang_repr() => erlang_repr()}. %%% ACI/AACI @@ -47,6 +79,9 @@ prepare_from_file(Path) -> Error -> Error end. +-spec prepare(ACI) -> AACI + when ACI :: term(), + AACI :: aaci(). prepare(ACI) -> % We want to take the types represented by the ACI, things like N1.T(N2.T), @@ -66,6 +101,12 @@ prepare(ACI) -> {aaci, Name, Specs, TypeDefs}. +-spec convert_aci_types(ACI) -> {Name, OpaqueSpecs, TypeDefs} + when ACI :: term(), + Name :: string(), + OpaqueSpecs :: [{string(), [{string(), opaque_type()}], opaque_type()}], + TypeDefs :: #{string() => typedef()}. + convert_aci_types(ACI) -> % Find the main contract, so we can get the specifications of its % entrypoints. @@ -134,6 +175,12 @@ convert_typedefs_loop([Next | Rest], NamePrefix, Converted) -> Def = opaque_type(Params, DefACI), convert_typedefs_loop(Rest, NamePrefix, [Converted, {Name, Params, Def}]). +-spec collect_opaque_types(Tree, TypeDefs) -> TypeDefs + when Tree :: typedef_tree(), + TypeDefs :: #{string() => typedef()}. + +-type typedef_tree() :: {string(), [string()], typedef_rhs()} | list(typedef_tree()). + collect_opaque_types([], Types) -> Types; collect_opaque_types([L | R], Types) -> @@ -144,6 +191,11 @@ collect_opaque_types({Name, Params, Def}, Types) -> %%% ACI Type -> Opaque Type +-spec opaque_type(Params, ACIType) -> Opaque + when Params :: [string()], + ACIType :: binary() | map(), + Opaque :: typedef_rhs(). + % Convert an ACI type defintion/spec into the 'opaque type' representation that % our dereferencing algorithms can reason about. opaque_type(Params, NameBin) when is_binary(NameBin) -> @@ -171,6 +223,8 @@ opaque_type(Params, Pair) when is_map(Pair) -> [{Name, TypeArgs}] = maps:to_list(Pair), {opaque_type_name(Name), [opaque_type(Params, Arg) || Arg <- TypeArgs]}. +-spec opaque_type_name(binary()) -> atom() | string(). + % Atoms for any builtins that aren't qualified by a namespace in Sophia. % Everything else stays as a string, user-defined or not. opaque_type_name(<<"int">>) -> integer; @@ -280,6 +334,12 @@ annotate_function_specs([{Name, ArgsOpaque, ResultOpaque} | Rest], Types, Specs) NewSpecs = maps:put(Name, {Args, Result}, Specs), annotate_function_specs(Rest, Types, NewSpecs). +-spec annotate_type(Opaque, Types) -> {ok, Annotated} | {error, Reason} + when Opaque :: opaque_type(), + Types :: #{string() => typedef()}, + Annotated :: annotated_type(), + Reason :: none(). + annotate_type(T, Types) -> case normalize_opaque_type(T, Types) of {ok, AlreadyNormalized, NOpaque, NExpanded} -> @@ -445,6 +505,14 @@ substitute_opaque_types(Bindings, Types) -> %%% Erlang to FATE +-spec erlang_args_to_fate(VarTypes, Terms) -> {ok, FATE} | {error, Errors} + when VarTypes :: [{string(), annotated_type()}], + Terms :: [erlang_repr()], + FATE :: gmb_fate_data:fate_type(), + Errors :: [{Reason, [PathStep]}], + Reason :: term(), + PathStep :: term(). + erlang_args_to_fate(VarTypes, Terms) -> DefLength = length(VarTypes), ArgLength = length(Terms), @@ -454,6 +522,14 @@ erlang_args_to_fate(VarTypes, Terms) -> DefLength < ArgLength -> {error, too_many_args} end. +-spec erlang_to_fate(Type, Erlang) -> {ok, FATE} | {error, Errors} + when Type :: annotated_type(), + FATE :: gmb_fate_data:fate_type(), + Erlang :: erlang_repr(), + Errors :: [{Reason, [PathStep]}], + Reason :: term(), + PathStep :: term(). + erlang_to_fate({_, _, integer}, S) when is_integer(S) -> {ok, S}; erlang_to_fate({O, N, integer}, S) when is_list(S) -> @@ -794,6 +870,14 @@ coerce_direction(Type, Term, to_fate) -> coerce_direction(Type, Term, from_fate) -> fate_to_erlang(Type, Term). +-spec fate_to_erlang(Type, FATE) -> {ok, Erlang} | {error, Errors} + when Type :: annotated_type(), + FATE :: gmb_fate_data:fate_type(), + Erlang :: erlang_repr(), + Errors :: [{Reason, [PathStep]}], + Reason :: term(), + PathStep :: term(). + fate_to_erlang({_, _, integer}, S) when is_integer(S) -> {ok, S}; fate_to_erlang({_, _, address}, {address, Bin}) -> diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 4c1c8cb..2df3755 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -10,14 +10,20 @@ -include_lib("eunit/include/eunit.hrl"). --spec parse_literal(String) -> Result - when String :: string(), - Result :: {ok, gmb_fate_data:fate_type()} - | {error, Reason :: term()}. +-spec parse_literal(Sophia) -> {ok, FATE} | {error, Reason} + when Sophia :: string(), + FATE :: gmb_fate_data:fate_type(), + Reason :: term(). parse_literal(String) -> parse_literal(unknown_type(), String). +-spec parse_literal(Type, Sophia) -> {ok, FATE} | {error, Reason} + when Type :: hz_aaci:annotated_type(), + Sophia :: string(), + FATE :: gmb_fate_data:fate_type(), + Reason :: term(). + parse_literal(Type, String) -> case parse_expression(Type, {1, 1}, String) of {ok, {Result, NewPos, NewString}} -> @@ -917,16 +923,34 @@ wrap_error(Reason, _) -> Reason. %%% Pretty Printing +-spec fate_to_list(FATE) -> Sophia + when FATE :: gmb_fate_data:fate_type(), + Sophia :: string(). + fate_to_list(Term) -> fate_to_list(unknown_type(), Term). +-spec fate_to_list(Type, FATE) -> Sophia + when Type :: hz_aaci:annotated_type(), + FATE :: gmb_fate_data:fate_type(), + Sophia :: string(). + fate_to_list(Type, Term) -> IOList = fate_to_iolist(Type, Term), unicode:characters_to_list(IOList). +-spec fate_to_iolist(FATE) -> Sophia + when FATE :: gmb_fate_data:fate_type(), + Sophia :: iolist(). + fate_to_iolist(Term) -> fate_to_iolist(unknown_type(), Term). +-spec fate_to_iolist(Type, FATE) -> Sophia + when Type :: hz_aaci:annotated_type(), + FATE :: gmb_fate_data:fate_type(), + Sophia :: iolist(). + % Special case for singleton records, since they are erased during compilation. fate_to_iolist({_, _, {record, [{FieldName, FieldType}]}}, Term) -> singleton_record_to_iolist(FieldName, FieldType, Term); @@ -1430,6 +1454,6 @@ singleton_test() -> % Also if we wanted an integer, the singleton is NOT dropped, so is also an % error. - {error, {expected_close_paren, 1, 3}} = parse_literal({integer, alread_normalized, integer}, "(1,)"), + {error, {expected_close_paren, 1, 3}} = parse_literal({integer, already_normalized, integer}, "(1,)"), ok. From fd8766a249fd6f6bf0dc0ace5e990f14a0c16b37 Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Thu, 7 May 2026 19:53:36 +0900 Subject: [PATCH 32/42] Unify call arg order between call and create --- src/hz.erl | 92 ++++++++++++++++++++++++------------------------------ 1 file changed, 41 insertions(+), 51 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index bc49137..f61a7ac 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -272,8 +272,7 @@ chain_nodes() -> %% transactions are submitted is called the "sticky node". This is the first node %% (head position) in the list of nodes submitted to the chain when `chain_nodes/1' %% is called. If using multiple nodes but the sticky node should also be used for -%% read-only queries, submit the sticky node at the head of the list and again in -%% the tail. +%% read-only queries, put the sticky node in the list twice. chain_nodes(List) when is_list(List) -> hz_man:chain_nodes(List). @@ -284,7 +283,7 @@ chain_nodes(List) when is_list(List) -> %% Check whether TLS is in use. The typical situation is to not use TLS as nodes that %% serve as part of the backend of an application are typically run in the same %% backend network as the application service. When accessing chain nodes over the WAN -%% however, TLS is strongly recommended to avoid a MITM attack. +%% however, TLS is recommended to avoid a MitM attack. %% %% In this version of Hakuzaru TLS is either on or off for all nodes, making a mixed %% infrastructure complicated to support without two Hakuzaru instances. This will @@ -299,7 +298,7 @@ tls() -> -spec tls(boolean()) -> ok. %% @doc %% Set TLS true or false. That's what a boolean is, by the way, `true' or `false'. -%% This is a condescending comment. That means I am talking down to you. +%% This is a condescending comment. That means to talk down to someone. %% %% TLS defaults to `false'. @@ -343,7 +342,8 @@ timeout(MS) -> %% NOTE: %% This will return the currently synced height, which may be different than the %% actual current top of the entire chain if the node being queried is still syncing -%% (has not yet caught up with the chain). +%% (has not yet caught up with the chain). More complete information, including +%% whether the node is currently syncing, can be gained from a `status()' query. top_height() -> case top_block() of @@ -356,7 +356,7 @@ top_height() -> when TopBlock :: microblock_header(), Reason :: chain_error(). %% @doc -%% Returns the current block height as an integer. +%% Returns the current top block of the chain. top_block() -> request("/v3/headers/top"). @@ -386,7 +386,7 @@ kb_current() -> kb_current_hash() -> case request("/v3/key-blocks/current/hash") of {ok, #{"reason" := Reason}} -> {error, Reason}; - {ok, #{"hash" := Hash}} -> {ok, Hash}; + {ok, #{"hash" := Hash}} -> {ok, Hash}; Error -> Error end. @@ -444,10 +444,6 @@ kb_by_height(Height) -> result(request(["/v3/key-blocks/height/", StringN])). -%kb_insert(KeyblockData) -> -% request("/v3/key-blocks", KeyblockData). - - -spec mb_header(ID) -> {ok, MB_Header} | {error, Reason} when ID :: microblock_hash(), MB_Header :: microblock_header(), @@ -607,12 +603,6 @@ next_nonce(AccountID) -> {ok, #{"reason" := Reason}} -> {error, Reason}; Error -> Error end. -% case request_sticky(["/v3/accounts/", AccountID]) of -% {ok, #{"nonce" := Nonce}} -> {ok, Nonce + 1}; -% {ok, #{"reason" := "Account not found"}} -> {ok, 1}; -% {ok, #{"reason" := Reason}} -> {error, Reason}; -% Error -> Error -% end. -spec dry_run(TX) -> {ok, Result} | {error, Reason} @@ -929,14 +919,14 @@ contract_create(CreatorID, Path, InitArgs) -> -spec contract_create(CreatorID, Nonce, - Amount, TTL, Gas, GasPrice, + Gas, GasPrice, Amount, TTL, Path, InitArgs) -> Result when CreatorID :: pubkey(), Nonce :: pos_integer(), - Amount :: non_neg_integer(), - TTL :: non_neg_integer(), Gas :: pos_integer(), GasPrice :: pos_integer(), + Amount :: non_neg_integer(), + TTL :: non_neg_integer(), Path :: file:filename(), InitArgs :: [string()] | {erlang, [term()]} @@ -973,24 +963,6 @@ contract_create(CreatorID, Path, InitArgs) -> %% querying your Gajumaru node (via `hz:next_nonce(CallerID)', for example). %% %%
  • -%% Amount: -%% All Gajumaru transactions can carry an "amount" spent from the origin account -%% (in this case the `CallerID') to the destination. In a "Spend" transaction this -%% is the only value that really matters, but in a contract call the utility is -%% quite different, as you can pay money into a contract and have that -%% contract hold it (for future payouts, to be held in escrow, as proof of intent -%% to purchase or engage in an auction, whatever). Typically this value is 0, but -%% of course there are very good reasons why it should be set to a non-zero value -%% in the case of calls related to contract-governed payment systems. -%%
  • -%%
  • -%% TTL: -%% This stands for "Time-To-Live", meaning the height beyond which this element is -%% considered to be eligible for garbage collection (and therefore inaccessible!). -%% The TTL can be extended by a "live extension" transaction (basically pay for the -%% data to remain alive longer). -%%
  • -%%
  • %% Gas: %% This number sets a limit on the maximum amount of computation the caller is willing %% to pay for on the chain. @@ -1023,6 +995,24 @@ contract_create(CreatorID, Path, InitArgs) -> %% transaction, thus making miners more likely to prioritize the high value ones. %%
  • %%
  • +%% Amount: +%% All Gajumaru transactions can carry an "amount" spent from the origin account +%% (in this case the `CallerID') to the destination. In a "Spend" transaction this +%% is the only value that really matters, but in a contract call the utility is +%% quite different, as you can pay money into a contract and have that +%% contract hold it (for future payouts, to be held in escrow, as proof of intent +%% to purchase or engage in an auction, whatever). Typically this value is 0, but +%% of course there are very good reasons why it should be set to a non-zero value +%% in the case of calls related to contract-governed payment systems. +%%
  • +%%
  • +%% TTL: +%% This stands for "Time-To-Live", meaning the height beyond which this element is +%% considered to be eligible for garbage collection (and therefore inaccessible!). +%% The TTL can be extended by a "live extension" transaction (basically pay for the +%% data to remain alive longer). +%%
  • +%%
  • %% ACI: %% This is the compiled contract's metadata. It provides the information necessary %% for the contract call data to be formed in a way that the Gajumaru runtime will @@ -1056,7 +1046,7 @@ contract_create(CreatorID, Path, InitArgs) -> %% if you do not already have a copy, and can check the spec of a function before %% trying to form a contract call. -contract_create(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Path, InitArgs) -> +contract_create(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Path, InitArgs) -> case file:read_file(Path) of {ok, Source} -> Dir = filename:dirname(Path), @@ -1067,17 +1057,17 @@ contract_create(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Path, InitArgs) -> {src_file, Path}, {src_dir, SrcDir}, {include, {file_system, [CWD, so_utils:canonical_dir(Dir)]}}], - contract_create2(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, + contract_create2(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Source, Options, InitArgs); Error -> Error end. -contract_create2(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Source, Options, InitArgs) -> +contract_create2(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Source, Options, InitArgs) -> case so_compiler:from_string(Source, Options) of {ok, Compiled} -> - contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, + contract_create_built(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, InitArgs); Error -> Error @@ -1108,20 +1098,20 @@ contract_create_built(CreatorID, Compiled, InitArgs) -> Gas = 500000, GasPrice = min_gas_price(), contract_create_built(CreatorID, Nonce, - Amount, TTL, Gas, GasPrice, + Gas, GasPrice, Amount, TTL, Compiled, InitArgs); Error -> Error end. --spec contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, InitArgs) -> Result +-spec contract_create_built(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, InitArgs) -> Result when CreatorID :: unicode:chardata(), Nonce :: pos_integer(), - Amount :: non_neg_integer(), - TTL :: non_neg_integer(), Gas :: pos_integer(), GasPrice :: pos_integer(), + Amount :: non_neg_integer(), + TTL :: non_neg_integer(), Compiled :: map(), InitArgs :: [string()] | {erlang, [term()]} @@ -1135,26 +1125,26 @@ contract_create_built(CreatorID, Compiled, InitArgs) -> %% The `Compiled' argument is the output of contract compilation and replaces the `File' %% argument in `contract_create/8'. -contract_create_built(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, InitArgs) -> +contract_create_built(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, InitArgs) -> AACI = hz_aaci:prepare(maps:get(aci, Compiled)), case encode_call_data(AACI, "init", InitArgs) of {ok, CallData} -> - assemble_calldata(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, CallData); + assemble_calldata(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData); Error -> Error end. -assemble_calldata(CreatorID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, CallData) -> +assemble_calldata(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData) -> PK = unicode:characters_to_binary(CreatorID), try {account_pubkey, OwnerID} = gmser_api_encoder:decode(PK), - assemble_calldata2(OwnerID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, CallData) + assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData) catch Error:Reason -> {Error, Reason} end. -assemble_calldata2(OwnerID, Nonce, Amount, TTL, Gas, GasPrice, Compiled, CallData) -> +assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData) -> Code = gmser_contract_code:serialize(Compiled), Source = unicode:characters_to_binary(maps:get(contract_source, Compiled, <<>>)), VM = 1, From 9f02f73dbd1428d7b56e9124bc6f1c60194b7a19 Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Fri, 8 May 2026 08:43:07 +0900 Subject: [PATCH 33/42] verup --- ebin/hakuzaru.app | 2 +- src/hakuzaru.erl | 2 +- src/hz.erl | 2 +- src/hz_aaci.erl | 2 +- src/hz_fetcher.erl | 2 +- src/hz_format.erl | 2 +- src/hz_grids.erl | 2 +- src/hz_key_master.erl | 2 +- src/hz_man.erl | 2 +- src/hz_sophia.erl | 2 +- src/hz_sup.erl | 2 +- zomp.meta | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ebin/hakuzaru.app b/ebin/hakuzaru.app index 9d35663..50bd67f 100644 --- a/ebin/hakuzaru.app +++ b/ebin/hakuzaru.app @@ -3,7 +3,7 @@ {included_applications,[]}, {applications,[stdlib,kernel]}, {description,"Gajumaru interoperation library"}, - {vsn,"0.8.2"}, + {vsn,"0.9.0"}, {modules,[hakuzaru,hz,hz_fetcher,hz_format,hz_grids, hz_key_master,hz_man,hz_sup]}, {mod,{hakuzaru,[]}}]}. diff --git a/src/hakuzaru.erl b/src/hakuzaru.erl index 93c67c6..ddd67a9 100644 --- a/src/hakuzaru.erl +++ b/src/hakuzaru.erl @@ -6,7 +6,7 @@ %%% @end -module(hakuzaru). --vsn("0.8.2"). +-vsn("0.9.0"). -author("Craig Everett "). -copyright("Craig Everett "). -license("GPL-3.0-or-later"). diff --git a/src/hz.erl b/src/hz.erl index f61a7ac..086dbb7 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -23,7 +23,7 @@ %%% @end -module(hz). --vsn("0.8.2"). +-vsn("0.9.0"). -author("Craig Everett "). -copyright("Craig Everett "). -license("GPL-3.0-or-later"). diff --git a/src/hz_aaci.erl b/src/hz_aaci.erl index 5011e29..de28f55 100644 --- a/src/hz_aaci.erl +++ b/src/hz_aaci.erl @@ -10,7 +10,7 @@ %%% @end -module(hz_aaci). --vsn("0.8.2"). +-vsn("0.9.0"). -author("Jarvis Carroll "). -copyright("Craig Everett "). -license("GPL-3.0-or-later"). diff --git a/src/hz_fetcher.erl b/src/hz_fetcher.erl index 5d3aff0..33e5de7 100644 --- a/src/hz_fetcher.erl +++ b/src/hz_fetcher.erl @@ -1,5 +1,5 @@ -module(hz_fetcher). --vsn("0.8.2"). +-vsn("0.9.0"). -author("Craig Everett "). -copyright("Craig Everett "). -license("MIT"). diff --git a/src/hz_format.erl b/src/hz_format.erl index dafbec3..39ca2df 100644 --- a/src/hz_format.erl +++ b/src/hz_format.erl @@ -21,7 +21,7 @@ %%% @end -module(hz_format). --vsn("0.8.2"). +-vsn("0.9.0"). -author("Craig Everett "). -copyright("Craig Everett "). -license("GPL-3.0-or-later"). diff --git a/src/hz_grids.erl b/src/hz_grids.erl index 78da96b..9971207 100644 --- a/src/hz_grids.erl +++ b/src/hz_grids.erl @@ -37,7 +37,7 @@ %%% @end -module(hz_grids). --vsn("0.8.2"). +-vsn("0.9.0"). -export([url/2, url/3, url/4, parse/1, req/2, req/3]). diff --git a/src/hz_key_master.erl b/src/hz_key_master.erl index fe118e3..1f4703e 100644 --- a/src/hz_key_master.erl +++ b/src/hz_key_master.erl @@ -8,7 +8,7 @@ %%% @end -module(hz_key_master). --vsn("0.8.2"). +-vsn("0.9.0"). -export([make_key/1, encode/1, decode/1]). diff --git a/src/hz_man.erl b/src/hz_man.erl index a09ee51..49c0375 100644 --- a/src/hz_man.erl +++ b/src/hz_man.erl @@ -9,7 +9,7 @@ %%% @end -module(hz_man). --vsn("0.8.2"). +-vsn("0.9.0"). -behavior(gen_server). -author("Craig Everett "). -copyright("Craig Everett "). diff --git a/src/hz_sophia.erl b/src/hz_sophia.erl index 2df3755..70995a6 100644 --- a/src/hz_sophia.erl +++ b/src/hz_sophia.erl @@ -1,5 +1,5 @@ -module(hz_sophia). --vsn("0.8.2"). +-vsn("0.9.0"). -author("Jarvis Carroll "). -copyright("Jarvis Carroll "). -license("GPL-3.0-or-later"). diff --git a/src/hz_sup.erl b/src/hz_sup.erl index a8d1932..eabb584 100644 --- a/src/hz_sup.erl +++ b/src/hz_sup.erl @@ -9,7 +9,7 @@ %%% @end -module(hz_sup). --vsn("0.8.2"). +-vsn("0.9.0"). -behaviour(supervisor). -author("Craig Everett "). -copyright("Craig Everett "). diff --git a/zomp.meta b/zomp.meta index 6595ead..0fa8075 100644 --- a/zomp.meta +++ b/zomp.meta @@ -4,7 +4,7 @@ {prefix,"hz"}. {desc,"Gajumaru interoperation library"}. {author,"Craig Everett"}. -{package_id,{"otpr","hakuzaru",{0,8,2}}}. +{package_id,{"otpr","hakuzaru",{0,9,0}}}. {deps,[{"otpr","sophia",{9,0,0}}, {"otpr","gmserialization",{0,1,3}}, {"otpr","gmbytecode",{3,4,1}}, From 695e7e4828abea0809abc2c478751c6ce0a03cbb Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Fri, 8 May 2026 15:48:05 +0900 Subject: [PATCH 34/42] WIP --- src/hz.erl | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 086dbb7..4388ff2 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -792,11 +792,20 @@ contract_code(ID) -> contract_source(ID) -> case request(["/v3/contracts/", ID, "/source"]) of - {ok, #{"source" := Source}} -> {ok, Source}; + {ok, #{"source" := Source}} -> extract(Source); {ok, #{"reason" := Reason}} -> {error, Reason}; Error -> Error end. +extract(TarBaby) -> + case erl_tar:extract({binary, TarBaby}, [memory, compressed]) of + {ok, Project} -> + {ok, Project}; + Error -> + tell(info, "non-compressed thingy returned: ~tp", [Error]), + {ok, TarBaby} + end. + -spec contract_poi(ID) -> {ok, Bytecode} | {error, Reason} when ID :: contract_id(), @@ -1049,6 +1058,7 @@ contract_create(CreatorID, Path, InitArgs) -> contract_create(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Path, InitArgs) -> case file:read_file(Path) of {ok, Source} -> + Name = filename:basename(Path), Dir = filename:dirname(Path), {ok, CWD} = file:get_cwd(), SrcDir = so_utils:canonical_dir(Path), @@ -1058,17 +1068,18 @@ contract_create(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Path, InitArgs) -> {src_dir, SrcDir}, {include, {file_system, [CWD, so_utils:canonical_dir(Dir)]}}], contract_create2(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, - Source, Options, InitArgs); + Name, Source, Options, InitArgs); Error -> Error end. -contract_create2(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Source, Options, InitArgs) -> +contract_create2(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Name, Source, Options, InitArgs) -> case so_compiler:from_string(Source, Options) of {ok, Compiled} -> + Named = maps:put(contract_name, Name, Compiled), contract_create_built(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, - Compiled, InitArgs); + Named, InitArgs); Error -> Error end. @@ -1146,7 +1157,7 @@ assemble_calldata(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallDa assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData) -> Code = gmser_contract_code:serialize(Compiled), - Source = unicode:characters_to_binary(maps:get(contract_source, Compiled, <<>>)), + Source = bundle_source(Compiled), VM = 1, ABI = 1, <> = <>, @@ -1183,6 +1194,29 @@ assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallDat error:Reason -> {error, Reason} end. +bundle_source(Compiled) -> + case maps:find(contract_source, Compiled) of + {ok, Source} -> bundle_source2(unicode:characters_to_binary(Source), Compiled); + error -> <<>> + end. + +bundle_source2(Source, Compiled) -> + File = unicode:characters_to_binary(maps:get(contract_name, Compiled, <<"contract.aes">>)), + <> = crypto:strong_rand_bytes(8), + Rand = integer_to_binary(RN, 36), + TmpDir = filename:join(zx_lib:path(tmp), Rand), + TgzName = <>, + TarGzPath = filename:join(TmpDir, TgzName), + ok = filelib:ensure_dir(TarGzPath), + {ok, CWD} = file:get_cwd(), + ok = file:set_cwd(TmpDir), + ok = file:write_file(File, Source), + ok = erl_tar:create(TarGzPath, [File], [compressed]), + {ok, TgzBin} = file:read_file(TarGzPath), + ok = file:set_cwd(CWD), + ok = file:del_dir_r(TmpDir), + TgzBin. + -spec read_aci(Path) -> Result when Path :: file:filename(), From 02945dd10d3cbfb8476908b1b37a1dad109de75f Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Fri, 8 May 2026 19:47:25 +0900 Subject: [PATCH 35/42] derp --- src/hz.erl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 4388ff2..ceaffbd 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -799,10 +799,9 @@ contract_source(ID) -> extract(TarBaby) -> case erl_tar:extract({binary, TarBaby}, [memory, compressed]) of - {ok, Project} -> - {ok, Project}; + {ok, Project} -> {ok, Project}; Error -> - tell(info, "non-compressed thingy returned: ~tp", [Error]), + io:format("dis chit happen: ~tp", [Error]), {ok, TarBaby} end. From 7fc3cd00da969b029fad940798be43bf8412891d Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Fri, 8 May 2026 23:04:56 +0900 Subject: [PATCH 36/42] WIP --- src/hz.erl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index ceaffbd..adfeae2 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -1151,12 +1151,13 @@ assemble_calldata(CreatorID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallDa {account_pubkey, OwnerID} = gmser_api_encoder:decode(PK), assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData) catch - Error:Reason -> {Error, Reason} + Error:Reason:Stack -> + {Error, {Reason, Stack}} end. assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallData) -> - Code = gmser_contract_code:serialize(Compiled), - Source = bundle_source(Compiled), + Compressed = #{contract_source := Bundle} = bundle_source(Compiled), + Code = gmser_contract_code:serialize(Compressed), VM = 1, ABI = 1, <> = <>, @@ -1166,7 +1167,7 @@ assemble_calldata2(OwnerID, Nonce, Gas, GasPrice, Amount, TTL, Compiled, CallDat [{owner_id, gmser_id:create(account, OwnerID)}, {nonce, Nonce}, {code, Code}, - {source, Source}, + {source, Bundle}, {ct_version, CTVersion}, {ttl, TTL}, {deposit, 0}, @@ -1200,21 +1201,21 @@ bundle_source(Compiled) -> end. bundle_source2(Source, Compiled) -> - File = unicode:characters_to_binary(maps:get(contract_name, Compiled, <<"contract.aes">>)), + File = unicode:characters_to_list(maps:get(contract_name, Compiled, "contract.aes")), <> = crypto:strong_rand_bytes(8), Rand = integer_to_binary(RN, 36), TmpDir = filename:join(zx_lib:path(tmp), Rand), - TgzName = <>, + TgzName = File ++ ".tgz", TarGzPath = filename:join(TmpDir, TgzName), ok = filelib:ensure_dir(TarGzPath), {ok, CWD} = file:get_cwd(), ok = file:set_cwd(TmpDir), - ok = file:write_file(File, Source), - ok = erl_tar:create(TarGzPath, [File], [compressed]), + ok = erl_tar:create(TarGzPath, [{File, Source}], [compressed]), {ok, TgzBin} = file:read_file(TarGzPath), ok = file:set_cwd(CWD), ok = file:del_dir_r(TmpDir), - TgzBin. + {ok, Hash} = eblake2:blake2b(32, TgzBin), + Compiled#{contract_source => TgzBin, source_hash => Hash}. -spec read_aci(Path) -> Result From fd2158a465368a2474d8b674da4cedb25ce9cb0c Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Sat, 9 May 2026 15:19:51 +0900 Subject: [PATCH 37/42] base64 -> bytearray encoding --- src/hz.erl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index adfeae2..2827f79 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -792,16 +792,22 @@ contract_code(ID) -> contract_source(ID) -> case request(["/v3/contracts/", ID, "/source"]) of - {ok, #{"source" := Source}} -> extract(Source); + {ok, #{"source" := Blobby}} -> extract(Blobby); {ok, #{"reason" := Reason}} -> {error, Reason}; Error -> Error end. -extract(TarBaby) -> +extract(Blobby) -> + case gmser_api_encoder:safe_decode(bytearray, Blobby) of + {ok, TarBaby} -> extract2(TarBaby); + {error, invalid_encoding} -> {ok, Blobby} + end. + +extract2(TarBaby) -> case erl_tar:extract({binary, TarBaby}, [memory, compressed]) of {ok, Project} -> {ok, Project}; - Error -> - io:format("dis chit happen: ~tp", [Error]), + Error -> + io:format("Dis chit happen: ~tp~n", [Error]), {ok, TarBaby} end. From cd4f6a56a5d2cbb74e06c63e1671cec0436cf8c3 Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Sat, 9 May 2026 20:07:52 +0900 Subject: [PATCH 38/42] Differentiate between source return types --- src/hz.erl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 2827f79..88893c6 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -783,9 +783,13 @@ contract_code(ID) -> end. --spec contract_source(ID) -> {ok, Bytecode} | {error, Reason} - when ID :: contract_id(), - Bytecode :: contract_byte_array(), +-spec contract_source(ID) -> Result + when ID :: contract_id(), + Result :: {ok, Source} + | {project, Bundle} + | {error, Reason}, + Source :: string(), + Bundle :: [{FilePath :: string(), Contents :: binary()}], Reason :: chain_error() | string(). %% @doc %% Retrieve the code of a contract as represented on chain. @@ -805,8 +809,9 @@ extract(Blobby) -> extract2(TarBaby) -> case erl_tar:extract({binary, TarBaby}, [memory, compressed]) of - {ok, Project} -> {ok, Project}; - Error -> + {ok, Bundle} -> + {project, Bundle}; + Error -> io:format("Dis chit happen: ~tp~n", [Error]), {ok, TarBaby} end. From c54c0db17a31e0a2dee9e2579babec91c75a2b8c Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Sun, 10 May 2026 13:16:13 +0900 Subject: [PATCH 39/42] Fix list -> binary arg --- src/hz.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hz.erl b/src/hz.erl index 88893c6..4382daa 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -796,7 +796,7 @@ contract_code(ID) -> contract_source(ID) -> case request(["/v3/contracts/", ID, "/source"]) of - {ok, #{"source" := Blobby}} -> extract(Blobby); + {ok, #{"source" := Blobby}} -> extract(list_to_binary(Blobby)); {ok, #{"reason" := Reason}} -> {error, Reason}; Error -> Error end. From c9ead44aa24b9b706a5bdc670639ddcb020aab1d Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Sun, 10 May 2026 15:01:50 +0900 Subject: [PATCH 40/42] Let non-zx projects call contract_create* --- src/hz.erl | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/hz.erl b/src/hz.erl index 4382daa..e90ef00 100644 --- a/src/hz.erl +++ b/src/hz.erl @@ -1213,21 +1213,35 @@ bundle_source(Compiled) -> bundle_source2(Source, Compiled) -> File = unicode:characters_to_list(maps:get(contract_name, Compiled, "contract.aes")), - <> = crypto:strong_rand_bytes(8), - Rand = integer_to_binary(RN, 36), - TmpDir = filename:join(zx_lib:path(tmp), Rand), + TempDir = temp_dir(), TgzName = File ++ ".tgz", - TarGzPath = filename:join(TmpDir, TgzName), + TarGzPath = filename:join(TempDir, TgzName), ok = filelib:ensure_dir(TarGzPath), {ok, CWD} = file:get_cwd(), - ok = file:set_cwd(TmpDir), + ok = file:set_cwd(TempDir), ok = erl_tar:create(TarGzPath, [{File, Source}], [compressed]), {ok, TgzBin} = file:read_file(TarGzPath), ok = file:set_cwd(CWD), - ok = file:del_dir_r(TmpDir), + ok = file:del_dir_r(TempDir), {ok, Hash} = eblake2:blake2b(32, TgzBin), Compiled#{contract_source => TgzBin, source_hash => Hash}. +temp_dir() -> + case erlang:function_exported(zx_lib, path, 3) of + true -> + TS = integer_to_list(erlang:system_time()), + filename:join(zx_lib:path(tmp, "otpr", "hakuzaru"), TS); + false -> + temp_dir(os:type()) + end. + +temp_dir({unix, _}) -> + string:trim(os:cmd("mktemp -d")); +temp_dir({win32, _}) -> + Temp = os:getenv("TEMP"), + TS = integer_to_list(erlang:system_time()), + filename:join([Temp, "hakuzaru", TS]). + -spec read_aci(Path) -> Result when Path :: file:filename(), From 4b9fa656721a66b0c3375ea66dc2dc3b49810a83 Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Sun, 10 May 2026 15:09:47 +0900 Subject: [PATCH 41/42] Merge down --- doc/overview.edoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/overview.edoc b/doc/overview.edoc index 92b0715..2587cf4 100644 --- a/doc/overview.edoc +++ b/doc/overview.edoc @@ -1,5 +1,5 @@ @author Craig Everett [https://git.qpq.swiss/QPQ-AG/hakuzaru] -@version 0.8.0 +@version 0.9.0 @title Hakuzaru: Gajumaru blockchain bindings for Erlang @doc From a4914c1ad1a5ba1e56ece8d3f6220e9758fb945c Mon Sep 17 00:00:00 2001 From: Craig Everett Date: Sun, 10 May 2026 15:22:13 +0900 Subject: [PATCH 42/42] Making custom dir. --- doc/{ => cusom}/erlang.png | Bin doc/{ => cusom}/stylesheet.css | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename doc/{ => cusom}/erlang.png (100%) rename doc/{ => cusom}/stylesheet.css (100%) diff --git a/doc/erlang.png b/doc/cusom/erlang.png similarity index 100% rename from doc/erlang.png rename to doc/cusom/erlang.png diff --git a/doc/stylesheet.css b/doc/cusom/stylesheet.css similarity index 100% rename from doc/stylesheet.css rename to doc/cusom/stylesheet.css