commit 1efe459714d6105f3afe717edaabe93dafeff2d5 Author: Peter Harpending Date: Mon Oct 20 14:17:54 2025 -0700 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..655a0ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.swp +*.swo +*.beam +erl_crash.dump diff --git a/gap.erl b/gap.erl new file mode 100644 index 0000000..1f4b918 --- /dev/null +++ b/gap.erl @@ -0,0 +1,173 @@ +% @doc gap = graduated argument parser +% +% This is "graduated" with respect to degree of opinionation. You can use as +% much or as little of this library as you want. +% +% However, more and more advanced functionality (e.g. automated help messages +% for the user) require stronger and stronger assumptions about your program's +% argument structure. +% +% I chose assumptions that are true for the majority of UNIX programs. +% +% Tiers +% +% 1. tokens/1: play around with the xg-tokens escript to see +-module(gap). + +-export_type([ + token/0 +]). + +-export([ + tokens/1, + parse_stem/2 +]). + + +%% tokenizing + +-type token() :: {'--', Token :: string()} + | {'-', Token :: [char()]} + | stdin + | {naked, Token :: string()}. + + +-spec tokens(Args) -> Tokens + when Args :: [string()], + Tokens :: [token()]. +% @doc +% Assumptions: +% 1. -- means "remaining args are verbatim" +% 2. empty - maps to atom 'stdin' +% 3. -xyz is equivalent to -x -y -z +% +% The only opinionation here is in the treatment of a naked "--" arg: +% +% tokens(["-foo", "-", "--bar", "--", "--baz", "-" "quux"]) -> +% [{'-', $f}, +% {'-', $o}, +% {'-', $o}, +% stdin, +% {'--', "bar"}, +% {naked, "--baz"}, +% {naked, "-"}, +% {naked, "quux"}]. + +% "--" -> parse rest verbatim +tokens(["--" | Rest]) -> + Nekkid = fun(Token) -> {naked, Token} end, + lists:map(Nekkid, Rest); +% "--option" -> normal +tokens(["--" ++ Token | Rest]) -> + [{'--', Token} | tokens(Rest)]; +% "-" -> stdin +tokens(["-" | Rest]) -> + [stdin | tokens(Rest)]; +% "-xyz" -> {'-', $x}, {'-', $y}, {'-', $z}] +tokens(["-" ++ Chars | Rest]) -> + Optize = fun(Char) -> {'-', Char} end, + Tokens = lists:map(Optize, Chars), + Tokens ++ tokens(Rest); +tokens([Token | Rest]) -> + [{naked, Token} | tokens(Rest)]; +tokens([]) -> + []. + + +%% parsing inputs + +-type aritee() :: non_neg_integer() | infinity. +-type spec_stem() :: {spec_stem, OptionSpecs :: [spec_opt()], + LeafSpecs :: [spec_leaf()]}. +-type spec_opt() :: {spec_opt, LongName :: string(), + ShortName :: char(), + Arity :: aritee()}. +-type spec_leaf() :: spec_args() + | spec_cmd(). +-type spec_args() :: {spec_args, Arity :: aritee()}. +-type spec_cmd() :: {spec_cmd, Name :: string(), + Subtree :: spec_stem()}. + + +%% parsing outputs + +-type stem() :: {stem, Opts :: [opt()], + MaybeLeaf :: none | {value, leaf()}}. +-type opt() :: {opt, LongName :: string(), + Args :: args()}. +-type leaf() :: args() + | cmd(). +-type args() :: {args, [token()]}. +-type cmd() :: {cmd, Name :: string(), + Subtree :: stem()}. + + +-spec parse_stem(Spec, Tokens) -> {Stem, Rest} + when Spec :: spec_stem(), + Tokens :: [token()], + Stem :: stem(), + Rest :: Tokens. +% @doc +% This is the beginning of opinionation +% +% Assumes your program's command syntax is +% +% Stem :: rootcmd GlobalOptions SubTree +% +% where +% GlobalOptions :: -one --or -more --global --options +% SubTree :: Args | SubCmd +% Args :: ["specified", "number", "of", "string arguments"] +% SubCmd :: + +parse_stem({spec_stem, OptSpecs, SubtreeSpec}, Args0) -> + {Opts, Args1} = parse_opts(OptSpecs, Args0), + {Subtree, Args2} = parse_subtree(SubtreeSpec, Args1), + {{tree, Opts, Subtree}, Args2}. + + +-spec parse_opts(OptSpecs, Tokens) -> {Opts, Rest} + when OptSpecs :: [spec_opt()], + Tokens :: [token()], + Opts :: [opt()], + Rest :: Tokens. + +parse_opts(Specs, Tokens) -> + parse_opts(Specs, Tokens, []). + + +%% parse options +%% out of specs, move on + + +% match --foo -> consume args +match_opt({spec_opt, LongName, _, Aritee}, [{'--', LongName} | Rest0]) -> + case match_args(Aritee, Rest0) of + {match, OptArgs, Rest1} -> {match, {opt, LongName, OptArgs}, Rest1}; + Fail -> Fail + end; +% match -f -> consume args +match_opt({spec_opt, LongName, ShortName, Aritee}, [{'-', ShortName} | Rest0]) -> + case match_args(Aritee, Rest0) of + {match, OptArgs, Rest1} -> {match, {opt, LongName, OptArgs}, Rest1}; + Fail -> Fail + end; +% not a match +match_opt(_, Tokens) -> + {fail, Tokens}. + + +match_args(infinity, Rest0) -> + {match, {args, Rest0}, []}; +match_args(N, [Head | Rest0]) when is_integer(N), N >= 1 -> + case match_args(N-1, Rest0) of + {match, {args, TailArgs}, Rest1} -> {match, {args, [Head | TailArgs]}, Rest1}; + Fail -> Fail + end; +match_args(0, List) -> + {match, {args, []}, List}; +match_args(_, List) -> + {fail, List}. + + +parse_subtree(_, _) -> error(nyi). diff --git a/xg-tokens b/xg-tokens new file mode 100755 index 0000000..ac6061d --- /dev/null +++ b/xg-tokens @@ -0,0 +1,7 @@ +#!/usr/bin/env escript + +-mode(compile). + +main(Args) -> + Tokens = gap:tokens(Args), + io:format("tokens: ~tp~n", [Tokens]).