Fix #15: add command-line tokenizer with support for quoted arguments
Replace string:tokens/2 with a custom command-line tokenizer that supports single and double-quoted arguments (with embedded whitespace) and expansion of environment variables using either the Unix ($VAR; ${VAR}) or Windows (%VAR%) formats. This tokenizer is only used when the command line is parsed as a single string.
This commit is contained in:
parent
eb4a5beccd
commit
1a01b82531
95
README.md
95
README.md
@ -280,7 +280,7 @@ would return:
|
|||||||
Positional Options
|
Positional Options
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
We can also have options with neither short nor long option name. In this case,
|
We can also have options with neither short nor long option names. In this case,
|
||||||
the options will be taken according to their position in the option specification
|
the options will be taken according to their position in the option specification
|
||||||
list passed to ``getopt:/parse2``.
|
list passed to ``getopt:/parse2``.
|
||||||
|
|
||||||
@ -351,3 +351,96 @@ will return:
|
|||||||
{ok,{[{host,"myhost"}, {port,1000}, {dbname,"mydb"}],
|
{ok,{[{host,"myhost"}, {port,1000}, {dbname,"mydb"}],
|
||||||
["-","dummy"]}}
|
["-","dummy"]}}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Arguments with embedded whitespace
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
Arguments that have embedded whitespace have to be quoted with either
|
||||||
|
single or double quotes to be considered as a single
|
||||||
|
argument.
|
||||||
|
|
||||||
|
|
||||||
|
e.g. Given an option specification list with the following format:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
OptSpecList =
|
||||||
|
[
|
||||||
|
{define, $D, "define", string, "Define a variable"},
|
||||||
|
{user, $u, "user", string, "User name"}
|
||||||
|
].
|
||||||
|
```
|
||||||
|
|
||||||
|
The following invocation:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
getopt:parse(OptSpecList,
|
||||||
|
"-D'FOO=VAR 123' --define \"VAR WITH SPACES\" -u\"my user name\"").
|
||||||
|
```
|
||||||
|
|
||||||
|
would return:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
{ok,{[{define,"FOO=VAR 123"},
|
||||||
|
{define,"VAR WITH SPACES"},
|
||||||
|
{user,"my user name"}],
|
||||||
|
[]}}
|
||||||
|
```
|
||||||
|
|
||||||
|
When parsing a command line with unclosed quotes the last argument
|
||||||
|
will be a single string starting at the position where the last quote
|
||||||
|
was entered.
|
||||||
|
|
||||||
|
e.g. The following invocation:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
getopt:parse(OptSpecList, "--user ' my user ' \"argument with unclosed quotes").
|
||||||
|
```
|
||||||
|
|
||||||
|
would return:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
{ok,{[{user," my user "}],
|
||||||
|
["argument with unclosed quotes"]}}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Environment variable expansion
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
`getopt:parse/2` will expand environment variables when used with a command
|
||||||
|
line that is passed as a single string. The formats that are supported
|
||||||
|
for environment variable expansion are:
|
||||||
|
|
||||||
|
- $VAR (simple Unix/bash format)
|
||||||
|
- ${VAR} (full Unix/bash format)
|
||||||
|
- %VAR% (Windows format)
|
||||||
|
|
||||||
|
If a variable is not present in the environment it will not be
|
||||||
|
expanded. Variables can be expanded within double-quoted and free
|
||||||
|
arguments. *getopt* will not expand environment variables within
|
||||||
|
single-quoted arguments.
|
||||||
|
|
||||||
|
e.g. Given the following option specification list:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
OptSpecList =
|
||||||
|
[
|
||||||
|
{path, $p, "path", string, "File path"}
|
||||||
|
].
|
||||||
|
```
|
||||||
|
|
||||||
|
The following invocation:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
getopt:parse(OptSpecList, "--path ${PATH} $NONEXISTENT_DUMMY_VAR").
|
||||||
|
```
|
||||||
|
|
||||||
|
would return (depending on the value of your PATH variable) something like:
|
||||||
|
|
||||||
|
``` erlang
|
||||||
|
{ok,{[{path, "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}],
|
||||||
|
["$NONEXISTENT_DUMMY_VAR"]}}
|
||||||
|
```
|
||||||
|
|
||||||
|
Currently, *getopt* does not perform wildcard expansion of file paths.
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
-author('juanjo@comellas.org').
|
-author('juanjo@comellas.org').
|
||||||
|
|
||||||
-export([parse/2, usage/2, usage/3, usage/4]).
|
-export([parse/2, usage/2, usage/3, usage/4]).
|
||||||
|
-export([tokenize_cmdline/1]).
|
||||||
|
|
||||||
-export_type([arg_type/0,
|
-export_type([arg_type/0,
|
||||||
arg_value/0,
|
arg_value/0,
|
||||||
@ -66,7 +67,7 @@ parse(OptSpecList, CmdLine) ->
|
|||||||
try
|
try
|
||||||
Args = if
|
Args = if
|
||||||
is_integer(hd(CmdLine)) ->
|
is_integer(hd(CmdLine)) ->
|
||||||
string:tokens(CmdLine, " \t\n");
|
tokenize_cmdline(CmdLine);
|
||||||
true ->
|
true ->
|
||||||
CmdLine
|
CmdLine
|
||||||
end,
|
end,
|
||||||
@ -606,6 +607,102 @@ add_option_help(_Opt, _Prefix, Acc) ->
|
|||||||
Acc.
|
Acc.
|
||||||
|
|
||||||
|
|
||||||
|
%% @doc Tokenize a command line string with support for single and double
|
||||||
|
%% quoted arguments (needed for arguments that have embedded whitespace).
|
||||||
|
%% The function also supports the expansion of environment variables in
|
||||||
|
%% both the Unix (${VAR}; $VAR) and Windows (%VAR%) formats. It does NOT
|
||||||
|
%% support wildcard expansion of paths.
|
||||||
|
-spec tokenize_cmdline(CmdLine :: string()) -> [string()].
|
||||||
|
tokenize_cmdline(CmdLine) ->
|
||||||
|
tokenize_cmdline(CmdLine, [], []).
|
||||||
|
|
||||||
|
-spec tokenize_cmdline(CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()].
|
||||||
|
tokenize_cmdline([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep =:= $\n ->
|
||||||
|
NewAcc = case ArgAcc of
|
||||||
|
[_ | _] ->
|
||||||
|
%% Found separator: add to the list of arguments.
|
||||||
|
[lists:reverse(ArgAcc) | Acc];
|
||||||
|
[] ->
|
||||||
|
%% Found separator with no accumulated argument; discard it.
|
||||||
|
Acc
|
||||||
|
end,
|
||||||
|
tokenize_cmdline(Tail, NewAcc, []);
|
||||||
|
tokenize_cmdline([QuotationMark | Tail], Acc, ArgAcc) when QuotationMark =:= $"; QuotationMark =:= $' ->
|
||||||
|
%% Quoted argument (might contain spaces, tabs, etc.)
|
||||||
|
tokenize_quoted_arg(QuotationMark, Tail, Acc, ArgAcc);
|
||||||
|
tokenize_cmdline([Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% ->
|
||||||
|
%% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR%
|
||||||
|
{NewCmdLine, Var} = expand_env_var(CmdLine),
|
||||||
|
tokenize_cmdline(NewCmdLine, Acc, lists:reverse(Var, ArgAcc));
|
||||||
|
tokenize_cmdline([$\\, Char | Tail], Acc, ArgAcc) ->
|
||||||
|
%% Escaped char.
|
||||||
|
tokenize_cmdline(Tail, Acc, [Char | ArgAcc]);
|
||||||
|
tokenize_cmdline([Char | Tail], Acc, ArgAcc) ->
|
||||||
|
tokenize_cmdline(Tail, Acc, [Char | ArgAcc]);
|
||||||
|
tokenize_cmdline([], Acc, []) ->
|
||||||
|
lists:reverse(Acc);
|
||||||
|
tokenize_cmdline([], Acc, ArgAcc) ->
|
||||||
|
lists:reverse([lists:reverse(ArgAcc) | Acc]).
|
||||||
|
|
||||||
|
-spec tokenize_quoted_arg(QuotationMark :: char(), CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()].
|
||||||
|
tokenize_quoted_arg(QuotationMark, [QuotationMark | Tail], Acc, ArgAcc) ->
|
||||||
|
%% End of quoted argument
|
||||||
|
tokenize_cmdline(Tail, Acc, ArgAcc);
|
||||||
|
tokenize_quoted_arg($" = QuotationMark, [Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% ->
|
||||||
|
%% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR%
|
||||||
|
{NewCmdLine, Var} = expand_env_var(CmdLine),
|
||||||
|
tokenize_quoted_arg(QuotationMark, NewCmdLine, Acc, lists:reverse(Var, ArgAcc));
|
||||||
|
tokenize_quoted_arg(QuotationMark, [$\\, Char | Tail], Acc, ArgAcc) ->
|
||||||
|
%% Escaped char.
|
||||||
|
tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]);
|
||||||
|
tokenize_quoted_arg(QuotationMark, [Char | Tail], Acc, ArgAcc) ->
|
||||||
|
tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]);
|
||||||
|
tokenize_quoted_arg(_QuotationMark, CmdLine, Acc, ArgAcc) ->
|
||||||
|
tokenize_cmdline(CmdLine, Acc, ArgAcc).
|
||||||
|
|
||||||
|
|
||||||
|
-spec expand_env_var(CmdLine :: string()) -> string().
|
||||||
|
expand_env_var(CmdLine) ->
|
||||||
|
case CmdLine of
|
||||||
|
"${" ++ Tail ->
|
||||||
|
expand_env_var("${", $}, Tail, []);
|
||||||
|
"$" ++ Tail ->
|
||||||
|
expand_env_var("$", Tail, []);
|
||||||
|
"%" ++ Tail ->
|
||||||
|
expand_env_var("%", $%, Tail, [])
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec expand_env_var(Prefix :: string(), EndMark :: char(), CmdLine :: string(), Acc :: string()) -> string().
|
||||||
|
expand_env_var(Prefix, EndMark, [Char | Tail], Acc)
|
||||||
|
when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse
|
||||||
|
(Char >= $0 andalso Char =< $9) orelse (Char =:= $_) ->
|
||||||
|
expand_env_var(Prefix, EndMark, Tail, [Char | Acc]);
|
||||||
|
expand_env_var(Prefix, EndMark, [EndMark | Tail], Acc) ->
|
||||||
|
{Tail, get_env_var(Prefix, [EndMark], Acc)};
|
||||||
|
expand_env_var(Prefix, _EndMark, CmdLine, Acc) ->
|
||||||
|
{CmdLine, Prefix ++ lists:reverse(Acc)}.
|
||||||
|
|
||||||
|
|
||||||
|
-spec expand_env_var(Prefix :: string(), CmdLine :: string(), Acc :: string()) -> string().
|
||||||
|
expand_env_var(Prefix, [Char | Tail], Acc)
|
||||||
|
when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse
|
||||||
|
(Char >= $0 andalso Char =< $9) orelse (Char =:= $_) ->
|
||||||
|
expand_env_var(Prefix, Tail, [Char | Acc]);
|
||||||
|
expand_env_var(Prefix, CmdLine, Acc) ->
|
||||||
|
{CmdLine, get_env_var(Prefix, "", Acc)}.
|
||||||
|
|
||||||
|
|
||||||
|
-spec get_env_var(Prefix :: string(), Suffix :: string(), Acc :: string()) -> string().
|
||||||
|
get_env_var(Prefix, Suffix, [_ | _] = Acc) ->
|
||||||
|
Name = lists:reverse(Acc),
|
||||||
|
%% Only expand valid variables.
|
||||||
|
case os:getenv(Name) of
|
||||||
|
false -> Prefix ++ Name ++ Suffix;
|
||||||
|
Value -> Value
|
||||||
|
end;
|
||||||
|
get_env_var(Prefix, Suffix, []) ->
|
||||||
|
Prefix ++ Suffix.
|
||||||
|
|
||||||
|
|
||||||
%% @doc Return the smallest integral value not less than the argument.
|
%% @doc Return the smallest integral value not less than the argument.
|
||||||
-spec ceiling(float()) -> integer().
|
-spec ceiling(float()) -> integer().
|
||||||
|
@ -26,8 +26,8 @@
|
|||||||
%%% UNIT TESTS
|
%%% UNIT TESTS
|
||||||
%%%-------------------------------------------------------------------
|
%%%-------------------------------------------------------------------
|
||||||
|
|
||||||
%%% Test for the getopt/1 function
|
%%% Main test for the getopt/1 function.
|
||||||
parse_1_test_() ->
|
parse_main_test_() ->
|
||||||
Short = {short, $a, undefined, undefined, "Option with only short form and no argument"},
|
Short = {short, $a, undefined, undefined, "Option with only short form and no argument"},
|
||||||
Short2 = {short2, $b, undefined, undefined, "Second option with only short form and no argument"},
|
Short2 = {short2, $b, undefined, undefined, "Second option with only short form and no argument"},
|
||||||
Short3 = {short3, $c, undefined, undefined, "Third option with only short form and no argument"},
|
Short3 = {short3, $c, undefined, undefined, "Third option with only short form and no argument"},
|
||||||
@ -218,8 +218,8 @@ parse_1_test_() ->
|
|||||||
].
|
].
|
||||||
|
|
||||||
|
|
||||||
%% Real world test for getopt/1
|
%% Real world test for getopt/1.
|
||||||
parse_2_test_() ->
|
parse_multiple_repetitions_test_() ->
|
||||||
OptSpecList =
|
OptSpecList =
|
||||||
[
|
[
|
||||||
{define, $D, "define", string, "Define a variable"},
|
{define, $D, "define", string, "Define a variable"},
|
||||||
@ -233,3 +233,44 @@ parse_2_test_() ->
|
|||||||
{verbose, true}, {verbose, true}, {debug, 2}, {offset, -61.0}, {debug, 1}, {debug, 4}], ["dummy1", "dummy2"]}},
|
{verbose, true}, {verbose, true}, {debug, 2}, {offset, -61.0}, {debug, 1}, {debug, 4}], ["dummy1", "dummy2"]}},
|
||||||
parse(OptSpecList, "-DFOO -DVAR1=VAL1 -DBAR -vv -dd --offset=-61.0 --debug -dddd dummy1 dummy2"))}
|
parse(OptSpecList, "-DFOO -DVAR1=VAL1 -DBAR -vv -dd --offset=-61.0 --debug -dddd dummy1 dummy2"))}
|
||||||
].
|
].
|
||||||
|
|
||||||
|
|
||||||
|
%% Arguments with spaces.
|
||||||
|
parse_args_with_spaces_test_() ->
|
||||||
|
OptSpecList =
|
||||||
|
[
|
||||||
|
{define, $D, "define", string, "Define a variable"},
|
||||||
|
{user, $u, "user", string, "User name"}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"Arguments with spaces",
|
||||||
|
?_assertEqual({ok, {[{define, "FOO BAR"}, {define, "VAR 1=VAL 1"}, {user, "my user name"}], [" dummy1 dummy2 "]}},
|
||||||
|
parse(OptSpecList, "-D'FOO BAR' -D\"VAR 1=VAL 1\" --user \"my user name\" ' dummy1 dummy2 "))}
|
||||||
|
].
|
||||||
|
|
||||||
|
|
||||||
|
%% Arguments with emulated shell variable expansion.
|
||||||
|
parse_variable_expansion_test_() ->
|
||||||
|
Path = os:getenv("PATH"),
|
||||||
|
false = os:getenv("DUMMY_VAR_THAT_MUST_NOT_EXIST"),
|
||||||
|
OptSpecList =
|
||||||
|
[
|
||||||
|
{path, $p, "path", string, "File path"}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"Shell variable expansion (simple Unix/bash format)",
|
||||||
|
?_assertEqual({ok, {[{path, Path}], ["$DUMMY_VAR_THAT_MUST_NOT_EXIST"]}},
|
||||||
|
parse(OptSpecList, "--path $PATH $DUMMY_VAR_THAT_MUST_NOT_EXIST"))},
|
||||||
|
{"Shell variable expansion (full Unix/bash format)",
|
||||||
|
?_assertEqual({ok, {[{path, Path}], ["${DUMMY_VAR_THAT_MUST_NOT_EXIST}"]}},
|
||||||
|
parse(OptSpecList, " --path ${PATH} ${DUMMY_VAR_THAT_MUST_NOT_EXIST} "))},
|
||||||
|
{"Incomplete variable expansion (full Unix/bash format)",
|
||||||
|
?_assertEqual({ok, {[{path, "${PATH"}], ["${DUMMY_VAR_THAT_MUST_NOT_EXIST}"]}},
|
||||||
|
parse(OptSpecList, " --path ${PATH ${DUMMY_VAR_THAT_MUST_NOT_EXIST} "))},
|
||||||
|
{"Shell variable expansion (Windows format)",
|
||||||
|
?_assertEqual({ok, {[{path, Path}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}},
|
||||||
|
parse(OptSpecList, " --path %PATH% %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))},
|
||||||
|
{"Incomplete variable expansion (Windows format)",
|
||||||
|
?_assertEqual({ok, {[{path, "%PATH"}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}},
|
||||||
|
parse(OptSpecList, " --path %PATH %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))}
|
||||||
|
].
|
||||||
|
Loading…
x
Reference in New Issue
Block a user