Fix #15: add command-line tokenizer with support for quoted arguments

Replace string:tokens/2 with a custom command-line tokenizer that supports
single and double-quoted arguments (with embedded whitespace) and
expansion of environment variables using either the Unix ($VAR; ${VAR}) or
Windows (%VAR%) formats. This tokenizer is only used when the command line
is parsed as a single string.
This commit is contained in:
Juan Jose Comellas 2012-07-20 07:42:23 -03:00
parent eb4a5beccd
commit 1a01b82531
3 changed files with 237 additions and 6 deletions

View File

@ -280,7 +280,7 @@ would return:
Positional Options
------------------
We can also have options with neither short nor long option name. In this case,
We can also have options with neither short nor long option names. In this case,
the options will be taken according to their position in the option specification
list passed to ``getopt:/parse2``.
@ -351,3 +351,96 @@ will return:
{ok,{[{host,"myhost"}, {port,1000}, {dbname,"mydb"}],
["-","dummy"]}}
```
Arguments with embedded whitespace
----------------------------------
Arguments that have embedded whitespace have to be quoted with either
single or double quotes to be considered as a single
argument.
e.g. Given an option specification list with the following format:
``` erlang
OptSpecList =
[
{define, $D, "define", string, "Define a variable"},
{user, $u, "user", string, "User name"}
].
```
The following invocation:
``` erlang
getopt:parse(OptSpecList,
"-D'FOO=VAR 123' --define \"VAR WITH SPACES\" -u\"my user name\"").
```
would return:
``` erlang
{ok,{[{define,"FOO=VAR 123"},
{define,"VAR WITH SPACES"},
{user,"my user name"}],
[]}}
```
When parsing a command line with unclosed quotes the last argument
will be a single string starting at the position where the last quote
was entered.
e.g. The following invocation:
``` erlang
getopt:parse(OptSpecList, "--user ' my user ' \"argument with unclosed quotes").
```
would return:
``` erlang
{ok,{[{user," my user "}],
["argument with unclosed quotes"]}}
```
Environment variable expansion
------------------------------
`getopt:parse/2` will expand environment variables when used with a command
line that is passed as a single string. The formats that are supported
for environment variable expansion are:
- $VAR (simple Unix/bash format)
- ${VAR} (full Unix/bash format)
- %VAR% (Windows format)
If a variable is not present in the environment it will not be
expanded. Variables can be expanded within double-quoted and free
arguments. *getopt* will not expand environment variables within
single-quoted arguments.
e.g. Given the following option specification list:
``` erlang
OptSpecList =
[
{path, $p, "path", string, "File path"}
].
```
The following invocation:
``` erlang
getopt:parse(OptSpecList, "--path ${PATH} $NONEXISTENT_DUMMY_VAR").
```
would return (depending on the value of your PATH variable) something like:
``` erlang
{ok,{[{path, "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}],
["$NONEXISTENT_DUMMY_VAR"]}}
```
Currently, *getopt* does not perform wildcard expansion of file paths.

View File

@ -12,6 +12,7 @@
-author('juanjo@comellas.org').
-export([parse/2, usage/2, usage/3, usage/4]).
-export([tokenize_cmdline/1]).
-export_type([arg_type/0,
arg_value/0,
@ -66,7 +67,7 @@ parse(OptSpecList, CmdLine) ->
try
Args = if
is_integer(hd(CmdLine)) ->
string:tokens(CmdLine, " \t\n");
tokenize_cmdline(CmdLine);
true ->
CmdLine
end,
@ -606,6 +607,102 @@ add_option_help(_Opt, _Prefix, Acc) ->
Acc.
%% @doc Tokenize a command line string with support for single and double
%% quoted arguments (needed for arguments that have embedded whitespace).
%% The function also supports the expansion of environment variables in
%% both the Unix (${VAR}; $VAR) and Windows (%VAR%) formats. It does NOT
%% support wildcard expansion of paths.
-spec tokenize_cmdline(CmdLine :: string()) -> [string()].
tokenize_cmdline(CmdLine) ->
tokenize_cmdline(CmdLine, [], []).
-spec tokenize_cmdline(CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()].
tokenize_cmdline([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep =:= $\n ->
NewAcc = case ArgAcc of
[_ | _] ->
%% Found separator: add to the list of arguments.
[lists:reverse(ArgAcc) | Acc];
[] ->
%% Found separator with no accumulated argument; discard it.
Acc
end,
tokenize_cmdline(Tail, NewAcc, []);
tokenize_cmdline([QuotationMark | Tail], Acc, ArgAcc) when QuotationMark =:= $"; QuotationMark =:= $' ->
%% Quoted argument (might contain spaces, tabs, etc.)
tokenize_quoted_arg(QuotationMark, Tail, Acc, ArgAcc);
tokenize_cmdline([Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% ->
%% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR%
{NewCmdLine, Var} = expand_env_var(CmdLine),
tokenize_cmdline(NewCmdLine, Acc, lists:reverse(Var, ArgAcc));
tokenize_cmdline([$\\, Char | Tail], Acc, ArgAcc) ->
%% Escaped char.
tokenize_cmdline(Tail, Acc, [Char | ArgAcc]);
tokenize_cmdline([Char | Tail], Acc, ArgAcc) ->
tokenize_cmdline(Tail, Acc, [Char | ArgAcc]);
tokenize_cmdline([], Acc, []) ->
lists:reverse(Acc);
tokenize_cmdline([], Acc, ArgAcc) ->
lists:reverse([lists:reverse(ArgAcc) | Acc]).
-spec tokenize_quoted_arg(QuotationMark :: char(), CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()].
tokenize_quoted_arg(QuotationMark, [QuotationMark | Tail], Acc, ArgAcc) ->
%% End of quoted argument
tokenize_cmdline(Tail, Acc, ArgAcc);
tokenize_quoted_arg($" = QuotationMark, [Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% ->
%% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR%
{NewCmdLine, Var} = expand_env_var(CmdLine),
tokenize_quoted_arg(QuotationMark, NewCmdLine, Acc, lists:reverse(Var, ArgAcc));
tokenize_quoted_arg(QuotationMark, [$\\, Char | Tail], Acc, ArgAcc) ->
%% Escaped char.
tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]);
tokenize_quoted_arg(QuotationMark, [Char | Tail], Acc, ArgAcc) ->
tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]);
tokenize_quoted_arg(_QuotationMark, CmdLine, Acc, ArgAcc) ->
tokenize_cmdline(CmdLine, Acc, ArgAcc).
-spec expand_env_var(CmdLine :: string()) -> string().
expand_env_var(CmdLine) ->
case CmdLine of
"${" ++ Tail ->
expand_env_var("${", $}, Tail, []);
"$" ++ Tail ->
expand_env_var("$", Tail, []);
"%" ++ Tail ->
expand_env_var("%", $%, Tail, [])
end.
-spec expand_env_var(Prefix :: string(), EndMark :: char(), CmdLine :: string(), Acc :: string()) -> string().
expand_env_var(Prefix, EndMark, [Char | Tail], Acc)
when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse
(Char >= $0 andalso Char =< $9) orelse (Char =:= $_) ->
expand_env_var(Prefix, EndMark, Tail, [Char | Acc]);
expand_env_var(Prefix, EndMark, [EndMark | Tail], Acc) ->
{Tail, get_env_var(Prefix, [EndMark], Acc)};
expand_env_var(Prefix, _EndMark, CmdLine, Acc) ->
{CmdLine, Prefix ++ lists:reverse(Acc)}.
-spec expand_env_var(Prefix :: string(), CmdLine :: string(), Acc :: string()) -> string().
expand_env_var(Prefix, [Char | Tail], Acc)
when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse
(Char >= $0 andalso Char =< $9) orelse (Char =:= $_) ->
expand_env_var(Prefix, Tail, [Char | Acc]);
expand_env_var(Prefix, CmdLine, Acc) ->
{CmdLine, get_env_var(Prefix, "", Acc)}.
-spec get_env_var(Prefix :: string(), Suffix :: string(), Acc :: string()) -> string().
get_env_var(Prefix, Suffix, [_ | _] = Acc) ->
Name = lists:reverse(Acc),
%% Only expand valid variables.
case os:getenv(Name) of
false -> Prefix ++ Name ++ Suffix;
Value -> Value
end;
get_env_var(Prefix, Suffix, []) ->
Prefix ++ Suffix.
%% @doc Return the smallest integral value not less than the argument.
-spec ceiling(float()) -> integer().

View File

@ -26,8 +26,8 @@
%%% UNIT TESTS
%%%-------------------------------------------------------------------
%%% Test for the getopt/1 function
parse_1_test_() ->
%%% Main test for the getopt/1 function.
parse_main_test_() ->
Short = {short, $a, undefined, undefined, "Option with only short form and no argument"},
Short2 = {short2, $b, undefined, undefined, "Second option with only short form and no argument"},
Short3 = {short3, $c, undefined, undefined, "Third option with only short form and no argument"},
@ -218,8 +218,8 @@ parse_1_test_() ->
].
%% Real world test for getopt/1
parse_2_test_() ->
%% Real world test for getopt/1.
parse_multiple_repetitions_test_() ->
OptSpecList =
[
{define, $D, "define", string, "Define a variable"},
@ -233,3 +233,44 @@ parse_2_test_() ->
{verbose, true}, {verbose, true}, {debug, 2}, {offset, -61.0}, {debug, 1}, {debug, 4}], ["dummy1", "dummy2"]}},
parse(OptSpecList, "-DFOO -DVAR1=VAL1 -DBAR -vv -dd --offset=-61.0 --debug -dddd dummy1 dummy2"))}
].
%% Arguments with spaces.
parse_args_with_spaces_test_() ->
OptSpecList =
[
{define, $D, "define", string, "Define a variable"},
{user, $u, "user", string, "User name"}
],
[
{"Arguments with spaces",
?_assertEqual({ok, {[{define, "FOO BAR"}, {define, "VAR 1=VAL 1"}, {user, "my user name"}], [" dummy1 dummy2 "]}},
parse(OptSpecList, "-D'FOO BAR' -D\"VAR 1=VAL 1\" --user \"my user name\" ' dummy1 dummy2 "))}
].
%% Arguments with emulated shell variable expansion.
parse_variable_expansion_test_() ->
Path = os:getenv("PATH"),
false = os:getenv("DUMMY_VAR_THAT_MUST_NOT_EXIST"),
OptSpecList =
[
{path, $p, "path", string, "File path"}
],
[
{"Shell variable expansion (simple Unix/bash format)",
?_assertEqual({ok, {[{path, Path}], ["$DUMMY_VAR_THAT_MUST_NOT_EXIST"]}},
parse(OptSpecList, "--path $PATH $DUMMY_VAR_THAT_MUST_NOT_EXIST"))},
{"Shell variable expansion (full Unix/bash format)",
?_assertEqual({ok, {[{path, Path}], ["${DUMMY_VAR_THAT_MUST_NOT_EXIST}"]}},
parse(OptSpecList, " --path ${PATH} ${DUMMY_VAR_THAT_MUST_NOT_EXIST} "))},
{"Incomplete variable expansion (full Unix/bash format)",
?_assertEqual({ok, {[{path, "${PATH"}], ["${DUMMY_VAR_THAT_MUST_NOT_EXIST}"]}},
parse(OptSpecList, " --path ${PATH ${DUMMY_VAR_THAT_MUST_NOT_EXIST} "))},
{"Shell variable expansion (Windows format)",
?_assertEqual({ok, {[{path, Path}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}},
parse(OptSpecList, " --path %PATH% %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))},
{"Incomplete variable expansion (Windows format)",
?_assertEqual({ok, {[{path, "%PATH"}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}},
parse(OptSpecList, " --path %PATH %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))}
].