Fix #15: add command-line tokenizer with support for quoted arguments
Replace string:tokens/2 with a custom command-line tokenizer that supports
single and double-quoted arguments (with embedded whitespace) and
expansion of environment variables using either the Unix ($VAR; ${VAR}) or
Windows (%VAR%) formats. This tokenizer is only used when the command line
is parsed as a single string.
This commit is contained in:
+98
-1
@@ -12,6 +12,7 @@
|
||||
-author('juanjo@comellas.org').
|
||||
|
||||
-export([parse/2, usage/2, usage/3, usage/4]).
|
||||
-export([tokenize_cmdline/1]).
|
||||
|
||||
-export_type([arg_type/0,
|
||||
arg_value/0,
|
||||
@@ -66,7 +67,7 @@ parse(OptSpecList, CmdLine) ->
|
||||
try
|
||||
Args = if
|
||||
is_integer(hd(CmdLine)) ->
|
||||
string:tokens(CmdLine, " \t\n");
|
||||
tokenize_cmdline(CmdLine);
|
||||
true ->
|
||||
CmdLine
|
||||
end,
|
||||
@@ -606,6 +607,102 @@ add_option_help(_Opt, _Prefix, Acc) ->
|
||||
Acc.
|
||||
|
||||
|
||||
%% @doc Tokenize a command line string with support for single and double
|
||||
%% quoted arguments (needed for arguments that have embedded whitespace).
|
||||
%% The function also supports the expansion of environment variables in
|
||||
%% both the Unix (${VAR}; $VAR) and Windows (%VAR%) formats. It does NOT
|
||||
%% support wildcard expansion of paths.
|
||||
-spec tokenize_cmdline(CmdLine :: string()) -> [string()].
|
||||
tokenize_cmdline(CmdLine) ->
|
||||
tokenize_cmdline(CmdLine, [], []).
|
||||
|
||||
-spec tokenize_cmdline(CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()].
|
||||
tokenize_cmdline([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep =:= $\n ->
|
||||
NewAcc = case ArgAcc of
|
||||
[_ | _] ->
|
||||
%% Found separator: add to the list of arguments.
|
||||
[lists:reverse(ArgAcc) | Acc];
|
||||
[] ->
|
||||
%% Found separator with no accumulated argument; discard it.
|
||||
Acc
|
||||
end,
|
||||
tokenize_cmdline(Tail, NewAcc, []);
|
||||
tokenize_cmdline([QuotationMark | Tail], Acc, ArgAcc) when QuotationMark =:= $"; QuotationMark =:= $' ->
|
||||
%% Quoted argument (might contain spaces, tabs, etc.)
|
||||
tokenize_quoted_arg(QuotationMark, Tail, Acc, ArgAcc);
|
||||
tokenize_cmdline([Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% ->
|
||||
%% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR%
|
||||
{NewCmdLine, Var} = expand_env_var(CmdLine),
|
||||
tokenize_cmdline(NewCmdLine, Acc, lists:reverse(Var, ArgAcc));
|
||||
tokenize_cmdline([$\\, Char | Tail], Acc, ArgAcc) ->
|
||||
%% Escaped char.
|
||||
tokenize_cmdline(Tail, Acc, [Char | ArgAcc]);
|
||||
tokenize_cmdline([Char | Tail], Acc, ArgAcc) ->
|
||||
tokenize_cmdline(Tail, Acc, [Char | ArgAcc]);
|
||||
tokenize_cmdline([], Acc, []) ->
|
||||
lists:reverse(Acc);
|
||||
tokenize_cmdline([], Acc, ArgAcc) ->
|
||||
lists:reverse([lists:reverse(ArgAcc) | Acc]).
|
||||
|
||||
-spec tokenize_quoted_arg(QuotationMark :: char(), CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()].
|
||||
tokenize_quoted_arg(QuotationMark, [QuotationMark | Tail], Acc, ArgAcc) ->
|
||||
%% End of quoted argument
|
||||
tokenize_cmdline(Tail, Acc, ArgAcc);
|
||||
tokenize_quoted_arg($" = QuotationMark, [Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% ->
|
||||
%% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR%
|
||||
{NewCmdLine, Var} = expand_env_var(CmdLine),
|
||||
tokenize_quoted_arg(QuotationMark, NewCmdLine, Acc, lists:reverse(Var, ArgAcc));
|
||||
tokenize_quoted_arg(QuotationMark, [$\\, Char | Tail], Acc, ArgAcc) ->
|
||||
%% Escaped char.
|
||||
tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]);
|
||||
tokenize_quoted_arg(QuotationMark, [Char | Tail], Acc, ArgAcc) ->
|
||||
tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]);
|
||||
tokenize_quoted_arg(_QuotationMark, CmdLine, Acc, ArgAcc) ->
|
||||
tokenize_cmdline(CmdLine, Acc, ArgAcc).
|
||||
|
||||
|
||||
-spec expand_env_var(CmdLine :: string()) -> string().
|
||||
expand_env_var(CmdLine) ->
|
||||
case CmdLine of
|
||||
"${" ++ Tail ->
|
||||
expand_env_var("${", $}, Tail, []);
|
||||
"$" ++ Tail ->
|
||||
expand_env_var("$", Tail, []);
|
||||
"%" ++ Tail ->
|
||||
expand_env_var("%", $%, Tail, [])
|
||||
end.
|
||||
|
||||
-spec expand_env_var(Prefix :: string(), EndMark :: char(), CmdLine :: string(), Acc :: string()) -> string().
|
||||
expand_env_var(Prefix, EndMark, [Char | Tail], Acc)
|
||||
when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse
|
||||
(Char >= $0 andalso Char =< $9) orelse (Char =:= $_) ->
|
||||
expand_env_var(Prefix, EndMark, Tail, [Char | Acc]);
|
||||
expand_env_var(Prefix, EndMark, [EndMark | Tail], Acc) ->
|
||||
{Tail, get_env_var(Prefix, [EndMark], Acc)};
|
||||
expand_env_var(Prefix, _EndMark, CmdLine, Acc) ->
|
||||
{CmdLine, Prefix ++ lists:reverse(Acc)}.
|
||||
|
||||
|
||||
-spec expand_env_var(Prefix :: string(), CmdLine :: string(), Acc :: string()) -> string().
|
||||
expand_env_var(Prefix, [Char | Tail], Acc)
|
||||
when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse
|
||||
(Char >= $0 andalso Char =< $9) orelse (Char =:= $_) ->
|
||||
expand_env_var(Prefix, Tail, [Char | Acc]);
|
||||
expand_env_var(Prefix, CmdLine, Acc) ->
|
||||
{CmdLine, get_env_var(Prefix, "", Acc)}.
|
||||
|
||||
|
||||
-spec get_env_var(Prefix :: string(), Suffix :: string(), Acc :: string()) -> string().
|
||||
get_env_var(Prefix, Suffix, [_ | _] = Acc) ->
|
||||
Name = lists:reverse(Acc),
|
||||
%% Only expand valid variables.
|
||||
case os:getenv(Name) of
|
||||
false -> Prefix ++ Name ++ Suffix;
|
||||
Value -> Value
|
||||
end;
|
||||
get_env_var(Prefix, Suffix, []) ->
|
||||
Prefix ++ Suffix.
|
||||
|
||||
|
||||
%% @doc Return the smallest integral value not less than the argument.
|
||||
-spec ceiling(float()) -> integer().
|
||||
|
||||
Reference in New Issue
Block a user