From 1a01b82531a50564956c948b2281bdc1aa39d582 Mon Sep 17 00:00:00 2001 From: Juan Jose Comellas Date: Fri, 20 Jul 2012 07:42:23 -0300 Subject: [PATCH] Fix #15: add command-line tokenizer with support for quoted arguments Replace string:tokens/2 with a custom command-line tokenizer that supports single and double-quoted arguments (with embedded whitespace) and expansion of environment variables using either the Unix ($VAR; ${VAR}) or Windows (%VAR%) formats. This tokenizer is only used when the command line is parsed as a single string. --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++- src/getopt.erl | 99 +++++++++++++++++++++++++++++++++++++++++++- test/getopt_test.erl | 49 ++++++++++++++++++++-- 3 files changed, 237 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 00f1a16..f528362 100644 --- a/README.md +++ b/README.md @@ -280,7 +280,7 @@ would return: Positional Options ------------------ -We can also have options with neither short nor long option name. In this case, +We can also have options with neither short nor long option names. In this case, the options will be taken according to their position in the option specification list passed to ``getopt:/parse2``. @@ -351,3 +351,96 @@ will return: {ok,{[{host,"myhost"}, {port,1000}, {dbname,"mydb"}], ["-","dummy"]}} ``` + + +Arguments with embedded whitespace +---------------------------------- + +Arguments that have embedded whitespace have to be quoted with either +single or double quotes to be considered as a single +argument. + + +e.g. Given an option specification list with the following format: + +``` erlang + OptSpecList = + [ + {define, $D, "define", string, "Define a variable"}, + {user, $u, "user", string, "User name"} + ]. +``` + +The following invocation: + +``` erlang + getopt:parse(OptSpecList, + "-D'FOO=VAR 123' --define \"VAR WITH SPACES\" -u\"my user name\""). +``` + +would return: + +``` erlang + {ok,{[{define,"FOO=VAR 123"}, + {define,"VAR WITH SPACES"}, + {user,"my user name"}], + []}} +``` + +When parsing a command line with unclosed quotes the last argument +will be a single string starting at the position where the last quote +was entered. + +e.g. The following invocation: + +``` erlang + getopt:parse(OptSpecList, "--user ' my user ' \"argument with unclosed quotes"). +``` + +would return: + +``` erlang + {ok,{[{user," my user "}], + ["argument with unclosed quotes"]}} +``` + + +Environment variable expansion +------------------------------ + +`getopt:parse/2` will expand environment variables when used with a command +line that is passed as a single string. The formats that are supported +for environment variable expansion are: + + - $VAR (simple Unix/bash format) + - ${VAR} (full Unix/bash format) + - %VAR% (Windows format) + +If a variable is not present in the environment it will not be +expanded. Variables can be expanded within double-quoted and free +arguments. *getopt* will not expand environment variables within +single-quoted arguments. + +e.g. Given the following option specification list: + +``` erlang + OptSpecList = + [ + {path, $p, "path", string, "File path"} + ]. +``` + +The following invocation: + +``` erlang + getopt:parse(OptSpecList, "--path ${PATH} $NONEXISTENT_DUMMY_VAR"). +``` + +would return (depending on the value of your PATH variable) something like: + +``` erlang + {ok,{[{path, "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"}], + ["$NONEXISTENT_DUMMY_VAR"]}} +``` + +Currently, *getopt* does not perform wildcard expansion of file paths. diff --git a/src/getopt.erl b/src/getopt.erl index 175b7a5..09fd194 100644 --- a/src/getopt.erl +++ b/src/getopt.erl @@ -12,6 +12,7 @@ -author('juanjo@comellas.org'). -export([parse/2, usage/2, usage/3, usage/4]). +-export([tokenize_cmdline/1]). -export_type([arg_type/0, arg_value/0, @@ -66,7 +67,7 @@ parse(OptSpecList, CmdLine) -> try Args = if is_integer(hd(CmdLine)) -> - string:tokens(CmdLine, " \t\n"); + tokenize_cmdline(CmdLine); true -> CmdLine end, @@ -606,6 +607,102 @@ add_option_help(_Opt, _Prefix, Acc) -> Acc. +%% @doc Tokenize a command line string with support for single and double +%% quoted arguments (needed for arguments that have embedded whitespace). +%% The function also supports the expansion of environment variables in +%% both the Unix (${VAR}; $VAR) and Windows (%VAR%) formats. It does NOT +%% support wildcard expansion of paths. +-spec tokenize_cmdline(CmdLine :: string()) -> [string()]. +tokenize_cmdline(CmdLine) -> + tokenize_cmdline(CmdLine, [], []). + +-spec tokenize_cmdline(CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()]. +tokenize_cmdline([Sep | Tail], Acc, ArgAcc) when Sep =:= $\s; Sep =:= $\t; Sep =:= $\n -> + NewAcc = case ArgAcc of + [_ | _] -> + %% Found separator: add to the list of arguments. + [lists:reverse(ArgAcc) | Acc]; + [] -> + %% Found separator with no accumulated argument; discard it. + Acc + end, + tokenize_cmdline(Tail, NewAcc, []); +tokenize_cmdline([QuotationMark | Tail], Acc, ArgAcc) when QuotationMark =:= $"; QuotationMark =:= $' -> + %% Quoted argument (might contain spaces, tabs, etc.) + tokenize_quoted_arg(QuotationMark, Tail, Acc, ArgAcc); +tokenize_cmdline([Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% -> + %% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR% + {NewCmdLine, Var} = expand_env_var(CmdLine), + tokenize_cmdline(NewCmdLine, Acc, lists:reverse(Var, ArgAcc)); +tokenize_cmdline([$\\, Char | Tail], Acc, ArgAcc) -> + %% Escaped char. + tokenize_cmdline(Tail, Acc, [Char | ArgAcc]); +tokenize_cmdline([Char | Tail], Acc, ArgAcc) -> + tokenize_cmdline(Tail, Acc, [Char | ArgAcc]); +tokenize_cmdline([], Acc, []) -> + lists:reverse(Acc); +tokenize_cmdline([], Acc, ArgAcc) -> + lists:reverse([lists:reverse(ArgAcc) | Acc]). + +-spec tokenize_quoted_arg(QuotationMark :: char(), CmdLine :: string(), Acc :: [string()], ArgAcc :: string()) -> [string()]. +tokenize_quoted_arg(QuotationMark, [QuotationMark | Tail], Acc, ArgAcc) -> + %% End of quoted argument + tokenize_cmdline(Tail, Acc, ArgAcc); +tokenize_quoted_arg($" = QuotationMark, [Char | _Tail] = CmdLine, Acc, ArgAcc) when Char =:= $$; Char =:= $% -> + %% Unix and Windows environment variable expansion: ${VAR}; $VAR; %VAR% + {NewCmdLine, Var} = expand_env_var(CmdLine), + tokenize_quoted_arg(QuotationMark, NewCmdLine, Acc, lists:reverse(Var, ArgAcc)); +tokenize_quoted_arg(QuotationMark, [$\\, Char | Tail], Acc, ArgAcc) -> + %% Escaped char. + tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]); +tokenize_quoted_arg(QuotationMark, [Char | Tail], Acc, ArgAcc) -> + tokenize_quoted_arg(QuotationMark, Tail, Acc, [Char | ArgAcc]); +tokenize_quoted_arg(_QuotationMark, CmdLine, Acc, ArgAcc) -> + tokenize_cmdline(CmdLine, Acc, ArgAcc). + + +-spec expand_env_var(CmdLine :: string()) -> string(). +expand_env_var(CmdLine) -> + case CmdLine of + "${" ++ Tail -> + expand_env_var("${", $}, Tail, []); + "$" ++ Tail -> + expand_env_var("$", Tail, []); + "%" ++ Tail -> + expand_env_var("%", $%, Tail, []) + end. + +-spec expand_env_var(Prefix :: string(), EndMark :: char(), CmdLine :: string(), Acc :: string()) -> string(). +expand_env_var(Prefix, EndMark, [Char | Tail], Acc) + when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse + (Char >= $0 andalso Char =< $9) orelse (Char =:= $_) -> + expand_env_var(Prefix, EndMark, Tail, [Char | Acc]); +expand_env_var(Prefix, EndMark, [EndMark | Tail], Acc) -> + {Tail, get_env_var(Prefix, [EndMark], Acc)}; +expand_env_var(Prefix, _EndMark, CmdLine, Acc) -> + {CmdLine, Prefix ++ lists:reverse(Acc)}. + + +-spec expand_env_var(Prefix :: string(), CmdLine :: string(), Acc :: string()) -> string(). +expand_env_var(Prefix, [Char | Tail], Acc) + when (Char >= $A andalso Char =< $Z) orelse (Char >= $a andalso Char =< $z) orelse + (Char >= $0 andalso Char =< $9) orelse (Char =:= $_) -> + expand_env_var(Prefix, Tail, [Char | Acc]); +expand_env_var(Prefix, CmdLine, Acc) -> + {CmdLine, get_env_var(Prefix, "", Acc)}. + + +-spec get_env_var(Prefix :: string(), Suffix :: string(), Acc :: string()) -> string(). +get_env_var(Prefix, Suffix, [_ | _] = Acc) -> + Name = lists:reverse(Acc), + %% Only expand valid variables. + case os:getenv(Name) of + false -> Prefix ++ Name ++ Suffix; + Value -> Value + end; +get_env_var(Prefix, Suffix, []) -> + Prefix ++ Suffix. + %% @doc Return the smallest integral value not less than the argument. -spec ceiling(float()) -> integer(). diff --git a/test/getopt_test.erl b/test/getopt_test.erl index fa1b67d..66d1021 100644 --- a/test/getopt_test.erl +++ b/test/getopt_test.erl @@ -26,8 +26,8 @@ %%% UNIT TESTS %%%------------------------------------------------------------------- -%%% Test for the getopt/1 function -parse_1_test_() -> +%%% Main test for the getopt/1 function. +parse_main_test_() -> Short = {short, $a, undefined, undefined, "Option with only short form and no argument"}, Short2 = {short2, $b, undefined, undefined, "Second option with only short form and no argument"}, Short3 = {short3, $c, undefined, undefined, "Third option with only short form and no argument"}, @@ -218,8 +218,8 @@ parse_1_test_() -> ]. -%% Real world test for getopt/1 -parse_2_test_() -> +%% Real world test for getopt/1. +parse_multiple_repetitions_test_() -> OptSpecList = [ {define, $D, "define", string, "Define a variable"}, @@ -233,3 +233,44 @@ parse_2_test_() -> {verbose, true}, {verbose, true}, {debug, 2}, {offset, -61.0}, {debug, 1}, {debug, 4}], ["dummy1", "dummy2"]}}, parse(OptSpecList, "-DFOO -DVAR1=VAL1 -DBAR -vv -dd --offset=-61.0 --debug -dddd dummy1 dummy2"))} ]. + + +%% Arguments with spaces. +parse_args_with_spaces_test_() -> + OptSpecList = + [ + {define, $D, "define", string, "Define a variable"}, + {user, $u, "user", string, "User name"} + ], + [ + {"Arguments with spaces", + ?_assertEqual({ok, {[{define, "FOO BAR"}, {define, "VAR 1=VAL 1"}, {user, "my user name"}], [" dummy1 dummy2 "]}}, + parse(OptSpecList, "-D'FOO BAR' -D\"VAR 1=VAL 1\" --user \"my user name\" ' dummy1 dummy2 "))} + ]. + + +%% Arguments with emulated shell variable expansion. +parse_variable_expansion_test_() -> + Path = os:getenv("PATH"), + false = os:getenv("DUMMY_VAR_THAT_MUST_NOT_EXIST"), + OptSpecList = + [ + {path, $p, "path", string, "File path"} + ], + [ + {"Shell variable expansion (simple Unix/bash format)", + ?_assertEqual({ok, {[{path, Path}], ["$DUMMY_VAR_THAT_MUST_NOT_EXIST"]}}, + parse(OptSpecList, "--path $PATH $DUMMY_VAR_THAT_MUST_NOT_EXIST"))}, + {"Shell variable expansion (full Unix/bash format)", + ?_assertEqual({ok, {[{path, Path}], ["${DUMMY_VAR_THAT_MUST_NOT_EXIST}"]}}, + parse(OptSpecList, " --path ${PATH} ${DUMMY_VAR_THAT_MUST_NOT_EXIST} "))}, + {"Incomplete variable expansion (full Unix/bash format)", + ?_assertEqual({ok, {[{path, "${PATH"}], ["${DUMMY_VAR_THAT_MUST_NOT_EXIST}"]}}, + parse(OptSpecList, " --path ${PATH ${DUMMY_VAR_THAT_MUST_NOT_EXIST} "))}, + {"Shell variable expansion (Windows format)", + ?_assertEqual({ok, {[{path, Path}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}}, + parse(OptSpecList, " --path %PATH% %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))}, + {"Incomplete variable expansion (Windows format)", + ?_assertEqual({ok, {[{path, "%PATH"}], ["%DUMMY_VAR_THAT_MUST_NOT_EXIST%"]}}, + parse(OptSpecList, " --path %PATH %DUMMY_VAR_THAT_MUST_NOT_EXIST% "))} + ].