diff --git a/README.md b/README.md index dd5f5b6..377fcf8 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ The `parse/2` function receives a list of tuples with the command line option specifications. The type specification for the tuple is: ```erlang --type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. +-type arg_type() :: 'atom' | 'binary' | 'utf8_binary' | 'boolean' | 'float' | 'integer' | 'string'. -type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). @@ -244,7 +244,8 @@ A long option can have the following syntax: Argument Types -------------- -The arguments allowed for options are: *atom*; *binary*; *boolean*; *float*; *integer*; *string*. +The arguments allowed for options are: *atom*; *binary*; *utf8_binary*; *boolean*; +*float*; *integer*; *string*. The `getopt` module checks every argument to see if it can be converted to its correct type. @@ -256,6 +257,17 @@ Numeric arguments can only be negative when passed as part of an assignment expr e.g. `--increment=-100` is a valid expression; whereas `--increment -100` is invalid +Arguments of `utf8_binary` type allow proper binary encoding of arguments containing +code points greater than 255. The resulting value is a normalized UTF-8 binary. + +As of Erlang/20, `standard_error` device has `unicode` option set to `false`. +It prevents correct printing of usage for arguments containing unicode +binaries/strings as default values. To fix this, one needs to enable unicode: + +```erlang +io:setopts(standard_error, [{unicode, true}]). +``` + Implicit Arguments ------------------ diff --git a/src/getopt.erl b/src/getopt.erl index b5ee7b4..847a98a 100644 --- a/src/getopt.erl +++ b/src/getopt.erl @@ -13,7 +13,7 @@ -export([parse/2, check/2, parse_and_check/2, format_error/2, usage/2, usage/3, usage/4, usage/6, tokenize/1]). --export([usage_cmd_line/2]). +-export([usage_cmd_line/2, usage_options/1]). -define(LINE_LENGTH, 75). -define(MIN_USAGE_COMMAND_LINE_OPTION_LENGTH, 25). @@ -30,7 +30,7 @@ (Char) =:= $\n orelse (Char) =:= $\r)). %% Atom indicating the data type that an argument can be converted to. --type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. +-type arg_type() :: 'atom' | 'binary' | 'utf8_binary' | 'boolean' | 'float' | 'integer' | 'string'. %% Data type that an argument can be converted to. -type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). %% Argument specification. @@ -435,6 +435,8 @@ to_type({Type, _DefaultArg}, Arg) -> to_type(Type, Arg); to_type(binary, Arg) -> list_to_binary(Arg); +to_type(utf8_binary, Arg) -> + unicode:characters_to_nfc_binary(Arg); to_type(atom, Arg) -> list_to_atom(Arg); to_type(integer, Arg) -> @@ -730,8 +732,8 @@ usage_option_text({_Name, Short, Long, _ArgSpec, _Help}) -> -spec usage_help_text(option_spec()) -> string(). -usage_help_text({_Name, _Short, _Long, {_ArgType, ArgValue}, [_ | _] = Help}) -> - Help ++ " [default: " ++ default_arg_value_to_string(ArgValue) ++ "]"; +usage_help_text({_Name, _Short, _Long, {ArgType, ArgValue}, [_ | _] = Help}) -> + Help ++ " [default: " ++ default_arg_value_to_string(ArgType, ArgValue) ++ "]"; usage_help_text({_Name, _Short, _Long, _ArgSpec, Help}) -> Help. @@ -804,15 +806,17 @@ wrap_text_line(_Length, [], Acc, _Count, _CurrentLineAcc) -> lists:reverse(Acc). -default_arg_value_to_string(Value) when is_atom(Value) -> +default_arg_value_to_string(_, Value) when is_atom(Value) -> atom_to_list(Value); -default_arg_value_to_string(Value) when is_binary(Value) -> +default_arg_value_to_string(binary, Value) when is_binary(Value) -> binary_to_list(Value); -default_arg_value_to_string(Value) when is_integer(Value) -> +default_arg_value_to_string(utf8_binary, Value) when is_binary(Value) -> + unicode:characters_to_list(Value); +default_arg_value_to_string(_, Value) when is_integer(Value) -> integer_to_list(Value); -default_arg_value_to_string(Value) when is_float(Value) -> +default_arg_value_to_string(_, Value) when is_float(Value) -> lists:flatten(io_lib:format("~w", [Value])); -default_arg_value_to_string(Value) -> +default_arg_value_to_string(_, Value) -> Value. diff --git a/test/getopt_test.erl b/test/getopt_test.erl index 7ec0f35..1dc9a34 100644 --- a/test/getopt_test.erl +++ b/test/getopt_test.erl @@ -323,3 +323,18 @@ check_test_() -> ?_assertEqual("option 'verbose' has invalid argument: 100", format_error(OptSpecList, {error, {invalid_option_arg, {verbose, "100"}}}))} ]. + +utf8_binary_test_() -> + OptSpecList = [{utf8, undefined, "utf8", utf8_binary, "UTF-8 arg"}], + Unicode = [228, 220, 223, 1455], + Utf8 = unicode:characters_to_binary(Unicode), + io:setopts(standard_error, [{encoding, utf8}]), + OptSpecsWithDefault = [{utf8, undefined, "utf8", {utf8_binary, Utf8}, "UTF-8 arg"}], + [{"Empty utf8_binary argument", + ?_assertEqual({ok, {[{utf8, <<>>}], []}}, parse(OptSpecList, ["--utf8", ""]))}, + {"Non empty utf8_binary argument", + ?_assertEqual({ok, {[{utf8, Utf8}], []}}, parse(OptSpecList, ["--utf8", Unicode]))}, + {"Default utf8_binary argument", + ?_assertEqual({ok, {[{utf8, Utf8}], []}}, parse(OptSpecsWithDefault, []))}, + {"Default utf8_binary argument usage", + ?_assert(is_list(string:find(getopt:usage_options(OptSpecsWithDefault), Unicode)))}].