Merge pull request #39 from suprematic/master

Add support for UTF-8 encoded binaries
This commit is contained in:
Juan Jose Comellas 2018-03-06 21:40:48 -03:00 committed by GitHub
commit fde61fe7aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 11 deletions

View File

@ -57,7 +57,7 @@ The `parse/2` function receives a list of tuples with the command line option
specifications. The type specification for the tuple is: specifications. The type specification for the tuple is:
```erlang ```erlang
-type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. -type arg_type() :: 'atom' | 'binary' | 'utf8_binary' | 'boolean' | 'float' | 'integer' | 'string'.
-type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). -type arg_value() :: atom() | binary() | boolean() | float() | integer() | string().
@ -244,7 +244,8 @@ A long option can have the following syntax:
Argument Types Argument Types
-------------- --------------
The arguments allowed for options are: *atom*; *binary*; *boolean*; *float*; *integer*; *string*. The arguments allowed for options are: *atom*; *binary*; *utf8_binary*; *boolean*;
*float*; *integer*; *string*.
The `getopt` module checks every argument to see if it can be converted to its The `getopt` module checks every argument to see if it can be converted to its
correct type. correct type.
@ -256,6 +257,17 @@ Numeric arguments can only be negative when passed as part of an assignment expr
e.g. `--increment=-100` is a valid expression; whereas `--increment -100` is invalid e.g. `--increment=-100` is a valid expression; whereas `--increment -100` is invalid
Arguments of `utf8_binary` type allow proper binary encoding of arguments containing
code points greater than 255. The resulting value is a normalized UTF-8 binary.
As of Erlang/20, `standard_error` device has `unicode` option set to `false`.
It prevents correct printing of usage for arguments containing unicode
binaries/strings as default values. To fix this, one needs to enable unicode:
```erlang
io:setopts(standard_error, [{unicode, true}]).
```
Implicit Arguments Implicit Arguments
------------------ ------------------

View File

@ -13,7 +13,7 @@
-export([parse/2, check/2, parse_and_check/2, format_error/2, -export([parse/2, check/2, parse_and_check/2, format_error/2,
usage/2, usage/3, usage/4, usage/6, tokenize/1]). usage/2, usage/3, usage/4, usage/6, tokenize/1]).
-export([usage_cmd_line/2]). -export([usage_cmd_line/2, usage_options/1]).
-define(LINE_LENGTH, 75). -define(LINE_LENGTH, 75).
-define(MIN_USAGE_COMMAND_LINE_OPTION_LENGTH, 25). -define(MIN_USAGE_COMMAND_LINE_OPTION_LENGTH, 25).
@ -30,7 +30,7 @@
(Char) =:= $\n orelse (Char) =:= $\r)). (Char) =:= $\n orelse (Char) =:= $\r)).
%% Atom indicating the data type that an argument can be converted to. %% Atom indicating the data type that an argument can be converted to.
-type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. -type arg_type() :: 'atom' | 'binary' | 'utf8_binary' | 'boolean' | 'float' | 'integer' | 'string'.
%% Data type that an argument can be converted to. %% Data type that an argument can be converted to.
-type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). -type arg_value() :: atom() | binary() | boolean() | float() | integer() | string().
%% Argument specification. %% Argument specification.
@ -435,6 +435,8 @@ to_type({Type, _DefaultArg}, Arg) ->
to_type(Type, Arg); to_type(Type, Arg);
to_type(binary, Arg) -> to_type(binary, Arg) ->
list_to_binary(Arg); list_to_binary(Arg);
to_type(utf8_binary, Arg) ->
unicode:characters_to_nfc_binary(Arg);
to_type(atom, Arg) -> to_type(atom, Arg) ->
list_to_atom(Arg); list_to_atom(Arg);
to_type(integer, Arg) -> to_type(integer, Arg) ->
@ -730,8 +732,8 @@ usage_option_text({_Name, Short, Long, _ArgSpec, _Help}) ->
-spec usage_help_text(option_spec()) -> string(). -spec usage_help_text(option_spec()) -> string().
usage_help_text({_Name, _Short, _Long, {_ArgType, ArgValue}, [_ | _] = Help}) -> usage_help_text({_Name, _Short, _Long, {ArgType, ArgValue}, [_ | _] = Help}) ->
Help ++ " [default: " ++ default_arg_value_to_string(ArgValue) ++ "]"; Help ++ " [default: " ++ default_arg_value_to_string(ArgType, ArgValue) ++ "]";
usage_help_text({_Name, _Short, _Long, _ArgSpec, Help}) -> usage_help_text({_Name, _Short, _Long, _ArgSpec, Help}) ->
Help. Help.
@ -804,15 +806,17 @@ wrap_text_line(_Length, [], Acc, _Count, _CurrentLineAcc) ->
lists:reverse(Acc). lists:reverse(Acc).
default_arg_value_to_string(Value) when is_atom(Value) -> default_arg_value_to_string(_, Value) when is_atom(Value) ->
atom_to_list(Value); atom_to_list(Value);
default_arg_value_to_string(Value) when is_binary(Value) -> default_arg_value_to_string(binary, Value) when is_binary(Value) ->
binary_to_list(Value); binary_to_list(Value);
default_arg_value_to_string(Value) when is_integer(Value) -> default_arg_value_to_string(utf8_binary, Value) when is_binary(Value) ->
unicode:characters_to_list(Value);
default_arg_value_to_string(_, Value) when is_integer(Value) ->
integer_to_list(Value); integer_to_list(Value);
default_arg_value_to_string(Value) when is_float(Value) -> default_arg_value_to_string(_, Value) when is_float(Value) ->
lists:flatten(io_lib:format("~w", [Value])); lists:flatten(io_lib:format("~w", [Value]));
default_arg_value_to_string(Value) -> default_arg_value_to_string(_, Value) ->
Value. Value.

View File

@ -323,3 +323,18 @@ check_test_() ->
?_assertEqual("option 'verbose' has invalid argument: 100", ?_assertEqual("option 'verbose' has invalid argument: 100",
format_error(OptSpecList, {error, {invalid_option_arg, {verbose, "100"}}}))} format_error(OptSpecList, {error, {invalid_option_arg, {verbose, "100"}}}))}
]. ].
utf8_binary_test_() ->
OptSpecList = [{utf8, undefined, "utf8", utf8_binary, "UTF-8 arg"}],
Unicode = [228, 220, 223, 1455],
Utf8 = unicode:characters_to_binary(Unicode),
io:setopts(standard_error, [{encoding, utf8}]),
OptSpecsWithDefault = [{utf8, undefined, "utf8", {utf8_binary, Utf8}, "UTF-8 arg"}],
[{"Empty utf8_binary argument",
?_assertEqual({ok, {[{utf8, <<>>}], []}}, parse(OptSpecList, ["--utf8", ""]))},
{"Non empty utf8_binary argument",
?_assertEqual({ok, {[{utf8, Utf8}], []}}, parse(OptSpecList, ["--utf8", Unicode]))},
{"Default utf8_binary argument",
?_assertEqual({ok, {[{utf8, Utf8}], []}}, parse(OptSpecsWithDefault, []))},
{"Default utf8_binary argument usage",
?_assert(is_list(string:find(getopt:usage_options(OptSpecsWithDefault), Unicode)))}].