From 28508354142f1a8159cda070f156969b172febca Mon Sep 17 00:00:00 2001 From: "Gavin M. Roy" Date: Thu, 27 Oct 2016 12:02:26 -0400 Subject: [PATCH] Add percent_encode/2 and plus_encode/2 --- README.md | 2 ++ src/urilib.app.src | 4 ++-- src/urilib.erl | 56 ++++++++++++++++++++++++++++++++++++++++--- test/urilib_tests.erl | 30 +++++++++++++++++++++++ 4 files changed, 87 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7290861..abcca87 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,10 @@ Function | Description `parse/2` | Parse a URI, returning the result as either a `uri()` or `url()`. `percent_decode/1` | Decode a percent encoded string value. `percent_encode/1` | Percent encode a string value. +`percent_encode/2` | Percent encode a string value, explicitly stating the desired case for hexidecimal values. Pass `uppercase` to the second value to have hex values returned as `%2F` instead of `%2f`. `plus_decode/1` | Decode a percent encoded string value that uses pluses for spaces. `plus_encode/1` | Percent encode a string value similar to `percent_encode/1`, but encodes spaces with a plus (`+`) instead of `%20`. +`plus_encode/2` | Percent encode a string value similar to `percent_encode/1`, but encodes spaces with a plus (`+`) instead of `%20`, explicitly stating the desired case for hexidecimal values. Pass `uppercase` to the second value to have hex values returned as `%2F` instead of `%2f`. ### Types diff --git a/src/urilib.app.src b/src/urilib.app.src index aacf95c..59b3bad 100644 --- a/src/urilib.app.src +++ b/src/urilib.app.src @@ -1,6 +1,6 @@ {application, urilib, [ {description, "A RFC-3986 URI Library for parsing and building URIs"}, - {vsn,"0.1.1"}, + {vsn,"0.2.0"}, {licenses, ["BSD"]}, {links, [{"Github", "https://github.com/gmr/urilib"}]}, {maintainers, ["Gavin M. Roy"]}, @@ -11,6 +11,6 @@ inets, edoc ]}, - {mod, {urilib, []}}, + {modules, []}, {env, []} ]}. diff --git a/src/urilib.erl b/src/urilib.erl index ad09f88..bc9e8a3 100644 --- a/src/urilib.erl +++ b/src/urilib.erl @@ -11,8 +11,10 @@ parse/2, percent_decode/1, percent_encode/1, + percent_encode/2, plus_decode/1, - plus_encode/1]). + plus_encode/1, + plus_encode/2]). -export_type([scheme/0, host/0, @@ -32,6 +34,7 @@ -compile(export_all). -endif. +-type hexcase() :: uppercase | lowercase. -type scheme() :: http | https | atom(). -type host() :: string(). -type tcp_port() :: integer(). @@ -45,7 +48,6 @@ -type uri() :: {scheme(), authority(), path(), query(), fragment()}. -type url() :: {scheme(), username(), password(), host(), tcp_port(), path(), query(), fragment()}. - -spec build(Value :: uri() | url()) -> string(). %% @doc Build a URI %% @end @@ -101,12 +103,27 @@ parse(Value, url) -> -spec percent_encode(string()) -> string(). -%% @doc Percent encode a string value. +%% @doc Percent encode a string value. Note that this will return hexidecimal +%% values in lowercase. If you need uppercase values, invoke percent_encode/2 +%% with the second parameter as the value ``upercase``. %% @end percent_encode(Value) -> edoc_lib:escape_uri(Value). +-spec percent_encode(string(), hexcase()) -> string(). +%% @doc Percent encode a string value. +%% +%% When lowercase is passed, hexidecimal strings with A-F values in them are returned +%% as lowercase. Likewise, the uppercase value will encode hexidecimal strings as +%% uppercase values. +%% @end +percent_encode(Value, lowercase) -> + percent_encode(Value); +percent_encode(Value, uppercase) -> + hex_to_upper(percent_encode(Value)). + + -spec percent_decode(string()) -> string(). %% @doc Decode a percent encoded string value. %% @end @@ -124,6 +141,21 @@ plus_encode(Value) -> string:join([edoc_lib:escape_uri(V) || V <- string:tokens(Value, " ")], "+"). +-spec plus_encode(string(), hexcase()) -> string(). +%% @doc Percent encode a string value similar to encode/1, but encodes spaces with a +%% plus (`+') instead of `%20'. This function can be used for encoding query arguments. +%% When lowercase is passed, hexidecimal strings with A-F values in them are returned +%% as lowercase. Likewise, the uppercase value will encode hexidecimal strings as +%% uppercase values. +%% +%% Note: The use of plus for space is defined in RFC-1630 but does not appear in RFC-3986. +%% @end +plus_encode(Value, lowercase) -> + plus_encode(Value); +plus_encode(Value, uppercase) -> + hex_to_upper(plus_encode(Value)). + + -spec plus_decode(string()) -> string(). %% @doc Decode a percent encoded string value that uses pluses for spaces. %% @@ -284,3 +316,21 @@ url_maybe_add_fragment(Value, URL) -> _ -> edoc_lib:escape_uri(Value) end, string:join([URL, Fragment], "#"). + + +-spec hex_to_upper(string()) -> string(). +%% @private +hex_to_upper(Value) -> + hex_to_upper(Value, [], []). +hex_to_upper([], [], Value) -> + Value; +hex_to_upper([], Hex, Value) -> + lists:append(Value, string:to_upper(Hex)); +hex_to_upper([37|T], [], Value) -> + hex_to_upper(T, [37], Value); +hex_to_upper([H|T], [], Value) -> + hex_to_upper(T, [], lists:append(Value, [H])); +hex_to_upper(Remaining, Hex, Value) when length(Hex) == 3 -> + hex_to_upper(Remaining, [], lists:append(Value, string:to_upper(Hex))); +hex_to_upper([H|T], Hex, Value) -> + hex_to_upper(T, lists:append(Hex, [H]), Value). diff --git a/test/urilib_tests.erl b/test/urilib_tests.erl index 7072878..3c5f84a 100644 --- a/test/urilib_tests.erl +++ b/test/urilib_tests.erl @@ -2,6 +2,11 @@ -include_lib("eunit/include/eunit.hrl"). +hex_to_upper_test() -> + Value = "%2f%c0%88this+is+%c3%a4n+%c3%aaxample+value+woot%c0%88%2f", + Expect = "%2F%C0%88this+is+%C3%A4n+%C3%AAxample+value+woot%C0%88%2F", + ?assertEqual(Expect, urilib:hex_to_upper(Value)). + build_variation1_test() -> Params = {amqp, {{"guest", "password"}, "rabbitmq", 5672}, "/%2f", [{"heartbeat", "5"}], undefined}, Expect = "amqp://guest:password@rabbitmq:5672/%2f?heartbeat=5", @@ -147,7 +152,32 @@ percent_encode_unicode_test() -> Expect = "foo%2fbar%c0%88baz", ?assertEqual(Expect, urilib:percent_encode(Value)). +percent_encode_lowercase_unicode_test() -> + Value = "/✈this is än êxample value woot✈/", + Expect = "%2f%c0%88this%20is%20%c3%a4n%20%c3%aaxample%20value%20woot%c0%88%2f", + ?assertEqual(Expect, urilib:percent_encode(Value, lowercase)). + +percent_encode_uppercase_unicode_test() -> + Value = "/✈this is än êxample value woot✈/", + Expect = "%2F%C0%88this%20is%20%C3%A4n%20%C3%AAxample%20value%20woot%C0%88%2F", + ?assertEqual(Expect, urilib:percent_encode(Value, uppercase)). + +percent_encode_uppercase_perent_test() -> + Value = "/✈this is än êxample value woot with 30% off✈/", + Expect = "%2F%C0%88this+is+%C3%A4n+%C3%AAxample+value+woot+with+30%25+off%C0%88%2F", + ?assertEqual(Expect, urilib:plus_encode(Value, uppercase)). + plus_encode_test() -> Value = "foo/bar baz", Expect = "foo%2fbar+baz", ?assertEqual(Expect, urilib:plus_encode(Value)). + +plus_encode_lowercase_test() -> + Value = "/✈this is än êxample value woot✈/", + Expect = "%2f%c0%88this+is+%c3%a4n+%c3%aaxample+value+woot%c0%88%2f", + ?assertEqual(Expect, urilib:plus_encode(Value, lowercase)). + +plus_encode_uppercase_test() -> + Value = "/✈this is än êxample value woot✈/", + Expect = "%2F%C0%88this+is+%C3%A4n+%C3%AAxample+value+woot%C0%88%2F", + ?assertEqual(Expect, urilib:plus_encode(Value, uppercase)).