From 234538f653918e5c9b172c8b3a982a4d50cc1f30 Mon Sep 17 00:00:00 2001 From: Mikko Ahlroth Date: Sat, 14 Feb 2015 20:57:07 +0200 Subject: [PATCH] Initial commit --- .gitignore | 5 ++ README.md | 3 + lib/nurina.ex | 174 +++++++++++++++++++++++++++++++++++++++++++ lib/speedtest.ex | 15 ++++ mix.exs | 21 ++++++ test/nurina_test.exs | 144 +++++++++++++++++++++++++++++++++++ test/test_helper.exs | 1 + 7 files changed, 363 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 lib/nurina.ex create mode 100644 lib/speedtest.ex create mode 100644 mix.exs create mode 100644 test/nurina_test.exs create mode 100644 test/test_helper.exs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f050951 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/ebin +/deps +erl_crash.dump +*.ez +_build/* diff --git a/README.md b/README.md new file mode 100644 index 0000000..43c4a5d --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Nurina + +** TODO: Add description ** diff --git a/lib/nurina.ex b/lib/nurina.ex new file mode 100644 index 0000000..7665749 --- /dev/null +++ b/lib/nurina.ex @@ -0,0 +1,174 @@ +defmodule Nurina do + + defmodule Info do + @moduledoc """ + Contains a struct containing info about a given URL. + """ + + defstruct scheme: nil, + hier: nil, + query: nil, + fragment: nil, + valid: true, + authority: nil, + path: nil, + host: nil, + port: nil, + userinfo: nil, + is_ipv6: false + end + + @doc """ + Parse an URI into components. Will return a Nurina.Info struct. + + Tries to follow RFC 3986. + """ + def parse(uri) do + parsed = parse uri, %Info{}, "", :scheme + + case parsed do + %{valid: true, port: nil} -> %{parsed | port: URI.default_port parsed.scheme} + _ -> parsed + end + end + + + # Hier part parsing + def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth) + def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth) + + def parse(hier, parsed, :hier_no_auth), do: %{parsed | path: nil_or hier} + def parse(hier, parsed, :hier_auth) do + parsed = parse(hier, parsed, "", :authority) + + # Go inside authority to parse parts + if parsed.authority != nil do + parsed = parse(parsed.authority, parsed, "", :userinfo) + end + parsed + end + + # Host part parsing + # Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains + def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6) + def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4) + + # Port part parsing + def parse(<< ":", rest :: binary >>, parsed, :port) do + case Integer.parse rest do + :error -> %{parsed | valid: false} + + {_, remainder} when remainder != "" -> %{parsed | valid: false} + + {port, _} -> %{parsed | port: port} + end + end + def parse(_, parsed, :port), do: %{parsed | port: nil, valid: false} + + + + # Scheme part parsing + # If URI stops at scheme, it's not valid + def parse("", parsed, _, :scheme), do: %{parsed | valid: false} + + def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do + parsed = %{parsed | scheme: nil_or String.downcase current_part} + + # Scheme must exist + case parsed.scheme do + nil -> %{parsed | valid: false} + _ -> parse(rest, parsed, "", :hier) + end + end + + + + # Hier part parsing + # Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'. + def parse("", parsed, current_part, :hier) do + parsed = %{parsed | hier: nil_or current_part} + + # Go inside hierarchy to parse parts + parse(current_part, parsed, :hier_parse) + end + + def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do + parsed = %{parsed | hier: nil_or current_part} + + # Go inside hierarchy to parse parts + parsed = parse(current_part, parsed, :hier_parse) + + # If we have a query and + + parse(rest, parsed, "", :query) + end + + + + # Query part parsing + def parse("", parsed, current_part, :query), do: %{parsed | query: nil_or current_part} + + def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do + # All the rest is just fragment + %{parsed | query: nil_or(current_part), fragment: nil_or rest} + end + + + + # Authority part parsing + def parse("", parsed, current_part, :authority), do: %{parsed | authority: nil_or current_part} + + def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do + %{parsed | authority: nil_or(current_part), path: "/" <> rest} + end + + + + # Userinfo part parsing + # If no userinfo was found, start from the beginning and look for host instead + def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host) + + def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do + parsed = %{parsed | userinfo: nil_or current_part} + parse(rest, parsed, :host) + end + + + + # IPv6 host parsing + # Host must not end without closing ] + def parse("", parsed, _, :host_6), do: %{parsed | valid: false} + + def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do + parsed = %{parsed | is_ipv6: true, host: nil_or current_part} + parse(rest, parsed, :port) + end + + + + # "Normal" host parsing + def parse("", parsed, current_part, :host_4), do: %{parsed | host: nil_or current_part} + + def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do + parsed = %{parsed | host: nil_or current_part} + parse(":" <> rest, parsed, :port) + end + + + + + + + # Default walking function for all parsing modes, just walk through all + # non recognised characters + def parse(<>, parsed, current_part, mode) do + current_part = current_part <> << char :: utf8 >> + parse(rest, parsed, current_part, mode) + end + + + + # Convert "" into nil + defp nil_or(str) when str == "", do: nil + defp nil_or(str), do: str +end diff --git a/lib/speedtest.ex b/lib/speedtest.ex new file mode 100644 index 0000000..4180beb --- /dev/null +++ b/lib/speedtest.ex @@ -0,0 +1,15 @@ +defmodule Nurina.Speedtest do + + def run(_, iterations, _) when iterations == 0, do: nil + + def run(url, iterations, :nurina) do + Nurina.parse(url) + run url, iterations - 1, :nurina + end + + def run(url, iterations, :uri) do + URI.parse(url) + run url, iterations - 1, :uri + end + +end diff --git a/mix.exs b/mix.exs new file mode 100644 index 0000000..d839a31 --- /dev/null +++ b/mix.exs @@ -0,0 +1,21 @@ +defmodule Nurina.Mixfile do + use Mix.Project + + def project do + [ app: :nurina, + version: "0.1.0", + elixir: "~> 1.0.3", + deps: deps ] + end + + # Configuration for the OTP application + def application do + [] + end + + # Returns the list of dependencies in the format: + # { :foobar, "~> 0.1", git: "https://github.com/elixir-lang/foobar.git" } + defp deps do + [] + end +end diff --git a/test/nurina_test.exs b/test/nurina_test.exs new file mode 100644 index 0000000..fd1008c --- /dev/null +++ b/test/nurina_test.exs @@ -0,0 +1,144 @@ +defmodule NurinaTest do + use ExUnit.Case + + test :parse_http do + assert %Nurina.Info{scheme: "http", host: "foo.com", path: "/path/to/something", + query: "foo=bar&bar=foo", fragment: "fragment", port: 80, + authority: "foo.com", userinfo: nil, + hier: "//foo.com/path/to/something", valid: true} == + Nurina.parse("http://foo.com/path/to/something?foo=bar&bar=foo#fragment") + end + + test :parse_https do + assert %Nurina.Info{scheme: "https", host: "foo.com", authority: "foo.com", + query: nil, fragment: nil, port: 443, path: nil, userinfo: nil, + hier: "//foo.com", valid: true} == + Nurina.parse("https://foo.com") + end + + test :parse_file do + assert %Nurina.Info{scheme: "file", host: nil, path: "/foo/bar/baz", userinfo: nil, + query: nil, fragment: nil, port: nil, authority: nil, + hier: "///foo/bar/baz", valid: true} == + Nurina.parse("file:///foo/bar/baz") + end + + test :parse_ftp do + assert %Nurina.Info{scheme: "ftp", host: "private.ftp-servers.example.com", + userinfo: "user001:secretpassword", authority: "user001:secretpassword@private.ftp-servers.example.com", + path: "/mydirectory/myfile.txt", query: nil, fragment: nil, + port: 21, + hier: "//user001:secretpassword@private.ftp-servers.example.com/mydirectory/myfile.txt", valid: true} == + Nurina.parse("ftp://user001:secretpassword@private.ftp-servers.example.com/mydirectory/myfile.txt") + end + + test :parse_sftp do + assert %Nurina.Info{scheme: "sftp", host: "private.ftp-servers.example.com", + userinfo: "user001:secretpassword", authority: "user001:secretpassword@private.ftp-servers.example.com", + path: "/mydirectory/myfile.txt", query: nil, fragment: nil, + port: 22, + hier: "//user001:secretpassword@private.ftp-servers.example.com/mydirectory/myfile.txt", valid: true} == + Nurina.parse("sftp://user001:secretpassword@private.ftp-servers.example.com/mydirectory/myfile.txt") + end + + test :parse_tftp do + assert %Nurina.Info{scheme: "tftp", host: "private.ftp-servers.example.com", + userinfo: "user001:secretpassword", authority: "user001:secretpassword@private.ftp-servers.example.com", + path: "/mydirectory/myfile.txt", query: nil, fragment: nil, port: 69, + hier: "//user001:secretpassword@private.ftp-servers.example.com/mydirectory/myfile.txt", valid: true} == + Nurina.parse("tftp://user001:secretpassword@private.ftp-servers.example.com/mydirectory/myfile.txt") + end + + + test :parse_ldap do + assert %Nurina.Info{scheme: "ldap", host: nil, authority: nil, userinfo: nil, + path: "/dc=example,dc=com", query: "?sub?(givenName=John)", + fragment: nil, port: 389, + hier: "///dc=example,dc=com", valid: true} == + Nurina.parse("ldap:///dc=example,dc=com??sub?(givenName=John)") + assert %Nurina.Info{scheme: "ldap", host: "ldap.example.com", authority: "ldap.example.com", + userinfo: nil, path: "/cn=John%20Doe,dc=example,dc=com", fragment: nil, + port: 389, query: nil, + hier: "//ldap.example.com/cn=John%20Doe,dc=example,dc=com", valid: true} == + Nurina.parse("ldap://ldap.example.com/cn=John%20Doe,dc=example,dc=com") + end + + test :parse_mailto do + assert %Nurina.Info{scheme: "mailto", host: nil, authority: nil, userinfo: nil, + path: "foo@foo.com", query: nil, fragment: nil, port: nil, hier: "foo@foo.com", valid: true} == + Nurina.parse("mailto:foo@foo.com") + end + + test :parse_splits_authority do + assert %Nurina.Info{scheme: "http", host: "foo.com", path: nil, + query: nil, fragment: nil, port: 4444, + authority: "foo:bar@foo.com:4444", + userinfo: "foo:bar", + hier: "//foo:bar@foo.com:4444", valid: true} == + Nurina.parse("http://foo:bar@foo.com:4444") + assert %Nurina.Info{scheme: "https", host: "foo.com", path: nil, + query: nil, fragment: nil, port: 443, + authority: "foo:bar@foo.com", userinfo: "foo:bar", + hier: "//foo:bar@foo.com", valid: true} == + Nurina.parse("https://foo:bar@foo.com") + assert %Nurina.Info{scheme: "http", host: "foo.com", path: nil, + query: nil, fragment: nil, port: 4444, + authority: "foo.com:4444", + userinfo: nil, + hier: "//foo.com:4444", valid: true} == + Nurina.parse("http://foo.com:4444") + end + + test :parse_bad_uris do + %Nurina.Info{valid: false} = Nurina.parse("https??@?F?@#>F//23/") + %Nurina.Info{valid: false} = Nurina.parse("") + %Nurina.Info{valid: false} = Nurina.parse(":https") + %Nurina.Info{valid: false} = Nurina.parse("https") + %Nurina.Info{valid: false} = Nurina.parse("http://example.com:what/") + end + + test :ipv6_addresses do + addrs = [ + "::", # undefined + "::1", # loopback + "1080::8:800:200C:417A", # unicast + "FF01::101", # multicast + "2607:f3f0:2:0:216:3cff:fef0:174a", # abbreviated + "2607:f3F0:2:0:216:3cFf:Fef0:174A", # mixed hex case + "2051:0db8:2d5a:3521:8313:ffad:1242:8e2e", # complete + "::00:192.168.10.184" # embedded IPv4 + ] + + Enum.each addrs, fn(addr) -> + simple_uri = Nurina.parse("http://[#{addr}]/") + assert simple_uri.host == addr + + userinfo_uri = Nurina.parse("http://user:pass@[#{addr}]/") + assert userinfo_uri.host == addr + assert userinfo_uri.userinfo == "user:pass" + + port_uri = Nurina.parse("http://[#{addr}]:2222/") + assert port_uri.host == addr + assert port_uri.port == 2222 + + userinfo_port_uri = Nurina.parse("http://user:pass@[#{addr}]:2222/") + assert userinfo_port_uri.host == addr + assert userinfo_port_uri.userinfo == "user:pass" + assert userinfo_port_uri.port == 2222 + end + end + + test :downcase_scheme do + assert Nurina.parse("hTtP://google.com").scheme == "http" + end + + #test :to_string do + # assert to_string(Nurina.parse("http://google.com")) == "http://google.com" + # assert to_string(Nurina.parse("http://google.com:443")) == "http://google.com:443" + # assert to_string(Nurina.parse("https://google.com:443")) == "https://google.com" + # assert to_string(Nurina.parse("http://lol:wut@google.com")) == "http://lol:wut@google.com" + # assert to_string(Nurina.parse("http://google.com/elixir")) == "http://google.com/elixir" + # assert to_string(Nurina.parse("http://google.com?q=lol")) == "http://google.com?q=lol" + # assert to_string(Nurina.parse("http://google.com?q=lol#omg")) == "http://google.com?q=lol#omg" + #end +end diff --git a/test/test_helper.exs b/test/test_helper.exs new file mode 100644 index 0000000..4b8b246 --- /dev/null +++ b/test/test_helper.exs @@ -0,0 +1 @@ +ExUnit.start