2015-02-14 18:57:07 +00:00
|
|
|
defmodule Nurina do
|
|
|
|
|
|
|
|
defmodule Info do
|
|
|
|
@moduledoc """
|
2015-02-14 19:13:42 +00:00
|
|
|
Contains a struct containing info about a given URL.
|
2015-02-14 18:57:07 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
defstruct scheme: nil,
|
|
|
|
hier: nil,
|
|
|
|
query: nil,
|
|
|
|
fragment: nil,
|
|
|
|
valid: true,
|
|
|
|
authority: nil,
|
|
|
|
path: nil,
|
|
|
|
host: nil,
|
|
|
|
port: nil,
|
|
|
|
userinfo: nil,
|
|
|
|
is_ipv6: false
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Parse an URI into components. Will return a Nurina.Info struct.
|
|
|
|
|
|
|
|
Tries to follow RFC 3986.
|
|
|
|
"""
|
|
|
|
def parse(uri) do
|
|
|
|
parsed = parse uri, %Info{}, "", :scheme
|
|
|
|
|
|
|
|
case parsed do
|
|
|
|
%{valid: true, port: nil} -> %{parsed | port: URI.default_port parsed.scheme}
|
|
|
|
_ -> parsed
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
# Hier part parsing
|
|
|
|
def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth)
|
|
|
|
def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth)
|
|
|
|
|
|
|
|
def parse(hier, parsed, :hier_no_auth), do: %{parsed | path: nil_or hier}
|
|
|
|
def parse(hier, parsed, :hier_auth) do
|
2015-02-14 19:36:19 +00:00
|
|
|
parsed = parse hier, parsed, "", :authority
|
2015-02-14 18:57:07 +00:00
|
|
|
|
|
|
|
# Go inside authority to parse parts
|
2015-02-14 19:36:19 +00:00
|
|
|
case parsed.authority do
|
|
|
|
nil -> parsed
|
|
|
|
_ -> parse parsed.authority, parsed, "", :userinfo
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Host part parsing
|
|
|
|
# Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains
|
|
|
|
def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6)
|
|
|
|
def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4)
|
|
|
|
|
|
|
|
# Port part parsing
|
|
|
|
def parse(<< ":", rest :: binary >>, parsed, :port) do
|
|
|
|
case Integer.parse rest do
|
2015-02-14 21:36:55 +00:00
|
|
|
{port, ""} -> %{parsed | port: port}
|
|
|
|
_ -> %{parsed | valid: false}
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
def parse(_, parsed, :port), do: %{parsed | port: nil, valid: false}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Scheme part parsing
|
|
|
|
# If URI stops at scheme, it's not valid
|
|
|
|
def parse("", parsed, _, :scheme), do: %{parsed | valid: false}
|
|
|
|
|
|
|
|
def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do
|
|
|
|
parsed = %{parsed | scheme: nil_or String.downcase current_part}
|
|
|
|
|
|
|
|
# Scheme must exist
|
|
|
|
case parsed.scheme do
|
|
|
|
nil -> %{parsed | valid: false}
|
2015-02-14 19:36:19 +00:00
|
|
|
_ -> parse rest, parsed, "", :hier
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Hier part parsing
|
|
|
|
# Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'.
|
|
|
|
def parse("", parsed, current_part, :hier) do
|
|
|
|
parsed = %{parsed | hier: nil_or current_part}
|
|
|
|
|
|
|
|
# Go inside hierarchy to parse parts
|
2015-02-14 19:36:19 +00:00
|
|
|
parse current_part, parsed, :hier_parse
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do
|
|
|
|
parsed = %{parsed | hier: nil_or current_part}
|
|
|
|
|
|
|
|
# Go inside hierarchy to parse parts
|
2015-02-14 19:36:19 +00:00
|
|
|
parsed = parse current_part, parsed, :hier_parse
|
2015-02-14 18:57:07 +00:00
|
|
|
|
|
|
|
# If we have a query and
|
|
|
|
|
2015-02-14 19:36:19 +00:00
|
|
|
parse rest, parsed, "", :query
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Query part parsing
|
|
|
|
def parse("", parsed, current_part, :query), do: %{parsed | query: nil_or current_part}
|
|
|
|
|
|
|
|
def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do
|
|
|
|
# All the rest is just fragment
|
|
|
|
%{parsed | query: nil_or(current_part), fragment: nil_or rest}
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Authority part parsing
|
|
|
|
def parse("", parsed, current_part, :authority), do: %{parsed | authority: nil_or current_part}
|
|
|
|
|
|
|
|
def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do
|
|
|
|
%{parsed | authority: nil_or(current_part), path: "/" <> rest}
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Userinfo part parsing
|
|
|
|
# If no userinfo was found, start from the beginning and look for host instead
|
|
|
|
def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host)
|
|
|
|
|
|
|
|
def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do
|
|
|
|
parsed = %{parsed | userinfo: nil_or current_part}
|
2015-02-14 19:36:19 +00:00
|
|
|
parse rest, parsed, :host
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# IPv6 host parsing
|
|
|
|
# Host must not end without closing ]
|
|
|
|
def parse("", parsed, _, :host_6), do: %{parsed | valid: false}
|
|
|
|
|
|
|
|
def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do
|
|
|
|
parsed = %{parsed | is_ipv6: true, host: nil_or current_part}
|
2015-02-14 19:36:19 +00:00
|
|
|
parse rest, parsed, :port
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# "Normal" host parsing
|
|
|
|
def parse("", parsed, current_part, :host_4), do: %{parsed | host: nil_or current_part}
|
|
|
|
|
|
|
|
def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do
|
|
|
|
parsed = %{parsed | host: nil_or current_part}
|
2015-02-14 19:36:19 +00:00
|
|
|
parse ":" <> rest, parsed, :port
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Default walking function for all parsing modes, just walk through all
|
|
|
|
# non recognised characters
|
2015-02-14 19:36:19 +00:00
|
|
|
def parse(<< char, rest :: binary >>, parsed, current_part, mode) do
|
2015-02-14 18:57:07 +00:00
|
|
|
current_part = current_part <> << char :: utf8 >>
|
2015-02-14 19:36:19 +00:00
|
|
|
parse rest, parsed, current_part, mode
|
2015-02-14 18:57:07 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Convert "" into nil
|
|
|
|
defp nil_or(str) when str == "", do: nil
|
|
|
|
defp nil_or(str), do: str
|
|
|
|
end
|