Nurina/lib/nurina.ex

175 lines
5.1 KiB
Elixir
Raw Normal View History

2015-02-14 18:57:07 +00:00
defmodule Nurina do
defmodule Info do
@moduledoc """
2015-02-14 19:13:42 +00:00
Contains a struct containing info about a given URL.
2015-02-14 18:57:07 +00:00
"""
defstruct scheme: nil,
hier: nil,
query: nil,
fragment: nil,
valid: true,
authority: nil,
path: nil,
host: nil,
port: nil,
userinfo: nil,
is_ipv6: false
end
@doc """
Parse an URI into components. Will return a Nurina.Info struct.
Tries to follow RFC 3986.
"""
def parse(uri) do
parsed = parse uri, %Info{}, "", :scheme
case parsed do
%{valid: true, port: nil} -> %{parsed | port: URI.default_port parsed.scheme}
_ -> parsed
end
end
# Hier part parsing
def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth)
def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth)
def parse(hier, parsed, :hier_no_auth), do: %{parsed | path: nil_or hier}
def parse(hier, parsed, :hier_auth) do
parsed = parse(hier, parsed, "", :authority)
# Go inside authority to parse parts
if parsed.authority != nil do
parsed = parse(parsed.authority, parsed, "", :userinfo)
end
parsed
end
# Host part parsing
# Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains
def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6)
def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4)
# Port part parsing
def parse(<< ":", rest :: binary >>, parsed, :port) do
case Integer.parse rest do
:error -> %{parsed | valid: false}
{_, remainder} when remainder != "" -> %{parsed | valid: false}
{port, _} -> %{parsed | port: port}
end
end
def parse(_, parsed, :port), do: %{parsed | port: nil, valid: false}
# Scheme part parsing
# If URI stops at scheme, it's not valid
def parse("", parsed, _, :scheme), do: %{parsed | valid: false}
def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do
parsed = %{parsed | scheme: nil_or String.downcase current_part}
# Scheme must exist
case parsed.scheme do
nil -> %{parsed | valid: false}
_ -> parse(rest, parsed, "", :hier)
end
end
# Hier part parsing
# Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'.
def parse("", parsed, current_part, :hier) do
parsed = %{parsed | hier: nil_or current_part}
# Go inside hierarchy to parse parts
parse(current_part, parsed, :hier_parse)
end
def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do
parsed = %{parsed | hier: nil_or current_part}
# Go inside hierarchy to parse parts
parsed = parse(current_part, parsed, :hier_parse)
# If we have a query and
parse(rest, parsed, "", :query)
end
# Query part parsing
def parse("", parsed, current_part, :query), do: %{parsed | query: nil_or current_part}
def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do
# All the rest is just fragment
%{parsed | query: nil_or(current_part), fragment: nil_or rest}
end
# Authority part parsing
def parse("", parsed, current_part, :authority), do: %{parsed | authority: nil_or current_part}
def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do
%{parsed | authority: nil_or(current_part), path: "/" <> rest}
end
# Userinfo part parsing
# If no userinfo was found, start from the beginning and look for host instead
def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host)
def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do
parsed = %{parsed | userinfo: nil_or current_part}
parse(rest, parsed, :host)
end
# IPv6 host parsing
# Host must not end without closing ]
def parse("", parsed, _, :host_6), do: %{parsed | valid: false}
def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do
parsed = %{parsed | is_ipv6: true, host: nil_or current_part}
parse(rest, parsed, :port)
end
# "Normal" host parsing
def parse("", parsed, current_part, :host_4), do: %{parsed | host: nil_or current_part}
def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do
parsed = %{parsed | host: nil_or current_part}
parse(":" <> rest, parsed, :port)
end
# Default walking function for all parsing modes, just walk through all
# non recognised characters
def parse(<<char, rest :: binary>>, parsed, current_part, mode) do
current_part = current_part <> << char :: utf8 >>
parse(rest, parsed, current_part, mode)
end
# Convert "" into nil
defp nil_or(str) when str == "", do: nil
defp nil_or(str), do: str
end