Nurina/lib/nurina.ex

172 lines
5 KiB
Elixir
Raw Permalink Normal View History

2015-02-14 18:57:07 +00:00
defmodule Nurina do
defmodule Info do
@moduledoc """
2015-02-14 19:13:42 +00:00
Contains a struct containing info about a given URL.
2015-02-14 18:57:07 +00:00
"""
defstruct scheme: nil,
hier: nil,
query: nil,
fragment: nil,
valid: true,
authority: nil,
path: nil,
host: nil,
port: nil,
userinfo: nil,
is_ipv6: false
end
@doc """
Parse an URI into components. Will return a Nurina.Info struct.
Tries to follow RFC 3986.
"""
def parse(uri) do
parsed = parse uri, %Info{}, "", :scheme
case parsed do
%{valid: true, port: nil} -> %{parsed | port: URI.default_port parsed.scheme}
_ -> parsed
end
end
# Hier part parsing
def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth)
def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth)
def parse(hier, parsed, :hier_no_auth), do: %{parsed | path: nil_or hier}
def parse(hier, parsed, :hier_auth) do
2015-02-14 19:36:19 +00:00
parsed = parse hier, parsed, "", :authority
2015-02-14 18:57:07 +00:00
# Go inside authority to parse parts
2015-02-14 19:36:19 +00:00
case parsed.authority do
nil -> parsed
_ -> parse parsed.authority, parsed, "", :userinfo
2015-02-14 18:57:07 +00:00
end
end
# Host part parsing
# Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains
def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6)
def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4)
# Port part parsing
def parse(<< ":", rest :: binary >>, parsed, :port) do
case Integer.parse rest do
2015-02-14 21:36:55 +00:00
{port, ""} -> %{parsed | port: port}
_ -> %{parsed | valid: false}
2015-02-14 18:57:07 +00:00
end
end
def parse(_, parsed, :port), do: %{parsed | port: nil, valid: false}
# Scheme part parsing
# If URI stops at scheme, it's not valid
def parse("", parsed, _, :scheme), do: %{parsed | valid: false}
def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do
parsed = %{parsed | scheme: nil_or String.downcase current_part}
# Scheme must exist
case parsed.scheme do
nil -> %{parsed | valid: false}
2015-02-14 19:36:19 +00:00
_ -> parse rest, parsed, "", :hier
2015-02-14 18:57:07 +00:00
end
end
# Hier part parsing
# Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'.
def parse("", parsed, current_part, :hier) do
parsed = %{parsed | hier: nil_or current_part}
# Go inside hierarchy to parse parts
2015-02-14 19:36:19 +00:00
parse current_part, parsed, :hier_parse
2015-02-14 18:57:07 +00:00
end
def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do
parsed = %{parsed | hier: nil_or current_part}
# Go inside hierarchy to parse parts
2015-02-14 19:36:19 +00:00
parsed = parse current_part, parsed, :hier_parse
2015-02-14 18:57:07 +00:00
# If we have a query and
2015-02-14 19:36:19 +00:00
parse rest, parsed, "", :query
2015-02-14 18:57:07 +00:00
end
# Query part parsing
def parse("", parsed, current_part, :query), do: %{parsed | query: nil_or current_part}
def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do
# All the rest is just fragment
%{parsed | query: nil_or(current_part), fragment: nil_or rest}
end
# Authority part parsing
def parse("", parsed, current_part, :authority), do: %{parsed | authority: nil_or current_part}
def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do
%{parsed | authority: nil_or(current_part), path: "/" <> rest}
end
# Userinfo part parsing
# If no userinfo was found, start from the beginning and look for host instead
def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host)
def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do
parsed = %{parsed | userinfo: nil_or current_part}
2015-02-14 19:36:19 +00:00
parse rest, parsed, :host
2015-02-14 18:57:07 +00:00
end
# IPv6 host parsing
# Host must not end without closing ]
def parse("", parsed, _, :host_6), do: %{parsed | valid: false}
def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do
parsed = %{parsed | is_ipv6: true, host: nil_or current_part}
2015-02-14 19:36:19 +00:00
parse rest, parsed, :port
2015-02-14 18:57:07 +00:00
end
# "Normal" host parsing
def parse("", parsed, current_part, :host_4), do: %{parsed | host: nil_or current_part}
def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do
parsed = %{parsed | host: nil_or current_part}
2015-02-14 19:36:19 +00:00
parse ":" <> rest, parsed, :port
2015-02-14 18:57:07 +00:00
end
# Default walking function for all parsing modes, just walk through all
# non recognised characters
2015-02-14 19:36:19 +00:00
def parse(<< char, rest :: binary >>, parsed, current_part, mode) do
2015-02-14 18:57:07 +00:00
current_part = current_part <> << char :: utf8 >>
2015-02-14 19:36:19 +00:00
parse rest, parsed, current_part, mode
2015-02-14 18:57:07 +00:00
end
# Convert "" into nil
defp nil_or(str) when str == "", do: nil
defp nil_or(str), do: str
end