defmodule Nurina do defmodule Info do @moduledoc """ Contains a struct containing info about a given URL. """ defstruct scheme: nil, hier: nil, query: nil, fragment: nil, valid: true, authority: nil, path: nil, host: nil, port: nil, userinfo: nil, is_ipv6: false end @doc """ Parse an URI into components. Will return a Nurina.Info struct. Tries to follow RFC 3986. """ def parse(uri) do parsed = parse uri, %Info{}, "", :scheme case parsed do %{valid: true, port: nil} -> %{parsed | port: URI.default_port parsed.scheme} _ -> parsed end end # Hier part parsing def parse(<< "//", rest :: binary >>, parsed, :hier_parse), do: parse(rest, parsed, :hier_auth) def parse(hier, parsed, :hier_parse), do: parse(hier, parsed, :hier_no_auth) def parse(hier, parsed, :hier_no_auth), do: %{parsed | path: nil_or hier} def parse(hier, parsed, :hier_auth) do parsed = parse hier, parsed, "", :authority # Go inside authority to parse parts case parsed.authority do nil -> parsed _ -> parse parsed.authority, parsed, "", :userinfo end end # Host part parsing # Split into IPv6 parsing if needed, :host_4 will handle IPv4 and domains def parse(<< "[", rest :: binary >>, parsed, :host), do: parse(rest, parsed, "", :host_6) def parse(hier, parsed, :host), do: parse(hier, parsed, "", :host_4) # Port part parsing def parse(<< ":", rest :: binary >>, parsed, :port) do case Integer.parse rest do {port, ""} -> %{parsed | port: port} _ -> %{parsed | valid: false} end end def parse(_, parsed, :port), do: %{parsed | port: nil, valid: false} # Scheme part parsing # If URI stops at scheme, it's not valid def parse("", parsed, _, :scheme), do: %{parsed | valid: false} def parse(<< ":", rest :: binary >>, parsed, current_part, :scheme) do parsed = %{parsed | scheme: nil_or String.downcase current_part} # Scheme must exist case parsed.scheme do nil -> %{parsed | valid: false} _ -> parse rest, parsed, "", :hier end end # Hier part parsing # Hier is the hierarchical sequence of the URI. In RFC 3986 it is 'hier-part'. def parse("", parsed, current_part, :hier) do parsed = %{parsed | hier: nil_or current_part} # Go inside hierarchy to parse parts parse current_part, parsed, :hier_parse end def parse(<< "?", rest :: binary >>, parsed, current_part, :hier) do parsed = %{parsed | hier: nil_or current_part} # Go inside hierarchy to parse parts parsed = parse current_part, parsed, :hier_parse # If we have a query and parse rest, parsed, "", :query end # Query part parsing def parse("", parsed, current_part, :query), do: %{parsed | query: nil_or current_part} def parse(<< "#", rest :: binary >>, parsed, current_part, :query) do # All the rest is just fragment %{parsed | query: nil_or(current_part), fragment: nil_or rest} end # Authority part parsing def parse("", parsed, current_part, :authority), do: %{parsed | authority: nil_or current_part} def parse(<< "/", rest :: binary >>, parsed, current_part, :authority) do %{parsed | authority: nil_or(current_part), path: "/" <> rest} end # Userinfo part parsing # If no userinfo was found, start from the beginning and look for host instead def parse("", parsed, current_part, :userinfo), do: parse(current_part, parsed, :host) def parse(<< "@", rest :: binary >>, parsed, current_part, :userinfo) do parsed = %{parsed | userinfo: nil_or current_part} parse rest, parsed, :host end # IPv6 host parsing # Host must not end without closing ] def parse("", parsed, _, :host_6), do: %{parsed | valid: false} def parse(<< "]", rest :: binary >>, parsed, current_part, :host_6) do parsed = %{parsed | is_ipv6: true, host: nil_or current_part} parse rest, parsed, :port end # "Normal" host parsing def parse("", parsed, current_part, :host_4), do: %{parsed | host: nil_or current_part} def parse(<< ":", rest :: binary >>, parsed, current_part, :host_4) do parsed = %{parsed | host: nil_or current_part} parse ":" <> rest, parsed, :port end # Default walking function for all parsing modes, just walk through all # non recognised characters def parse(<< char, rest :: binary >>, parsed, current_part, mode) do current_part = current_part <> << char :: utf8 >> parse rest, parsed, current_part, mode end # Convert "" into nil defp nil_or(str) when str == "", do: nil defp nil_or(str), do: str end