Add IP support and a blacklist for domains/IPs

This commit is contained in:
Mikko Ahlroth 2013-08-03 23:25:43 +03:00
parent 2aa388e901
commit 2d8dcdae34

View file

@ -22,6 +22,9 @@ defmodule Nulform.Plugins.URLAnalyzer do
@parseable_types [
"text/html", "text/xml", "application/xml", "application/xhtml+xml"
]
@domain_blacklist [
%R/(192|127|10)\.\d{1,3}\.\d{1,3}\.\d{1,3}/, %R/localhost/i
]
def init(nil) do
# If these fail, let them fail, we'll crash later
@ -54,11 +57,15 @@ defmodule Nulform.Plugins.URLAnalyzer do
(?:
(?:\w+\-)*\w+\.
)+ # Hostname parts, \w separated by - and . at the end
(
(?:
(?:[a-z]+(?:\w+\-)*\w+) # Top-level domain, starts with a-z
\.? # Optional root domain dot
(?::\d+)? # Optional port number
)
|
(?:\d{1,3}) # Or an IP address final term
)
(?::\d+)? # Optional port number
(?:
(?:/[^?\s]*)+ # URL path, anything non-?, non-ws separated by /
(?:\?(?:\S*))? # Optional query string, anything after ?
@ -86,7 +93,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
id_str = "(" <> to_binary(id) <> ") "
end
analysis = binary_to_list(url) |> analyze_url
analysis = analyze_url url
case analysis do
{status, domain, type, size, title} ->
@ -109,12 +116,14 @@ defmodule Nulform.Plugins.URLAnalyzer do
end
{:error, error, domain} ->
result = id_str <> "[" <> domain <> "] "
result = id_str <> "[" <> domain <> "] " <>
case error do
:timeout -> result = result <> "Timed out."
:no_scheme -> result = result <> "No scheme."
:max_redirects -> result = result <> "Too many redirects."
{:failed_connect, _} -> result = result <> "Connection failed."
:timeout -> "Timed out."
:no_scheme -> "No scheme."
:max_redirects -> "Too many redirects."
:blacklisted -> "Host blacklisted."
:unknown_method -> "Unknown HTTP method."
{:failed_connect, _} -> "Connection failed."
end
end
@ -131,19 +140,22 @@ defmodule Nulform.Plugins.URLAnalyzer do
end
def analyze_url(url, redirects, mode) when redirects > @max_redirects do
{:error, :max_redirects, URI.parse(to_binary url).authority}
{:error, :max_redirects, URI.parse(url).authority}
end
def analyze_url(url, redirects, mode) do
IO.puts "Analyzing " <> to_binary(url) <> " round " <> to_binary redirects
IO.puts "Analyzing " <> url <> " round " <> to_binary redirects
title = ""
domain = URI.parse(to_binary url).authority
domain = URI.parse(url).authority
case match_blacklist URI.parse(url).host do
true -> {:error, :blacklisted, domain}
false ->
result =
case mode do
:head -> http_head url
:get -> http_get url
_ -> {:error, :unknown_method} # fail
:head -> http_head binary_to_list(url)
:get -> http_get binary_to_list(url)
_ -> {:error, :unknown_method, domain} # fail
end
case result do
@ -155,10 +167,10 @@ defmodule Nulform.Plugins.URLAnalyzer do
# Fix non-absolute location URIs by retard webdevs
if not String.starts_with? String.downcase(new_url), ["http://", "https://"] do
IO.puts "Fixing " <> new_url <> " to..."
new_url = URI.parse(to_binary url).scheme <> "://" <> domain <> "/" <> String.lstrip new_url, "/"
new_url = URI.parse(url).scheme <> "://" <> domain <> "/" <> String.lstrip new_url, "/"
IO.inspect new_url
end
analyze_url binary_to_list(new_url), redirects + 1, mode
analyze_url new_url, redirects + 1, mode
else
if mode != :get and (status != 200 or Enum.any? @parseable_types, fn(x) -> x == content_type end) do
analyze_url url, redirects + 1, :get
@ -190,6 +202,11 @@ defmodule Nulform.Plugins.URLAnalyzer do
{:error, error} -> {:error, error, domain}
end
end
end
def match_blacklist(domain) do
Enum.any? @domain_blacklist, fn x -> Regex.match? x, domain end
end
def parse_title(html) do
regex = %R@<title.*?>([^>]*?)<\s*?/\s*?title\s*?>@i