Add IP support and a blacklist for domains/IPs
This commit is contained in:
parent
2aa388e901
commit
2d8dcdae34
1 changed files with 76 additions and 59 deletions
|
@ -22,6 +22,9 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
@parseable_types [
|
||||
"text/html", "text/xml", "application/xml", "application/xhtml+xml"
|
||||
]
|
||||
@domain_blacklist [
|
||||
%R/(192|127|10)\.\d{1,3}\.\d{1,3}\.\d{1,3}/, %R/localhost/i
|
||||
]
|
||||
|
||||
def init(nil) do
|
||||
# If these fail, let them fail, we'll crash later
|
||||
|
@ -54,11 +57,15 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
(?:
|
||||
(?:\w+\-)*\w+\.
|
||||
)+ # Hostname parts, \w separated by - and . at the end
|
||||
(?:
|
||||
(?:[a-z]+(?:\w+\-)*\w+) # Top-level domain, starts with a-z
|
||||
\.? # Optional root domain dot
|
||||
(?::\d+)? # Optional port number
|
||||
(
|
||||
(?:
|
||||
(?:[a-z]+(?:\w+\-)*\w+) # Top-level domain, starts with a-z
|
||||
\.? # Optional root domain dot
|
||||
)
|
||||
|
|
||||
(?:\d{1,3}) # Or an IP address final term
|
||||
)
|
||||
(?::\d+)? # Optional port number
|
||||
(?:
|
||||
(?:/[^?\s]*)+ # URL path, anything non-?, non-ws separated by /
|
||||
(?:\?(?:\S*))? # Optional query string, anything after ?
|
||||
|
@ -86,7 +93,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
id_str = "(" <> to_binary(id) <> ") "
|
||||
end
|
||||
|
||||
analysis = binary_to_list(url) |> analyze_url
|
||||
analysis = analyze_url url
|
||||
|
||||
case analysis do
|
||||
{status, domain, type, size, title} ->
|
||||
|
@ -109,13 +116,15 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
end
|
||||
|
||||
{:error, error, domain} ->
|
||||
result = id_str <> "[" <> domain <> "] "
|
||||
case error do
|
||||
:timeout -> result = result <> "Timed out."
|
||||
:no_scheme -> result = result <> "No scheme."
|
||||
:max_redirects -> result = result <> "Too many redirects."
|
||||
{:failed_connect, _} -> result = result <> "Connection failed."
|
||||
end
|
||||
result = id_str <> "[" <> domain <> "] " <>
|
||||
case error do
|
||||
:timeout -> "Timed out."
|
||||
:no_scheme -> "No scheme."
|
||||
:max_redirects -> "Too many redirects."
|
||||
:blacklisted -> "Host blacklisted."
|
||||
:unknown_method -> "Unknown HTTP method."
|
||||
{:failed_connect, _} -> "Connection failed."
|
||||
end
|
||||
end
|
||||
|
||||
result_msg = Nulform.IRC.PRIVMSG.reply msg, result
|
||||
|
@ -131,66 +140,74 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
end
|
||||
|
||||
def analyze_url(url, redirects, mode) when redirects > @max_redirects do
|
||||
{:error, :max_redirects, URI.parse(to_binary url).authority}
|
||||
{:error, :max_redirects, URI.parse(url).authority}
|
||||
end
|
||||
|
||||
def analyze_url(url, redirects, mode) do
|
||||
IO.puts "Analyzing " <> to_binary(url) <> " round " <> to_binary redirects
|
||||
IO.puts "Analyzing " <> url <> " round " <> to_binary redirects
|
||||
title = ""
|
||||
domain = URI.parse(to_binary url).authority
|
||||
domain = URI.parse(url).authority
|
||||
|
||||
result =
|
||||
case mode do
|
||||
:head -> http_head url
|
||||
:get -> http_get url
|
||||
_ -> {:error, :unknown_method} # fail
|
||||
end
|
||||
|
||||
case result do
|
||||
{:ok, {{_, status, _}, headers, body}} ->
|
||||
content_type = parse_content_type headers['content-type']
|
||||
|
||||
if status == 301 or status == 302 or status == 303 or status == 307 do
|
||||
new_url = to_binary headers['location']
|
||||
# Fix non-absolute location URIs by retard webdevs
|
||||
if not String.starts_with? String.downcase(new_url), ["http://", "https://"] do
|
||||
IO.puts "Fixing " <> new_url <> " to..."
|
||||
new_url = URI.parse(to_binary url).scheme <> "://" <> domain <> "/" <> String.lstrip new_url, "/"
|
||||
IO.inspect new_url
|
||||
case match_blacklist URI.parse(url).host do
|
||||
true -> {:error, :blacklisted, domain}
|
||||
false ->
|
||||
result =
|
||||
case mode do
|
||||
:head -> http_head binary_to_list(url)
|
||||
:get -> http_get binary_to_list(url)
|
||||
_ -> {:error, :unknown_method, domain} # fail
|
||||
end
|
||||
analyze_url binary_to_list(new_url), redirects + 1, mode
|
||||
else
|
||||
if mode != :get and (status != 200 or Enum.any? @parseable_types, fn(x) -> x == content_type end) do
|
||||
analyze_url url, redirects + 1, :get
|
||||
else
|
||||
IO.inspect mode
|
||||
IO.inspect status
|
||||
IO.inspect content_type
|
||||
if mode == :get and status == 200 and Enum.any? @parseable_types, fn(x) -> x == content_type end do
|
||||
title = parse_title body
|
||||
IO.inspect title
|
||||
end
|
||||
IO.inspect Nulform.Utilities.to_utf8(title)
|
||||
|
||||
content_length = to_binary headers['content-length']
|
||||
if content_length != "" do
|
||||
content_length = binary_to_integer content_length
|
||||
case result do
|
||||
{:ok, {{_, status, _}, headers, body}} ->
|
||||
content_type = parse_content_type headers['content-type']
|
||||
|
||||
if status == 301 or status == 302 or status == 303 or status == 307 do
|
||||
new_url = to_binary headers['location']
|
||||
# Fix non-absolute location URIs by retard webdevs
|
||||
if not String.starts_with? String.downcase(new_url), ["http://", "https://"] do
|
||||
IO.puts "Fixing " <> new_url <> " to..."
|
||||
new_url = URI.parse(url).scheme <> "://" <> domain <> "/" <> String.lstrip new_url, "/"
|
||||
IO.inspect new_url
|
||||
end
|
||||
analyze_url new_url, redirects + 1, mode
|
||||
else
|
||||
if mode != :get and (status != 200 or Enum.any? @parseable_types, fn(x) -> x == content_type end) do
|
||||
analyze_url url, redirects + 1, :get
|
||||
else
|
||||
IO.inspect mode
|
||||
IO.inspect status
|
||||
IO.inspect content_type
|
||||
if mode == :get and status == 200 and Enum.any? @parseable_types, fn(x) -> x == content_type end do
|
||||
title = parse_title body
|
||||
IO.inspect title
|
||||
end
|
||||
IO.inspect Nulform.Utilities.to_utf8(title)
|
||||
|
||||
content_length = to_binary headers['content-length']
|
||||
if content_length != "" do
|
||||
content_length = binary_to_integer content_length
|
||||
end
|
||||
|
||||
{
|
||||
status,
|
||||
domain,
|
||||
content_type,
|
||||
content_length,
|
||||
Nulform.Utilities.to_utf8 title
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
{
|
||||
status,
|
||||
domain,
|
||||
content_type,
|
||||
content_length,
|
||||
Nulform.Utilities.to_utf8 title
|
||||
}
|
||||
end
|
||||
{:error, error} -> {:error, error, domain}
|
||||
end
|
||||
|
||||
{:error, error} -> {:error, error, domain}
|
||||
end
|
||||
end
|
||||
|
||||
def match_blacklist(domain) do
|
||||
Enum.any? @domain_blacklist, fn x -> Regex.match? x, domain end
|
||||
end
|
||||
|
||||
def parse_title(html) do
|
||||
regex = %R@<title.*?>([^>]*?)<\s*?/\s*?title\s*?>@i
|
||||
title = Regex.scan regex, html
|
||||
|
|
Loading…
Reference in a new issue