Fix stuff deprecated in Elixir 0.10.2, initial IPv6 and gzip support for url analyzer

This commit is contained in:
Mikko Ahlroth 2013-10-02 11:33:51 +03:00
parent 4cb854e94c
commit b548fdd1bb
3 changed files with 50 additions and 28 deletions

View file

@ -36,7 +36,7 @@ defmodule Nulform.Connection do
end
def handle_cast(:connect, data) do
{:ok, sock} = connect binary_to_list(data.host), data.port
{:ok, sock} = connect String.to_char_list!(data.host), data.port
data = data.sock sock
send_connect_info data
@ -88,7 +88,7 @@ defmodule Nulform.Connection do
case String.split stripped do
[_, "433" | _] ->
send data.buffer, "NICK " <> data.altnick
uniqid = to_binary :random.uniform(9999)
uniqid = to_string :random.uniform(9999)
data = data.altnick String.slice(data.altnick, 0, 10) <> "-" <> uniqid
_ ->
end
@ -120,7 +120,7 @@ defmodule Nulform.Connection do
defp send_raw(data, msg) do
:ok = :gen_tcp.send data.sock, String.slice(msg, 0, @max_len) <> "\r\n"
IO.puts(to_binary(data.id) <> " <- " <> String.slice(msg, 0, @max_len))
IO.puts(to_string(data.id) <> " <- " <> String.slice(msg, 0, @max_len))
end
defp tell_handler(data, msg) do

View file

@ -17,13 +17,16 @@ defmodule Nulform.Plugins.URLAnalyzer do
@base_options [body_format: :binary, sync: true]
@http_options [timeout: @timeout, autoredirect: false]
# We need a real user agent since some sites fail on nonstandard ones
@headers [{'user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36'}]
# Also request sites as non-compressed
@headers [{'user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36'},
{'accept-encoding', 'identity;q=1.0,gzip;q=0.5,*;q=0'}]
@max_redirects 10
@parseable_types [
"text/html", "text/xml", "application/xml", "application/xhtml+xml"
]
@domain_blacklist [
%R/(192|127|10)\.\d{1,3}\.\d{1,3}\.\d{1,3}/, %R/localhost/i
%R/^(192|127|10)\.\d{1,3}\.\d{1,3}\.\d{1,3}$/, %R/^localhost$/i,
%R/^\[(0*:)*:?:1\]$/
]
def init(nil) do
@ -54,16 +57,22 @@ defmodule Nulform.Plugins.URLAnalyzer do
(?:
(?:https?://)|(?:www\.) # Catch http://, https:// and www.
)
(?:
(?:\w+\-)*\w+\.
)+ # Hostname parts, \w separated by - and . at the end
(?:
( # Domain or IPv4
(?:
(?:[a-z]+(?:\w+\-)*\w+) # Top-level domain, starts with a-z
\.? # Optional root domain dot
(?:\w+\-)*\w+\.
)+ # Hostname parts, \w separated by - and . at the end
(?:
(?:
(?:[a-z]+(?:\w+\-)*\w+) # Top-level domain, starts with a-z
\.? # Optional root domain dot
)
|
(?:\d{1,3}) # Or an IP address final term
)
|
(?:\d{1,3}) # Or an IP address final term
| # IPv6
\[
(?:[0-9a-f]:?)+
\]
)
(?::\d+)? # Optional port number
(?:
@ -79,18 +88,18 @@ defmodule Nulform.Plugins.URLAnalyzer do
def run_analyzer(msg, id, urls) when is_list urls do
if not Enum.empty? urls do
[url | rest] = urls
Kernel.spawn __MODULE__, :analyze_url, [msg, id, Enum.at(url, 0)]
Kernel.spawn __MODULE__, :run_analyzer, [msg, id, Enum.at(url, 0)]
id = if(id != nil, do: id + 1)
run_analyzer msg, id, rest
end
end
def analyze_url(msg, id, url) when is_binary url do
def run_analyzer(msg, id, url) when is_binary url do
id_str = ""
size_str = ""
if id != nil do
id_str = "(" <> to_binary(id) <> ") "
id_str = "(" <> to_string(id) <> ") "
end
analysis = analyze_url url
@ -116,7 +125,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
IO.inspect result
end
else
result = result <> "HTTP " <> to_binary status
result = result <> "HTTP " <> to_string status
end
{:error, error, domain} ->
@ -143,12 +152,12 @@ defmodule Nulform.Plugins.URLAnalyzer do
analyze_url url, redirects, :head
end
def analyze_url(url, redirects, mode) when redirects > @max_redirects do
def analyze_url(url, redirects, _) when redirects > @max_redirects do
{:error, :max_redirects, URI.parse(url).authority}
end
def analyze_url(url, redirects, mode) do
IO.puts "Analyzing " <> url <> " round " <> to_binary redirects
IO.puts "Analyzing " <> url <> " round " <> to_string redirects
title = ""
if String.starts_with? url, "www." do
@ -164,8 +173,8 @@ defmodule Nulform.Plugins.URLAnalyzer do
false ->
result =
case mode do
:head -> http_head binary_to_list(url)
:get -> http_get binary_to_list(url)
:head -> http_head String.to_char_list!(url)
:get -> http_get String.to_char_list!(url)
_ -> {:error, :unknown_method, domain} # fail
end
@ -174,7 +183,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
content_type = parse_content_type headers['content-type']
if status == 301 or status == 302 or status == 303 or status == 307 do
new_url = to_binary headers['location']
new_url = to_string headers['location']
# Fix non-absolute location URIs by retard webdevs
if not String.starts_with? String.downcase(new_url), ["http://", "https://"] do
IO.puts "Fixing " <> new_url <> " to..."
@ -186,6 +195,9 @@ defmodule Nulform.Plugins.URLAnalyzer do
if mode != :get and (status != 200 or Enum.any? @parseable_types, fn(x) -> x == content_type end) do
analyze_url url, redirects + 1, :get
else
# Maybe gunzip body since retard server send gzip for us
body = maybe_gunzip headers, body
IO.inspect mode
IO.inspect status
IO.inspect content_type
@ -195,7 +207,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
end
IO.inspect Nulform.Utilities.to_utf8(title)
content_length = to_binary headers['content-length']
content_length = to_string headers['content-length']
if content_length != "" do
content_length = binary_to_integer content_length
end
@ -220,7 +232,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
end
def parse_title(html) do
regex = %R@<title.*?>([^>]*?)<\s*?/\s*?title\s*?>@i
regex = %R@<title.*?>([^<]*?)<\s*?/\s*?title\s*?>@i
title = Regex.scan regex, html
if not Enum.empty? title do
@ -231,6 +243,16 @@ defmodule Nulform.Plugins.URLAnalyzer do
end
end
# Some sites send us gzipped content even though we don't ask for it, so
# check if we need to unzip
def maybe_gunzip(headers, body) do
if headers['content-encoding'] == 'gzip' do
:zlib.gunzip body
else
body
end
end
def http_head(url) do
http_req :head, url
end
@ -243,7 +265,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
:httpc.request mode, {url, @headers}, @http_options, @base_options
end
defp parse_content_type(header) do
Enum.at String.split(to_binary(header), ";"), 0
def parse_content_type(header) do
Enum.at String.split(to_string(header), ";"), 0
end
end

View file

@ -18,8 +18,8 @@ defmodule Nulform.Mixfile do
# { :foobar, "0.1", git: "https://github.com/elixir-lang/foobar.git" }
defp deps do
[
{:json, "0.0.2", git: "https://github.com/hio/erlang-json"},
{:excoder, "0.0.1", git: "https://Nicd@bitbucket.org/Nicd/excoder.git"}
{:json, github: "cblage/elixir-json"},
{:excoder, "1.0.0", git: "https://Nicd@bitbucket.org/Nicd/excoder.git"}
]
end
end