Add www. support, cut url from #, clean titles with excess whitespace
This commit is contained in:
parent
8e9120f88a
commit
f1d7e8a753
1 changed files with 11 additions and 4 deletions
|
@ -68,8 +68,8 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
(?::\d+)? # Optional port number
|
||||
(?:
|
||||
(?:/[^?\s]*)+ # URL path, anything non-?, non-ws separated by /
|
||||
(?:\?(?:\S*))? # Optional query string, anything after ?
|
||||
)? # Make the whole path & query part optional
|
||||
(?:\?\S*)? # Optional query string, anything after ?, up to #
|
||||
)? # Make the whole path & query part optional
|
||||
)
|
||||
"""
|
||||
|
||||
|
@ -146,8 +146,15 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
def analyze_url(url, redirects, mode) do
|
||||
IO.puts "Analyzing " <> url <> " round " <> to_binary redirects
|
||||
title = ""
|
||||
domain = URI.parse(url).authority
|
||||
|
||||
if String.starts_with? url, "www." do
|
||||
url = "http://" <> url
|
||||
end
|
||||
|
||||
# Strip anchor
|
||||
url = Enum.at String.split(url, "#"), 0
|
||||
|
||||
domain = URI.parse(url).authority
|
||||
case match_blacklist URI.parse(url).host do
|
||||
true -> {:error, :blacklisted, domain}
|
||||
false ->
|
||||
|
@ -214,7 +221,7 @@ defmodule Nulform.Plugins.URLAnalyzer do
|
|||
|
||||
if not Enum.empty? title do
|
||||
title = Enum.at Enum.at(title, 0), 0
|
||||
Enum.join String.split(title), " "
|
||||
Regex.replace %R/\s+/, title, " "
|
||||
else
|
||||
""
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue