Add automatic scrubber

This commit is contained in:
Mikko Ahlroth 2018-06-28 23:04:58 +03:00
parent f0b7e2c504
commit fa00afe9bf
3 changed files with 72 additions and 1 deletions

View file

@ -16,7 +16,7 @@ defmodule Tilastokeskus.Archive.Schemas.PageView do
# Request path without query string # Request path without query string
field(:path_noq, :string) field(:path_noq, :string)
# Request host header # Request URL host (authority)
field(:host, :string) field(:host, :string)
# Full HTTP referrer # Full HTTP referrer

View file

@ -0,0 +1,62 @@
defmodule Tilastokeskus.Archive.Scrubinator do
@moduledoc """
Scrubinator is a timed assassin that periodically scrubs log data from too sensitive
information.
"""
@how_often 24 * 60 * 60 * 1000
use GenServer
alias Tilastokeskus.Archive.Schemas.PageView
alias Tilastokeskus.Archive.Repo
import Ecto.Query, only: [from: 2]
def start_link(opts) do
GenServer.start_link(__MODULE__, opts)
end
def init(%{days: days} = state) do
:ok = scrub(days)
Process.send_after(self(), :scrub, @how_often)
{:ok, state}
end
def handle_info(:scrub, %{days: days} = state) do
:ok = scrub(days)
Process.send_after(self(), :scrub, @how_often)
{:noreply, state}
end
@doc """
Scrub hits older than `days` days of private data.
"""
@spec scrub(integer) :: :ok
def scrub(days) do
now = DateTime.utc_now() |> DateTime.to_unix()
then = now - days * 24 * 60 * 60
case DateTime.from_unix(then) do
{:ok, then_dt} ->
from(
p in PageView,
where: p.at <= ^then_dt and p.scrubbed == false,
update: [
set: [
scrubbed: true,
addr: nil,
ua: nil,
loc_city: nil
]
]
)
|> Repo.update_all([])
:ok
{:error, _} ->
:ok
end
end
end

View file

@ -10,10 +10,12 @@ defmodule Tilastokeskus.Application do
port = (System.get_env("PORT") || "1971") |> String.to_integer() port = (System.get_env("PORT") || "1971") |> String.to_integer()
hosts = get_hosts() hosts = get_hosts()
days = get_days()
# List all child processes to be supervised # List all child processes to be supervised
children = [ children = [
{Tilastokeskus.Archive.Repo, []}, {Tilastokeskus.Archive.Repo, []},
{Tilastokeskus.Archive.Scrubinator, %{days: days}},
{Tilastokeskus.Reception.Router, [[hosts: hosts], [port: port]]} {Tilastokeskus.Reception.Router, [[hosts: hosts], [port: port]]}
] ]
@ -29,4 +31,11 @@ defmodule Tilastokeskus.Application do
hosts -> String.split(hosts, ",") hosts -> String.split(hosts, ",")
end end
end end
defp get_days() do
case System.get_env("TILASTOKESKUS_SCRUB_DAYS") do
nil -> 90
days -> String.to_integer(days)
end
end
end end