Add automatic scrubber
This commit is contained in:
parent
f0b7e2c504
commit
fa00afe9bf
3 changed files with 72 additions and 1 deletions
|
@ -16,7 +16,7 @@ defmodule Tilastokeskus.Archive.Schemas.PageView do
|
||||||
# Request path without query string
|
# Request path without query string
|
||||||
field(:path_noq, :string)
|
field(:path_noq, :string)
|
||||||
|
|
||||||
# Request host header
|
# Request URL host (authority)
|
||||||
field(:host, :string)
|
field(:host, :string)
|
||||||
|
|
||||||
# Full HTTP referrer
|
# Full HTTP referrer
|
||||||
|
|
62
lib/archive/scrubinator.ex
Normal file
62
lib/archive/scrubinator.ex
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
defmodule Tilastokeskus.Archive.Scrubinator do
|
||||||
|
@moduledoc """
|
||||||
|
Scrubinator is a timed assassin that periodically scrubs log data from too sensitive
|
||||||
|
information.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@how_often 24 * 60 * 60 * 1000
|
||||||
|
|
||||||
|
use GenServer
|
||||||
|
alias Tilastokeskus.Archive.Schemas.PageView
|
||||||
|
alias Tilastokeskus.Archive.Repo
|
||||||
|
import Ecto.Query, only: [from: 2]
|
||||||
|
|
||||||
|
def start_link(opts) do
|
||||||
|
GenServer.start_link(__MODULE__, opts)
|
||||||
|
end
|
||||||
|
|
||||||
|
def init(%{days: days} = state) do
|
||||||
|
:ok = scrub(days)
|
||||||
|
|
||||||
|
Process.send_after(self(), :scrub, @how_often)
|
||||||
|
{:ok, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_info(:scrub, %{days: days} = state) do
|
||||||
|
:ok = scrub(days)
|
||||||
|
|
||||||
|
Process.send_after(self(), :scrub, @how_often)
|
||||||
|
{:noreply, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Scrub hits older than `days` days of private data.
|
||||||
|
"""
|
||||||
|
@spec scrub(integer) :: :ok
|
||||||
|
def scrub(days) do
|
||||||
|
now = DateTime.utc_now() |> DateTime.to_unix()
|
||||||
|
then = now - days * 24 * 60 * 60
|
||||||
|
|
||||||
|
case DateTime.from_unix(then) do
|
||||||
|
{:ok, then_dt} ->
|
||||||
|
from(
|
||||||
|
p in PageView,
|
||||||
|
where: p.at <= ^then_dt and p.scrubbed == false,
|
||||||
|
update: [
|
||||||
|
set: [
|
||||||
|
scrubbed: true,
|
||||||
|
addr: nil,
|
||||||
|
ua: nil,
|
||||||
|
loc_city: nil
|
||||||
|
]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|> Repo.update_all([])
|
||||||
|
|
||||||
|
:ok
|
||||||
|
|
||||||
|
{:error, _} ->
|
||||||
|
:ok
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -10,10 +10,12 @@ defmodule Tilastokeskus.Application do
|
||||||
|
|
||||||
port = (System.get_env("PORT") || "1971") |> String.to_integer()
|
port = (System.get_env("PORT") || "1971") |> String.to_integer()
|
||||||
hosts = get_hosts()
|
hosts = get_hosts()
|
||||||
|
days = get_days()
|
||||||
|
|
||||||
# List all child processes to be supervised
|
# List all child processes to be supervised
|
||||||
children = [
|
children = [
|
||||||
{Tilastokeskus.Archive.Repo, []},
|
{Tilastokeskus.Archive.Repo, []},
|
||||||
|
{Tilastokeskus.Archive.Scrubinator, %{days: days}},
|
||||||
{Tilastokeskus.Reception.Router, [[hosts: hosts], [port: port]]}
|
{Tilastokeskus.Reception.Router, [[hosts: hosts], [port: port]]}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -29,4 +31,11 @@ defmodule Tilastokeskus.Application do
|
||||||
hosts -> String.split(hosts, ",")
|
hosts -> String.split(hosts, ",")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp get_days() do
|
||||||
|
case System.get_env("TILASTOKESKUS_SCRUB_DAYS") do
|
||||||
|
nil -> 90
|
||||||
|
days -> String.to_integer(days)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue