This repository has been archived on 2024-06-16. You can view files and clone it, but cannot push or open issues or pull requests.
mebe/apps/mebe_engine/lib/crawler.ex

58 lines
1.6 KiB
Elixir
Raw Normal View History

2015-05-14 20:58:25 +00:00
defmodule MebeEngine.Crawler do
@moduledoc """
The crawler goes through the specified directory, opening and parsing all the matching files
inside concurrently.
"""
alias MebeEngine.Parser
alias MebeEngine.Models.Page
alias MebeEngine.Models.Post
def crawl(path) do
get_files(path)
|> Enum.map(fn file -> Task.async MebeEngine.Crawler, :parse, [file] end)
|> handle_responses
|> construct_archives
end
def get_files(path) do
Path.wildcard path <> "/**/*.md"
end
def parse(file) do
File.read!(file)
|> Parser.parse(Path.basename file)
end
def handle_responses(tasklist) do
Enum.map tasklist, fn task -> Task.await task end
end
def construct_archives(datalist) do
Enum.reduce datalist, %{pages: %{}, posts: [], years: %{}, months: %{}, tags: %{}}, fn pagedata, acc ->
case pagedata.__struct__ do
Page -> %{acc | pages: Map.put(acc.pages, pagedata.slug, pagedata)}
Post ->
{year, month, _} = pagedata.date
tags = Enum.reduce pagedata.tags, acc.tags, fn tag, tagmap ->
posts = Map.get(tagmap, tag, [])
Map.put(tagmap, tag, [pagedata | posts])
end
year_posts = [pagedata | Map.get acc.years, year, []]
month_posts = [pagedata | Map.get acc.months, {year, month}, []]
2015-05-14 20:58:25 +00:00
%{
acc |
posts: [pagedata | acc.posts],
years: Map.put(acc.years, year, year_posts),
months: Map.put(acc.months, {year, month}, month_posts),
2015-05-14 20:58:25 +00:00
tags: tags
}
end
end
end
end