This repository has been archived on 2024-06-16. You can view files and clone it, but cannot push or open issues or pull requests.
mebe/lib/mebe_engine/crawler.ex
Mikko Ahlroth b33c48c8db Implement extra header support and multi author support
Also add a lot of parenthesis
2016-02-18 23:12:39 +02:00

84 lines
No EOL
2.5 KiB
Elixir

defmodule MebeEngine.Crawler do
@moduledoc """
The crawler goes through the specified directory, opening and parsing all the matching files
inside concurrently.
"""
require Logger
alias MebeEngine.Parser
alias MebeEngine.Models.Page
alias MebeEngine.Models.Post
alias MebeWeb.Utils
def crawl(path) do
get_files(path)
|> Enum.map(fn file -> Task.async MebeEngine.Crawler, :parse, [file] end)
|> handle_responses
|> construct_archives
end
def get_files(path) do
path = path <> "/**/*.md"
Logger.info "Searching files using '#{path}' with cwd '#{System.cwd}'"
files = Path.wildcard path
Logger.info "Found files:"
for file <- files do
Logger.info file
end
files
end
def parse(file) do
File.read!(file)
|> Parser.parse(Path.basename file)
end
def handle_responses(tasklist) do
Enum.map tasklist, fn task -> Task.await task end
end
def construct_archives(datalist) do
multi_author_mode = Utils.get_conf(:multi_author_mode)
Enum.reduce datalist, %{pages: %{}, posts: [], years: %{}, months: %{}, tags: %{}, authors: %{}, author_names: %{}}, fn pagedata, acc ->
case pagedata.__struct__ do
Page -> %{acc | pages: Map.put(acc.pages, pagedata.slug, pagedata)}
Post ->
{year, month, _} = pagedata.date
tags = Enum.reduce(pagedata.tags, acc.tags, fn tag, tagmap ->
posts = Map.get(tagmap, tag, [])
Map.put(tagmap, tag, [pagedata | posts])
end)
authors = %{}
author_names = %{}
if multi_author_mode do
author_name = Utils.get_author(pagedata)
author_slug = Utils.slugify(author_name)
author_posts = [pagedata | Map.get(acc.authors, author_slug, [])]
authors = Map.put(acc.authors, author_slug, author_posts)
# Authors end up with the name that was in the post with the first matching slug
author_names = Map.put_new(acc.author_names, author_slug, author_name)
end
year_posts = [pagedata | Map.get(acc.years, year, [])]
month_posts = [pagedata | Map.get(acc.months, {year, month}, [])]
%{
acc |
posts: [pagedata | acc.posts],
years: Map.put(acc.years, year, year_posts),
months: Map.put(acc.months, {year, month}, month_posts),
tags: tags,
authors: authors,
author_names: author_names
}
end
end
end
end