Add support for multiline values

This commit is contained in:
Mikko Ahlroth 2022-01-28 18:40:19 +02:00
parent 1e6bd963f0
commit 7d25bc6a0b
4 changed files with 196 additions and 29 deletions

View file

@ -7,7 +7,7 @@ defmodule DotenvParser do
Blank lines and lines starting with `#` are ignored. Additionally inline comments can be added after values with a Blank lines and lines starting with `#` are ignored. Additionally inline comments can be added after values with a
`#`, i.e. `FOO=bar # comment`. `#`, i.e. `FOO=bar # comment`.
Single quote or double quoted value to prevent trimming of whitespace and allow usage of `#` in value, i.e. `FOO=' bar # not comment ' # comment`. Single quote or double quote value to prevent trimming of whitespace and allow usage of `#` in value, i.e. `FOO=' bar # not comment ' # comment`.
Single quoted values don't do any unescaping. Double quoted values will unescape the following: Single quoted values don't do any unescaping. Double quoted values will unescape the following:
@ -19,6 +19,20 @@ defmodule DotenvParser do
* `\\"` and `\\'` - Quotes * `\\"` and `\\'` - Quotes
* `\\\\` - Backslash * `\\\\` - Backslash
* `\\uFFFF` - Unicode escape (4 hex characters to denote the codepoint) * `\\uFFFF` - Unicode escape (4 hex characters to denote the codepoint)
* A backslash at the end of the line in a multiline value will remove the linefeed.
Values can span multiple lines when single or double quoted:
```sh
MULTILINE="This is a
multiline value."
```
This will result in the following:
```elixir
System.fetch_env!("MULTILINE") == "This is a\\nmultiline value."
```
A line can start with `export ` for easier interoperation with regular shell scripts. These lines are treated the A line can start with `export ` for easier interoperation with regular shell scripts. These lines are treated the
same as any others. same as any others.
@ -42,10 +56,14 @@ defmodule DotenvParser do
@linefeed_re ~R/\r?\n/ @linefeed_re ~R/\r?\n/
@line_re ~R/^(?:\s*export)?\s*[a-z_][a-z_0-9]*\s*=/i @line_re ~R/^(?:\s*export)?\s*[a-z_][a-z_0-9]*\s*=/i
@dquoted_val_re ~R/^\s*"(.*)(?!\\)"\s*(?:#.*)?$/ @dquoted_val_re ~R/^"([^"\\]*(?:\\.[^"\\]*)*)"\s*(?:#.*)?$/
@squoted_val_re ~R/^\s*'(.*)(?!\\)'\s*(?:#.*)?$/ @squoted_val_re ~R/^\s*'(.*)'\s*(?:#.*)?$/
@dquoted_multiline_end ~R/^([^"\\]*(?:\\.[^"\\]*)*)"\s*(?:#.*)?$/
@squoted_multiline_end ~R/^(.*)'\s*(?:#.*)?$/
@hex_re ~R/^[0-9a-f]+$/i @hex_re ~R/^[0-9a-f]+$/i
@quote_chars ~w(" ')
@typedoc "Pair of variable name, variable value." @typedoc "Pair of variable name, variable value."
@type value_pair :: {String.t(), String.t()} @type value_pair :: {String.t(), String.t()}
@ -54,6 +72,20 @@ defmodule DotenvParser do
defexception [:message] defexception [:message]
end end
defmodule Continuation do
@typedoc """
A multiline value continuation. When a function returns this, it means that a multiline value
was started and more needs to be parsed to get the rest of the value.
"""
@type t :: %__MODULE__{
name: String.t(),
value: String.t(),
start_quote: String.t()
}
@enforce_keys [:name, :value, :start_quote]
defstruct [:name, :value, :start_quote]
end
@doc """ @doc """
Parse given file and load the variables to the environment. Parse given file and load the variables to the environment.
@ -98,39 +130,124 @@ defmodule DotenvParser do
""" """
@spec parse_data(String.t()) :: [value_pair()] @spec parse_data(String.t()) :: [value_pair()]
def parse_data(data) do def parse_data(data) do
{value_pairs, continuation} =
data data
|> String.split(@linefeed_re) |> String.split(@linefeed_re)
|> Enum.map(&String.trim/1) |> Enum.reduce({[], nil}, fn
|> Enum.reject(&is_comment?/1) line, {ret, nil} ->
|> Enum.reject(&is_blank?/1) trimmed = String.trim(line)
|> Enum.map(&parse_line/1)
if not is_comment?(trimmed) and not is_blank?(trimmed) do
reduce_line(ret, line, nil)
else
{ret, nil}
end
line, {ret, continuation} ->
reduce_line(ret, line, continuation)
end)
if not is_nil(continuation) do
raise ParseError,
"Could not find end for quote #{continuation.start_quote} in variable #{continuation.name}"
end
Enum.reverse(value_pairs)
end end
@doc """ @doc """
Parse given single line and return a variablevalue tuple. Parse given single line and return a variablevalue tuple, or a continuation value if the line
started or continued a multiline value.
If line cannot be parsed, an error is raised. If line cannot be parsed, an error is raised.
The second argument needs to be `nil` or a continuation value returned from parsing the previous
line.
""" """
@spec parse_line(String.t()) :: value_pair() @spec parse_line(String.t(), Continuation.t() | nil) :: value_pair() | Continuation.t()
def parse_line(line) do def parse_line(line, state)
def parse_line(line, nil) do
if not Regex.match?(@line_re, line) do if not Regex.match?(@line_re, line) do
raise ParseError, "Malformed line cannot be parsed: #{line}" raise ParseError, "Malformed line cannot be parsed: #{line}"
else else
[var, val] = String.split(line, "=", parts: 2) [var, val] = String.split(line, "=", parts: 2)
var = var |> String.trim() |> String.replace_leading("export ", "") var = var |> String.trim() |> String.replace_leading("export ", "")
val = String.trim(val) trimmed = String.trim(val)
with {:dquoted, nil} <- {:dquoted, Regex.run(@dquoted_val_re, val)}, with {:dquoted, nil} <- {:dquoted, Regex.run(@dquoted_val_re, trimmed)},
{:squoted, nil} <- {:squoted, Regex.run(@squoted_val_re, val)} do {:squoted, nil} <- {:squoted, Regex.run(@squoted_val_re, trimmed)},
trimmed_leading = String.trim_leading(val),
{:quoted_start, false} <-
{:quoted_start, String.starts_with?(trimmed_leading, @quote_chars)} do
# Value is plain value # Value is plain value
{var, val |> remove_comment() |> String.trim()} {var, trimmed |> remove_comment() |> String.trim()}
else else
{:dquoted, [_, inner_val]} -> {var, stripslashes(inner_val)} {:dquoted, [_, inner_val]} ->
{:squoted, [_, inner_val]} -> {var, inner_val} {var, stripslashes(inner_val)}
{:squoted, [_, inner_val]} ->
{var, inner_val}
{:quoted_start, _} ->
parse_multiline_start(var, val)
end end
end end
end end
def parse_line(line, %Continuation{} = continuation) do
trimmed = String.trim_trailing(line)
end_match =
if continuation.start_quote == "\"" do
Regex.run(@dquoted_multiline_end, trimmed)
else
Regex.run(@squoted_multiline_end, trimmed)
end
with [_, line_content] <- end_match do
ret = maybe_stripslashes(continuation, line_content)
{continuation.name, continuation.value <> ret}
else
_ ->
next_line = maybe_stripslashes(continuation, line)
next_line = maybe_linefeed(continuation, next_line)
%Continuation{
continuation
| value: continuation.value <> next_line
}
end
end
@spec parse_multiline_start(String.t(), String.t()) :: Continuation.t()
defp parse_multiline_start(name, input) do
{start_quote, rest} = input |> String.trim_leading() |> String.split_at(1)
continuation = %Continuation{
name: name,
value: "",
start_quote: start_quote
}
value = maybe_stripslashes(continuation, rest)
value = maybe_linefeed(continuation, value)
%Continuation{continuation | value: value}
end
@spec reduce_line([value_pair()], String.t(), Continuation.t() | nil) ::
{[value_pair()], Continuation.t() | nil}
defp reduce_line(ret, line, continuation) do
case parse_line(line, continuation) do
%Continuation{} = new_continuation ->
{ret, new_continuation}
result ->
{[result | ret], nil}
end
end
@spec remove_comment(String.t()) :: String.t() @spec remove_comment(String.t()) :: String.t()
defp remove_comment(val) do defp remove_comment(val) do
case String.split(val, "#", parts: 2) do case String.split(val, "#", parts: 2) do
@ -184,4 +301,23 @@ defmodule DotenvParser do
end end
defp stripslashes(input, :slash, acc), do: stripslashes(input, :no_slash, acc <> "\\") defp stripslashes(input, :slash, acc), do: stripslashes(input, :no_slash, acc <> "\\")
@spec maybe_stripslashes(Continuation.t(), String.t()) :: String.t()
defp maybe_stripslashes(continuation, input)
defp maybe_stripslashes(%Continuation{start_quote: "\""}, input), do: stripslashes(input)
defp maybe_stripslashes(_, input), do: input
@spec maybe_linefeed(Continuation.t(), String.t()) :: String.t()
defp maybe_linefeed(continuation, input)
defp maybe_linefeed(%Continuation{start_quote: "\""}, input) do
if String.ends_with?(input, "\\") do
String.slice(input, 0..-2)
else
input <> "\n"
end
end
defp maybe_linefeed(_, input), do: input <> "\n"
end end

View file

@ -17,16 +17,30 @@ DONT_EXPAND_SQUOTED='dontexpand\nnewlines'
# COMMENTS=work # COMMENTS=work
EQUAL_SIGNS=equals== EQUAL_SIGNS=equals==
RETAIN_INNER_QUOTES={"foo": "bar"} RETAIN_INNER_QUOTES={"foo": "bar"}
RETAIN_LEADING_DQUOTE="retained
RETAIN_LEADING_SQUOTE='retained
RETAIN_TRAILING_DQUOTE=retained"
RETAIN_TRAILING_SQUOTE=retained'
RETAIN_INNER_QUOTES_AS_STRING='{"foo": "bar"}' RETAIN_INNER_QUOTES_AS_STRING='{"foo": "bar"}'
TRIM_SPACE_FROM_UNQUOTED= some spaced out string TRIM_SPACE_FROM_UNQUOTED= some spaced out string
USERNAME=therealnerdybeast@example.tld USERNAME=therealnerdybeast@example.tld
SPACED_KEY = parsed SPACED_KEY = parsed
INLINE_COMMENT="foo#bar" # Bark Bark INLINE_COMMENT="foo#bar" # Bark Bark
INLINE_COMMENT_PLAIN=foo bar # Bark Bark INLINE_COMMENT_PLAIN=foo bar # Bark Bark
END_BACKSLASH="something\" # Comment END_BACKSLASH="something\" # Comment"
END_DOUBLE_BACKSLASH="foo\\" # Comment
lowercased_var=foo lowercased_var=foo
export FOO="exports are supported" export FOO="exports are supported"
MULTILINE="There are
many\u1234lines
in this var!"
SPACED_MULTILINE = "
<-- Note: Indentation preserved"
MULTILINE_SINGLE_QUOTE='Now also
with single quotes! "_"'
export MULTILINE_ESCAPED_QUOTE="Not stopping here --> \\\\\\\"
but \"\" here" # Comment for multiline
MULTILINE_ESCAPED_LINEFEED="No linefeed --> \
nope"
export MULTILINE_NON_ESCAPED_LINEFEED='Linefeed not escaped in single quotes \
'
RETAIN_TRAILING_DQUOTE=retained"
RETAIN_TRAILING_SQUOTE=retained'

View file

@ -0,0 +1,6 @@
# No end found for multiline string
ENV=Cool
BEANS="I like beans
MULTILINE_ESCAPED=\"
PEAS=0

View file

@ -53,19 +53,24 @@ defmodule DotenvParserTest do
{"DONT_EXPAND_SQUOTED", "dontexpand\\nnewlines"}, {"DONT_EXPAND_SQUOTED", "dontexpand\\nnewlines"},
{"EQUAL_SIGNS", "equals=="}, {"EQUAL_SIGNS", "equals=="},
{"RETAIN_INNER_QUOTES", "{\"foo\": \"bar\"}"}, {"RETAIN_INNER_QUOTES", "{\"foo\": \"bar\"}"},
{"RETAIN_LEADING_DQUOTE", "\"retained"},
{"RETAIN_LEADING_SQUOTE", "'retained"},
{"RETAIN_TRAILING_DQUOTE", "retained\""},
{"RETAIN_TRAILING_SQUOTE", "retained'"},
{"RETAIN_INNER_QUOTES_AS_STRING", "{\"foo\": \"bar\"}"}, {"RETAIN_INNER_QUOTES_AS_STRING", "{\"foo\": \"bar\"}"},
{"TRIM_SPACE_FROM_UNQUOTED", "some spaced out string"}, {"TRIM_SPACE_FROM_UNQUOTED", "some spaced out string"},
{"USERNAME", "therealnerdybeast@example.tld"}, {"USERNAME", "therealnerdybeast@example.tld"},
{"SPACED_KEY", "parsed"}, {"SPACED_KEY", "parsed"},
{"INLINE_COMMENT", "foo#bar"}, {"INLINE_COMMENT", "foo#bar"},
{"INLINE_COMMENT_PLAIN", "foo bar"}, {"INLINE_COMMENT_PLAIN", "foo bar"},
{"END_BACKSLASH", "something\\"}, {"END_BACKSLASH", "something\" # Comment"},
{"END_DOUBLE_BACKSLASH", "foo\\"},
{"lowercased_var", "foo"}, {"lowercased_var", "foo"},
{"FOO", "exports are supported"} {"FOO", "exports are supported"},
{"MULTILINE", "There are\nmanyሴlines\nin this var!"},
{"SPACED_MULTILINE", " \n <-- Note: Indentation preserved"},
{"MULTILINE_SINGLE_QUOTE", "Now also\nwith single quotes! \"_\""},
{"MULTILINE_ESCAPED_QUOTE", "Not stopping here --> \\\\\\\"\nbut \"\" here"},
{"MULTILINE_ESCAPED_LINEFEED", "No linefeed --> nope"},
{"MULTILINE_NON_ESCAPED_LINEFEED", "Linefeed not escaped in single quotes \\\n"},
{"RETAIN_TRAILING_DQUOTE", "retained\""},
{"RETAIN_TRAILING_SQUOTE", "retained'"}
] ]
end end
@ -93,4 +98,10 @@ defmodule DotenvParserTest do
DotenvParser.parse_file("test/data/.invalid_var_name_chars") DotenvParser.parse_file("test/data/.invalid_var_name_chars")
end) end)
end end
test "fails with invalid multiline variable" do
assert_raise(DotenvParser.ParseError, fn ->
DotenvParser.parse_file("test/data/.invalid_multiline_end")
end)
end
end end