diff --git a/lib/dotenv_parser.ex b/lib/dotenv_parser.ex index c2ae23b..595e4ec 100644 --- a/lib/dotenv_parser.ex +++ b/lib/dotenv_parser.ex @@ -7,7 +7,7 @@ defmodule DotenvParser do Blank lines and lines starting with `#` are ignored. Additionally inline comments can be added after values with a `#`, i.e. `FOO=bar # comment`. - Single quote or double quoted value to prevent trimming of whitespace and allow usage of `#` in value, i.e. `FOO=' bar # not comment ' # comment`. + Single quote or double quote value to prevent trimming of whitespace and allow usage of `#` in value, i.e. `FOO=' bar # not comment ' # comment`. Single quoted values don't do any unescaping. Double quoted values will unescape the following: @@ -19,6 +19,20 @@ defmodule DotenvParser do * `\\"` and `\\'` - Quotes * `\\\\` - Backslash * `\\uFFFF` - Unicode escape (4 hex characters to denote the codepoint) + * A backslash at the end of the line in a multiline value will remove the linefeed. + + Values can span multiple lines when single or double quoted: + + ```sh + MULTILINE="This is a + multiline value." + ``` + + This will result in the following: + + ```elixir + System.fetch_env!("MULTILINE") == "This is a\\nmultiline value." + ``` A line can start with `export ` for easier interoperation with regular shell scripts. These lines are treated the same as any others. @@ -42,10 +56,14 @@ defmodule DotenvParser do @linefeed_re ~R/\r?\n/ @line_re ~R/^(?:\s*export)?\s*[a-z_][a-z_0-9]*\s*=/i - @dquoted_val_re ~R/^\s*"(.*)(?!\\)"\s*(?:#.*)?$/ - @squoted_val_re ~R/^\s*'(.*)(?!\\)'\s*(?:#.*)?$/ + @dquoted_val_re ~R/^"([^"\\]*(?:\\.[^"\\]*)*)"\s*(?:#.*)?$/ + @squoted_val_re ~R/^\s*'(.*)'\s*(?:#.*)?$/ + @dquoted_multiline_end ~R/^([^"\\]*(?:\\.[^"\\]*)*)"\s*(?:#.*)?$/ + @squoted_multiline_end ~R/^(.*)'\s*(?:#.*)?$/ @hex_re ~R/^[0-9a-f]+$/i + @quote_chars ~w(" ') + @typedoc "Pair of variable name, variable value." @type value_pair :: {String.t(), String.t()} @@ -54,6 +72,20 @@ defmodule DotenvParser do defexception [:message] end + defmodule Continuation do + @typedoc """ + A multiline value continuation. When a function returns this, it means that a multiline value + was started and more needs to be parsed to get the rest of the value. + """ + @type t :: %__MODULE__{ + name: String.t(), + value: String.t(), + start_quote: String.t() + } + @enforce_keys [:name, :value, :start_quote] + defstruct [:name, :value, :start_quote] + end + @doc """ Parse given file and load the variables to the environment. @@ -98,39 +130,124 @@ defmodule DotenvParser do """ @spec parse_data(String.t()) :: [value_pair()] def parse_data(data) do - data - |> String.split(@linefeed_re) - |> Enum.map(&String.trim/1) - |> Enum.reject(&is_comment?/1) - |> Enum.reject(&is_blank?/1) - |> Enum.map(&parse_line/1) + {value_pairs, continuation} = + data + |> String.split(@linefeed_re) + |> Enum.reduce({[], nil}, fn + line, {ret, nil} -> + trimmed = String.trim(line) + + if not is_comment?(trimmed) and not is_blank?(trimmed) do + reduce_line(ret, line, nil) + else + {ret, nil} + end + + line, {ret, continuation} -> + reduce_line(ret, line, continuation) + end) + + if not is_nil(continuation) do + raise ParseError, + "Could not find end for quote #{continuation.start_quote} in variable #{continuation.name}" + end + + Enum.reverse(value_pairs) end @doc """ - Parse given single line and return a variable–value tuple. + Parse given single line and return a variable–value tuple, or a continuation value if the line + started or continued a multiline value. If line cannot be parsed, an error is raised. + + The second argument needs to be `nil` or a continuation value returned from parsing the previous + line. """ - @spec parse_line(String.t()) :: value_pair() - def parse_line(line) do + @spec parse_line(String.t(), Continuation.t() | nil) :: value_pair() | Continuation.t() + def parse_line(line, state) + + def parse_line(line, nil) do if not Regex.match?(@line_re, line) do raise ParseError, "Malformed line cannot be parsed: #{line}" else [var, val] = String.split(line, "=", parts: 2) var = var |> String.trim() |> String.replace_leading("export ", "") - val = String.trim(val) + trimmed = String.trim(val) - with {:dquoted, nil} <- {:dquoted, Regex.run(@dquoted_val_re, val)}, - {:squoted, nil} <- {:squoted, Regex.run(@squoted_val_re, val)} do + with {:dquoted, nil} <- {:dquoted, Regex.run(@dquoted_val_re, trimmed)}, + {:squoted, nil} <- {:squoted, Regex.run(@squoted_val_re, trimmed)}, + trimmed_leading = String.trim_leading(val), + {:quoted_start, false} <- + {:quoted_start, String.starts_with?(trimmed_leading, @quote_chars)} do # Value is plain value - {var, val |> remove_comment() |> String.trim()} + {var, trimmed |> remove_comment() |> String.trim()} else - {:dquoted, [_, inner_val]} -> {var, stripslashes(inner_val)} - {:squoted, [_, inner_val]} -> {var, inner_val} + {:dquoted, [_, inner_val]} -> + {var, stripslashes(inner_val)} + + {:squoted, [_, inner_val]} -> + {var, inner_val} + + {:quoted_start, _} -> + parse_multiline_start(var, val) end end end + def parse_line(line, %Continuation{} = continuation) do + trimmed = String.trim_trailing(line) + + end_match = + if continuation.start_quote == "\"" do + Regex.run(@dquoted_multiline_end, trimmed) + else + Regex.run(@squoted_multiline_end, trimmed) + end + + with [_, line_content] <- end_match do + ret = maybe_stripslashes(continuation, line_content) + {continuation.name, continuation.value <> ret} + else + _ -> + next_line = maybe_stripslashes(continuation, line) + next_line = maybe_linefeed(continuation, next_line) + + %Continuation{ + continuation + | value: continuation.value <> next_line + } + end + end + + @spec parse_multiline_start(String.t(), String.t()) :: Continuation.t() + defp parse_multiline_start(name, input) do + {start_quote, rest} = input |> String.trim_leading() |> String.split_at(1) + + continuation = %Continuation{ + name: name, + value: "", + start_quote: start_quote + } + + value = maybe_stripslashes(continuation, rest) + value = maybe_linefeed(continuation, value) + + %Continuation{continuation | value: value} + end + + @spec reduce_line([value_pair()], String.t(), Continuation.t() | nil) :: + {[value_pair()], Continuation.t() | nil} + defp reduce_line(ret, line, continuation) do + case parse_line(line, continuation) do + %Continuation{} = new_continuation -> + {ret, new_continuation} + + result -> + {[result | ret], nil} + end + end + @spec remove_comment(String.t()) :: String.t() defp remove_comment(val) do case String.split(val, "#", parts: 2) do @@ -184,4 +301,23 @@ defmodule DotenvParser do end defp stripslashes(input, :slash, acc), do: stripslashes(input, :no_slash, acc <> "\\") + + @spec maybe_stripslashes(Continuation.t(), String.t()) :: String.t() + defp maybe_stripslashes(continuation, input) + + defp maybe_stripslashes(%Continuation{start_quote: "\""}, input), do: stripslashes(input) + defp maybe_stripslashes(_, input), do: input + + @spec maybe_linefeed(Continuation.t(), String.t()) :: String.t() + defp maybe_linefeed(continuation, input) + + defp maybe_linefeed(%Continuation{start_quote: "\""}, input) do + if String.ends_with?(input, "\\") do + String.slice(input, 0..-2) + else + input <> "\n" + end + end + + defp maybe_linefeed(_, input), do: input <> "\n" end diff --git a/test/data/.env b/test/data/.env index 3201250..3d37819 100644 --- a/test/data/.env +++ b/test/data/.env @@ -17,16 +17,30 @@ DONT_EXPAND_SQUOTED='dontexpand\nnewlines' # COMMENTS=work EQUAL_SIGNS=equals== RETAIN_INNER_QUOTES={"foo": "bar"} -RETAIN_LEADING_DQUOTE="retained -RETAIN_LEADING_SQUOTE='retained -RETAIN_TRAILING_DQUOTE=retained" -RETAIN_TRAILING_SQUOTE=retained' RETAIN_INNER_QUOTES_AS_STRING='{"foo": "bar"}' TRIM_SPACE_FROM_UNQUOTED= some spaced out string USERNAME=therealnerdybeast@example.tld SPACED_KEY = parsed INLINE_COMMENT="foo#bar" # Bark Bark INLINE_COMMENT_PLAIN=foo bar # Bark Bark -END_BACKSLASH="something\" # Comment +END_BACKSLASH="something\" # Comment" +END_DOUBLE_BACKSLASH="foo\\" # Comment lowercased_var=foo export FOO="exports are supported" + +MULTILINE="There are +many\u1234lines +in this var!" + SPACED_MULTILINE = " + <-- Note: Indentation preserved" +MULTILINE_SINGLE_QUOTE='Now also +with single quotes! "_"' +export MULTILINE_ESCAPED_QUOTE="Not stopping here --> \\\\\\\" +but \"\" here" # Comment for multiline +MULTILINE_ESCAPED_LINEFEED="No linefeed --> \ +nope" +export MULTILINE_NON_ESCAPED_LINEFEED='Linefeed not escaped in single quotes \ +' + +RETAIN_TRAILING_DQUOTE=retained" +RETAIN_TRAILING_SQUOTE=retained' diff --git a/test/data/.invalid_multiline_end b/test/data/.invalid_multiline_end new file mode 100644 index 0000000..d55f90d --- /dev/null +++ b/test/data/.invalid_multiline_end @@ -0,0 +1,6 @@ +# No end found for multiline string + +ENV=Cool +BEANS="I like beans +MULTILINE_ESCAPED=\" +PEAS=0 diff --git a/test/dotenv_parser_test.exs b/test/dotenv_parser_test.exs index a2cc554..3831a91 100644 --- a/test/dotenv_parser_test.exs +++ b/test/dotenv_parser_test.exs @@ -53,19 +53,24 @@ defmodule DotenvParserTest do {"DONT_EXPAND_SQUOTED", "dontexpand\\nnewlines"}, {"EQUAL_SIGNS", "equals=="}, {"RETAIN_INNER_QUOTES", "{\"foo\": \"bar\"}"}, - {"RETAIN_LEADING_DQUOTE", "\"retained"}, - {"RETAIN_LEADING_SQUOTE", "'retained"}, - {"RETAIN_TRAILING_DQUOTE", "retained\""}, - {"RETAIN_TRAILING_SQUOTE", "retained'"}, {"RETAIN_INNER_QUOTES_AS_STRING", "{\"foo\": \"bar\"}"}, {"TRIM_SPACE_FROM_UNQUOTED", "some spaced out string"}, {"USERNAME", "therealnerdybeast@example.tld"}, {"SPACED_KEY", "parsed"}, {"INLINE_COMMENT", "foo#bar"}, {"INLINE_COMMENT_PLAIN", "foo bar"}, - {"END_BACKSLASH", "something\\"}, + {"END_BACKSLASH", "something\" # Comment"}, + {"END_DOUBLE_BACKSLASH", "foo\\"}, {"lowercased_var", "foo"}, - {"FOO", "exports are supported"} + {"FOO", "exports are supported"}, + {"MULTILINE", "There are\nmanyሴlines\nin this var!"}, + {"SPACED_MULTILINE", " \n <-- Note: Indentation preserved"}, + {"MULTILINE_SINGLE_QUOTE", "Now also\nwith single quotes! \"_\""}, + {"MULTILINE_ESCAPED_QUOTE", "Not stopping here --> \\\\\\\"\nbut \"\" here"}, + {"MULTILINE_ESCAPED_LINEFEED", "No linefeed --> nope"}, + {"MULTILINE_NON_ESCAPED_LINEFEED", "Linefeed not escaped in single quotes \\\n"}, + {"RETAIN_TRAILING_DQUOTE", "retained\""}, + {"RETAIN_TRAILING_SQUOTE", "retained'"} ] end @@ -93,4 +98,10 @@ defmodule DotenvParserTest do DotenvParser.parse_file("test/data/.invalid_var_name_chars") end) end + + test "fails with invalid multiline variable" do + assert_raise(DotenvParser.ParseError, fn -> + DotenvParser.parse_file("test/data/.invalid_multiline_end") + end) + end end