From 1e6bd963f02c12740cb8806a7ebde0cc52315524 Mon Sep 17 00:00:00 2001 From: Mikko Ahlroth Date: Fri, 28 Jan 2022 18:36:54 +0200 Subject: [PATCH 1/4] Set up tool versions for Elixir 1.11 --- .tool-versions | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .tool-versions diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..75babc3 --- /dev/null +++ b/.tool-versions @@ -0,0 +1,2 @@ +elixir 1.11.3-otp-23 +erlang 23.2.2 From 7d25bc6a0b84f38f71ba5bbd19db0f389246f796 Mon Sep 17 00:00:00 2001 From: Mikko Ahlroth Date: Fri, 28 Jan 2022 18:40:19 +0200 Subject: [PATCH 2/4] Add support for multiline values --- lib/dotenv_parser.ex | 172 +++++++++++++++++++++++++++---- test/data/.env | 24 ++++- test/data/.invalid_multiline_end | 6 ++ test/dotenv_parser_test.exs | 23 +++-- 4 files changed, 196 insertions(+), 29 deletions(-) create mode 100644 test/data/.invalid_multiline_end diff --git a/lib/dotenv_parser.ex b/lib/dotenv_parser.ex index c2ae23b..595e4ec 100644 --- a/lib/dotenv_parser.ex +++ b/lib/dotenv_parser.ex @@ -7,7 +7,7 @@ defmodule DotenvParser do Blank lines and lines starting with `#` are ignored. Additionally inline comments can be added after values with a `#`, i.e. `FOO=bar # comment`. - Single quote or double quoted value to prevent trimming of whitespace and allow usage of `#` in value, i.e. `FOO=' bar # not comment ' # comment`. + Single quote or double quote value to prevent trimming of whitespace and allow usage of `#` in value, i.e. `FOO=' bar # not comment ' # comment`. Single quoted values don't do any unescaping. Double quoted values will unescape the following: @@ -19,6 +19,20 @@ defmodule DotenvParser do * `\\"` and `\\'` - Quotes * `\\\\` - Backslash * `\\uFFFF` - Unicode escape (4 hex characters to denote the codepoint) + * A backslash at the end of the line in a multiline value will remove the linefeed. + + Values can span multiple lines when single or double quoted: + + ```sh + MULTILINE="This is a + multiline value." + ``` + + This will result in the following: + + ```elixir + System.fetch_env!("MULTILINE") == "This is a\\nmultiline value." + ``` A line can start with `export ` for easier interoperation with regular shell scripts. These lines are treated the same as any others. @@ -42,10 +56,14 @@ defmodule DotenvParser do @linefeed_re ~R/\r?\n/ @line_re ~R/^(?:\s*export)?\s*[a-z_][a-z_0-9]*\s*=/i - @dquoted_val_re ~R/^\s*"(.*)(?!\\)"\s*(?:#.*)?$/ - @squoted_val_re ~R/^\s*'(.*)(?!\\)'\s*(?:#.*)?$/ + @dquoted_val_re ~R/^"([^"\\]*(?:\\.[^"\\]*)*)"\s*(?:#.*)?$/ + @squoted_val_re ~R/^\s*'(.*)'\s*(?:#.*)?$/ + @dquoted_multiline_end ~R/^([^"\\]*(?:\\.[^"\\]*)*)"\s*(?:#.*)?$/ + @squoted_multiline_end ~R/^(.*)'\s*(?:#.*)?$/ @hex_re ~R/^[0-9a-f]+$/i + @quote_chars ~w(" ') + @typedoc "Pair of variable name, variable value." @type value_pair :: {String.t(), String.t()} @@ -54,6 +72,20 @@ defmodule DotenvParser do defexception [:message] end + defmodule Continuation do + @typedoc """ + A multiline value continuation. When a function returns this, it means that a multiline value + was started and more needs to be parsed to get the rest of the value. + """ + @type t :: %__MODULE__{ + name: String.t(), + value: String.t(), + start_quote: String.t() + } + @enforce_keys [:name, :value, :start_quote] + defstruct [:name, :value, :start_quote] + end + @doc """ Parse given file and load the variables to the environment. @@ -98,39 +130,124 @@ defmodule DotenvParser do """ @spec parse_data(String.t()) :: [value_pair()] def parse_data(data) do - data - |> String.split(@linefeed_re) - |> Enum.map(&String.trim/1) - |> Enum.reject(&is_comment?/1) - |> Enum.reject(&is_blank?/1) - |> Enum.map(&parse_line/1) + {value_pairs, continuation} = + data + |> String.split(@linefeed_re) + |> Enum.reduce({[], nil}, fn + line, {ret, nil} -> + trimmed = String.trim(line) + + if not is_comment?(trimmed) and not is_blank?(trimmed) do + reduce_line(ret, line, nil) + else + {ret, nil} + end + + line, {ret, continuation} -> + reduce_line(ret, line, continuation) + end) + + if not is_nil(continuation) do + raise ParseError, + "Could not find end for quote #{continuation.start_quote} in variable #{continuation.name}" + end + + Enum.reverse(value_pairs) end @doc """ - Parse given single line and return a variable–value tuple. + Parse given single line and return a variable–value tuple, or a continuation value if the line + started or continued a multiline value. If line cannot be parsed, an error is raised. + + The second argument needs to be `nil` or a continuation value returned from parsing the previous + line. """ - @spec parse_line(String.t()) :: value_pair() - def parse_line(line) do + @spec parse_line(String.t(), Continuation.t() | nil) :: value_pair() | Continuation.t() + def parse_line(line, state) + + def parse_line(line, nil) do if not Regex.match?(@line_re, line) do raise ParseError, "Malformed line cannot be parsed: #{line}" else [var, val] = String.split(line, "=", parts: 2) var = var |> String.trim() |> String.replace_leading("export ", "") - val = String.trim(val) + trimmed = String.trim(val) - with {:dquoted, nil} <- {:dquoted, Regex.run(@dquoted_val_re, val)}, - {:squoted, nil} <- {:squoted, Regex.run(@squoted_val_re, val)} do + with {:dquoted, nil} <- {:dquoted, Regex.run(@dquoted_val_re, trimmed)}, + {:squoted, nil} <- {:squoted, Regex.run(@squoted_val_re, trimmed)}, + trimmed_leading = String.trim_leading(val), + {:quoted_start, false} <- + {:quoted_start, String.starts_with?(trimmed_leading, @quote_chars)} do # Value is plain value - {var, val |> remove_comment() |> String.trim()} + {var, trimmed |> remove_comment() |> String.trim()} else - {:dquoted, [_, inner_val]} -> {var, stripslashes(inner_val)} - {:squoted, [_, inner_val]} -> {var, inner_val} + {:dquoted, [_, inner_val]} -> + {var, stripslashes(inner_val)} + + {:squoted, [_, inner_val]} -> + {var, inner_val} + + {:quoted_start, _} -> + parse_multiline_start(var, val) end end end + def parse_line(line, %Continuation{} = continuation) do + trimmed = String.trim_trailing(line) + + end_match = + if continuation.start_quote == "\"" do + Regex.run(@dquoted_multiline_end, trimmed) + else + Regex.run(@squoted_multiline_end, trimmed) + end + + with [_, line_content] <- end_match do + ret = maybe_stripslashes(continuation, line_content) + {continuation.name, continuation.value <> ret} + else + _ -> + next_line = maybe_stripslashes(continuation, line) + next_line = maybe_linefeed(continuation, next_line) + + %Continuation{ + continuation + | value: continuation.value <> next_line + } + end + end + + @spec parse_multiline_start(String.t(), String.t()) :: Continuation.t() + defp parse_multiline_start(name, input) do + {start_quote, rest} = input |> String.trim_leading() |> String.split_at(1) + + continuation = %Continuation{ + name: name, + value: "", + start_quote: start_quote + } + + value = maybe_stripslashes(continuation, rest) + value = maybe_linefeed(continuation, value) + + %Continuation{continuation | value: value} + end + + @spec reduce_line([value_pair()], String.t(), Continuation.t() | nil) :: + {[value_pair()], Continuation.t() | nil} + defp reduce_line(ret, line, continuation) do + case parse_line(line, continuation) do + %Continuation{} = new_continuation -> + {ret, new_continuation} + + result -> + {[result | ret], nil} + end + end + @spec remove_comment(String.t()) :: String.t() defp remove_comment(val) do case String.split(val, "#", parts: 2) do @@ -184,4 +301,23 @@ defmodule DotenvParser do end defp stripslashes(input, :slash, acc), do: stripslashes(input, :no_slash, acc <> "\\") + + @spec maybe_stripslashes(Continuation.t(), String.t()) :: String.t() + defp maybe_stripslashes(continuation, input) + + defp maybe_stripslashes(%Continuation{start_quote: "\""}, input), do: stripslashes(input) + defp maybe_stripslashes(_, input), do: input + + @spec maybe_linefeed(Continuation.t(), String.t()) :: String.t() + defp maybe_linefeed(continuation, input) + + defp maybe_linefeed(%Continuation{start_quote: "\""}, input) do + if String.ends_with?(input, "\\") do + String.slice(input, 0..-2) + else + input <> "\n" + end + end + + defp maybe_linefeed(_, input), do: input <> "\n" end diff --git a/test/data/.env b/test/data/.env index 3201250..3d37819 100644 --- a/test/data/.env +++ b/test/data/.env @@ -17,16 +17,30 @@ DONT_EXPAND_SQUOTED='dontexpand\nnewlines' # COMMENTS=work EQUAL_SIGNS=equals== RETAIN_INNER_QUOTES={"foo": "bar"} -RETAIN_LEADING_DQUOTE="retained -RETAIN_LEADING_SQUOTE='retained -RETAIN_TRAILING_DQUOTE=retained" -RETAIN_TRAILING_SQUOTE=retained' RETAIN_INNER_QUOTES_AS_STRING='{"foo": "bar"}' TRIM_SPACE_FROM_UNQUOTED= some spaced out string USERNAME=therealnerdybeast@example.tld SPACED_KEY = parsed INLINE_COMMENT="foo#bar" # Bark Bark INLINE_COMMENT_PLAIN=foo bar # Bark Bark -END_BACKSLASH="something\" # Comment +END_BACKSLASH="something\" # Comment" +END_DOUBLE_BACKSLASH="foo\\" # Comment lowercased_var=foo export FOO="exports are supported" + +MULTILINE="There are +many\u1234lines +in this var!" + SPACED_MULTILINE = " + <-- Note: Indentation preserved" +MULTILINE_SINGLE_QUOTE='Now also +with single quotes! "_"' +export MULTILINE_ESCAPED_QUOTE="Not stopping here --> \\\\\\\" +but \"\" here" # Comment for multiline +MULTILINE_ESCAPED_LINEFEED="No linefeed --> \ +nope" +export MULTILINE_NON_ESCAPED_LINEFEED='Linefeed not escaped in single quotes \ +' + +RETAIN_TRAILING_DQUOTE=retained" +RETAIN_TRAILING_SQUOTE=retained' diff --git a/test/data/.invalid_multiline_end b/test/data/.invalid_multiline_end new file mode 100644 index 0000000..d55f90d --- /dev/null +++ b/test/data/.invalid_multiline_end @@ -0,0 +1,6 @@ +# No end found for multiline string + +ENV=Cool +BEANS="I like beans +MULTILINE_ESCAPED=\" +PEAS=0 diff --git a/test/dotenv_parser_test.exs b/test/dotenv_parser_test.exs index a2cc554..3831a91 100644 --- a/test/dotenv_parser_test.exs +++ b/test/dotenv_parser_test.exs @@ -53,19 +53,24 @@ defmodule DotenvParserTest do {"DONT_EXPAND_SQUOTED", "dontexpand\\nnewlines"}, {"EQUAL_SIGNS", "equals=="}, {"RETAIN_INNER_QUOTES", "{\"foo\": \"bar\"}"}, - {"RETAIN_LEADING_DQUOTE", "\"retained"}, - {"RETAIN_LEADING_SQUOTE", "'retained"}, - {"RETAIN_TRAILING_DQUOTE", "retained\""}, - {"RETAIN_TRAILING_SQUOTE", "retained'"}, {"RETAIN_INNER_QUOTES_AS_STRING", "{\"foo\": \"bar\"}"}, {"TRIM_SPACE_FROM_UNQUOTED", "some spaced out string"}, {"USERNAME", "therealnerdybeast@example.tld"}, {"SPACED_KEY", "parsed"}, {"INLINE_COMMENT", "foo#bar"}, {"INLINE_COMMENT_PLAIN", "foo bar"}, - {"END_BACKSLASH", "something\\"}, + {"END_BACKSLASH", "something\" # Comment"}, + {"END_DOUBLE_BACKSLASH", "foo\\"}, {"lowercased_var", "foo"}, - {"FOO", "exports are supported"} + {"FOO", "exports are supported"}, + {"MULTILINE", "There are\nmanyሴlines\nin this var!"}, + {"SPACED_MULTILINE", " \n <-- Note: Indentation preserved"}, + {"MULTILINE_SINGLE_QUOTE", "Now also\nwith single quotes! \"_\""}, + {"MULTILINE_ESCAPED_QUOTE", "Not stopping here --> \\\\\\\"\nbut \"\" here"}, + {"MULTILINE_ESCAPED_LINEFEED", "No linefeed --> nope"}, + {"MULTILINE_NON_ESCAPED_LINEFEED", "Linefeed not escaped in single quotes \\\n"}, + {"RETAIN_TRAILING_DQUOTE", "retained\""}, + {"RETAIN_TRAILING_SQUOTE", "retained'"} ] end @@ -93,4 +98,10 @@ defmodule DotenvParserTest do DotenvParser.parse_file("test/data/.invalid_var_name_chars") end) end + + test "fails with invalid multiline variable" do + assert_raise(DotenvParser.ParseError, fn -> + DotenvParser.parse_file("test/data/.invalid_multiline_end") + end) + end end From e1d6a7bb9db86b29dd69b29e9650a911d2b0261d Mon Sep 17 00:00:00 2001 From: Mikko Ahlroth Date: Fri, 28 Jan 2022 18:59:02 +0200 Subject: [PATCH 3/4] Bump ex_doc version --- mix.exs | 2 +- mix.lock | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mix.exs b/mix.exs index b32baa4..048ed49 100644 --- a/mix.exs +++ b/mix.exs @@ -31,7 +31,7 @@ defmodule DotenvParser.MixProject do # Run "mix help deps" to learn about dependencies. defp deps do [ - {:ex_doc, "~> 0.23.0", only: :dev} + {:ex_doc, "~> 0.28.0", only: :dev} ] end end diff --git a/mix.lock b/mix.lock index 4f6e7c3..d0aea5f 100644 --- a/mix.lock +++ b/mix.lock @@ -1,7 +1,8 @@ %{ - "earmark_parser": {:hex, :earmark_parser, "1.4.12", "b245e875ec0a311a342320da0551da407d9d2b65d98f7a9597ae078615af3449", [:mix], [], "hexpm", "711e2cc4d64abb7d566d43f54b78f7dc129308a63bc103fbd88550d2174b3160"}, - "ex_doc": {:hex, :ex_doc, "0.23.0", "a069bc9b0bf8efe323ecde8c0d62afc13d308b1fa3d228b65bca5cf8703a529d", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "f5e2c4702468b2fd11b10d39416ddadd2fcdd173ba2a0285ebd92c39827a5a16"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.19", "de0d033d5ff9fc396a24eadc2fcf2afa3d120841eb3f1004d138cbf9273210e8", [:mix], [], "hexpm", "527ab6630b5c75c3a3960b75844c314ec305c76d9899bb30f71cb85952a9dc45"}, + "ex_doc": {:hex, :ex_doc, "0.28.0", "7eaf526dd8c80ae8c04d52ac8801594426ae322b52a6156cd038f30bafa8226f", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "e55cdadf69a5d1f4cfd8477122ebac5e1fadd433a8c1022dafc5025e48db0131"}, "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.15.0", "98312c9f0d3730fde4049985a1105da5155bfe5c11e47bdc7406d88e01e4219b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "75ffa34ab1056b7e24844c90bfc62aaf6f3a37a15faa76b07bc5eba27e4a8b4a"}, - "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"}, + "makeup_elixir": {:hex, :makeup_elixir, "0.15.2", "dc72dfe17eb240552857465cc00cce390960d9a0c055c4ccd38b70629227e97c", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "fd23ae48d09b32eff49d4ced2b43c9f086d402ee4fd4fcb2d7fad97fa8823e75"}, + "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.2.1", "264fc6864936b59fedb3ceb89998c64e9bb91945faf1eb115d349b96913cc2ef", [:mix], [], "hexpm", "23c31d0ec38c97bf9adde35bc91bc8e1181ea5202881f48a192f4aa2d2cf4d59"}, } From 999e31905f17bb7ba6cf13441ee04ae53086ce6b Mon Sep 17 00:00:00 2001 From: Mikko Ahlroth Date: Fri, 28 Jan 2022 18:40:26 +0200 Subject: [PATCH 4/4] Bump version to 2.0.0 --- CHANGELOG | 10 ++++++++++ README.md | 2 +- mix.exs | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..bff1431 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,10 @@ +2.0.0 +----- + ++ Added support for multiline values by using quotes around the value. +* Fixed some escaping related corner cases. + +1.2.0 +----- + +First public release. diff --git a/README.md b/README.md index b102223..f021542 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ The package can be installed by adding `dotenv_parser` to your list of dependenc ```elixir def deps do [ - {:dotenv_parser, "~> 1.2"} + {:dotenv_parser, "~> 2.0"} ] end ``` diff --git a/mix.exs b/mix.exs index 048ed49..87b6625 100644 --- a/mix.exs +++ b/mix.exs @@ -4,7 +4,7 @@ defmodule DotenvParser.MixProject do def project do [ app: :dotenv_parser, - version: "1.2.0", + version: "2.0.0", elixir: "~> 1.11", start_permanent: Mix.env() == :prod, deps: deps(),