Add HTMLBody encoding mode that doesn't encode quite so many characters

This commit is contained in:
Mikko Ahlroth 2023-05-28 17:19:30 +03:00
parent e41712faa9
commit 349e18c407
5 changed files with 61 additions and 14 deletions

View file

@ -1,3 +1,8 @@
3.1.0
-----
* Add "HTMLBody" encoding mode that doesn't encode everything.
3.0.0
-----

View file

@ -12,11 +12,12 @@ Entities can be encoded using named or hex entity references. Named entity refer
```gleam
import glentities
glentities.encode("</html>", glentities.Named) // "&lt;&sol;html&gt;"
glentities.encode("</html>", glentities.Hex) // "&#x3C;&#x2F;html&#x3E;"
glentities.encode("</html>", glentities.HTMLBody) // "&lt;/html&gt;"
glentities.encode("</html>", glentities.Named) // "&lt;&sol;html&gt;"
glentities.encode("</html>", glentities.Hex) // "&#x3C;&#x2F;html&#x3E;"
glentities.decode("&#x3C;&#x2F;html&#x3E;") // "</html>"
glentities.decode("&lt;&sol;html&gt;") // "</html>"
glentities.decode("&#x3C;&#x2F;html&#x3E;") // "</html>"
glentities.decode("&lt;&sol;html&gt;") // "</html>"
```
## Development

View file

@ -1,5 +1,5 @@
name = "glentities"
version = "3.0.0"
version = "3.1.0"
description = "HTML entity encoder/decoder for Gleam"
# Fill out these fields if you intend to generate HTML documentation or publish

View file

@ -10,6 +10,13 @@ pub type EncodeMode {
/// Encode all characters using hex entities, except a-z, A-Z, 0-9, newline, tab, carriage return, and space.
Hex
/// Encode only the necessary characters when the output target is HTML element or attribute content.
///
/// This means `&`, `<`, `>`, `'`, and `"`.
///
/// Note! Not suitable for outputting inside `<style>`, `<script>` elements.
HTMLBody
}
/// Decode any HTML entities in the given string.
@ -2294,6 +2301,7 @@ pub fn encode(text: String, mode: EncodeMode) {
case mode {
Named -> encode_named(input, string_builder.new())
Hex -> encode_hex(input)
HTMLBody -> encode_html_body(input, string_builder.new())
}
}
@ -2311,7 +2319,24 @@ pub fn encode_hex(text: String) {
|> string.join("")
}
/// Encoded text using named HTML entities, except newline and tab. Characters without a named entity are untouched.
/// Encode text to be safe in the HTML body, inside element or attribute content.
///
/// `&`, `<`, `>`, `'`, and `"` are encoded.
///
/// Note! Not suitable for outputting inside `<style>`, `<script>` elements.
pub fn encode_html_body(text: String, acc: StringBuilder) -> String {
case text {
"" -> string_builder.to_string(acc)
"&" <> rest -> encode_html_body(rest, string_builder.append(acc, "&amp;"))
"<" <> rest -> encode_html_body(rest, string_builder.append(acc, "&lt;"))
">" <> rest -> encode_html_body(rest, string_builder.append(acc, "&gt;"))
"\"" <> rest -> encode_html_body(rest, string_builder.append(acc, "&quot;"))
"'" <> rest -> encode_html_body(rest, string_builder.append(acc, "&#39;"))
other -> encode_other_case(other, acc, encode_html_body)
}
}
/// Encode text using named HTML entities, except newline and tab. Characters without a named entity are untouched.
pub fn encode_named(text: String, acc: StringBuilder) -> String {
case text {
"" -> string_builder.to_string(acc)
@ -4049,13 +4074,19 @@ pub fn encode_named(text: String, acc: StringBuilder) -> String {
"𝓏" <> rest -> encode_named(rest, string_builder.append(acc, "&zscr;"))
"" <> rest -> encode_named(rest, string_builder.append(acc, "&zwj;"))
"" <> rest -> encode_named(rest, string_builder.append(acc, "&zwnj;"))
_other -> {
let maybe_grapheme = string.pop_grapheme(text)
case maybe_grapheme {
Ok(#(grapheme, rest)) ->
encode_named(rest, string_builder.append(acc, grapheme))
Error(Nil) -> string_builder.to_string(acc)
}
}
other -> encode_other_case(other, acc, encode_named)
}
}
fn encode_other_case(
text: String,
acc: StringBuilder,
continue_callback: fn(String, StringBuilder) -> String,
) -> String {
let maybe_grapheme = string.pop_grapheme(text)
case maybe_grapheme {
Ok(#(grapheme, rest)) ->
continue_callback(rest, string_builder.append(acc, grapheme))
Error(Nil) -> string_builder.to_string(acc)
}
}

View file

@ -37,6 +37,16 @@ pub fn encode_hex_test() {
)
}
pub fn encode_html_body_test() {
let input =
"This &amp 'string' contains many Θ \"encoded\" <characters>. ☃"
should.equal(
glentities.encode(input, glentities.HTMLBody),
"This &amp;amp &#39;string&#39; contains many Θ &quot;encoded&quot; &lt;characters&gt;. ☃",
)
}
pub fn roundtrip_named_test() {
let input =
string_utils.normalise(