Support parsing BigEndian files

Fixes #2
This commit is contained in:
Mikko Ahlroth 2024-06-02 22:28:18 +03:00
parent 3c9736f8c3
commit fa9e78c918
7 changed files with 206 additions and 57 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@
*.ez
/build
erl_crash.dump
.DS_Store

View file

@ -24,3 +24,33 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
---
The test file for big endian MO parsing is used from https://github.com/perpetualKid/GetText.NET/blob/main/test/GetText.Tests/TestResources/locales/ru_RU/Test_BigEndian.mo under the following license:
The MIT License (MIT)
Original Source Code NGettext
Copyright (c) 2012 Vitaly Zilnik
GetText.NET including updates and additions (WindowsForms, Extractor)
Copyright (c) 2020 perpetualKid
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -87,10 +87,23 @@ pub type Mo {
)
}
type EndiannessHandler {
EndiannessHandler(
int_8: fn(BitArray) -> Result(Int, Nil),
int_32: fn(BitArray) -> Result(Int, Nil),
)
}
/// Parse given MO file data.
pub fn parse(mo: BitArray) {
use #(endianness, rest) <- result.try(parse_magic(mo))
use header <- result.try(parse_header(endianness, rest))
let endianness_handler = case endianness {
LittleEndian -> EndiannessHandler(int_8: le_int_8, int_32: le_int_32)
BigEndian -> EndiannessHandler(int_8: be_int_8, int_32: be_int_32)
}
use header <- result.try(parse_header(endianness_handler, rest))
use <- bool.guard(
header.revision.major > max_supported_major,
Error(UnknownRevision(header.revision)),
@ -120,7 +133,11 @@ pub fn parse(mo: BitArray) {
Error(OffsetPastEnd(header.ht_offset)),
)
use translations <- result.try(parse_translations(endianness, header, mo))
use translations <- result.try(parse_translations(
endianness_handler,
header,
mo,
))
use metadata <- result.try(parse_metadata(translations))
Ok(Mo(
@ -139,46 +156,39 @@ fn parse_magic(body: BitArray) {
}
}
fn parse_header(endianness: Endianness, body: BitArray) {
case endianness {
LittleEndian -> {
case body {
<<
major_bytes:bytes-size(2),
minor_bytes:bytes-size(2),
string_count_bytes:bytes-size(4),
og_table_offset_bytes:bytes-size(4),
trans_table_offset_bytes:bytes-size(4),
ht_size_bytes:bytes-size(4),
ht_offset_bytes:bytes-size(4),
_rest:bytes,
>> -> {
let assert Ok(major) = le_int_8(major_bytes)
let assert Ok(minor) = le_int_8(minor_bytes)
let assert Ok(string_count) = le_int_32(string_count_bytes)
let assert Ok(og_table_offset) = le_int_32(og_table_offset_bytes)
let assert Ok(trans_table_offset) =
le_int_32(trans_table_offset_bytes)
let assert Ok(ht_size) = le_int_32(ht_size_bytes)
let assert Ok(ht_offset) = le_int_32(ht_offset_bytes)
Ok(Header(
Revision(major, minor),
string_count,
og_table_offset,
trans_table_offset,
ht_size,
ht_offset,
))
}
_ -> Error(MalformedHeader)
}
fn parse_header(eh: EndiannessHandler, body: BitArray) {
case body {
<<
major_bytes:bytes-size(2),
minor_bytes:bytes-size(2),
string_count_bytes:bytes-size(4),
og_table_offset_bytes:bytes-size(4),
trans_table_offset_bytes:bytes-size(4),
ht_size_bytes:bytes-size(4),
ht_offset_bytes:bytes-size(4),
_rest:bytes,
>> -> {
let assert Ok(major) = eh.int_8(major_bytes)
let assert Ok(minor) = eh.int_8(minor_bytes)
let assert Ok(string_count) = eh.int_32(string_count_bytes)
let assert Ok(og_table_offset) = eh.int_32(og_table_offset_bytes)
let assert Ok(trans_table_offset) = eh.int_32(trans_table_offset_bytes)
let assert Ok(ht_size) = eh.int_32(ht_size_bytes)
let assert Ok(ht_offset) = eh.int_32(ht_offset_bytes)
Ok(Header(
Revision(major, minor),
string_count,
og_table_offset,
trans_table_offset,
ht_size,
ht_offset,
))
}
BigEndian -> panic
_ -> Error(MalformedHeader)
}
}
fn parse_translations(endianness: Endianness, header: Header, mo: BitArray) {
fn parse_translations(eh: EndiannessHandler, header: Header, mo: BitArray) {
let strings = list.range(0, header.string_count - 1)
use translations <- result.try(
list.try_fold(strings, dict.new(), fn(translations, i) {
@ -186,7 +196,7 @@ fn parse_translations(endianness: Endianness, header: Header, mo: BitArray) {
let og_offset = header.og_table_offset + new_offset
let trans_offset = header.trans_table_offset + new_offset
use #(og, translation) <- result.try(parse_translation(
endianness,
eh,
mo,
og_offset,
trans_offset,
@ -209,18 +219,18 @@ fn parse_translations(endianness: Endianness, header: Header, mo: BitArray) {
}
fn parse_translation(
endianness: Endianness,
eh: EndiannessHandler,
mo: BitArray,
og_offset: Int,
trans_offset: Int,
) {
use #(og_str_length, og_str_offset) <- result.try(parse_offset_table_entry(
endianness,
eh,
mo,
og_offset,
))
use #(trans_str_length, trans_str_offset) <- result.try(
parse_offset_table_entry(endianness, mo, trans_offset),
parse_offset_table_entry(eh, mo, trans_offset),
)
use og_string <- result.try(parse_mo_string(mo, og_str_length, og_str_offset))
@ -233,23 +243,18 @@ fn parse_translation(
Ok(#(og_string, trans_string))
}
fn parse_offset_table_entry(endianness: Endianness, mo: BitArray, offset: Int) {
fn parse_offset_table_entry(eh: EndiannessHandler, mo: BitArray, offset: Int) {
use data <- result.try(
bit_array.slice(mo, offset, 8)
|> result.replace_error(OffsetPastEnd(offset)),
)
case endianness {
LittleEndian ->
case data {
<<target_length:bytes-size(4), target_offset:bytes-size(4)>> -> {
let assert Ok(target_length) = le_int_32(target_length)
let assert Ok(target_offset) = le_int_32(target_offset)
Ok(#(target_length, target_offset))
}
_ -> Error(MalformedOffsetTableEntry(data))
}
BigEndian -> panic
case data {
<<target_length:bytes-size(4), target_offset:bytes-size(4)>> -> {
let assert Ok(target_length) = eh.int_32(target_length)
let assert Ok(target_offset) = eh.int_32(target_offset)
Ok(#(target_length, target_offset))
}
_ -> Error(MalformedOffsetTableEntry(data))
}
}
@ -317,6 +322,13 @@ fn le_int_8(int8: BitArray) {
}
}
fn be_int_8(int8: BitArray) {
case int8 {
<<h:size(8), l:size(8)>> -> Ok(reconstruct_ui8(h, l))
_ -> Error(Nil)
}
}
fn le_int_32(int32: BitArray) {
case int32 {
<<ll:size(8), lh:size(8), hl:size(8), hh:size(8)>> ->
@ -325,6 +337,14 @@ fn le_int_32(int32: BitArray) {
}
}
fn be_int_32(int32: BitArray) {
case int32 {
<<hh:size(8), hl:size(8), lh:size(8), ll:size(8)>> ->
Ok(reconstruct_ui32(hh, hl, lh, ll))
_ -> Error(Nil)
}
}
fn reconstruct_ui8(h: Int, l: Int) {
int.bitwise_shift_left(h, 8) + l
}

View file

@ -0,0 +1,25 @@
The MIT License (MIT)
Original Source Code NGettext
Copyright (c) 2012 Vitaly Zilnik
GetText.NET including updates and additions (WindowsForms, Extractor)
Copyright (c) 2020 perpetualKid
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Binary file not shown.

View file

@ -0,0 +1,36 @@
msgid ""
msgstr ""
"Project-Id-Version: GetText test translation file\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2012-09-06 01:37+0200\n"
"PO-Revision-Date: 2012-09-06 17:18+0200\n"
"Last-Translator: \n"
"Language-Team: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=3; plural=((((n%10)==1)&&((n%100)!=11))?(0):(((((n%10)"
">=2)&&((n%10)<=4))&&(((n%100)<10)||((n%100)>=20)))?(1):2));\n"
msgid "test"
msgstr "тест"
msgid "test2"
msgstr "тест2"
msgid "test3"
msgstr ""
msgid "{0} minute"
msgid_plural "{0} minutes"
msgstr[0] "{0} минута"
msgstr[1] "{0} минуты"
msgstr[2] "{0} минут"
msgctxt "context1"
msgid "test3"
msgstr "тест3контекст1"
msgctxt "context2"
msgid "test3"
msgstr "тест3контекст2"

View file

@ -69,12 +69,49 @@ pub fn metadata_test() {
)
}
pub fn big_endian_test() {
let assert Ok(mo_data) =
simplifile.read_bits("./test/locale/big-endian/ru.mo")
let mo =
mo_data
|> mo.parse()
|> should.be_ok()
should.equal(mo.endianness, mo.BigEndian)
should.equal(
mo.translations,
dict.from_list([
#(
"",
mo.Singular(
context: "",
content: "Project-Id-Version: NGettext test translation file\nReport-Msgid-Bugs-To: \nPOT-Creation-Date: 2012-09-06 01:37+0200\nPO-Revision-Date: 2012-09-06 17:18+0200\nLast-Translator: \nLanguage-Team: \nMIME-Version: 1.0\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\nPlural-Forms: nplurals=3; plural=((((n%10)==1)&&((n%100)!=11))?(0):(((((n%10)>=2)&&((n%10)<=4))&&(((n%100)<10)||((n%100)>=20)))?(1):2));\n",
),
),
#("test2", mo.Singular(context: "", content: "тест2")),
#(
"test3",
mo.Singular(context: "", content: "тест3контекст2"),
),
#(
"{0} minute",
mo.Plural(
context: "",
content: dict.from_list([
#(0, "{0} минута"),
#(2, "{0} минут"),
#(1, "{0} минуты"),
]),
),
),
#("test", mo.Singular(context: "", content: "тест")),
]),
)
}
fn open() {
let assert Ok(mo_data) =
simplifile.read_bits("./test/locale/fi/LC_MESSAGES/fi.mo")
let mo = mo.parse(mo_data)
should.be_ok(mo)
let assert Ok(mo) = mo
mo
}