VirtualBox

source: vbox/trunk/src/VBox/Main/src-all/QMTranslatorImpl.cpp@ 94348

Last change on this file since 94348 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 20.2 KB
Line 
1/* $Id: QMTranslatorImpl.cpp 93115 2022-01-01 11:31:46Z vboxsync $ */
2/** @file
3 * VirtualBox API translation handling class
4 */
5
6/*
7 * Copyright (C) 2014-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#include <vector>
19#include <set>
20#include <algorithm>
21#include <iprt/sanitized/iterator>
22#include <iprt/errcore.h>
23#include <iprt/file.h>
24#include <iprt/asm.h>
25#include <iprt/string.h>
26#include <iprt/strcache.h>
27#include <VBox/com/string.h>
28#include <VBox/log.h>
29#include <QMTranslator.h>
30
31/* QM File Magic Number */
32static const size_t g_cbMagic = 16;
33static const uint8_t g_abMagic[g_cbMagic] =
34{
35 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
36 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
37};
38
39/* Used internally */
40class QMException : public std::exception
41{
42 const char *m_str;
43public:
44 QMException(const char *str) : m_str(str) {}
45 virtual const char *what() const throw() { return m_str; }
46};
47
48/* Bytes stream. Used by the parser to iterate through the data */
49class QMBytesStream
50{
51 size_t m_cbSize;
52 const uint8_t * const m_dataStart;
53 const uint8_t *m_iter;
54 const uint8_t *m_end;
55
56public:
57
58 QMBytesStream(const uint8_t *const dataStart, size_t cbSize)
59 : m_cbSize(dataStart ? cbSize : 0)
60 , m_dataStart(dataStart)
61 , m_iter(dataStart)
62 {
63 setEnd();
64 }
65
66 /** Sets end pointer.
67 * Used in message reader to detect the end of message block */
68 inline void setEnd(size_t pos = 0)
69 {
70 m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
71 }
72
73 inline uint8_t read8()
74 {
75 checkSize(1);
76 return *m_iter++;
77 }
78
79 inline uint32_t read32()
80 {
81 checkSize(4);
82 uint32_t result = *reinterpret_cast<const uint32_t *>(m_iter);
83 m_iter += 4;
84 return RT_BE2H_U32(result);
85 }
86
87 /** Reads string in UTF16 and converts it into a UTF8 string */
88 inline com::Utf8Str readUtf16String()
89 {
90 uint32_t size = read32();
91 checkSize(size);
92 if (size & 1)
93 throw QMException("Incorrect string size");
94
95 /* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes
96 to encode, so reserve twice the size plus a terminator for the result. */
97 com::Utf8Str result;
98 result.reserve(size * 2 + 1);
99 char *pszStr = result.mutableRaw();
100 int rc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL);
101 if (RT_SUCCESS(rc))
102 result.jolt();
103 else
104 throw QMException("Translation from UTF-16 to UTF-8 failed");
105
106 m_iter += size;
107 return result;
108 }
109
110 /**
111 * Reads a string, forcing UTF-8 encoding.
112 */
113 inline com::Utf8Str readString()
114 {
115 uint32_t size = read32();
116 checkSize(size);
117
118 com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
119 if (size > 0)
120 {
121 RTStrPurgeEncoding(result.mutableRaw());
122 result.jolt();
123 }
124
125 m_iter += size;
126 return result;
127 }
128
129 /**
130 * Reads memory block
131 * Returns number of bytes read
132 */
133 inline uint32_t read(char *bBuf, uint32_t cbSize)
134 {
135 if (!bBuf || !cbSize)
136 return 0;
137 cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter));
138 memcpy(bBuf, m_iter, cbSize);
139 m_iter += cbSize;
140 return cbSize;
141 }
142
143 /** Checks the magic number.
144 * Should be called when in the beginning of the data
145 * @throws exception on mismatch */
146 inline void checkMagic()
147 {
148 checkSize(g_cbMagic);
149 if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0))
150 m_iter += g_cbMagic;
151 else
152 throw QMException("Wrong magic number");
153 }
154
155 /** Has we reached the end pointer? */
156 inline bool hasFinished()
157 {
158 return m_iter == m_end;
159 }
160
161 /** Returns current stream position */
162 inline size_t tellPos()
163 {
164 return (size_t)(m_iter - m_dataStart);
165 }
166
167 /** Moves current pointer to a desired position */
168 inline void seek(uint32_t offSkip)
169 {
170 size_t cbLeft = (size_t)(m_end - m_iter);
171 if (cbLeft >= offSkip)
172 m_iter += offSkip;
173 else
174 m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */
175 }
176
177 /** Checks whether stream has enough data to read size bytes */
178 inline void checkSize(size_t size)
179 {
180 if (RT_LIKELY((size_t)(m_end - m_iter) >= size))
181 return;
182 throw QMException("Incorrect item size");
183 }
184};
185
186/* Internal QMTranslator implementation */
187class QMTranslator_Impl
188{
189 /** Used while parsing */
190 struct QMMessageParse
191 {
192 /* Everything is in UTF-8 */
193 std::vector<com::Utf8Str> astrTranslations;
194 com::Utf8Str strContext;
195 com::Utf8Str strComment;
196 com::Utf8Str strSource;
197
198 QMMessageParse() {}
199 };
200
201 struct QMMessage
202 {
203 const char *pszContext;
204 const char *pszSource;
205 const char *pszComment;
206 std::vector<const char *> vecTranslations;
207 uint32_t hash;
208
209 QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0)
210 {}
211
212 QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc)
213 : pszContext(addStr(hStrCache, rSrc.strContext))
214 , pszSource(addStr(hStrCache, rSrc.strSource))
215 , pszComment(addStr(hStrCache, rSrc.strComment))
216 , hash(RTStrHash1(pszSource))
217 {
218 for (size_t i = 0; i < rSrc.astrTranslations.size(); i++)
219 vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i]));
220 }
221
222 /** Helper. */
223 static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc)
224 {
225 if (rSrc.isNotEmpty())
226 {
227 const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length());
228 if (RT_LIKELY(psz))
229 return psz;
230 throw std::bad_alloc();
231 }
232 return NULL;
233 }
234
235 };
236
237 struct HashOffset
238 {
239 uint32_t hash;
240 uint32_t offset;
241
242 HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {}
243
244 bool operator<(const HashOffset &obj) const
245 {
246 return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
247 }
248
249 };
250
251 typedef std::set<HashOffset> QMHashSet;
252 typedef QMHashSet::const_iterator QMHashSetConstIter;
253 typedef std::vector<QMMessage> QMMessageArray;
254 typedef std::vector<uint8_t> QMByteArray;
255
256 QMHashSet m_hashSet;
257 QMMessageArray m_messageArray;
258 QMByteArray m_pluralRules;
259
260public:
261
262 QMTranslator_Impl() {}
263
264 enum PluralOpCodes
265 {
266 Pl_Eq = 0x01,
267 Pl_Lt = 0x02,
268 Pl_Leq = 0x03,
269 Pl_Between = 0x04,
270
271 Pl_OpMask = 0x07,
272
273 Pl_Not = 0x08,
274 Pl_Mod10 = 0x10,
275 Pl_Mod100 = 0x20,
276 Pl_Lead1000 = 0x40,
277
278 Pl_And = 0xFD,
279 Pl_Or = 0xFE,
280 Pl_NewRule = 0xFF,
281
282 Pl_LMask = 0x80,
283 };
284
285 /*
286 * Rules format:
287 * <O><2>[<3>][<&&><O><2>[<3>]]...[<||><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]...
288 * where:
289 * <O> - OpCode
290 * <2> - Second operand
291 * <3> - Third operand
292 * <&&> - 'And' operation
293 * <||> - 'Or' operation
294 * <New> - Start of rule for next plural form
295 * Rules are ordered by plural forms, i.e:
296 * <rule for first form (i.e. single)><New><rule for next form>...
297 */
298 bool checkPlural(const QMByteArray &aRules) const
299 {
300 if (aRules.empty())
301 return true;
302
303 uint32_t iPos = 0;
304 do {
305 uint8_t bOpCode = aRules[iPos];
306
307 /* Invalid place of And/Or/NewRule */
308 if (bOpCode & Pl_LMask)
309 return false;
310
311 /* 2nd operand */
312 iPos++;
313
314 /* 2nd operand missing */
315 if (iPos == aRules.size())
316 return false;
317
318 /* Invalid OpCode */
319 if ((bOpCode & Pl_OpMask) == 0)
320 return false;
321
322 if ((bOpCode & Pl_OpMask) == Pl_Between)
323 {
324 /* 3rd operand */
325 iPos++;
326
327 /* 3rd operand missing */
328 if (iPos == aRules.size())
329 return false;
330 }
331
332 /* And/Or/NewRule */
333 iPos++;
334
335 /* All rules checked */
336 if (iPos == aRules.size())
337 return true;
338
339 } while ( ( (aRules[iPos] == Pl_And)
340 || (aRules[iPos] == Pl_Or)
341 || (aRules[iPos] == Pl_NewRule))
342 && ++iPos != aRules.size());
343
344 return false;
345 }
346
347 size_t plural(size_t aNum) const
348 {
349 if (aNum == ~(size_t)0 || m_pluralRules.empty())
350 return 0;
351
352 size_t uPluralNumber = 0;
353 uint32_t iPos = 0;
354
355 /* Rules loop */
356 for (;;)
357 {
358 bool fOr = false;
359 /* 'Or' loop */
360 for (;;)
361 {
362 bool fAnd = true;
363 /* 'And' loop */
364 for (;;)
365 {
366 int iOpCode = m_pluralRules[iPos++];
367 size_t iOpLeft = aNum;
368 if (iOpCode & Pl_Mod10)
369 iOpLeft %= 10;
370 else if (iOpCode & Pl_Mod100)
371 iOpLeft %= 100;
372 else if (iOpCode & Pl_Lead1000)
373 {
374 while (iOpLeft >= 1000)
375 iOpLeft /= 1000;
376 }
377 size_t iOpRight = m_pluralRules[iPos++];
378 int iOp = iOpCode & Pl_OpMask;
379 size_t iOpRight1 = 0;
380 if (iOp == Pl_Between)
381 iOpRight1 = m_pluralRules[iPos++];
382
383 bool fResult = (iOp == Pl_Eq && iOpLeft == iOpRight)
384 || (iOp == Pl_Lt && iOpLeft < iOpRight)
385 || (iOp == Pl_Leq && iOpLeft <= iOpRight)
386 || (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1);
387 if (iOpCode & Pl_Not)
388 fResult = !fResult;
389
390 fAnd = fAnd && fResult;
391 if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_And)
392 break;
393 iPos++;
394 }
395 fOr = fOr || fAnd;
396 if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_Or)
397 break;
398 iPos++;
399 }
400 if (fOr)
401 return uPluralNumber;
402
403 /* Qt returns last plural number if none of rules are match. */
404 uPluralNumber++;
405
406 if (iPos >= m_pluralRules.size())
407 return uPluralNumber;
408
409 iPos++; // Skip Pl_NewRule
410 }
411 }
412
413 const char *translate(const char *pszContext,
414 const char *pszSource,
415 const char *pszDisamb,
416 const size_t aNum,
417 const char **ppszSafeSource) const RT_NOEXCEPT
418 {
419 QMHashSetConstIter lowerIter, upperIter;
420
421 /* As turned out, comments (pszDisamb) are not kept always in result qm file
422 * Therefore, exclude them from the hash */
423 uint32_t hash = RTStrHash1(pszSource);
424 lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
425 upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX));
426
427 /*
428 * Check different combinations with and without context and
429 * disambiguation. This can help us to find the translation even
430 * if context or disambiguation are not know or properly defined.
431 */
432 const char *apszCtx[] = {pszContext, pszContext, NULL, NULL};
433 const char *apszDisabm[] = {pszDisamb, NULL, pszDisamb, NULL};
434 AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm));
435
436 for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i)
437 {
438 for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter)
439 {
440 const QMMessage &message = m_messageArray[iter->offset];
441 if ( RTStrCmp(message.pszSource, pszSource) == 0
442 && (!apszCtx[i] || !*apszCtx[i] || RTStrCmp(message.pszContext, apszCtx[i]) == 0)
443 && (!apszDisabm[i] || !*apszDisabm[i] || RTStrCmp(message.pszComment, apszDisabm[i]) == 0 ))
444 {
445 *ppszSafeSource = message.pszSource;
446 const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations;
447 size_t const idxPlural = plural(aNum);
448 return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)];
449 }
450 }
451 }
452
453 *ppszSafeSource = NULL;
454 return pszSource;
455 }
456
457 void load(QMBytesStream &stream, RTSTRCACHE hStrCache)
458 {
459 /* Load into local variables. If we failed during the load,
460 * it would allow us to keep the object in a valid (previous) state. */
461 QMHashSet hashSet;
462 QMMessageArray messageArray;
463 QMByteArray pluralRules;
464
465 stream.checkMagic();
466
467 while (!stream.hasFinished())
468 {
469 uint32_t sectionCode = stream.read8();
470 uint32_t sLen = stream.read32();
471
472 /* Hashes and Context sections are ignored. They contain hash tables
473 * to speed-up search which is not useful since we recalculate all hashes
474 * and don't perform context search by hash */
475 switch (sectionCode)
476 {
477 case Messages:
478 parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen);
479 break;
480 case Hashes:
481 /* Only get size information to speed-up vector filling
482 * if Hashes section goes in the file before Message section */
483 if (messageArray.empty())
484 messageArray.reserve(sLen >> 3);
485 stream.seek(sLen);
486 break;
487 case NumerusRules:
488 {
489 pluralRules.resize(sLen);
490 uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen);
491 if (cbSize < sLen)
492 throw QMException("Incorrect section size");
493 if (!checkPlural(pluralRules))
494 pluralRules.erase(pluralRules.begin(), pluralRules.end());
495 break;
496 }
497 case Contexts:
498 case Dependencies:
499 case Language:
500 stream.seek(sLen);
501 break;
502 default:
503 throw QMException("Unkown section");
504 }
505 }
506
507 /* Store the data into member variables.
508 * The following functions never generate exceptions */
509 m_hashSet.swap(hashSet);
510 m_messageArray.swap(messageArray);
511 m_pluralRules.swap(pluralRules);
512 }
513
514private:
515
516 /* Some QM stuff */
517 enum SectionType
518 {
519 Contexts = 0x2f,
520 Hashes = 0x42,
521 Messages = 0x69,
522 NumerusRules = 0x88,
523 Dependencies = 0x96,
524 Language = 0xa7
525 };
526
527 enum MessageType
528 {
529 End = 1,
530 SourceText16 = 2,
531 Translation = 3,
532 Context16 = 4,
533 Obsolete1 = 5, /**< was Hash */
534 SourceText = 6,
535 Context = 7,
536 Comment = 8
537 };
538
539 /* Read messages from the stream. */
540 static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet,
541 QMMessageArray * const messageArray, size_t cbSize)
542 {
543 stream.setEnd(stream.tellPos() + cbSize);
544 uint32_t cMessage = 0;
545 while (!stream.hasFinished())
546 {
547 /* Process the record. Skip anything that doesn't have a source
548 string or any valid translations. Using C++ strings for temporary
549 storage here, as we don't want to pollute the cache we bogus strings
550 in case of duplicate sub-records or invalid records. */
551 QMMessageParse ParsedMsg;
552 parseMessageRecord(stream, &ParsedMsg);
553 if ( ParsedMsg.astrTranslations.size() > 0
554 && ParsedMsg.strSource.isNotEmpty())
555 {
556 /* Copy the strings over into the string cache and a hashed QMMessage,
557 before adding it to the result. */
558 QMMessage HashedMsg(hStrCache, ParsedMsg);
559 hashSet->insert(HashOffset(HashedMsg.hash, cMessage++));
560 messageArray->push_back(HashedMsg);
561
562 }
563 /*else: wtf? */
564 }
565 stream.setEnd();
566 }
567
568 /* Parse one message from the stream */
569 static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message)
570 {
571 while (!stream.hasFinished())
572 {
573 uint8_t type = stream.read8();
574 switch (type)
575 {
576 case End:
577 return;
578 /* Ignored as obsolete */
579 case Context16:
580 case SourceText16:
581 stream.seek(stream.read32());
582 break;
583 case Translation:
584 message->astrTranslations.push_back(stream.readUtf16String());
585 break;
586
587 case SourceText:
588 message->strSource = stream.readString();
589 break;
590
591 case Context:
592 message->strContext = stream.readString();
593 break;
594
595 case Comment:
596 message->strComment = stream.readString();
597 break;
598
599 default:
600 /* Ignore unknown/obsolete block */
601 LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type));
602 break;
603 }
604 }
605 }
606};
607
608/* Inteface functions implementation */
609QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {}
610
611QMTranslator::~QMTranslator() { delete m_impl; }
612
613const char *QMTranslator::translate(const char *pszContext, const char *pszSource, const char **ppszSafeSource,
614 const char *pszDisamb /*= NULL*/, const size_t aNum /*= ~(size_t)0*/) const RT_NOEXCEPT
615
616{
617 return m_impl->translate(pszContext, pszSource, pszDisamb, aNum, ppszSafeSource);
618}
619
620int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT
621{
622 /* To free safely the file in case of exception */
623 struct FileLoader
624 {
625 uint8_t *data;
626 size_t cbSize;
627 int rc;
628 FileLoader(const char *pszFname)
629 {
630 rc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
631 }
632
633 ~FileLoader()
634 {
635 if (isSuccess())
636 RTFileReadAllFree(data, cbSize);
637 }
638 bool isSuccess() { return RT_SUCCESS(rc); }
639 };
640
641 try
642 {
643 FileLoader loader(pszFilename);
644 if (loader.isSuccess())
645 {
646 QMBytesStream stream(loader.data, loader.cbSize);
647 m_impl->load(stream, hStrCache);
648 }
649 return loader.rc;
650 }
651 catch(std::exception &e)
652 {
653 LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
654 return VERR_INTERNAL_ERROR;
655 }
656 catch(...)
657 {
658 LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
659 return VERR_GENERAL_FAILURE;
660 }
661}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette