VirtualBox

source: vbox/trunk/src/VBox/Main/src-all/QMTranslatorImpl.cpp@ 91156

Last change on this file since 91156 was 90929, checked in by vboxsync, 3 years ago

Main/QMTranslatorImpl: Just make plural() return size_t instead of casting at the caller site like done in r146553. bugref:1909

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.9 KB
Line 
1/* $Id: QMTranslatorImpl.cpp 90929 2021-08-26 19:56:31Z vboxsync $ */
2/** @file
3 * VirtualBox API translation handling class
4 */
5
6/*
7 * Copyright (C) 2014-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#include <vector>
19#include <set>
20#include <algorithm>
21#include <iprt/sanitized/iterator>
22#include <iprt/errcore.h>
23#include <iprt/file.h>
24#include <iprt/asm.h>
25#include <iprt/string.h>
26#include <iprt/strcache.h>
27#include <VBox/com/string.h>
28#include <VBox/log.h>
29#include <QMTranslator.h>
30
31/* QM File Magic Number */
32static const size_t g_cbMagic = 16;
33static const uint8_t g_abMagic[g_cbMagic] =
34{
35 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
36 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
37};
38
39/* Used internally */
40class QMException : public std::exception
41{
42 const char *m_str;
43public:
44 QMException(const char *str) : m_str(str) {}
45 virtual const char *what() const throw() { return m_str; }
46};
47
48/* Bytes stream. Used by the parser to iterate through the data */
49class QMBytesStream
50{
51 size_t m_cbSize;
52 const uint8_t * const m_dataStart;
53 const uint8_t *m_iter;
54 const uint8_t *m_end;
55
56public:
57
58 QMBytesStream(const uint8_t *const dataStart, size_t cbSize)
59 : m_cbSize(dataStart ? cbSize : 0)
60 , m_dataStart(dataStart)
61 , m_iter(dataStart)
62 {
63 setEnd();
64 }
65
66 /** Sets end pointer.
67 * Used in message reader to detect the end of message block */
68 inline void setEnd(size_t pos = 0)
69 {
70 m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
71 }
72
73 inline uint8_t read8()
74 {
75 checkSize(1);
76 return *m_iter++;
77 }
78
79 inline uint32_t read32()
80 {
81 checkSize(4);
82 uint32_t result = *reinterpret_cast<const uint32_t *>(m_iter);
83 m_iter += 4;
84 return RT_BE2H_U32(result);
85 }
86
87 /** Reads string in UTF16 and converts it into a UTF8 string */
88 inline com::Utf8Str readUtf16String()
89 {
90 uint32_t size = read32();
91 checkSize(size);
92 if (size & 1)
93 throw QMException("Incorrect string size");
94
95 /* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes
96 to encode, so reserve twice the size plus a terminator for the result. */
97 com::Utf8Str result;
98 result.reserve(size * 2 + 1);
99 char *pszStr = result.mutableRaw();
100 int rc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL);
101 if (RT_SUCCESS(rc))
102 result.jolt();
103 else
104 throw QMException("Translation from UTF-16 to UTF-8 failed");
105
106 m_iter += size;
107 return result;
108 }
109
110 /**
111 * Reads a string, forcing UTF-8 encoding.
112 */
113 inline com::Utf8Str readString()
114 {
115 uint32_t size = read32();
116 checkSize(size);
117
118 com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
119 if (size > 0)
120 {
121 RTStrPurgeEncoding(result.mutableRaw());
122 result.jolt();
123 }
124
125 m_iter += size;
126 return result;
127 }
128
129 /**
130 * Reads memory block
131 * Returns number of bytes read
132 */
133 inline uint32_t read(char *bBuf, uint32_t cbSize)
134 {
135 if (!bBuf || !cbSize)
136 return 0;
137 cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter));
138 memcpy(bBuf, m_iter, cbSize);
139 m_iter += cbSize;
140 return cbSize;
141 }
142
143 /** Checks the magic number.
144 * Should be called when in the beginning of the data
145 * @throws exception on mismatch */
146 inline void checkMagic()
147 {
148 checkSize(g_cbMagic);
149 if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0))
150 m_iter += g_cbMagic;
151 else
152 throw QMException("Wrong magic number");
153 }
154
155 /** Has we reached the end pointer? */
156 inline bool hasFinished()
157 {
158 return m_iter == m_end;
159 }
160
161 /** Returns current stream position */
162 inline size_t tellPos()
163 {
164 return (size_t)(m_iter - m_dataStart);
165 }
166
167 /** Moves current pointer to a desired position */
168 inline void seek(uint32_t offSkip)
169 {
170 size_t cbLeft = (size_t)(m_end - m_iter);
171 if (cbLeft >= offSkip)
172 m_iter += offSkip;
173 else
174 m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */
175 }
176
177 /** Checks whether stream has enough data to read size bytes */
178 inline void checkSize(size_t size)
179 {
180 if (RT_LIKELY((size_t)(m_end - m_iter) >= size))
181 return;
182 throw QMException("Incorrect item size");
183 }
184};
185
186/* Internal QMTranslator implementation */
187class QMTranslator_Impl
188{
189 /** Used while parsing */
190 struct QMMessageParse
191 {
192 /* Everything is in UTF-8 */
193 std::vector<com::Utf8Str> astrTranslations;
194 com::Utf8Str strContext;
195 com::Utf8Str strComment;
196 com::Utf8Str strSource;
197
198 QMMessageParse() {}
199 };
200
201 struct QMMessage
202 {
203 const char *pszContext;
204 const char *pszSource;
205 const char *pszComment;
206 std::vector<const char *> vecTranslations;
207 uint32_t hash;
208
209 QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0)
210 {}
211
212 QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc)
213 : pszContext(addStr(hStrCache, rSrc.strContext))
214 , pszSource(addStr(hStrCache, rSrc.strSource))
215 , pszComment(addStr(hStrCache, rSrc.strComment))
216 , hash(RTStrHash1(pszSource))
217 {
218 for (size_t i = 0; i < rSrc.astrTranslations.size(); i++)
219 vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i]));
220 }
221
222 /** Helper. */
223 static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc)
224 {
225 if (rSrc.isNotEmpty())
226 {
227 const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length());
228 if (RT_LIKELY(psz))
229 return psz;
230 throw std::bad_alloc();
231 }
232 return NULL;
233 }
234
235 };
236
237 struct HashOffset
238 {
239 uint32_t hash;
240 uint32_t offset;
241
242 HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {}
243
244 bool operator<(const HashOffset &obj) const
245 {
246 return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
247 }
248
249 };
250
251 typedef std::set<HashOffset> QMHashSet;
252 typedef QMHashSet::const_iterator QMHashSetConstIter;
253 typedef std::vector<QMMessage> QMMessageArray;
254 typedef std::vector<uint8_t> QMByteArray;
255
256 QMHashSet m_hashSet;
257 QMMessageArray m_messageArray;
258 QMByteArray m_pluralRules;
259
260public:
261
262 QMTranslator_Impl() {}
263
264 enum PluralOpCodes
265 {
266 Pl_Eq = 0x01,
267 Pl_Lt = 0x02,
268 Pl_Leq = 0x03,
269 Pl_Between = 0x04,
270
271 Pl_OpMask = 0x07,
272
273 Pl_Not = 0x08,
274 Pl_Mod10 = 0x10,
275 Pl_Mod100 = 0x20,
276 Pl_Lead1000 = 0x40,
277
278 Pl_And = 0xFD,
279 Pl_Or = 0xFE,
280 Pl_NewRule = 0xFF,
281
282 Pl_LMask = 0x80,
283 };
284
285 /*
286 * Rules format:
287 * <O><2>[<3>][<&&><O><2>[<3>]]...[<||><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]...
288 * where:
289 * <O> - OpCode
290 * <2> - Second operand
291 * <3> - Third operand
292 * <&&> - 'And' operation
293 * <||> - 'Or' operation
294 * <New> - Start of rule for next plural form
295 * Rules are ordered by plural forms, i.e:
296 * <rule for first form (i.e. single)><New><rule for next form>...
297 */
298 bool checkPlural(const QMByteArray &aRules) const
299 {
300 if (aRules.empty())
301 return true;
302
303 uint32_t iPos = 0;
304 do {
305 uint8_t bOpCode = aRules[iPos];
306
307 /* Invalid place of And/Or/NewRule */
308 if (bOpCode & Pl_LMask)
309 return false;
310
311 /* 2nd operand */
312 iPos++;
313
314 /* 2nd operand missing */
315 if (iPos == aRules.size())
316 return false;
317
318 /* Invalid OpCode */
319 if ((bOpCode & Pl_OpMask) == 0)
320 return false;
321
322 if ((bOpCode & Pl_OpMask) == Pl_Between)
323 {
324 /* 3rd operand */
325 iPos++;
326
327 /* 3rd operand missing */
328 if (iPos == aRules.size())
329 return false;
330 }
331
332 /* And/Or/NewRule */
333 iPos++;
334
335 /* All rules checked */
336 if (iPos == aRules.size())
337 return true;
338
339 } while ( ( (aRules[iPos] == Pl_And)
340 || (aRules[iPos] == Pl_Or)
341 || (aRules[iPos] == Pl_NewRule))
342 && ++iPos != aRules.size());
343
344 return false;
345 }
346
347 size_t plural(int aNum) const
348 {
349 if (aNum < 1 || m_pluralRules.empty())
350 return 0;
351
352 size_t uPluralNumber = 0;
353 uint32_t iPos = 0;
354
355 /* Rules loop */
356 for (;;)
357 {
358 bool fOr = false;
359 /* 'Or' loop */
360 for (;;)
361 {
362 bool fAnd = true;
363 /* 'And' loop */
364 for (;;)
365 {
366 int iOpCode = m_pluralRules[iPos++];
367 int iOpLeft = aNum;
368 if (iOpCode & Pl_Mod10)
369 iOpLeft %= 10;
370 else if (iOpCode & Pl_Mod100)
371 iOpLeft %= 100;
372 else if (iOpLeft & Pl_Lead1000)
373 {
374 while (iOpLeft >= 1000)
375 iOpLeft /= 1000;
376 }
377 int iOpRight = m_pluralRules[iPos++];
378 int iOp = iOpCode & Pl_OpMask;
379 int iOpRight1 = 0;
380 if (iOp == Pl_Between)
381 iOpRight1 = m_pluralRules[iPos++];
382
383 bool fResult = (iOp == Pl_Eq && iOpLeft == iOpRight)
384 || (iOp == Pl_Lt && iOpLeft < iOpRight)
385 || (iOp == Pl_Leq && iOpLeft <= iOpRight)
386 || (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1);
387 if (iOpCode & Pl_Not)
388 fResult = !fResult;
389
390 fAnd = fAnd && fResult;
391 if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_And)
392 break;
393 iPos++;
394 }
395 fOr = fOr || fAnd;
396 if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_Or)
397 break;
398 iPos++;
399 }
400 if (fOr)
401 return uPluralNumber;
402
403 /* Qt returns last plural number if none of rules are match. */
404 uPluralNumber++;
405
406 if (iPos >= m_pluralRules.size())
407 return uPluralNumber;
408
409 iPos++; // Skip Pl_NewRule
410 }
411 }
412
413 const char *translate(const char *pszContext,
414 const char *pszSource,
415 const char *pszDisamb,
416 const int aNum) const
417 {
418 QMHashSetConstIter lowerIter, upperIter;
419
420 /* As turned out, comments (pszDisamb) are not kept always in result qm file
421 * Therefore, exclude them from the hash */
422 uint32_t hash = RTStrHash1(pszSource);
423 lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
424 upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX));
425
426 /*
427 * Check different combinations with and without context and
428 * disambiguation. This can help us to find the translation even
429 * if context or disambiguation are not know or properly defined.
430 */
431 const char *apszCtx[] = {pszContext, pszContext, NULL, NULL};
432 const char *apszDisabm[] = {pszDisamb, NULL, pszDisamb, NULL};
433 AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm));
434
435 for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i)
436 {
437 for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter)
438 {
439 const QMMessage &message = m_messageArray[iter->offset];
440 if ( RTStrCmp(message.pszSource, pszSource) == 0
441 && (!apszCtx[i] || !*apszCtx[i] || RTStrCmp(message.pszContext, apszCtx[i]) == 0)
442 && (!apszDisabm[i] || !*apszDisabm[i] || RTStrCmp(message.pszComment, apszDisabm[i]) == 0 ))
443 {
444 const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations;
445 size_t const idxPlural = plural(aNum);
446 return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)];
447 }
448 }
449 }
450
451 return pszSource;
452 }
453
454 void load(QMBytesStream &stream, RTSTRCACHE hStrCache)
455 {
456 /* Load into local variables. If we failed during the load,
457 * it would allow us to keep the object in a valid (previous) state. */
458 QMHashSet hashSet;
459 QMMessageArray messageArray;
460 QMByteArray pluralRules;
461
462 stream.checkMagic();
463
464 while (!stream.hasFinished())
465 {
466 uint32_t sectionCode = stream.read8();
467 uint32_t sLen = stream.read32();
468
469 /* Hashes and Context sections are ignored. They contain hash tables
470 * to speed-up search which is not useful since we recalculate all hashes
471 * and don't perform context search by hash */
472 switch (sectionCode)
473 {
474 case Messages:
475 parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen);
476 break;
477 case Hashes:
478 /* Only get size information to speed-up vector filling
479 * if Hashes section goes in the file before Message section */
480 if (messageArray.empty())
481 messageArray.reserve(sLen >> 3);
482 stream.seek(sLen);
483 break;
484 case NumerusRules:
485 {
486 pluralRules.resize(sLen);
487 uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen);
488 if (cbSize < sLen)
489 throw QMException("Incorrect section size");
490 if (!checkPlural(pluralRules))
491 pluralRules.erase(pluralRules.begin(), pluralRules.end());
492 break;
493 }
494 case Contexts:
495 case Dependencies:
496 case Language:
497 stream.seek(sLen);
498 break;
499 default:
500 throw QMException("Unkown section");
501 }
502 }
503
504 /* Store the data into member variables.
505 * The following functions never generate exceptions */
506 m_hashSet.swap(hashSet);
507 m_messageArray.swap(messageArray);
508 m_pluralRules.swap(pluralRules);
509 }
510
511private:
512
513 /* Some QM stuff */
514 enum SectionType
515 {
516 Contexts = 0x2f,
517 Hashes = 0x42,
518 Messages = 0x69,
519 NumerusRules = 0x88,
520 Dependencies = 0x96,
521 Language = 0xa7
522 };
523
524 enum MessageType
525 {
526 End = 1,
527 SourceText16 = 2,
528 Translation = 3,
529 Context16 = 4,
530 Obsolete1 = 5, /**< was Hash */
531 SourceText = 6,
532 Context = 7,
533 Comment = 8
534 };
535
536 /* Read messages from the stream. */
537 static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet,
538 QMMessageArray * const messageArray, size_t cbSize)
539 {
540 stream.setEnd(stream.tellPos() + cbSize);
541 uint32_t cMessage = 0;
542 while (!stream.hasFinished())
543 {
544 /* Process the record. Skip anything that doesn't have a source
545 string or any valid translations. Using C++ strings for temporary
546 storage here, as we don't want to pollute the cache we bogus strings
547 in case of duplicate sub-records or invalid records. */
548 QMMessageParse ParsedMsg;
549 parseMessageRecord(stream, &ParsedMsg);
550 if ( ParsedMsg.astrTranslations.size() > 0
551 && ParsedMsg.strSource.isNotEmpty())
552 {
553 /* Copy the strings over into the string cache and a hashed QMMessage,
554 before adding it to the result. */
555 QMMessage HashedMsg(hStrCache, ParsedMsg);
556 hashSet->insert(HashOffset(HashedMsg.hash, cMessage++));
557 messageArray->push_back(HashedMsg);
558
559 }
560 /*else: wtf? */
561 }
562 stream.setEnd();
563 }
564
565 /* Parse one message from the stream */
566 static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message)
567 {
568 while (!stream.hasFinished())
569 {
570 uint8_t type = stream.read8();
571 switch (type)
572 {
573 case End:
574 return;
575 /* Ignored as obsolete */
576 case Context16:
577 case SourceText16:
578 stream.seek(stream.read32());
579 break;
580 case Translation:
581 message->astrTranslations.push_back(stream.readUtf16String());
582 break;
583
584 case SourceText:
585 message->strSource = stream.readString();
586 break;
587
588 case Context:
589 message->strContext = stream.readString();
590 break;
591
592 case Comment:
593 message->strComment = stream.readString();
594 break;
595
596 default:
597 /* Ignore unknown/obsolete block */
598 LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type));
599 break;
600 }
601 }
602 }
603};
604
605/* Inteface functions implementation */
606QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {}
607
608QMTranslator::~QMTranslator() { delete m_impl; }
609
610const char *QMTranslator::translate(const char *pszContext, const char *pszSource,
611 const char *pszDisamb, const int aNum) const RT_NOEXCEPT
612{
613 return m_impl->translate(pszContext, pszSource, pszDisamb, aNum);
614}
615
616int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT
617{
618 /* To free safely the file in case of exception */
619 struct FileLoader
620 {
621 uint8_t *data;
622 size_t cbSize;
623 int rc;
624 FileLoader(const char *pszFname)
625 {
626 rc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
627 }
628
629 ~FileLoader()
630 {
631 if (isSuccess())
632 RTFileReadAllFree(data, cbSize);
633 }
634 bool isSuccess() { return RT_SUCCESS(rc); }
635 };
636
637 try
638 {
639 FileLoader loader(pszFilename);
640 if (loader.isSuccess())
641 {
642 QMBytesStream stream(loader.data, loader.cbSize);
643 m_impl->load(stream, hStrCache);
644 }
645 return loader.rc;
646 }
647 catch(std::exception &e)
648 {
649 LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
650 return VERR_INTERNAL_ERROR;
651 }
652 catch(...)
653 {
654 LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
655 return VERR_GENERAL_FAILURE;
656 }
657}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette