QMTranslatorImpl.cpp@ 94134

Last change on this file since 94134 was 93115, checked in by vboxsync, 3 years ago
scm --update-copyright-year
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 20.2 KB

Line
1	/* $Id: QMTranslatorImpl.cpp 93115 2022-01-01 11:31:46Z vboxsync $ */
2	/** @file
3	* VirtualBox API translation handling class
4	*/
5
6	/*
7	* Copyright (C) 2014-2022 Oracle Corporation
8	*
9	* This file is part of VirtualBox Open Source Edition (OSE), as
10	* available from http://www.virtualbox.org. This file is free software;
11	* you can redistribute it and/or modify it under the terms of the GNU
12	* General Public License (GPL) as published by the Free Software
13	* Foundation, in version 2 as it comes in the "COPYING" file of the
14	* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	*/
17
18	#include <vector>
19	#include <set>
20	#include <algorithm>
21	#include <iprt/sanitized/iterator>
22	#include <iprt/errcore.h>
23	#include <iprt/file.h>
24	#include <iprt/asm.h>
25	#include <iprt/string.h>
26	#include <iprt/strcache.h>
27	#include <VBox/com/string.h>
28	#include <VBox/log.h>
29	#include <QMTranslator.h>
30
31	/* QM File Magic Number */
32	static const size_t g_cbMagic = 16;
33	static const uint8_t g_abMagic[g_cbMagic] =
34	{
35	0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
36	0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
37	};
38
39	/* Used internally */
40	class QMException : public std::exception
41	{
42	const char *m_str;
43	public:
44	QMException(const char *str) : m_str(str) {}
45	virtual const char *what() const throw() { return m_str; }
46	};
47
48	/* Bytes stream. Used by the parser to iterate through the data */
49	class QMBytesStream
50	{
51	size_t m_cbSize;
52	const uint8_t * const m_dataStart;
53	const uint8_t *m_iter;
54	const uint8_t *m_end;
55
56	public:
57
58	QMBytesStream(const uint8_t *const dataStart, size_t cbSize)
59	: m_cbSize(dataStart ? cbSize : 0)
60	, m_dataStart(dataStart)
61	, m_iter(dataStart)
62	{
63	setEnd();
64	}
65
66	/** Sets end pointer.
67	* Used in message reader to detect the end of message block */
68	inline void setEnd(size_t pos = 0)
69	{
70	m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
71	}
72
73	inline uint8_t read8()
74	{
75	checkSize(1);
76	return *m_iter++;
77	}
78
79	inline uint32_t read32()
80	{
81	checkSize(4);
82	uint32_t result = reinterpret_cast<const uint32_t >(m_iter);
83	m_iter += 4;
84	return RT_BE2H_U32(result);
85	}
86
87	/** Reads string in UTF16 and converts it into a UTF8 string */
88	inline com::Utf8Str readUtf16String()
89	{
90	uint32_t size = read32();
91	checkSize(size);
92	if (size & 1)
93	throw QMException("Incorrect string size");
94
95	/* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes
96	to encode, so reserve twice the size plus a terminator for the result. */
97	com::Utf8Str result;
98	result.reserve(size * 2 + 1);
99	char *pszStr = result.mutableRaw();
100	int rc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL);
101	if (RT_SUCCESS(rc))
102	result.jolt();
103	else
104	throw QMException("Translation from UTF-16 to UTF-8 failed");
105
106	m_iter += size;
107	return result;
108	}
109
110	/**
111	* Reads a string, forcing UTF-8 encoding.
112	*/
113	inline com::Utf8Str readString()
114	{
115	uint32_t size = read32();
116	checkSize(size);
117
118	com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
119	if (size > 0)
120	{
121	RTStrPurgeEncoding(result.mutableRaw());
122	result.jolt();
123	}
124
125	m_iter += size;
126	return result;
127	}
128
129	/**
130	* Reads memory block
131	* Returns number of bytes read
132	*/
133	inline uint32_t read(char *bBuf, uint32_t cbSize)
134	{
135	if (!bBuf \|\| !cbSize)
136	return 0;
137	cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter));
138	memcpy(bBuf, m_iter, cbSize);
139	m_iter += cbSize;
140	return cbSize;
141	}
142
143	/** Checks the magic number.
144	* Should be called when in the beginning of the data
145	* @throws exception on mismatch */
146	inline void checkMagic()
147	{
148	checkSize(g_cbMagic);
149	if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0))
150	m_iter += g_cbMagic;
151	else
152	throw QMException("Wrong magic number");
153	}
154
155	/** Has we reached the end pointer? */
156	inline bool hasFinished()
157	{
158	return m_iter == m_end;
159	}
160
161	/** Returns current stream position */
162	inline size_t tellPos()
163	{
164	return (size_t)(m_iter - m_dataStart);
165	}
166
167	/** Moves current pointer to a desired position */
168	inline void seek(uint32_t offSkip)
169	{
170	size_t cbLeft = (size_t)(m_end - m_iter);
171	if (cbLeft >= offSkip)
172	m_iter += offSkip;
173	else
174	m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */
175	}
176
177	/** Checks whether stream has enough data to read size bytes */
178	inline void checkSize(size_t size)
179	{
180	if (RT_LIKELY((size_t)(m_end - m_iter) >= size))
181	return;
182	throw QMException("Incorrect item size");
183	}
184	};
185
186	/* Internal QMTranslator implementation */
187	class QMTranslator_Impl
188	{
189	/** Used while parsing */
190	struct QMMessageParse
191	{
192	/* Everything is in UTF-8 */
193	std::vector<com::Utf8Str> astrTranslations;
194	com::Utf8Str strContext;
195	com::Utf8Str strComment;
196	com::Utf8Str strSource;
197
198	QMMessageParse() {}
199	};
200
201	struct QMMessage
202	{
203	const char *pszContext;
204	const char *pszSource;
205	const char *pszComment;
206	std::vector<const char *> vecTranslations;
207	uint32_t hash;
208
209	QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0)
210	{}
211
212	QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc)
213	: pszContext(addStr(hStrCache, rSrc.strContext))
214	, pszSource(addStr(hStrCache, rSrc.strSource))
215	, pszComment(addStr(hStrCache, rSrc.strComment))
216	, hash(RTStrHash1(pszSource))
217	{
218	for (size_t i = 0; i < rSrc.astrTranslations.size(); i++)
219	vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i]));
220	}
221
222	/** Helper. */
223	static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc)
224	{
225	if (rSrc.isNotEmpty())
226	{
227	const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length());
228	if (RT_LIKELY(psz))
229	return psz;
230	throw std::bad_alloc();
231	}
232	return NULL;
233	}
234
235	};
236
237	struct HashOffset
238	{
239	uint32_t hash;
240	uint32_t offset;
241
242	HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {}
243
244	bool operator<(const HashOffset &obj) const
245	{
246	return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
247	}
248
249	};
250
251	typedef std::set<HashOffset> QMHashSet;
252	typedef QMHashSet::const_iterator QMHashSetConstIter;
253	typedef std::vector<QMMessage> QMMessageArray;
254	typedef std::vector<uint8_t> QMByteArray;
255
256	QMHashSet m_hashSet;
257	QMMessageArray m_messageArray;
258	QMByteArray m_pluralRules;
259
260	public:
261
262	QMTranslator_Impl() {}
263
264	enum PluralOpCodes
265	{
266	Pl_Eq = 0x01,
267	Pl_Lt = 0x02,
268	Pl_Leq = 0x03,
269	Pl_Between = 0x04,
270
271	Pl_OpMask = 0x07,
272
273	Pl_Not = 0x08,
274	Pl_Mod10 = 0x10,
275	Pl_Mod100 = 0x20,
276	Pl_Lead1000 = 0x40,
277
278	Pl_And = 0xFD,
279	Pl_Or = 0xFE,
280	Pl_NewRule = 0xFF,
281
282	Pl_LMask = 0x80,
283	};
284
285	/*
286	* Rules format:
287	* <O><2>[<3>][<&&><O><2>[<3>]]...[<\|\|><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]...
288	* where:
289	* <O> - OpCode
290	* <2> - Second operand
291	* <3> - Third operand
292	* <&&> - 'And' operation
293	* <\|\|> - 'Or' operation
294	* <New> - Start of rule for next plural form
295	* Rules are ordered by plural forms, i.e:
296	* <rule for first form (i.e. single)><New><rule for next form>...
297	*/
298	bool checkPlural(const QMByteArray &aRules) const
299	{
300	if (aRules.empty())
301	return true;
302
303	uint32_t iPos = 0;
304	do {
305	uint8_t bOpCode = aRules[iPos];
306
307	/* Invalid place of And/Or/NewRule */
308	if (bOpCode & Pl_LMask)
309	return false;
310
311	/* 2nd operand */
312	iPos++;
313
314	/* 2nd operand missing */
315	if (iPos == aRules.size())
316	return false;
317
318	/* Invalid OpCode */
319	if ((bOpCode & Pl_OpMask) == 0)
320	return false;
321
322	if ((bOpCode & Pl_OpMask) == Pl_Between)
323	{
324	/* 3rd operand */
325	iPos++;
326
327	/* 3rd operand missing */
328	if (iPos == aRules.size())
329	return false;
330	}
331
332	/* And/Or/NewRule */
333	iPos++;
334
335	/* All rules checked */
336	if (iPos == aRules.size())
337	return true;
338
339	} while ( ( (aRules[iPos] == Pl_And)
340	\|\| (aRules[iPos] == Pl_Or)
341	\|\| (aRules[iPos] == Pl_NewRule))
342	&& ++iPos != aRules.size());
343
344	return false;
345	}
346
347	size_t plural(size_t aNum) const
348	{
349	if (aNum == ~(size_t)0 \|\| m_pluralRules.empty())
350	return 0;
351
352	size_t uPluralNumber = 0;
353	uint32_t iPos = 0;
354
355	/* Rules loop */
356	for (;;)
357	{
358	bool fOr = false;
359	/* 'Or' loop */
360	for (;;)
361	{
362	bool fAnd = true;
363	/* 'And' loop */
364	for (;;)
365	{
366	int iOpCode = m_pluralRules[iPos++];
367	size_t iOpLeft = aNum;
368	if (iOpCode & Pl_Mod10)
369	iOpLeft %= 10;
370	else if (iOpCode & Pl_Mod100)
371	iOpLeft %= 100;
372	else if (iOpCode & Pl_Lead1000)
373	{
374	while (iOpLeft >= 1000)
375	iOpLeft /= 1000;
376	}
377	size_t iOpRight = m_pluralRules[iPos++];
378	int iOp = iOpCode & Pl_OpMask;
379	size_t iOpRight1 = 0;
380	if (iOp == Pl_Between)
381	iOpRight1 = m_pluralRules[iPos++];
382
383	bool fResult = (iOp == Pl_Eq && iOpLeft == iOpRight)
384	\|\| (iOp == Pl_Lt && iOpLeft < iOpRight)
385	\|\| (iOp == Pl_Leq && iOpLeft <= iOpRight)
386	\|\| (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1);
387	if (iOpCode & Pl_Not)
388	fResult = !fResult;
389
390	fAnd = fAnd && fResult;
391	if (iPos == m_pluralRules.size() \|\| m_pluralRules[iPos] != Pl_And)
392	break;
393	iPos++;
394	}
395	fOr = fOr \|\| fAnd;
396	if (iPos == m_pluralRules.size() \|\| m_pluralRules[iPos] != Pl_Or)
397	break;
398	iPos++;
399	}
400	if (fOr)
401	return uPluralNumber;
402
403	/* Qt returns last plural number if none of rules are match. */
404	uPluralNumber++;
405
406	if (iPos >= m_pluralRules.size())
407	return uPluralNumber;
408
409	iPos++; // Skip Pl_NewRule
410	}
411	}
412
413	const char translate(const char pszContext,
414	const char *pszSource,
415	const char *pszDisamb,
416	const size_t aNum,
417	const char **ppszSafeSource) const RT_NOEXCEPT
418	{
419	QMHashSetConstIter lowerIter, upperIter;
420
421	/* As turned out, comments (pszDisamb) are not kept always in result qm file
422	* Therefore, exclude them from the hash */
423	uint32_t hash = RTStrHash1(pszSource);
424	lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
425	upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX));
426
427	/*
428	* Check different combinations with and without context and
429	* disambiguation. This can help us to find the translation even
430	* if context or disambiguation are not know or properly defined.
431	*/
432	const char *apszCtx[] = {pszContext, pszContext, NULL, NULL};
433	const char *apszDisabm[] = {pszDisamb, NULL, pszDisamb, NULL};
434	AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm));
435
436	for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i)
437	{
438	for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter)
439	{
440	const QMMessage &message = m_messageArray[iter->offset];
441	if ( RTStrCmp(message.pszSource, pszSource) == 0
442	&& (!apszCtx[i] \|\| !*apszCtx[i] \|\| RTStrCmp(message.pszContext, apszCtx[i]) == 0)
443	&& (!apszDisabm[i] \|\| !*apszDisabm[i] \|\| RTStrCmp(message.pszComment, apszDisabm[i]) == 0 ))
444	{
445	*ppszSafeSource = message.pszSource;
446	const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations;
447	size_t const idxPlural = plural(aNum);
448	return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)];
449	}
450	}
451	}
452
453	*ppszSafeSource = NULL;
454	return pszSource;
455	}
456
457	void load(QMBytesStream &stream, RTSTRCACHE hStrCache)
458	{
459	/* Load into local variables. If we failed during the load,
460	* it would allow us to keep the object in a valid (previous) state. */
461	QMHashSet hashSet;
462	QMMessageArray messageArray;
463	QMByteArray pluralRules;
464
465	stream.checkMagic();
466
467	while (!stream.hasFinished())
468	{
469	uint32_t sectionCode = stream.read8();
470	uint32_t sLen = stream.read32();
471
472	/* Hashes and Context sections are ignored. They contain hash tables
473	* to speed-up search which is not useful since we recalculate all hashes
474	* and don't perform context search by hash */
475	switch (sectionCode)
476	{
477	case Messages:
478	parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen);
479	break;
480	case Hashes:
481	/* Only get size information to speed-up vector filling
482	* if Hashes section goes in the file before Message section */
483	if (messageArray.empty())
484	messageArray.reserve(sLen >> 3);
485	stream.seek(sLen);
486	break;
487	case NumerusRules:
488	{
489	pluralRules.resize(sLen);
490	uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen);
491	if (cbSize < sLen)
492	throw QMException("Incorrect section size");
493	if (!checkPlural(pluralRules))
494	pluralRules.erase(pluralRules.begin(), pluralRules.end());
495	break;
496	}
497	case Contexts:
498	case Dependencies:
499	case Language:
500	stream.seek(sLen);
501	break;
502	default:
503	throw QMException("Unkown section");
504	}
505	}
506
507	/* Store the data into member variables.
508	* The following functions never generate exceptions */
509	m_hashSet.swap(hashSet);
510	m_messageArray.swap(messageArray);
511	m_pluralRules.swap(pluralRules);
512	}
513
514	private:
515
516	/* Some QM stuff */
517	enum SectionType
518	{
519	Contexts = 0x2f,
520	Hashes = 0x42,
521	Messages = 0x69,
522	NumerusRules = 0x88,
523	Dependencies = 0x96,
524	Language = 0xa7
525	};
526
527	enum MessageType
528	{
529	End = 1,
530	SourceText16 = 2,
531	Translation = 3,
532	Context16 = 4,
533	Obsolete1 = 5, /*< was Hash /
534	SourceText = 6,
535	Context = 7,
536	Comment = 8
537	};
538
539	/* Read messages from the stream. */
540	static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet,
541	QMMessageArray * const messageArray, size_t cbSize)
542	{
543	stream.setEnd(stream.tellPos() + cbSize);
544	uint32_t cMessage = 0;
545	while (!stream.hasFinished())
546	{
547	/* Process the record. Skip anything that doesn't have a source
548	string or any valid translations. Using C++ strings for temporary
549	storage here, as we don't want to pollute the cache we bogus strings
550	in case of duplicate sub-records or invalid records. */
551	QMMessageParse ParsedMsg;
552	parseMessageRecord(stream, &ParsedMsg);
553	if ( ParsedMsg.astrTranslations.size() > 0
554	&& ParsedMsg.strSource.isNotEmpty())
555	{
556	/* Copy the strings over into the string cache and a hashed QMMessage,
557	before adding it to the result. */
558	QMMessage HashedMsg(hStrCache, ParsedMsg);
559	hashSet->insert(HashOffset(HashedMsg.hash, cMessage++));
560	messageArray->push_back(HashedMsg);
561
562	}
563	/else: wtf? /
564	}
565	stream.setEnd();
566	}
567
568	/* Parse one message from the stream */
569	static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message)
570	{
571	while (!stream.hasFinished())
572	{
573	uint8_t type = stream.read8();
574	switch (type)
575	{
576	case End:
577	return;
578	/* Ignored as obsolete */
579	case Context16:
580	case SourceText16:
581	stream.seek(stream.read32());
582	break;
583	case Translation:
584	message->astrTranslations.push_back(stream.readUtf16String());
585	break;
586
587	case SourceText:
588	message->strSource = stream.readString();
589	break;
590
591	case Context:
592	message->strContext = stream.readString();
593	break;
594
595	case Comment:
596	message->strComment = stream.readString();
597	break;
598
599	default:
600	/* Ignore unknown/obsolete block */
601	LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type));
602	break;
603	}
604	}
605	}
606	};
607
608	/* Inteface functions implementation */
609	QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {}
610
611	QMTranslator::~QMTranslator() { delete m_impl; }
612
613	const char QMTranslator::translate(const char pszContext, const char pszSource, const char *ppszSafeSource,
614	const char pszDisamb /= NULL/, const size_t aNum /= ~(size_t)0*/) const RT_NOEXCEPT
615
616	{
617	return m_impl->translate(pszContext, pszSource, pszDisamb, aNum, ppszSafeSource);
618	}
619
620	int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT
621	{
622	/* To free safely the file in case of exception */
623	struct FileLoader
624	{
625	uint8_t *data;
626	size_t cbSize;
627	int rc;
628	FileLoader(const char *pszFname)
629	{
630	rc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
631	}
632
633	~FileLoader()
634	{
635	if (isSuccess())
636	RTFileReadAllFree(data, cbSize);
637	}
638	bool isSuccess() { return RT_SUCCESS(rc); }
639	};
640
641	try
642	{
643	FileLoader loader(pszFilename);
644	if (loader.isSuccess())
645	{
646	QMBytesStream stream(loader.data, loader.cbSize);
647	m_impl->load(stream, hStrCache);
648	}
649	return loader.rc;
650	}
651	catch(std::exception &e)
652	{
653	LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
654	return VERR_INTERNAL_ERROR;
655	}
656	catch(...)
657	{
658	LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
659	return VERR_GENERAL_FAILURE;
660	}
661	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Main/src-all/QMTranslatorImpl.cpp@ 94134

Download in other formats: