VirtualBox

source: vbox/trunk/src/VBox/Main/src-all/QMTranslatorImpl.cpp@ 58591

Last change on this file since 58591 was 52901, checked in by vboxsync, 10 years ago

Main: trailing spaces + warning

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.7 KB
Line 
1/* $Id: QMTranslatorImpl.cpp 52901 2014-09-30 15:32:03Z vboxsync $ */
2/** @file
3 * VirtualBox API translation handling class
4 */
5
6/*
7 * Copyright (C) 2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18#include <vector>
19#include <set>
20#include <algorithm>
21#include <iterator>
22#include <iprt/file.h>
23#include <iprt/asm.h>
24#include <VBox/com/string.h>
25#include <VBox/log.h>
26#include <QMTranslator.h>
27
28/* QM File Magic Number */
29static const size_t MagicLength = 16;
30static const uint8_t Magic[MagicLength] = {
31 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
32 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
33};
34
35/* Used internally */
36class QMException : public std::exception
37{
38 const char *m_str;
39public:
40 QMException(const char *str) : m_str(str) {}
41 virtual const char *what() const throw() { return m_str; }
42};
43
44/* Bytes stream. Used by the parser to iterate through the data */
45class QMBytesStream
46{
47 size_t m_cbSize;
48 const uint8_t * const m_dataStart;
49 const uint8_t *m_iter;
50 const uint8_t *m_end;
51
52 /* Function stub for transform method */
53 static uint16_t func_BE2H_U16(uint16_t value)
54 {
55 return RT_BE2H_U16(value);
56 }
57
58public:
59
60 QMBytesStream(const uint8_t *const dataStart, size_t cbSize) :
61 m_cbSize(dataStart ? cbSize : 0),
62 m_dataStart(dataStart),
63 m_iter(dataStart)
64 {
65 setEnd();
66 }
67
68 /* Sets end pointer
69 * Used in message reader to detect the end of message block */
70 inline void setEnd(size_t pos = 0)
71 {
72 m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
73 }
74
75 inline uint8_t read8()
76 {
77 checkSize(1);
78 return *m_iter++;
79 }
80
81 inline uint32_t read32()
82 {
83 checkSize(4);
84 uint32_t result = *reinterpret_cast<const uint32_t *>(m_iter);
85 m_iter += 4;
86 return RT_BE2H_U32(result);
87 }
88
89 /* Reads string in UTF16 and converts it into a UTF8 string */
90 inline com::Utf8Str readUtf16String()
91 {
92 uint32_t size = read32();
93 checkSize(size);
94 if (size & 1) throw QMException("Incorrect string size");
95 std::vector<uint16_t> wstr;
96 wstr.reserve(size / 2);
97
98 /* We cannot convert to host endianess without copying the data
99 * since the file might be mapped to the memory and any memory
100 * change will lead to the change of the file. */
101 std::transform(reinterpret_cast<const uint16_t *>(m_iter),
102 reinterpret_cast<const uint16_t *>(m_iter + size),
103 std::back_inserter(wstr),
104 func_BE2H_U16);
105 m_iter += size;
106 return com::Utf8Str((CBSTR) &wstr.front(), wstr.size());
107 }
108
109 /* Reads string in one-byte encoding
110 * The string is assumed to be in ISO-8859-1 encoding */
111 inline com::Utf8Str readString()
112 {
113 uint32_t size = read32();
114 checkSize(size);
115 com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
116 m_iter += size;
117 return result;
118 }
119
120 /* Checks the magic number
121 * Should be called when in the beginning of the data */
122 inline void checkMagic()
123 {
124 checkSize(MagicLength);
125 if (memcmp(&(*m_iter), Magic, MagicLength)) throw QMException("Wrong magic number");
126 m_iter += MagicLength;
127 }
128
129 /* Has we reached the end pointer? */
130 inline bool hasFinished() { return m_iter == m_end; }
131
132 /* Returns current stream position */
133 inline size_t tellPos() { return m_iter - m_dataStart; }
134
135 /* Moves current pointer to a desired position */
136 inline void seek(int pos) { m_iter += pos; }
137
138 /* Checks whether stream has enough data to read size bytes */
139 inline void checkSize(int size)
140 {
141 if (m_end - m_iter < size) throw QMException("Incorrect item size");
142 }
143};
144
145/* Internal QMTranslator implementation */
146class QMTranslator_Impl
147{
148 struct QMMessage
149 {
150 /* Everything is in UTF-8 */
151 com::Utf8Str strContext;
152 com::Utf8Str strTranslation;
153 com::Utf8Str strComment;
154 com::Utf8Str strSource;
155 uint32_t hash;
156 QMMessage() : hash(0) {}
157 };
158
159 struct HashOffset
160 {
161 uint32_t hash;
162 uint32_t offset;
163
164 HashOffset(uint32_t _hash = 0, uint32_t _offs = 0) : hash(_hash), offset(_offs) {}
165
166 bool operator<(const HashOffset &obj) const
167 {
168 return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
169 }
170
171 };
172
173 typedef std::set<HashOffset> QMHashSet;
174 typedef QMHashSet::const_iterator QMHashSetConstIter;
175 typedef std::vector<QMMessage> QMMessageArray;
176
177 QMHashSet m_hashSet;
178 QMMessageArray m_messageArray;
179
180public:
181
182 QMTranslator_Impl() {}
183
184 const char *translate(const char *pszContext,
185 const char *pszSource,
186 const char *pszDisamb) const
187 {
188 QMHashSetConstIter iter;
189 QMHashSetConstIter lowerIter, upperIter;
190
191 do {
192 uint32_t hash = calculateHash(pszSource, pszDisamb);
193 lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
194 upperIter = m_hashSet.upper_bound(HashOffset(hash, ~0));
195
196 for (iter = lowerIter; iter != upperIter; ++iter)
197 {
198 const QMMessage &message = m_messageArray[iter->offset];
199 if ((!pszContext || !*pszContext || message.strContext == pszContext) &&
200 message.strSource == pszSource &&
201 ((pszDisamb && !*pszDisamb) || message.strComment == pszDisamb))
202 break;
203 }
204
205 /* Try without disambiguating comment if it isn't empty */
206 if (pszDisamb)
207 {
208 if (!*pszDisamb) pszDisamb = 0;
209 else pszDisamb = "";
210 }
211
212 } while (iter == upperIter && pszDisamb);
213
214 return (iter != upperIter ? m_messageArray[iter->offset].strTranslation.c_str() : "");
215 }
216
217 void load(QMBytesStream &stream)
218 {
219 /* Load into local variables. If we failed during the load,
220 * it would allow us to keep the object in a valid (previous) state. */
221 QMHashSet hashSet;
222 QMMessageArray messageArray;
223
224 stream.checkMagic();
225
226 while (!stream.hasFinished())
227 {
228 uint32_t sectionCode = stream.read8();
229 uint32_t sLen = stream.read32();
230
231 /* Hashes and Context sections are ignored. They contain hash tables
232 * to speed-up search which is not useful since we recalculate all hashes
233 * and don't perform context search by hash */
234 switch (sectionCode)
235 {
236 case Messages:
237 parseMessages(stream, &hashSet, &messageArray, sLen);
238 break;
239 case Hashes:
240 /* Only get size information to speed-up vector filling
241 * if Hashes section goes in the file before Message section */
242 m_messageArray.reserve(sLen >> 3);
243 /* NB! NO BREAK HERE */
244 case Context:
245 stream.seek(sLen);
246 break;
247 default:
248 throw QMException("Unkown section");
249 }
250 }
251 /* Store the data into member variables.
252 * The following functions never generate exceptions */
253 m_hashSet.swap(hashSet);
254 m_messageArray.swap(messageArray);
255 }
256
257private:
258
259 /* Some QM stuff */
260 enum SectionType
261 {
262 Hashes = 0x42,
263 Messages = 0x69,
264 Contexts = 0x2f
265 };
266
267 enum MessageType
268 {
269 End = 1,
270 SourceText16 = 2,
271 Translation = 3,
272 Context16 = 4,
273 Hash = 5,
274 SourceText = 6,
275 Context = 7,
276 Comment = 8
277 };
278
279 /* Read messages from the stream. */
280 static void parseMessages(QMBytesStream &stream, QMHashSet * const hashSet, QMMessageArray * const messageArray, size_t cbSize)
281 {
282 stream.setEnd(stream.tellPos() + cbSize);
283 uint32_t cMessage = 0;
284 while (!stream.hasFinished())
285 {
286 QMMessage message;
287 HashOffset hashOffs;
288
289 parseMessageRecord(stream, &message);
290 if (!message.hash)
291 message.hash = calculateHash(message.strSource.c_str(), message.strComment.c_str());
292
293 hashOffs.hash = message.hash;
294 hashOffs.offset = cMessage++;
295
296 hashSet->insert(hashOffs);
297 messageArray->push_back(message);
298 }
299 stream.setEnd();
300 }
301
302 /* Parse one message from the stream */
303 static void parseMessageRecord(QMBytesStream &stream, QMMessage * const message)
304 {
305 while(!stream.hasFinished())
306 {
307 uint8_t type = stream.read8();
308 switch(type)
309 {
310 case End:
311 return;
312 /* Ignored as obsolete */
313 case Context16:
314 case SourceText16:
315 stream.seek(stream.read32());
316 break;
317 case Translation:
318 {
319 com::Utf8Str str = stream.readUtf16String();
320 message->strTranslation.swap(str);
321 break;
322 }
323 case Hash:
324 message->hash = stream.read32();
325 break;
326
327 case SourceText:
328 {
329 com::Utf8Str str = stream.readString();
330 message->strSource.swap(str);
331 break;
332 }
333
334 case Context:
335 {
336 com::Utf8Str str = stream.readString();
337 message->strContext.swap(str);
338 break;
339 }
340
341 case Comment:
342 {
343 com::Utf8Str str = stream.readString();
344 message->strComment.swap(str);
345 break;
346 }
347
348 default:
349 /* Ignore unknown block */
350 LogRel(("QMTranslator::parseMessageRecord(): Unkown message block %x\n", type));
351 break;
352 }
353 }
354 }
355
356 /* Defines the so called `hashpjw' function by P.J. Weinberger
357 [see Aho/Sethi/Ullman, COMPILERS: Principles, Techniques and Tools,
358 1986, 1987 Bell Telephone Laboratories, Inc.] */
359 static uint32_t calculateHash(const char *pszStr1, const char *pszStr2 = 0)
360 {
361 uint32_t hash = 0, g;
362
363 for (const char *pszStr = pszStr1; pszStr != pszStr2; pszStr = pszStr2)
364 for (; pszStr && *pszStr; pszStr++)
365 {
366 hash = (hash << 4) + static_cast<uint8_t>(*pszStr);
367
368 if ((g = hash & 0xf0000000ul) != 0)
369 {
370 hash ^= g >> 24;
371 hash ^= g;
372 }
373 }
374
375 return (hash != 0 ? hash : 1);
376 }
377};
378
379/* Inteface functions implementation */
380QMTranslator::QMTranslator() : _impl(new QMTranslator_Impl) {}
381
382QMTranslator::~QMTranslator() { delete _impl; }
383
384const char *QMTranslator::translate(const char *pszContext, const char *pszSource, const char *pszDisamb) const throw()
385{
386 return _impl->translate(pszContext, pszSource, pszDisamb);
387}
388
389/* The function is noexcept for now but it may be changed
390 * to throw exceptions if required to catch them in another
391 * place. */
392int QMTranslator::load(const char *pszFilename) throw()
393{
394 /* To free safely the file in case of exception */
395 struct FileLoader
396 {
397 uint8_t *data;
398 size_t cbSize;
399 int rc;
400 FileLoader(const char *pszFname)
401 {
402 rc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
403 }
404
405 ~FileLoader()
406 {
407 if (isSuccess())
408 RTFileReadAllFree(data, cbSize);
409 }
410 bool isSuccess() { return RT_SUCCESS(rc); }
411 };
412
413 try
414 {
415 FileLoader loader(pszFilename);
416 if (loader.isSuccess())
417 {
418 QMBytesStream stream(loader.data, loader.cbSize);
419 _impl->load(stream);
420 }
421 return loader.rc;
422 }
423 catch(std::exception &e)
424 {
425 LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
426 return VERR_INTERNAL_ERROR;
427 }
428 catch(...)
429 {
430 LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
431 return VERR_GENERAL_FAILURE;
432 }
433}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette