VirtualBox

source: vbox/trunk/include/VBox/com/string.h@ 36428

Last change on this file since 36428 was 36428, checked in by vboxsync, 14 years ago

com/string.h: AssertLogRel when encountering an invalid encoding in the copyFrom*() methods doing UTF-16/8 conversions. The ASSUMPTION is that all input strings are correctly encoded and that this is enforced by VirtualBox border code before things gets down to Utf8Str or Bstr.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.3 KB
Line 
1/* $Id: string.h 36428 2011-03-25 12:46:45Z vboxsync $ */
2
3/** @file
4 * MS COM / XPCOM Abstraction Layer:
5 * Smart string classes declaration
6 */
7
8/*
9 * Copyright (C) 2006-2010 Oracle Corporation
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * The contents of this file may alternatively be used under the terms
20 * of the Common Development and Distribution License Version 1.0
21 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
22 * VirtualBox OSE distribution, in which case the provisions of the
23 * CDDL are applicable instead of those of the GPL.
24 *
25 * You may elect to license modified versions of this file under the
26 * terms and conditions of either the GPL or the CDDL or both.
27 */
28
29#ifndef ___VBox_com_string_h
30#define ___VBox_com_string_h
31
32/* Make sure all the stdint.h macros are included - must come first! */
33#ifndef __STDC_LIMIT_MACROS
34# define __STDC_LIMIT_MACROS
35#endif
36#ifndef __STDC_CONSTANT_MACROS
37# define __STDC_CONSTANT_MACROS
38#endif
39
40#if defined(VBOX_WITH_XPCOM)
41# include <nsMemory.h>
42#endif
43
44#include "VBox/com/defs.h"
45#include "VBox/com/assert.h"
46
47#include <iprt/mem.h>
48#include <iprt/cpp/ministring.h>
49
50namespace com
51{
52
53class Utf8Str;
54
55// global constant in glue/string.cpp that represents an empty BSTR
56extern const BSTR g_bstrEmpty;
57
58/**
59 * String class used universally in Main for COM-style Utf-16 strings.
60 *
61 * Unfortunately COM on Windows uses UTF-16 everywhere, requiring conversions
62 * back and forth since most of VirtualBox and our libraries use UTF-8.
63 *
64 * To make things more obscure, on Windows, a COM-style BSTR is not just a
65 * pointer to a null-terminated wide character array, but the four bytes (32
66 * bits) BEFORE the memory that the pointer points to are a length DWORD. One
67 * must therefore avoid pointer arithmetic and always use SysAllocString and
68 * the like to deal with BSTR pointers, which manage that DWORD correctly.
69 *
70 * For platforms other than Windows, we provide our own versions of the Sys*
71 * functions in Main/xpcom/helpers.cpp which do NOT use length prefixes though
72 * to be compatible with how XPCOM allocates string parameters to public
73 * functions.
74 *
75 * The Bstr class hides all this handling behind a std::string-like interface
76 * and also provides automatic conversions to MiniString and Utf8Str instances.
77 *
78 * The one advantage of using the SysString* routines is that this makes it
79 * possible to use it as a type of member variables of COM/XPCOM components and
80 * pass their values to callers through component methods' output parameters
81 * using the #cloneTo() operation. Also, the class can adopt (take ownership
82 * of) string buffers returned in output parameters of COM methods using the
83 * #asOutParam() operation and correctly free them afterwards.
84 *
85 * Starting with VirtualBox 3.2, like Utf8Str, Bstr no longer differentiates
86 * between NULL strings and empty strings. In other words, Bstr("") and
87 * Bstr(NULL) behave the same. In both cases, Bstr allocates no memory,
88 * reports a zero length and zero allocated bytes for both, and returns an
89 * empty C wide string from raw().
90 *
91 * @note All Bstr methods ASSUMES valid UTF-16 or UTF-8 input strings.
92 * The VirtualBox policy in this regard is to validate strings coming
93 * from external sources before passing them to Bstr or Utf8Str.
94 */
95class Bstr
96{
97public:
98
99 Bstr()
100 : m_bstr(NULL)
101 { }
102
103 Bstr(const Bstr &that)
104 {
105 copyFrom((const OLECHAR *)that.m_bstr);
106 }
107
108 Bstr(CBSTR that)
109 {
110 copyFrom((const OLECHAR *)that);
111 }
112
113#if defined(VBOX_WITH_XPCOM)
114 Bstr(const wchar_t *that)
115 {
116 AssertCompile(sizeof(wchar_t) == sizeof(OLECHAR));
117 copyFrom((const OLECHAR *)that);
118 }
119#endif
120
121 Bstr(const iprt::MiniString &that)
122 {
123 copyFrom(that.c_str());
124 }
125
126 Bstr(const char *that)
127 {
128 copyFrom(that);
129 }
130
131 Bstr(const char *a_pThat, size_t a_cchMax)
132 {
133 copyFromN(a_pThat, a_cchMax);
134 }
135
136 ~Bstr()
137 {
138 setNull();
139 }
140
141 Bstr& operator=(const Bstr &that)
142 {
143 cleanup();
144 copyFrom((const OLECHAR *)that.m_bstr);
145 return *this;
146 }
147
148 Bstr& operator=(CBSTR that)
149 {
150 cleanup();
151 copyFrom((const OLECHAR *)that);
152 return *this;
153 }
154
155#if defined(VBOX_WITH_XPCOM)
156 Bstr& operator=(const wchar_t *that)
157 {
158 cleanup();
159 copyFrom((const OLECHAR *)that);
160 return *this;
161 }
162#endif
163
164 Bstr& setNull()
165 {
166 cleanup();
167 return *this;
168 }
169
170 RTMEMEF_NEW_AND_DELETE_OPERATORS();
171
172 /** Case sensitivity selector. */
173 enum CaseSensitivity
174 {
175 CaseSensitive,
176 CaseInsensitive
177 };
178
179 /**
180 * Compares the member string to str.
181 * @param str
182 * @param cs Whether comparison should be case-sensitive.
183 * @return
184 */
185 int compare(CBSTR str, CaseSensitivity cs = CaseSensitive) const
186 {
187 if (cs == CaseSensitive)
188 return ::RTUtf16Cmp((PRTUTF16)m_bstr, (PRTUTF16)str);
189 return ::RTUtf16LocaleICmp((PRTUTF16)m_bstr, (PRTUTF16)str);
190 }
191
192 int compare(BSTR str, CaseSensitivity cs = CaseSensitive) const
193 {
194 return compare((CBSTR)str, cs);
195 }
196
197 int compare(const Bstr &that, CaseSensitivity cs = CaseSensitive) const
198 {
199 return compare(that.m_bstr, cs);
200 }
201
202 bool operator==(const Bstr &that) const { return !compare(that.m_bstr); }
203 bool operator!=(const Bstr &that) const { return !!compare(that.m_bstr); }
204 bool operator==(CBSTR that) const { return !compare(that); }
205 bool operator==(BSTR that) const { return !compare(that); }
206
207 bool operator!=(CBSTR that) const { return !!compare(that); }
208 bool operator!=(BSTR that) const { return !!compare(that); }
209 bool operator<(const Bstr &that) const { return compare(that.m_bstr) < 0; }
210 bool operator<(CBSTR that) const { return compare(that) < 0; }
211 bool operator<(BSTR that) const { return compare(that) < 0; }
212
213 /**
214 * Returns true if the member string has no length.
215 * This is true for instances created from both NULL and "" input strings.
216 *
217 * @note Always use this method to check if an instance is empty. Do not
218 * use length() because that may need to run through the entire string
219 * (Bstr does not cache string lengths).
220 */
221 bool isEmpty() const { return m_bstr == NULL || *m_bstr == 0; }
222
223 /**
224 * Returns true if the member string has a length of one or more.
225 *
226 * @returns true if not empty, false if empty (NULL or "").
227 */
228 bool isNotEmpty() const { return m_bstr != NULL && *m_bstr != 0; }
229
230 size_t length() const { return isEmpty() ? 0 : ::RTUtf16Len((PRTUTF16)m_bstr); }
231
232#if defined(VBOX_WITH_XPCOM)
233 /**
234 * Returns a pointer to the raw member UTF-16 string. If the member string is empty,
235 * returns a pointer to a global variable containing an empty BSTR with a proper zero
236 * length prefix so that Windows is happy.
237 */
238 CBSTR raw() const
239 {
240 if (m_bstr)
241 return m_bstr;
242
243 return g_bstrEmpty;
244 }
245#else
246 /**
247 * Windows-only hack, as the automatically generated headers use BSTR.
248 * So if we don't want to cast like crazy we have to be more loose than
249 * on XPCOM.
250 *
251 * Returns a pointer to the raw member UTF-16 string. If the member string is empty,
252 * returns a pointer to a global variable containing an empty BSTR with a proper zero
253 * length prefix so that Windows is happy.
254 */
255 BSTR raw() const
256 {
257 if (m_bstr)
258 return m_bstr;
259
260 return g_bstrEmpty;
261 }
262#endif
263
264 /**
265 * Returns a non-const raw pointer that allows to modify the string directly.
266 * As opposed to raw(), this DOES return NULL if the member string is empty
267 * because we cannot return a mutable pointer to the global variable with the
268 * empty string.
269 *
270 * @warning
271 * Be sure not to modify data beyond the allocated memory! The
272 * guaranteed size of the allocated memory is at least #length()
273 * bytes after creation and after every assignment operation.
274 */
275 BSTR mutableRaw() { return m_bstr; }
276
277 /**
278 * Intended to assign copies of instances to |BSTR| out parameters from
279 * within the interface method. Transfers the ownership of the duplicated
280 * string to the caller.
281 *
282 * If the member string is empty, this allocates an empty BSTR in *pstr
283 * (i.e. makes it point to a new buffer with a null byte).
284 */
285 void cloneTo(BSTR *pstr) const
286 {
287 if (pstr)
288 {
289 *pstr = ::SysAllocString((const OLECHAR *)raw()); // raw() returns a pointer to "" if empty
290#ifdef RT_EXCEPTIONS_ENABLED
291 if (!*pstr)
292 throw std::bad_alloc();
293#endif
294 }
295 }
296
297 /**
298 * Intended to assign instances to |BSTR| out parameters from within the
299 * interface method. Transfers the ownership of the original string to the
300 * caller and resets the instance to null.
301 *
302 * As opposed to cloneTo(), this method doesn't create a copy of the
303 * string.
304 *
305 * If the member string is empty, this allocates an empty BSTR in *pstr
306 * (i.e. makes it point to a new buffer with a null byte).
307 *
308 * @param pbstrDst The BSTR variable to detach the string to.
309 *
310 * @throws std::bad_alloc if we failed to allocate a new empty string.
311 */
312 void detachTo(BSTR *pbstrDst)
313 {
314 if (m_bstr)
315 *pbstrDst = m_bstr;
316 else
317 {
318 // allocate null BSTR
319 *pbstrDst = ::SysAllocString((const OLECHAR *)g_bstrEmpty);
320#ifdef RT_EXCEPTIONS_ENABLED
321 if (!*pbstrDst)
322 throw std::bad_alloc();
323#endif
324 }
325 m_bstr = NULL;
326 }
327
328 /**
329 * Intended to pass instances as |BSTR| out parameters to methods.
330 * Takes the ownership of the returned data.
331 */
332 BSTR *asOutParam()
333 {
334 cleanup();
335 return &m_bstr;
336 }
337
338 /**
339 * Static immutable empty-string object. May be used for comparison purposes.
340 */
341 static const Bstr Empty;
342
343protected:
344
345 void cleanup()
346 {
347 if (m_bstr)
348 {
349 ::SysFreeString(m_bstr);
350 m_bstr = NULL;
351 }
352 }
353
354 /**
355 * Protected internal helper to copy a string. This ignores the previous object
356 * state, so either call this from a constructor or call cleanup() first.
357 *
358 * This variant copies from a zero-terminated UTF-16 string (which need not
359 * be a BSTR, i.e. need not have a length prefix).
360 *
361 * If the source is empty, this sets the member string to NULL.
362 *
363 * @param a_bstrSrc The source string. The caller guarantees
364 * that this is valid UTF-16.
365 *
366 * @throws std::bad_alloc - the object is representing an empty string.
367 */
368 void copyFrom(const OLECHAR *a_bstrSrc)
369 {
370 if (a_bstrSrc && *a_bstrSrc)
371 {
372 m_bstr = ::SysAllocString(a_bstrSrc);
373#ifdef RT_EXCEPTIONS_ENABLED
374 if (!m_bstr)
375 throw std::bad_alloc();
376#endif
377 }
378 else
379 m_bstr = NULL;
380 }
381
382 /**
383 * Protected internal helper to copy a string. This ignores the previous object
384 * state, so either call this from a constructor or call cleanup() first.
385 *
386 * This variant copies and converts from a zero-terminated UTF-8 string.
387 *
388 * If the source is empty, this sets the member string to NULL.
389 *
390 * @param a_pszSrc The source string. The caller guarantees
391 * that this is valid UTF-8.
392 *
393 * @throws std::bad_alloc - the object is representing an empty string.
394 */
395 void copyFrom(const char *a_pszSrc)
396 {
397 copyFromN(a_pszSrc, RTSTR_MAX);
398 }
399
400 /**
401 * Variant of copyFrom for sub-string constructors.
402 *
403 * @param a_pszSrc The source string. The caller guarantees
404 * that this is valid UTF-8.
405 * @param a_cchMax The maximum number of chars (not
406 * codepoints) to copy. If you pass RTSTR_MAX
407 * it'll be exactly like copyFrom().
408 *
409 * @throws std::bad_alloc - the object is representing an empty string.
410 */
411 void copyFromN(const char *a_pszSrc, size_t a_cchSrc);
412
413 BSTR m_bstr;
414
415 friend class Utf8Str; /* to access our raw_copy() */
416};
417
418/* symmetric compare operators */
419inline bool operator==(CBSTR l, const Bstr &r) { return r.operator==(l); }
420inline bool operator!=(CBSTR l, const Bstr &r) { return r.operator!=(l); }
421inline bool operator==(BSTR l, const Bstr &r) { return r.operator==(l); }
422inline bool operator!=(BSTR l, const Bstr &r) { return r.operator!=(l); }
423
424
425
426
427/**
428 * String class used universally in Main for UTF-8 strings.
429 *
430 * This is based on iprt::MiniString, to which some functionality has been
431 * moved. Here we keep things that are specific to Main, such as conversions
432 * with UTF-16 strings (Bstr).
433 *
434 * Like iprt::MiniString, Utf8Str does not differentiate between NULL strings
435 * and empty strings. In other words, Utf8Str("") and Utf8Str(NULL) behave the
436 * same. In both cases, MiniString allocates no memory, reports
437 * a zero length and zero allocated bytes for both, and returns an empty
438 * C string from c_str().
439 *
440 * @note All Utf8Str methods ASSUMES valid UTF-8 or UTF-16 input strings.
441 * The VirtualBox policy in this regard is to validate strings coming
442 * from external sources before passing them to Utf8Str or Bstr.
443 */
444class Utf8Str : public iprt::MiniString
445{
446public:
447
448 Utf8Str() {}
449
450 Utf8Str(const MiniString &that)
451 : MiniString(that)
452 {}
453
454 Utf8Str(const char *that)
455 : MiniString(that)
456 {}
457
458 Utf8Str(const Bstr &that)
459 {
460 copyFrom(that.raw());
461 }
462
463 Utf8Str(CBSTR that)
464 {
465 copyFrom(that);
466 }
467
468 /**
469 * Constructs a new string given the format string and the list of the
470 * arguments for the format string.
471 *
472 * @param a_pszFormat Pointer to the format string (UTF-8),
473 * @see pg_rt_str_format.
474 * @param a_va Argument vector containing the arguments
475 * specified by the format string.
476 * @sa iprt::MiniString::printfV
477 */
478 Utf8Str(const char *a_pszFormat, va_list a_va)
479 : MiniString(a_pszFormat, a_va)
480 {
481 }
482
483 Utf8Str& operator=(const MiniString &that)
484 {
485 MiniString::operator=(that);
486 return *this;
487 }
488
489 Utf8Str& operator=(const char *that)
490 {
491 MiniString::operator=(that);
492 return *this;
493 }
494
495 Utf8Str& operator=(const Bstr &that)
496 {
497 cleanup();
498 copyFrom(that.raw());
499 return *this;
500 }
501
502 Utf8Str& operator=(CBSTR that)
503 {
504 cleanup();
505 copyFrom(that);
506 return *this;
507 }
508
509 RTMEMEF_NEW_AND_DELETE_OPERATORS();
510
511#if defined(VBOX_WITH_XPCOM)
512 /**
513 * Intended to assign instances to |char *| out parameters from within the
514 * interface method. Transfers the ownership of the duplicated string to the
515 * caller.
516 *
517 * This allocates a single 0 byte in the target if the member string is empty.
518 *
519 * This uses XPCOM memory allocation and thus only works on XPCOM. MSCOM doesn't
520 * like char* strings anyway.
521 */
522 void cloneTo(char **pstr) const;
523#endif
524
525 /**
526 * Intended to assign instances to |BSTR| out parameters from within the
527 * interface method. Transfers the ownership of the duplicated string to the
528 * caller.
529 */
530 void cloneTo(BSTR *pstr) const
531 {
532 if (pstr)
533 {
534 Bstr bstr(*this);
535 bstr.cloneTo(pstr);
536 }
537 }
538
539 /**
540 * Removes a trailing slash from the member string, if present.
541 * Calls RTPathStripTrailingSlash() without having to mess with mutableRaw().
542 */
543 Utf8Str& stripTrailingSlash();
544
545 /**
546 * Removes a trailing filename from the member string, if present.
547 * Calls RTPathStripFilename() without having to mess with mutableRaw().
548 */
549 Utf8Str& stripFilename();
550
551 /**
552 * Removes the path component from the member string, if present.
553 * Calls RTPathFilename() without having to mess with mutableRaw().
554 */
555 Utf8Str& stripPath();
556
557 /**
558 * Removes a trailing file name extension from the member string, if present.
559 * Calls RTPathStripExt() without having to mess with mutableRaw().
560 */
561 Utf8Str& stripExt();
562
563 /**
564 * Static immutable empty-string object. May be used for comparison purposes.
565 */
566 static const Utf8Str Empty;
567
568protected:
569
570 void copyFrom(CBSTR a_pbstr);
571
572 friend class Bstr; /* to access our raw_copy() */
573};
574
575/**
576 * Class with iprt::MiniString::printf as constructor for your convenience.
577 *
578 * Constructing a Utf8Str string object from a format string and a variable
579 * number of arguments can easily be confused with the other Utf8Str
580 * constructures, thus this child class.
581 *
582 * The usage of this class is like the following:
583 * @code
584 Utf8StrFmt strName("program name = %s", argv[0]);
585 @endcode
586 */
587class Utf8StrFmt : public Utf8Str
588{
589public:
590
591 /**
592 * Constructs a new string given the format string and the list of the
593 * arguments for the format string.
594 *
595 * @param a_pszFormat Pointer to the format string (UTF-8),
596 * @see pg_rt_str_format.
597 * @param ... Ellipsis containing the arguments specified by
598 * the format string.
599 */
600 explicit Utf8StrFmt(const char *a_pszFormat, ...)
601 {
602 va_list va;
603 va_start(va, a_pszFormat);
604 printfV(a_pszFormat, va);
605 va_end(va);
606 }
607
608 RTMEMEF_NEW_AND_DELETE_OPERATORS();
609
610protected:
611 Utf8StrFmt()
612 { }
613
614private:
615};
616
617/**
618 * The BstrFmt class is a shortcut to <tt>Bstr(Utf8StrFmt(...))</tt>.
619 */
620class BstrFmt : public Bstr
621{
622public:
623
624 /**
625 * Constructs a new string given the format string and the list of the
626 * arguments for the format string.
627 *
628 * @param aFormat printf-like format string (in UTF-8 encoding).
629 * @param ... List of the arguments for the format string.
630 */
631 explicit BstrFmt(const char *aFormat, ...)
632 {
633 va_list args;
634 va_start(args, aFormat);
635 copyFrom(Utf8Str(aFormat, args).c_str());
636 va_end(args);
637 }
638
639 RTMEMEF_NEW_AND_DELETE_OPERATORS();
640};
641
642/**
643 * The BstrFmtVA class is a shortcut to <tt>Bstr(Utf8Str(format,va))</tt>.
644 */
645class BstrFmtVA : public Bstr
646{
647public:
648
649 /**
650 * Constructs a new string given the format string and the list of the
651 * arguments for the format string.
652 *
653 * @param aFormat printf-like format string (in UTF-8 encoding).
654 * @param aArgs List of arguments for the format string
655 */
656 BstrFmtVA(const char *aFormat, va_list aArgs)
657 {
658 copyFrom(Utf8Str(aFormat, aArgs).c_str());
659 }
660
661 RTMEMEF_NEW_AND_DELETE_OPERATORS();
662};
663
664} /* namespace com */
665
666#endif /* !___VBox_com_string_h */
667
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette