VirtualBox

source: vbox/trunk/include/iprt/cpp/ministring.h@ 33805

Last change on this file since 33805 was 33805, checked in by vboxsync, 14 years ago

iprt::MiniString: Added a substring constructor and joined the copyFrom stuff into one common method.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 24.2 KB
Line 
1/** @file
2 * IPRT - Mini C++ string class.
3 */
4
5/*
6 * Copyright (C) 2007-2009 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___VBox_ministring_h
27#define ___VBox_ministring_h
28
29#include <iprt/mem.h>
30#include <iprt/string.h>
31#include <iprt/stdarg.h>
32
33#include <new>
34
35namespace iprt
36{
37
38/**
39 * @brief Mini C++ string class.
40 *
41 * "MiniString" is a small C++ string class that does not depend on anything
42 * else except IPRT memory management functions. Semantics are like in
43 * std::string, except it can do a lot less.
44 *
45 * Note that MiniString does not differentiate between NULL strings and
46 * empty strings. In other words, MiniString("") and MiniString(NULL)
47 * behave the same. In both cases, MiniString allocates no memory, reports
48 * a zero length and zero allocated bytes for both, and returns an empty
49 * C string from c_str().
50 */
51#ifdef VBOX
52 /** @remarks Much of the code in here used to be in com::Utf8Str so that
53 * com::Utf8Str can now derive from MiniString and only contain code
54 * that is COM-specific, such as com::Bstr conversions. Compared to
55 * the old Utf8Str though, MiniString always knows the length of its
56 * member string and the size of the buffer so it can use memcpy()
57 * instead of strdup().
58 */
59#endif
60class RT_DECL_CLASS MiniString
61{
62public:
63 /**
64 * Creates an empty string that has no memory allocated.
65 */
66 MiniString()
67 : m_psz(NULL),
68 m_cch(0),
69 m_cbAllocated(0)
70 {
71 }
72
73 /**
74 * Creates a copy of another MiniString.
75 *
76 * This allocates s.length() + 1 bytes for the new instance, unless s is empty.
77 *
78 * @param a_rSrc The source string.
79 *
80 * @throws std::bad_alloc
81 */
82 MiniString(const MiniString &a_rSrc)
83 {
84 copyFromN(a_rSrc.m_psz, a_rSrc.m_cch);
85 }
86
87 /**
88 * Creates a copy of a C string.
89 *
90 * This allocates strlen(pcsz) + 1 bytes for the new instance, unless s is empty.
91 *
92 * @param pcsz The source string.
93 *
94 * @throws std::bad_alloc
95 */
96 MiniString(const char *pcsz)
97 {
98 copyFromN(pcsz, strlen(pcsz));
99 }
100
101 /**
102 * Create a partial copy of another MiniString.
103 *
104 * @param a_cchSrc The max number of chars (encoded UTF-8 bytes)
105 * to copy from the source string.
106 * @param a_rSrc The source string.
107 */
108 MiniString(size_t a_cchSrc, const MiniString &a_rSrc)
109 {
110 Assert(a_cchSrc <= a_rSrc.m_cch);
111 copyFromN(a_rSrc.m_psz, RT_MIN(a_cchSrc, a_rSrc.m_cch));
112 }
113
114 /**
115 * Create a partial copy of a C string.
116 *
117 * @param a_cchSrc The max number of chars (encoded UTF-8 bytes)
118 * to copy from the source string.
119 * @param a_pszSrc The source string (UTF-8).
120 */
121 MiniString(size_t a_cchSrc, const char *a_pszSrc)
122 {
123 size_t cchMax = a_pszSrc ? RTStrNLen(a_pszSrc, a_cchSrc) : 0;
124 Assert(a_cchSrc <= cchMax);
125 copyFromN(a_pszSrc, RT_MIN(a_cchSrc, cchMax));
126 }
127
128 /**
129 * Create a new string given the format string and its arguments.
130 *
131 * @param a_pszFormat Pointer to the format string (UTF-8),
132 * @see pg_rt_str_format.
133 * @param a_va Argument vector containing the arguments
134 * specified by the format string.
135 * @sa printfV
136 */
137 MiniString(const char *a_pszFormat, va_list a_va)
138 : m_psz(NULL),
139 m_cch(0),
140 m_cbAllocated(0)
141
142 {
143 printfV(a_pszFormat, a_va);
144 }
145
146 /**
147 * Destructor.
148 */
149 virtual ~MiniString()
150 {
151 cleanup();
152 }
153
154 /**
155 * String length in bytes.
156 *
157 * Returns the length of the member string, which is equal to strlen(c_str()).
158 * In other words, this does not count unicode codepoints but returns the number
159 * of bytes. This is always cached so calling this is cheap and requires no
160 * strlen() invocation.
161 *
162 * @returns m_cbLength.
163 */
164 size_t length() const
165 {
166 return m_cch;
167 }
168
169 /**
170 * The allocated buffer size (in bytes).
171 *
172 * Returns the number of bytes allocated in the internal string buffer, which is
173 * at least length() + 1 if length() > 0; for an empty string, this returns 0.
174 *
175 * @returns m_cbAllocated.
176 */
177 size_t capacity() const
178 {
179 return m_cbAllocated;
180 }
181
182 /**
183 * Make sure at that least cb of buffer space is reserved.
184 *
185 * Requests that the contained memory buffer have at least cb bytes allocated.
186 * This may expand or shrink the string's storage, but will never truncate the
187 * contained string. In other words, cb will be ignored if it's smaller than
188 * length() + 1.
189 *
190 * @param cb New minimum size (in bytes) of member memory buffer.
191 *
192 * @throws std::bad_alloc On allocation error. The object is left unchanged.
193 */
194 void reserve(size_t cb)
195 {
196 if ( cb != m_cbAllocated
197 && cb > m_cch + 1
198 )
199 {
200 int vrc = RTStrRealloc(&m_psz, cb);
201 if (RT_SUCCESS(vrc))
202 m_cbAllocated = cb;
203#ifdef RT_EXCEPTIONS_ENABLED
204 else
205 throw std::bad_alloc();
206#endif
207 }
208 }
209
210 /**
211 * Deallocates all memory.
212 */
213 inline void setNull()
214 {
215 cleanup();
216 }
217
218 /**
219 * Assigns a copy of pcsz to "this".
220 *
221 * @param pcsz The source string.
222 *
223 * @throws std::bad_alloc On allocation failure. The object is left describing
224 * a NULL string.
225 *
226 * @returns Reference to the object.
227 */
228 MiniString &operator=(const char *pcsz)
229 {
230 if (m_psz != pcsz)
231 {
232 cleanup();
233 copyFromN(pcsz, pcsz ? strlen(pcsz) : 0);
234 }
235 return *this;
236 }
237
238 /**
239 * Assigns a copy of s to "this".
240 *
241 * @param s The source string.
242 *
243 * @throws std::bad_alloc On allocation failure. The object is left describing
244 * a NULL string.
245 *
246 * @returns Reference to the object.
247 */
248 MiniString &operator=(const MiniString &s)
249 {
250 if (this != &s)
251 {
252 cleanup();
253 copyFromN(s.m_psz, s.m_cch);
254 }
255 return *this;
256 }
257
258 /**
259 * Assigns the output of the string format operation (RTStrPrintf).
260 *
261 * @param pszFormat Pointer to the format string,
262 * @see pg_rt_str_format.
263 * @param ... Ellipsis containing the arguments specified by
264 * the format string.
265 *
266 * @throws std::bad_alloc On allocation error. The object is left unchanged.
267 *
268 * @returns Reference to the object.
269 */
270 MiniString &printf(const char *pszFormat, ...);
271
272 /**
273 * Assigns the output of the string format operation (RTStrPrintfV).
274 *
275 * @param pszFormat Pointer to the format string,
276 * @see pg_rt_str_format.
277 * @param va Argument vector containing the arguments
278 * specified by the format string.
279 *
280 * @throws std::bad_alloc On allocation error. The object is left unchanged.
281 *
282 * @returns Reference to the object.
283 */
284 MiniString &printfV(const char *pszFormat, va_list va);
285
286 /**
287 * Appends the string "that" to "this".
288 *
289 * @param that The string to append.
290 *
291 * @throws std::bad_alloc On allocation error. The object is left unchanged.
292 *
293 * @returns Reference to the object.
294 */
295 MiniString &append(const MiniString &that);
296
297 /**
298 * Appends the string "that" to "this".
299 *
300 * @param pszThat The C string to append.
301 *
302 * @throws std::bad_alloc On allocation error. The object is left unchanged.
303 *
304 * @returns Reference to the object.
305 */
306 MiniString &append(const char *pszThat);
307
308 /**
309 * Appends the given character to "this".
310 *
311 * @param ch The character to append.
312 *
313 * @throws std::bad_alloc On allocation error. The object is left unchanged.
314 *
315 * @returns Reference to the object.
316 */
317 MiniString &append(char ch);
318
319 /**
320 * Appends the given unicode code point to "this".
321 *
322 * @param uc The unicode code point to append.
323 *
324 * @throws std::bad_alloc On allocation error. The object is left unchanged.
325 *
326 * @returns Reference to the object.
327 */
328 MiniString &appendCodePoint(RTUNICP uc);
329
330 /**
331 * Shortcut to append(), MiniString variant.
332 *
333 * @param that The string to append.
334 *
335 * @returns Reference to the object.
336 */
337 MiniString &operator+=(const MiniString &that)
338 {
339 return append(that);
340 }
341
342 /**
343 * Shortcut to append(), const char* variant.
344 *
345 * @param pszThat The C string to append.
346 *
347 * @returns Reference to the object.
348 */
349 MiniString &operator+=(const char *pszThat)
350 {
351 return append(pszThat);
352 }
353
354 /**
355 * Shortcut to append(), char variant.
356 *
357 * @param pszThat The character to append.
358 *
359 * @returns Reference to the object.
360 */
361 MiniString &operator+=(char c)
362 {
363 return append(c);
364 }
365
366 /**
367 * Converts the member string to upper case.
368 *
369 * @returns Reference to the object.
370 */
371 MiniString &toUpper()
372 {
373 if (length())
374 {
375 /* Folding an UTF-8 string may result in a shorter encoding (see
376 testcase), so recalculate the length afterwars. */
377 ::RTStrToUpper(m_psz);
378 size_t cchNew = strlen(m_psz);
379 Assert(cchNew <= m_cch);
380 m_cch = cchNew;
381 }
382 return *this;
383 }
384
385 /**
386 * Converts the member string to lower case.
387 *
388 * @returns Reference to the object.
389 */
390 MiniString &toLower()
391 {
392 if (length())
393 {
394 /* Folding an UTF-8 string may result in a shorter encoding (see
395 testcase), so recalculate the length afterwars. */
396 ::RTStrToLower(m_psz);
397 size_t cchNew = strlen(m_psz);
398 Assert(cchNew <= m_cch);
399 m_cch = cchNew;
400 }
401 return *this;
402 }
403
404 /**
405 * Index operator.
406 *
407 * Returns the byte at the given index, or a null byte if the index is not
408 * smaller than length(). This does _not_ count codepoints but simply points
409 * into the member C string.
410 *
411 * @param i The index into the string buffer.
412 * @returns char at the index or null.
413 */
414 inline char operator[](size_t i) const
415 {
416 if (i < length())
417 return m_psz[i];
418 return '\0';
419 }
420
421 /**
422 * Returns the contained string as a C-style const char* pointer.
423 * This never returns NULL; if the string is empty, this returns a
424 * pointer to static null byte.
425 *
426 * @returns const pointer to C-style string.
427 */
428 inline const char *c_str() const
429 {
430 return (m_psz) ? m_psz : "";
431 }
432
433 /**
434 * Returns a non-const raw pointer that allows to modify the string directly.
435 * As opposed to c_str() and raw(), this DOES return NULL for an empty string
436 * because we cannot return a non-const pointer to a static "" global.
437 *
438 * @warning
439 * -# Be sure not to modify data beyond the allocated memory! Call
440 * capacity() to find out how large that buffer is.
441 * -# After any operation that modifies the length of the string,
442 * you _must_ call MiniString::jolt(), or subsequent copy operations
443 * may go nowhere. Better not use mutableRaw() at all.
444 */
445 char *mutableRaw()
446 {
447 return m_psz;
448 }
449
450 /**
451 * Clean up after using mutableRaw.
452 *
453 * Intended to be called after something has messed with the internal string
454 * buffer (e.g. after using mutableRaw() or Utf8Str::asOutParam()). Resets the
455 * internal lengths correctly. Otherwise subsequent copy operations may go
456 * nowhere.
457 */
458 void jolt()
459 {
460 if (m_psz)
461 {
462 m_cch = strlen(m_psz);
463 m_cbAllocated = m_cch + 1; /* (Required for the Utf8Str::asOutParam case) */
464 }
465 else
466 {
467 m_cch = 0;
468 m_cbAllocated = 0;
469 }
470 }
471
472 /**
473 * Returns @c true if the member string has no length.
474 *
475 * This is @c true for instances created from both NULL and "" input
476 * strings.
477 *
478 * This states nothing about how much memory might be allocated.
479 *
480 * @returns @c true if empty, @c false if not.
481 */
482 bool isEmpty() const
483 {
484 return length() == 0;
485 }
486
487 /**
488 * Returns @c false if the member string has no length.
489 *
490 * This is @c false for instances created from both NULL and "" input
491 * strings.
492 *
493 * This states nothing about how much memory might be allocated.
494 *
495 * @returns @c false if empty, @c true if not.
496 */
497 bool isNotEmpty() const
498 {
499 return length() != 0;
500 }
501
502 /** Case sensitivity selector. */
503 enum CaseSensitivity
504 {
505 CaseSensitive,
506 CaseInsensitive
507 };
508
509 /**
510 * Compares the member string to a C-string.
511 *
512 * @param pcszThat The string to compare with.
513 * @param cs Whether comparison should be case-sensitive.
514 * @returns 0 if equal, negative if this is smaller than @a pcsz, positive
515 * if larger.
516 */
517 int compare(const char *pcszThat, CaseSensitivity cs = CaseSensitive) const
518 {
519 /* This klugde is for m_cch=0 and m_psz=NULL. pcsz=NULL and psz=""
520 are treated the same way so that str.compare(str2.c_str()) works. */
521 if (length() == 0)
522 return pcszThat == NULL || *pcszThat == '\0' ? 0 : -1;
523
524 if (cs == CaseSensitive)
525 return ::RTStrCmp(m_psz, pcszThat);
526 return ::RTStrICmp(m_psz, pcszThat);
527 }
528
529 /**
530 * Compares the member string to another MiniString.
531 *
532 * @param pcszThat The string to compare with.
533 * @param cs Whether comparison should be case-sensitive.
534 * @returns 0 if equal, negative if this is smaller than @a pcsz, positive
535 * if larger.
536 */
537 int compare(const MiniString &that, CaseSensitivity cs = CaseSensitive) const
538 {
539 if (cs == CaseSensitive)
540 return ::RTStrCmp(m_psz, that.m_psz);
541 return ::RTStrICmp(m_psz, that.m_psz);
542 }
543
544 /**
545 * Compares the two strings.
546 *
547 * @returns true if equal, false if not.
548 * @param that The string to compare with.
549 */
550 bool equals(const MiniString &that) const
551 {
552 return that.length() == length()
553 && memcmp(that.m_psz, m_psz, length()) == 0;
554 }
555
556 /**
557 * Compares the two strings.
558 *
559 * @returns true if equal, false if not.
560 * @param pszThat The string to compare with.
561 */
562 bool equals(const char *pszThat) const
563 {
564 /* This klugde is for m_cch=0 and m_psz=NULL. pcsz=NULL and psz=""
565 are treated the same way so that str.equals(str2.c_str()) works. */
566 if (length() == 0)
567 return pszThat == NULL || *pszThat == '\0';
568 return RTStrCmp(pszThat, m_psz) == 0;
569 }
570
571 /**
572 * Compares the two strings ignoring differences in case.
573 *
574 * @returns true if equal, false if not.
575 * @param that The string to compare with.
576 */
577 bool equalsIgnoreCase(const MiniString &that) const
578 {
579 /* Unfolded upper and lower case characters may require different
580 amount of encoding space, so the length optimization doesn't work. */
581 return RTStrICmp(that.m_psz, m_psz) == 0;
582 }
583
584 /**
585 * Compares the two strings ignoring differences in case.
586 *
587 * @returns true if equal, false if not.
588 * @param pszThat The string to compare with.
589 */
590 bool equalsIgnoreCase(const char *pszThat) const
591 {
592 /* This klugde is for m_cch=0 and m_psz=NULL. pcsz=NULL and psz=""
593 are treated the same way so that str.equalsIgnoreCase(str2.c_str()) works. */
594 if (length() == 0)
595 return pszThat == NULL || *pszThat == '\0';
596 return RTStrICmp(pszThat, m_psz) == 0;
597 }
598
599 /** @name Comparison operators.
600 * @{ */
601 bool operator==(const MiniString &that) const { return equals(that); }
602 bool operator!=(const MiniString &that) const { return !equals(that); }
603 bool operator<( const MiniString &that) const { return compare(that) < 0; }
604 bool operator>( const MiniString &that) const { return compare(that) > 0; }
605
606 bool operator==(const char *pszThat) const { return equals(pszThat); }
607 bool operator!=(const char *pszThat) const { return !equals(pszThat); }
608 bool operator<( const char *pszThat) const { return compare(pszThat) < 0; }
609 bool operator>( const char *pszThat) const { return compare(pszThat) > 0; }
610 /** @} */
611
612 /** Max string offset value.
613 *
614 * When returned by a method, this indicates failure. When taken as input,
615 * typically a default, it means all the way to the string terminator.
616 */
617 static const size_t npos;
618
619 /**
620 * Find the given substring.
621 *
622 * Looks for pcszFind in "this" starting at "pos" and returns its position,
623 * counting from the beginning of "this" at 0.
624 *
625 * @param pcszFind The substring to find.
626 * @param pos The (byte) offset into the string buffer to start
627 * searching.
628 *
629 * @returns 0 based position of pcszFind. npos if not found.
630 */
631 size_t find(const char *pcszFind, size_t pos = 0) const;
632
633 /**
634 * Returns a substring of "this" as a new Utf8Str.
635 *
636 * Works exactly like its equivalent in std::string except that this interprets
637 * pos and n as unicode codepoints instead of bytes. With the default
638 * parameters "0" and "npos", this always copies the entire string.
639 *
640 * @param pos Index of first unicode codepoint to copy from
641 * "this", counting from 0.
642 * @param n Number of unicode codepoints to copy, starting with
643 * the one at "pos". The copying will stop if the null
644 * terminator is encountered before n codepoints have
645 * been copied.
646 *
647 * @remarks This works on code points, not bytes!
648 */
649 iprt::MiniString substr(size_t pos = 0, size_t n = npos) const;
650
651 /**
652 * Returns true if "this" ends with "that".
653 *
654 * @param that Suffix to test for.
655 * @param cs Case sensitivity selector.
656 * @returns true if match, false if mismatch.
657 */
658 bool endsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
659
660 /**
661 * Returns true if "this" begins with "that".
662 * @param that Prefix to test for.
663 * @param cs Case sensitivity selector.
664 * @returns true if match, false if mismatch.
665 */
666 bool startsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
667
668 /**
669 * Returns true if "this" contains "that" (strstr).
670 *
671 * @param that Substring to look for.
672 * @param cs Case sensitivity selector.
673 * @returns true if match, false if mismatch.
674 */
675 bool contains(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
676
677 /**
678 * Attempts to convert the member string into a 32-bit integer.
679 *
680 * @returns 32-bit unsigned number on success.
681 * @returns 0 on failure.
682 */
683 int32_t toInt32() const
684 {
685 return RTStrToInt32(m_psz);
686 }
687
688 /**
689 * Attempts to convert the member string into an unsigned 32-bit integer.
690 *
691 * @returns 32-bit unsigned number on success.
692 * @returns 0 on failure.
693 */
694 uint32_t toUInt32() const
695 {
696 return RTStrToUInt32(m_psz);
697 }
698
699 /**
700 * Attempts to convert the member string into an 64-bit integer.
701 *
702 * @returns 64-bit unsigned number on success.
703 * @returns 0 on failure.
704 */
705 int64_t toInt64() const
706 {
707 return RTStrToInt64(m_psz);
708 }
709
710 /**
711 * Attempts to convert the member string into an unsigned 64-bit integer.
712 *
713 * @returns 64-bit unsigned number on success.
714 * @returns 0 on failure.
715 */
716 uint64_t toUInt64() const
717 {
718 return RTStrToUInt64(m_psz);
719 }
720
721 /**
722 * Attempts to convert the member string into an unsigned 64-bit integer.
723 *
724 * @param i Where to return the value on success.
725 * @returns IPRT error code, see RTStrToInt64.
726 */
727 int toInt(uint64_t &i) const;
728
729 /**
730 * Attempts to convert the member string into an unsigned 32-bit integer.
731 *
732 * @param i Where to return the value on success.
733 * @returns IPRT error code, see RTStrToInt32.
734 */
735 int toInt(uint32_t &i) const;
736
737protected:
738
739 /**
740 * Hide operator bool() to force people to use isEmpty() explicitly.
741 */
742 operator bool() const;
743
744 /**
745 * Destructor implementation, also used to clean up in operator=() before
746 * assigning a new string.
747 */
748 void cleanup()
749 {
750 if (m_psz)
751 {
752 RTStrFree(m_psz);
753 m_psz = NULL;
754 m_cch = 0;
755 m_cbAllocated = 0;
756 }
757 }
758
759 /**
760 * Protected internal helper to copy a string.
761 *
762 * This ignores the previous object state, so either call this from a
763 * constructor or call cleanup() first. copyFromN() unconditionally sets
764 * the members to a copy of the given other strings and makes no
765 * assumptions about previous contents. Can therefore be used both in copy
766 * constructors, when member variables have no defined value, and in
767 * assignments after having called cleanup().
768 *
769 * @param pcszSrc The source string.
770 * @param cchSrc The number of chars (bytes) to copy from the
771 * source strings.
772 *
773 * @throws std::bad_alloc On allocation failure. The object is left
774 * describing a NULL string.
775 */
776 void copyFromN(const char *pcszSrc, size_t cchSrc)
777 {
778 if (cchSrc)
779 {
780 m_psz = RTStrAlloc(cchSrc + 1);
781 if (RT_LIKELY(m_psz))
782 {
783 m_cch = cchSrc;
784 m_cbAllocated = cchSrc + 1;
785 memcpy(m_psz, pcszSrc, cchSrc + 1);
786 }
787 else
788 {
789 m_cch = 0;
790 m_cbAllocated = 0;
791#ifdef RT_EXCEPTIONS_ENABLED
792 throw std::bad_alloc();
793#endif
794 }
795 }
796 else
797 {
798 m_cch = 0;
799 m_cbAllocated = 0;
800 m_psz = NULL;
801 }
802 }
803
804 static DECLCALLBACK(size_t) printfOutputCallback(void *pvArg, const char *pachChars, size_t cbChars);
805
806 char *m_psz; /**< The string buffer. */
807 size_t m_cch; /**< strlen(m_psz) - i.e. no terminator included. */
808 size_t m_cbAllocated; /**< Size of buffer that m_psz points to; at least m_cbLength + 1. */
809};
810
811} // namespace iprt
812
813#endif
814
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette