VirtualBox

source: vbox/trunk/include/iprt/cpp/ministring.h@ 33613

Last change on this file since 33613 was 33613, checked in by vboxsync, 14 years ago

iprt::MiniString::compare: copy&past bug in the previous fix spotted by Ulrich.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 23.7 KB
Line 
1/** @file
2 * IPRT - Mini C++ string class.
3 */
4
5/*
6 * Copyright (C) 2007-2009 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___VBox_ministring_h
27#define ___VBox_ministring_h
28
29#include <iprt/mem.h>
30#include <iprt/string.h>
31#include <iprt/stdarg.h>
32
33#include <new>
34
35namespace iprt
36{
37
38/**
39 * @brief Mini C++ string class.
40 *
41 * "MiniString" is a small C++ string class that does not depend on anything
42 * else except IPRT memory management functions. Semantics are like in
43 * std::string, except it can do a lot less.
44 *
45 * Note that MiniString does not differentiate between NULL strings and
46 * empty strings. In other words, MiniString("") and MiniString(NULL)
47 * behave the same. In both cases, MiniString allocates no memory, reports
48 * a zero length and zero allocated bytes for both, and returns an empty
49 * C string from c_str().
50 */
51#ifdef VBOX
52 /** @remarks Much of the code in here used to be in com::Utf8Str so that
53 * com::Utf8Str can now derive from MiniString and only contain code
54 * that is COM-specific, such as com::Bstr conversions. Compared to
55 * the old Utf8Str though, MiniString always knows the length of its
56 * member string and the size of the buffer so it can use memcpy()
57 * instead of strdup().
58 */
59#endif
60class RT_DECL_CLASS MiniString
61{
62public:
63 /**
64 * Creates an empty string that has no memory allocated.
65 */
66 MiniString()
67 : m_psz(NULL),
68 m_cch(0),
69 m_cbAllocated(0)
70 {
71 }
72
73 /**
74 * Creates a copy of another MiniString.
75 *
76 * This allocates s.length() + 1 bytes for the new instance, unless s is empty.
77 *
78 * @param s The source string.
79 *
80 * @throws std::bad_alloc
81 */
82 MiniString(const MiniString &s)
83 {
84 copyFrom(s);
85 }
86
87 /**
88 * Creates a copy of a C string.
89 *
90 * This allocates strlen(pcsz) + 1 bytes for the new instance, unless s is empty.
91 *
92 * @param pcsz The source string.
93 *
94 * @throws std::bad_alloc
95 */
96 MiniString(const char *pcsz)
97 {
98 copyFrom(pcsz);
99 }
100
101 /**
102 * Create a new string given the format string and its arguments.
103 *
104 * @param a_pszFormat Pointer to the format string (UTF-8),
105 * @see pg_rt_str_format.
106 * @param a_va Argument vector containing the arguments
107 * specified by the format string.
108 * @sa printfV
109 */
110 MiniString(const char *a_pszFormat, va_list a_va)
111 : m_psz(NULL),
112 m_cch(0),
113 m_cbAllocated(0)
114
115 {
116 printfV(a_pszFormat, a_va);
117 }
118
119 /**
120 * Destructor.
121 */
122 virtual ~MiniString()
123 {
124 cleanup();
125 }
126
127 /**
128 * String length in bytes.
129 *
130 * Returns the length of the member string, which is equal to strlen(c_str()).
131 * In other words, this does not count unicode codepoints but returns the number
132 * of bytes. This is always cached so calling this is cheap and requires no
133 * strlen() invocation.
134 *
135 * @returns m_cbLength.
136 */
137 size_t length() const
138 {
139 return m_cch;
140 }
141
142 /**
143 * The allocated buffer size (in bytes).
144 *
145 * Returns the number of bytes allocated in the internal string buffer, which is
146 * at least length() + 1 if length() > 0; for an empty string, this returns 0.
147 *
148 * @returns m_cbAllocated.
149 */
150 size_t capacity() const
151 {
152 return m_cbAllocated;
153 }
154
155 /**
156 * Make sure at that least cb of buffer space is reserved.
157 *
158 * Requests that the contained memory buffer have at least cb bytes allocated.
159 * This may expand or shrink the string's storage, but will never truncate the
160 * contained string. In other words, cb will be ignored if it's smaller than
161 * length() + 1.
162 *
163 * @param cb New minimum size (in bytes) of member memory buffer.
164 *
165 * @throws std::bad_alloc On allocation error. The object is left unchanged.
166 */
167 void reserve(size_t cb)
168 {
169 if ( cb != m_cbAllocated
170 && cb > m_cch + 1
171 )
172 {
173 int vrc = RTStrRealloc(&m_psz, cb);
174 if (RT_SUCCESS(vrc))
175 m_cbAllocated = cb;
176#ifdef RT_EXCEPTIONS_ENABLED
177 else
178 throw std::bad_alloc();
179#endif
180 }
181 }
182
183 /**
184 * Deallocates all memory.
185 */
186 inline void setNull()
187 {
188 cleanup();
189 }
190
191 /**
192 * Assigns a copy of pcsz to "this".
193 *
194 * @param pcsz The source string.
195 *
196 * @throws std::bad_alloc On allocation failure. The object is left describing
197 * a NULL string.
198 *
199 * @returns Reference to the object.
200 */
201 MiniString &operator=(const char *pcsz)
202 {
203 if (m_psz != pcsz)
204 {
205 cleanup();
206 copyFrom(pcsz);
207 }
208 return *this;
209 }
210
211 /**
212 * Assigns a copy of s to "this".
213 *
214 * @param s The source string.
215 *
216 * @throws std::bad_alloc On allocation failure. The object is left describing
217 * a NULL string.
218 *
219 * @returns Reference to the object.
220 */
221 MiniString &operator=(const MiniString &s)
222 {
223 if (this != &s)
224 {
225 cleanup();
226 copyFrom(s);
227 }
228 return *this;
229 }
230
231 /**
232 * Assigns the output of the string format operation (RTStrPrintf).
233 *
234 * @param pszFormat Pointer to the format string,
235 * @see pg_rt_str_format.
236 * @param ... Ellipsis containing the arguments specified by
237 * the format string.
238 *
239 * @throws std::bad_alloc On allocation error. The object is left unchanged.
240 *
241 * @returns Reference to the object.
242 */
243 MiniString &printf(const char *pszFormat, ...);
244
245 /**
246 * Assigns the output of the string format operation (RTStrPrintfV).
247 *
248 * @param pszFormat Pointer to the format string,
249 * @see pg_rt_str_format.
250 * @param va Argument vector containing the arguments
251 * specified by the format string.
252 *
253 * @throws std::bad_alloc On allocation error. The object is left unchanged.
254 *
255 * @returns Reference to the object.
256 */
257 MiniString &printfV(const char *pszFormat, va_list va);
258
259 /**
260 * Appends the string "that" to "this".
261 *
262 * @param that The string to append.
263 *
264 * @throws std::bad_alloc On allocation error. The object is left unchanged.
265 *
266 * @returns Reference to the object.
267 */
268 MiniString &append(const MiniString &that);
269
270 /**
271 * Appends the string "that" to "this".
272 *
273 * @param pszThat The C string to append.
274 *
275 * @throws std::bad_alloc On allocation error. The object is left unchanged.
276 *
277 * @returns Reference to the object.
278 */
279 MiniString &append(const char *pszThat);
280
281 /**
282 * Appends the given character to "this".
283 *
284 * @param ch The character to append.
285 *
286 * @throws std::bad_alloc On allocation error. The object is left unchanged.
287 *
288 * @returns Reference to the object.
289 */
290 MiniString &append(char ch);
291
292 /**
293 * Appends the given unicode code point to "this".
294 *
295 * @param uc The unicode code point to append.
296 *
297 * @throws std::bad_alloc On allocation error. The object is left unchanged.
298 *
299 * @returns Reference to the object.
300 */
301 MiniString &appendCodePoint(RTUNICP uc);
302
303 /**
304 * Shortcut to append(), MiniString variant.
305 *
306 * @param that The string to append.
307 *
308 * @returns Reference to the object.
309 */
310 MiniString &operator+=(const MiniString &that)
311 {
312 return append(that);
313 }
314
315 /**
316 * Shortcut to append(), const char* variant.
317 *
318 * @param pszThat The C string to append.
319 *
320 * @returns Reference to the object.
321 */
322 MiniString &operator+=(const char *pszThat)
323 {
324 return append(pszThat);
325 }
326
327 /**
328 * Shortcut to append(), char variant.
329 *
330 * @param pszThat The character to append.
331 *
332 * @returns Reference to the object.
333 */
334 MiniString &operator+=(char c)
335 {
336 return append(c);
337 }
338
339 /**
340 * Converts the member string to upper case.
341 *
342 * @returns Reference to the object.
343 */
344 MiniString &toUpper()
345 {
346 if (length())
347 {
348 /* Folding an UTF-8 string may result in a shorter encoding (see
349 testcase), so recalculate the length afterwars. */
350 ::RTStrToUpper(m_psz);
351 size_t cchNew = strlen(m_psz);
352 Assert(cchNew <= m_cch);
353 m_cch = cchNew;
354 }
355 return *this;
356 }
357
358 /**
359 * Converts the member string to lower case.
360 *
361 * @returns Reference to the object.
362 */
363 MiniString &toLower()
364 {
365 if (length())
366 {
367 /* Folding an UTF-8 string may result in a shorter encoding (see
368 testcase), so recalculate the length afterwars. */
369 ::RTStrToLower(m_psz);
370 size_t cchNew = strlen(m_psz);
371 Assert(cchNew <= m_cch);
372 m_cch = cchNew;
373 }
374 return *this;
375 }
376
377 /**
378 * Index operator.
379 *
380 * Returns the byte at the given index, or a null byte if the index is not
381 * smaller than length(). This does _not_ count codepoints but simply points
382 * into the member C string.
383 *
384 * @param i The index into the string buffer.
385 * @returns char at the index or null.
386 */
387 inline char operator[](size_t i) const
388 {
389 if (i < length())
390 return m_psz[i];
391 return '\0';
392 }
393
394 /**
395 * Returns the contained string as a C-style const char* pointer.
396 * This never returns NULL; if the string is empty, this returns a
397 * pointer to static null byte.
398 *
399 * @returns const pointer to C-style string.
400 */
401 inline const char *c_str() const
402 {
403 return (m_psz) ? m_psz : "";
404 }
405
406 /**
407 * Returns a non-const raw pointer that allows to modify the string directly.
408 * As opposed to c_str() and raw(), this DOES return NULL for an empty string
409 * because we cannot return a non-const pointer to a static "" global.
410 *
411 * @warning
412 * -# Be sure not to modify data beyond the allocated memory! Call
413 * capacity() to find out how large that buffer is.
414 * -# After any operation that modifies the length of the string,
415 * you _must_ call MiniString::jolt(), or subsequent copy operations
416 * may go nowhere. Better not use mutableRaw() at all.
417 */
418 char *mutableRaw()
419 {
420 return m_psz;
421 }
422
423 /**
424 * Clean up after using mutableRaw.
425 *
426 * Intended to be called after something has messed with the internal string
427 * buffer (e.g. after using mutableRaw() or Utf8Str::asOutParam()). Resets the
428 * internal lengths correctly. Otherwise subsequent copy operations may go
429 * nowhere.
430 */
431 void jolt()
432 {
433 if (m_psz)
434 {
435 m_cch = strlen(m_psz);
436 m_cbAllocated = m_cch + 1; /* (Required for the Utf8Str::asOutParam case) */
437 }
438 else
439 {
440 m_cch = 0;
441 m_cbAllocated = 0;
442 }
443 }
444
445 /**
446 * Returns @c true if the member string has no length.
447 *
448 * This is @c true for instances created from both NULL and "" input
449 * strings.
450 *
451 * This states nothing about how much memory might be allocated.
452 *
453 * @returns @c true if empty, @c false if not.
454 */
455 bool isEmpty() const
456 {
457 return length() == 0;
458 }
459
460 /**
461 * Returns @c false if the member string has no length.
462 *
463 * This is @c false for instances created from both NULL and "" input
464 * strings.
465 *
466 * This states nothing about how much memory might be allocated.
467 *
468 * @returns @c false if empty, @c true if not.
469 */
470 bool isNotEmpty() const
471 {
472 return length() != 0;
473 }
474
475 /** Case sensitivity selector. */
476 enum CaseSensitivity
477 {
478 CaseSensitive,
479 CaseInsensitive
480 };
481
482 /**
483 * Compares the member string to a C-string.
484 *
485 * @param pcszThat The string to compare with.
486 * @param cs Whether comparison should be case-sensitive.
487 * @returns 0 if equal, negative if this is smaller than @a pcsz, positive
488 * if larger.
489 */
490 int compare(const char *pcszThat, CaseSensitivity cs = CaseSensitive) const
491 {
492 /* This klugde is for m_cch=0 and m_psz=NULL. pcsz=NULL and psz=""
493 are treated the same way so that str.compare(str2.c_str()) works. */
494 if (length() == 0)
495 return pcszThat == NULL || *pcszThat != '\0' ? 0 : 1;
496
497 if (cs == CaseSensitive)
498 return ::RTStrCmp(m_psz, pcszThat);
499 return ::RTStrICmp(m_psz, pcszThat);
500 }
501
502 /**
503 * Compares the member string to another MiniString.
504 *
505 * @param pcszThat The string to compare with.
506 * @param cs Whether comparison should be case-sensitive.
507 * @returns 0 if equal, negative if this is smaller than @a pcsz, positive
508 * if larger.
509 */
510 int compare(const MiniString &that, CaseSensitivity cs = CaseSensitive) const
511 {
512 if (cs == CaseSensitive)
513 return ::RTStrCmp(m_psz, that.m_psz);
514 return ::RTStrICmp(m_psz, that.m_psz);
515 }
516
517 /**
518 * Compares the two strings.
519 *
520 * @returns true if equal, false if not.
521 * @param that The string to compare with.
522 */
523 bool equals(const MiniString &that) const
524 {
525 return that.length() == length()
526 && memcmp(that.m_psz, m_psz, length()) == 0;
527 }
528
529 /**
530 * Compares the two strings.
531 *
532 * @returns true if equal, false if not.
533 * @param pszThat The string to compare with.
534 */
535 bool equals(const char *pszThat) const
536 {
537 /* This klugde is for m_cch=0 and m_psz=NULL. pcsz=NULL and psz=""
538 are treated the same way so that str.equals(str2.c_str()) works. */
539 if (length() == 0)
540 return pszThat == NULL || *pszThat == '\0';
541 return RTStrCmp(pszThat, m_psz) == 0;
542 }
543
544 /**
545 * Compares the two strings ignoring differences in case.
546 *
547 * @returns true if equal, false if not.
548 * @param that The string to compare with.
549 */
550 bool equalsIgnoreCase(const MiniString &that) const
551 {
552 /* Unfolded upper and lower case characters may require different
553 amount of encoding space, so the length optimization doesn't work. */
554 return RTStrICmp(that.m_psz, m_psz) == 0;
555 }
556
557 /**
558 * Compares the two strings ignoring differences in case.
559 *
560 * @returns true if equal, false if not.
561 * @param pszThat The string to compare with.
562 */
563 bool equalsIgnoreCase(const char *pszThat) const
564 {
565 /* This klugde is for m_cch=0 and m_psz=NULL. pcsz=NULL and psz=""
566 are treated the same way so that str.equalsIgnoreCase(str2.c_str()) works. */
567 if (length() == 0)
568 return pszThat == NULL || *pszThat == '\0';
569 return RTStrICmp(pszThat, m_psz) == 0;
570 }
571
572 /** @name Comparison operators.
573 * @{ */
574 bool operator==(const MiniString &that) const { return equals(that); }
575 bool operator!=(const MiniString &that) const { return !equals(that); }
576 bool operator<( const MiniString &that) const { return compare(that) < 0; }
577 bool operator>( const MiniString &that) const { return compare(that) > 0; }
578
579 bool operator==(const char *pszThat) const { return equals(pszThat); }
580 bool operator!=(const char *pszThat) const { return !equals(pszThat); }
581 bool operator<( const char *pszThat) const { return compare(pszThat) < 0; }
582 bool operator>( const char *pszThat) const { return compare(pszThat) > 0; }
583 /** @} */
584
585 /** Max string offset value.
586 *
587 * When returned by a method, this indicates failure. When taken as input,
588 * typically a default, it means all the way to the string terminator.
589 */
590 static const size_t npos;
591
592 /**
593 * Find the given substring.
594 *
595 * Looks for pcszFind in "this" starting at "pos" and returns its position,
596 * counting from the beginning of "this" at 0.
597 *
598 * @param pcszFind The substring to find.
599 * @param pos The (byte) offset into the string buffer to start
600 * searching.
601 *
602 * @returns 0 based position of pcszFind. npos if not found.
603 */
604 size_t find(const char *pcszFind, size_t pos = 0) const;
605
606 /**
607 * Returns a substring of "this" as a new Utf8Str.
608 *
609 * Works exactly like its equivalent in std::string except that this interprets
610 * pos and n as unicode codepoints instead of bytes. With the default
611 * parameters "0" and "npos", this always copies the entire string.
612 *
613 * @param pos Index of first unicode codepoint to copy from
614 * "this", counting from 0.
615 * @param n Number of unicode codepoints to copy, starting with
616 * the one at "pos". The copying will stop if the null
617 * terminator is encountered before n codepoints have
618 * been copied.
619 *
620 * @remarks This works on code points, not bytes!
621 */
622 iprt::MiniString substr(size_t pos = 0, size_t n = npos) const;
623
624 /**
625 * Returns true if "this" ends with "that".
626 *
627 * @param that Suffix to test for.
628 * @param cs Case sensitivity selector.
629 * @returns true if match, false if mismatch.
630 */
631 bool endsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
632
633 /**
634 * Returns true if "this" begins with "that".
635 * @param that Prefix to test for.
636 * @param cs Case sensitivity selector.
637 * @returns true if match, false if mismatch.
638 */
639 bool startsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
640
641 /**
642 * Returns true if "this" contains "that" (strstr).
643 *
644 * @param that Substring to look for.
645 * @param cs Case sensitivity selector.
646 * @returns true if match, false if mismatch.
647 */
648 bool contains(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
649
650 /**
651 * Attempts to convert the member string into an 64-bit integer.
652 *
653 * @returns 64-bit unsigned number on success.
654 * @returns 0 on failure.
655 */
656 int64_t toInt64() const
657 {
658 return RTStrToInt64(m_psz);
659 }
660
661 /**
662 * Attempts to convert the member string into an unsigned 64-bit integer.
663 *
664 * @returns 64-bit unsigned number on success.
665 * @returns 0 on failure.
666 */
667 uint64_t toUInt64() const
668 {
669 return RTStrToUInt64(m_psz);
670 }
671
672 /**
673 * Attempts to convert the member string into an unsigned 64-bit integer.
674 *
675 * @param i Where to return the value on success.
676 * @returns IPRT error code, see RTStrToInt64.
677 */
678 int toInt(uint64_t &i) const;
679
680 /**
681 * Attempts to convert the member string into an unsigned 32-bit integer.
682 *
683 * @param i Where to return the value on success.
684 * @returns IPRT error code, see RTStrToInt32.
685 */
686 int toInt(uint32_t &i) const;
687
688protected:
689
690 /**
691 * Hide operator bool() to force people to use isEmpty() explicitly.
692 */
693 operator bool() const;
694
695 /**
696 * Destructor implementation, also used to clean up in operator=() before
697 * assigning a new string.
698 */
699 void cleanup()
700 {
701 if (m_psz)
702 {
703 RTStrFree(m_psz);
704 m_psz = NULL;
705 m_cch = 0;
706 m_cbAllocated = 0;
707 }
708 }
709
710 /**
711 * Protected internal helper to copy a string. This ignores the previous object
712 * state, so either call this from a constructor or call cleanup() first.
713 *
714 * copyFrom() unconditionally sets the members to a copy of the given other
715 * strings and makes no assumptions about previous contents. Can therefore be
716 * used both in copy constructors, when member variables have no defined value,
717 * and in assignments after having called cleanup().
718 *
719 * This variant copies from another MiniString and is fast since
720 * the length of the source string is known.
721 *
722 * @param s The source string.
723 *
724 * @throws std::bad_alloc On allocation failure. The object is left describing
725 * a NULL string.
726 */
727 void copyFrom(const MiniString &s)
728 {
729 if ((m_cch = s.m_cch))
730 {
731 m_cbAllocated = m_cch + 1;
732 m_psz = (char *)RTStrAlloc(m_cbAllocated);
733 if (RT_LIKELY(m_psz))
734 memcpy(m_psz, s.m_psz, m_cbAllocated); // include 0 terminator
735 else
736 {
737 m_cch = 0;
738 m_cbAllocated = 0;
739#ifdef RT_EXCEPTIONS_ENABLED
740 throw std::bad_alloc();
741#endif
742 }
743 }
744 else
745 {
746 m_cbAllocated = 0;
747 m_psz = NULL;
748 }
749 }
750
751 /**
752 * Protected internal helper to copy a string. This ignores the previous object
753 * state, so either call this from a constructor or call cleanup() first.
754 *
755 * See copyFrom() above.
756 *
757 * This variant copies from a C string and needs to call strlen()
758 * on it. It's therefore slower than the one above.
759 *
760 * @param pcsz The source string.
761 *
762 * @throws std::bad_alloc On allocation failure. The object is left describing
763 * a NULL string.
764 */
765 void copyFrom(const char *pcsz)
766 {
767 if (pcsz && *pcsz)
768 {
769 m_cch = strlen(pcsz);
770 m_cbAllocated = m_cch + 1;
771 m_psz = (char *)RTStrAlloc(m_cbAllocated);
772 if (RT_LIKELY(m_psz))
773 memcpy(m_psz, pcsz, m_cbAllocated); // include 0 terminator
774 else
775 {
776 m_cch = 0;
777 m_cbAllocated = 0;
778#ifdef RT_EXCEPTIONS_ENABLED
779 throw std::bad_alloc();
780#endif
781 }
782 }
783 else
784 {
785 m_cch = 0;
786 m_cbAllocated = 0;
787 m_psz = NULL;
788 }
789 }
790
791 static DECLCALLBACK(size_t) printfOutputCallback(void *pvArg, const char *pachChars, size_t cbChars);
792
793 char *m_psz; /**< The string buffer. */
794 size_t m_cch; /**< strlen(m_psz) - i.e. no terminator included. */
795 size_t m_cbAllocated; /**< Size of buffer that m_psz points to; at least m_cbLength + 1. */
796};
797
798} // namespace iprt
799
800#endif
801
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette