1 | /* $Id: ministring.cpp 40417 2012-03-09 21:55:31Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IPRT - Mini C++ string class.
|
---|
4 | *
|
---|
5 | * This is a base for both Utf8Str and other places where IPRT may want to use
|
---|
6 | * a lean C++ string class.
|
---|
7 | */
|
---|
8 |
|
---|
9 | /*
|
---|
10 | * Copyright (C) 2007-2012 Oracle Corporation
|
---|
11 | *
|
---|
12 | * This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
13 | * available from http://www.virtualbox.org. This file is free software;
|
---|
14 | * you can redistribute it and/or modify it under the terms of the GNU
|
---|
15 | * General Public License (GPL) as published by the Free Software
|
---|
16 | * Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
17 | * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
18 | * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
19 | *
|
---|
20 | * The contents of this file may alternatively be used under the terms
|
---|
21 | * of the Common Development and Distribution License Version 1.0
|
---|
22 | * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
23 | * VirtualBox OSE distribution, in which case the provisions of the
|
---|
24 | * CDDL are applicable instead of those of the GPL.
|
---|
25 | *
|
---|
26 | * You may elect to license modified versions of this file under the
|
---|
27 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
28 | */
|
---|
29 |
|
---|
30 |
|
---|
31 | /*******************************************************************************
|
---|
32 | * Header Files *
|
---|
33 | *******************************************************************************/
|
---|
34 | #include <iprt/cpp/ministring.h>
|
---|
35 |
|
---|
36 |
|
---|
37 | /*******************************************************************************
|
---|
38 | * Global Variables *
|
---|
39 | *******************************************************************************/
|
---|
40 | const size_t RTCString::npos = ~(size_t)0;
|
---|
41 |
|
---|
42 |
|
---|
43 | /*******************************************************************************
|
---|
44 | * Defined Constants And Macros *
|
---|
45 | *******************************************************************************/
|
---|
46 | /** Allocation block alignment used when appending bytes to a string. */
|
---|
47 | #define IPRT_MINISTRING_APPEND_ALIGNMENT 64
|
---|
48 |
|
---|
49 |
|
---|
50 | RTCString &RTCString::printf(const char *pszFormat, ...)
|
---|
51 | {
|
---|
52 | va_list va;
|
---|
53 | va_start(va, pszFormat);
|
---|
54 | printfV(pszFormat, va);
|
---|
55 | va_end(va);
|
---|
56 | return *this;
|
---|
57 | }
|
---|
58 |
|
---|
59 | /**
|
---|
60 | * Callback used with RTStrFormatV by RTCString::printfV.
|
---|
61 | *
|
---|
62 | * @returns The number of bytes added (not used).
|
---|
63 | *
|
---|
64 | * @param pvArg The string object.
|
---|
65 | * @param pachChars The characters to append.
|
---|
66 | * @param cbChars The number of characters. 0 on the final callback.
|
---|
67 | */
|
---|
68 | /*static*/ DECLCALLBACK(size_t)
|
---|
69 | RTCString::printfOutputCallback(void *pvArg, const char *pachChars, size_t cbChars)
|
---|
70 | {
|
---|
71 | RTCString *pThis = (RTCString *)pvArg;
|
---|
72 | if (cbChars)
|
---|
73 | {
|
---|
74 | size_t cchBoth = pThis->m_cch + cbChars;
|
---|
75 | if (cchBoth >= pThis->m_cbAllocated)
|
---|
76 | {
|
---|
77 | /* Double the buffer size, if it's less that _4M. Align sizes like
|
---|
78 | for append. */
|
---|
79 | size_t cbAlloc = RT_ALIGN_Z(pThis->m_cbAllocated, IPRT_MINISTRING_APPEND_ALIGNMENT);
|
---|
80 | cbAlloc += RT_MIN(cbAlloc, _4M);
|
---|
81 | if (cbAlloc <= cchBoth)
|
---|
82 | cbAlloc = RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT);
|
---|
83 | pThis->reserve(cbAlloc);
|
---|
84 | #ifndef RT_EXCEPTIONS_ENABLED
|
---|
85 | AssertReleaseReturn(pThis->capacity() > cchBoth, 0);
|
---|
86 | #endif
|
---|
87 | }
|
---|
88 |
|
---|
89 | memcpy(&pThis->m_psz[pThis->m_cch], pachChars, cbChars);
|
---|
90 | pThis->m_cch = cchBoth;
|
---|
91 | pThis->m_psz[cchBoth] = '\0';
|
---|
92 | }
|
---|
93 | return cbChars;
|
---|
94 | }
|
---|
95 |
|
---|
96 | RTCString &RTCString::printfV(const char *pszFormat, va_list va)
|
---|
97 | {
|
---|
98 | cleanup();
|
---|
99 | RTStrFormatV(printfOutputCallback, this, NULL, NULL, pszFormat, va);
|
---|
100 | return *this;
|
---|
101 | }
|
---|
102 |
|
---|
103 | RTCString &RTCString::append(const RTCString &that)
|
---|
104 | {
|
---|
105 | size_t cchThat = that.length();
|
---|
106 | if (cchThat)
|
---|
107 | {
|
---|
108 | size_t cchThis = length();
|
---|
109 | size_t cchBoth = cchThis + cchThat;
|
---|
110 |
|
---|
111 | if (cchBoth >= m_cbAllocated)
|
---|
112 | {
|
---|
113 | reserve(RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
|
---|
114 | // calls realloc(cchBoth + 1) and sets m_cbAllocated; may throw bad_alloc.
|
---|
115 | #ifndef RT_EXCEPTIONS_ENABLED
|
---|
116 | AssertRelease(capacity() > cchBoth);
|
---|
117 | #endif
|
---|
118 | }
|
---|
119 |
|
---|
120 | memcpy(m_psz + cchThis, that.m_psz, cchThat);
|
---|
121 | m_psz[cchBoth] = '\0';
|
---|
122 | m_cch = cchBoth;
|
---|
123 | }
|
---|
124 | return *this;
|
---|
125 | }
|
---|
126 |
|
---|
127 | RTCString &RTCString::append(const char *pszThat)
|
---|
128 | {
|
---|
129 | size_t cchThat = strlen(pszThat);
|
---|
130 | if (cchThat)
|
---|
131 | {
|
---|
132 | size_t cchThis = length();
|
---|
133 | size_t cchBoth = cchThis + cchThat;
|
---|
134 |
|
---|
135 | if (cchBoth >= m_cbAllocated)
|
---|
136 | {
|
---|
137 | reserve(RT_ALIGN_Z(cchBoth + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
|
---|
138 | // calls realloc(cchBoth + 1) and sets m_cbAllocated; may throw bad_alloc.
|
---|
139 | #ifndef RT_EXCEPTIONS_ENABLED
|
---|
140 | AssertRelease(capacity() > cchBoth);
|
---|
141 | #endif
|
---|
142 | }
|
---|
143 |
|
---|
144 | memcpy(&m_psz[cchThis], pszThat, cchThat);
|
---|
145 | m_psz[cchBoth] = '\0';
|
---|
146 | m_cch = cchBoth;
|
---|
147 | }
|
---|
148 | return *this;
|
---|
149 | }
|
---|
150 |
|
---|
151 | RTCString& RTCString::append(char ch)
|
---|
152 | {
|
---|
153 | Assert((unsigned char)ch < 0x80); /* Don't create invalid UTF-8. */
|
---|
154 | if (ch)
|
---|
155 | {
|
---|
156 | // allocate in chunks of 20 in case this gets called several times
|
---|
157 | if (m_cch + 1 >= m_cbAllocated)
|
---|
158 | {
|
---|
159 | reserve(RT_ALIGN_Z(m_cch + 2, IPRT_MINISTRING_APPEND_ALIGNMENT));
|
---|
160 | // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
|
---|
161 | #ifndef RT_EXCEPTIONS_ENABLED
|
---|
162 | AssertRelease(capacity() > m_cch + 1);
|
---|
163 | #endif
|
---|
164 | }
|
---|
165 |
|
---|
166 | m_psz[m_cch] = ch;
|
---|
167 | m_psz[++m_cch] = '\0';
|
---|
168 | }
|
---|
169 | return *this;
|
---|
170 | }
|
---|
171 |
|
---|
172 | RTCString &RTCString::appendCodePoint(RTUNICP uc)
|
---|
173 | {
|
---|
174 | /*
|
---|
175 | * Single byte encoding.
|
---|
176 | */
|
---|
177 | if (uc < 0x80)
|
---|
178 | return RTCString::append((char)uc);
|
---|
179 |
|
---|
180 | /*
|
---|
181 | * Multibyte encoding.
|
---|
182 | * Assume max encoding length when resizing the string, that's simpler.
|
---|
183 | */
|
---|
184 | AssertReturn(uc <= UINT32_C(0x7fffffff), *this);
|
---|
185 |
|
---|
186 | if (m_cch + 6 >= m_cbAllocated)
|
---|
187 | {
|
---|
188 | reserve(RT_ALIGN_Z(m_cch + 6 + 1, IPRT_MINISTRING_APPEND_ALIGNMENT));
|
---|
189 | // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
|
---|
190 | #ifndef RT_EXCEPTIONS_ENABLED
|
---|
191 | AssertRelease(capacity() > m_cch + 6);
|
---|
192 | #endif
|
---|
193 | }
|
---|
194 |
|
---|
195 | char *pszNext = RTStrPutCp(&m_psz[m_cch], uc);
|
---|
196 | m_cch = pszNext - m_psz;
|
---|
197 | *pszNext = '\0';
|
---|
198 |
|
---|
199 | return *this;
|
---|
200 | }
|
---|
201 |
|
---|
202 | size_t RTCString::find(const char *pcszFind, size_t pos /*= 0*/) const
|
---|
203 | {
|
---|
204 | if (pos < length())
|
---|
205 | {
|
---|
206 | const char *pszThis = c_str();
|
---|
207 | if (pszThis)
|
---|
208 | {
|
---|
209 | const char *pszHit = strstr(pszThis + pos, pcszFind);
|
---|
210 | if (pszHit)
|
---|
211 | return pszHit - pszThis;
|
---|
212 | }
|
---|
213 | }
|
---|
214 |
|
---|
215 | return npos;
|
---|
216 | }
|
---|
217 |
|
---|
218 | void RTCString::findReplace(char chFind, char chReplace)
|
---|
219 | {
|
---|
220 | Assert((unsigned int)chFind < 128U);
|
---|
221 | Assert((unsigned int)chReplace < 128U);
|
---|
222 |
|
---|
223 | for (size_t i = 0; i < length(); ++i)
|
---|
224 | {
|
---|
225 | char *p = &m_psz[i];
|
---|
226 | if (*p == chFind)
|
---|
227 | *p = chReplace;
|
---|
228 | }
|
---|
229 | }
|
---|
230 |
|
---|
231 | size_t RTCString::count(char ch) const
|
---|
232 | {
|
---|
233 | Assert((unsigned int)ch < 128U);
|
---|
234 |
|
---|
235 | size_t c = 0;
|
---|
236 | const char *psz = m_psz;
|
---|
237 | if (psz)
|
---|
238 | {
|
---|
239 | char chCur;
|
---|
240 | while ((chCur = *psz++) != '\0')
|
---|
241 | if (chCur == ch)
|
---|
242 | c++;
|
---|
243 | }
|
---|
244 | return c;
|
---|
245 | }
|
---|
246 |
|
---|
247 | #if 0 /** @todo implement these when needed. */
|
---|
248 | size_t RTCString::count(const char *psz, CaseSensitivity cs = CaseSensitive) const
|
---|
249 | {
|
---|
250 | }
|
---|
251 |
|
---|
252 | size_t RTCString::count(const RTCString *pStr, CaseSensitivity cs = CaseSensitive) const
|
---|
253 | {
|
---|
254 |
|
---|
255 | }
|
---|
256 | #endif
|
---|
257 |
|
---|
258 | RTCString RTCString::substrCP(size_t pos /*= 0*/, size_t n /*= npos*/) const
|
---|
259 | {
|
---|
260 | RTCString ret;
|
---|
261 |
|
---|
262 | if (n)
|
---|
263 | {
|
---|
264 | const char *psz;
|
---|
265 |
|
---|
266 | if ((psz = c_str()))
|
---|
267 | {
|
---|
268 | RTUNICP cp;
|
---|
269 |
|
---|
270 | // walk the UTF-8 characters until where the caller wants to start
|
---|
271 | size_t i = pos;
|
---|
272 | while (*psz && i--)
|
---|
273 | if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
|
---|
274 | return ret; // return empty string on bad encoding
|
---|
275 |
|
---|
276 | const char *pFirst = psz;
|
---|
277 |
|
---|
278 | if (n == npos)
|
---|
279 | // all the rest:
|
---|
280 | ret = pFirst;
|
---|
281 | else
|
---|
282 | {
|
---|
283 | i = n;
|
---|
284 | while (*psz && i--)
|
---|
285 | if (RT_FAILURE(RTStrGetCpEx(&psz, &cp)))
|
---|
286 | return ret; // return empty string on bad encoding
|
---|
287 |
|
---|
288 | size_t cbCopy = psz - pFirst;
|
---|
289 | if (cbCopy)
|
---|
290 | {
|
---|
291 | ret.reserve(cbCopy + 1); // may throw bad_alloc
|
---|
292 | #ifndef RT_EXCEPTIONS_ENABLED
|
---|
293 | AssertRelease(capacity() >= cbCopy + 1);
|
---|
294 | #endif
|
---|
295 | memcpy(ret.m_psz, pFirst, cbCopy);
|
---|
296 | ret.m_cch = cbCopy;
|
---|
297 | ret.m_psz[cbCopy] = '\0';
|
---|
298 | }
|
---|
299 | }
|
---|
300 | }
|
---|
301 | }
|
---|
302 |
|
---|
303 | return ret;
|
---|
304 | }
|
---|
305 |
|
---|
306 | bool RTCString::endsWith(const RTCString &that, CaseSensitivity cs /*= CaseSensitive*/) const
|
---|
307 | {
|
---|
308 | size_t l1 = length();
|
---|
309 | if (l1 == 0)
|
---|
310 | return false;
|
---|
311 |
|
---|
312 | size_t l2 = that.length();
|
---|
313 | if (l1 < l2)
|
---|
314 | return false;
|
---|
315 | /** @todo r=bird: If l2 is 0, then m_psz can be NULL and we will crash. See
|
---|
316 | * also handling of l2 == in startsWith. */
|
---|
317 |
|
---|
318 | size_t l = l1 - l2;
|
---|
319 | if (cs == CaseSensitive)
|
---|
320 | return ::RTStrCmp(&m_psz[l], that.m_psz) == 0;
|
---|
321 | return ::RTStrICmp(&m_psz[l], that.m_psz) == 0;
|
---|
322 | }
|
---|
323 |
|
---|
324 | bool RTCString::startsWith(const RTCString &that, CaseSensitivity cs /*= CaseSensitive*/) const
|
---|
325 | {
|
---|
326 | size_t l1 = length();
|
---|
327 | size_t l2 = that.length();
|
---|
328 | if (l1 == 0 || l2 == 0) /** @todo r=bird: this differs from endsWith, and I think other IPRT code. If l2 == 0, it matches anything. */
|
---|
329 | return false;
|
---|
330 |
|
---|
331 | if (l1 < l2)
|
---|
332 | return false;
|
---|
333 |
|
---|
334 | if (cs == CaseSensitive)
|
---|
335 | return ::RTStrNCmp(m_psz, that.m_psz, l2) == 0;
|
---|
336 | return ::RTStrNICmp(m_psz, that.m_psz, l2) == 0;
|
---|
337 | }
|
---|
338 |
|
---|
339 | bool RTCString::contains(const RTCString &that, CaseSensitivity cs /*= CaseSensitive*/) const
|
---|
340 | {
|
---|
341 | /** @todo r-bird: Not checking for NULL strings like startsWith does (and
|
---|
342 | * endsWith only does half way). */
|
---|
343 | if (cs == CaseSensitive)
|
---|
344 | return ::RTStrStr(m_psz, that.m_psz) != NULL;
|
---|
345 | return ::RTStrIStr(m_psz, that.m_psz) != NULL;
|
---|
346 | }
|
---|
347 |
|
---|
348 | int RTCString::toInt(uint64_t &i) const
|
---|
349 | {
|
---|
350 | if (!m_psz)
|
---|
351 | return VERR_NO_DIGITS;
|
---|
352 | return RTStrToUInt64Ex(m_psz, NULL, 0, &i);
|
---|
353 | }
|
---|
354 |
|
---|
355 | int RTCString::toInt(uint32_t &i) const
|
---|
356 | {
|
---|
357 | if (!m_psz)
|
---|
358 | return VERR_NO_DIGITS;
|
---|
359 | return RTStrToUInt32Ex(m_psz, NULL, 0, &i);
|
---|
360 | }
|
---|
361 |
|
---|
362 | RTCList<RTCString, RTCString *>
|
---|
363 | RTCString::split(const RTCString &a_rstrSep, SplitMode mode /* = RemoveEmptyParts */) const
|
---|
364 | {
|
---|
365 | RTCList<RTCString> strRet;
|
---|
366 | if (!m_psz)
|
---|
367 | return strRet;
|
---|
368 | if (a_rstrSep.isEmpty())
|
---|
369 | {
|
---|
370 | strRet.append(RTCString(m_psz));
|
---|
371 | return strRet;
|
---|
372 | }
|
---|
373 |
|
---|
374 | size_t cch = m_cch;
|
---|
375 | char const *pszTmp = m_psz;
|
---|
376 | while (cch > 0)
|
---|
377 | {
|
---|
378 | char const *pszNext = strstr(pszTmp, a_rstrSep.c_str());
|
---|
379 | if (!pszNext)
|
---|
380 | {
|
---|
381 | strRet.append(RTCString(pszTmp, cch));
|
---|
382 | break;
|
---|
383 | }
|
---|
384 | size_t cchNext = pszNext - pszTmp;
|
---|
385 | if ( cchNext > 0
|
---|
386 | || mode == KeepEmptyParts)
|
---|
387 | strRet.append(RTCString(pszTmp, cchNext));
|
---|
388 | pszTmp += cchNext + a_rstrSep.length();
|
---|
389 | cch -= cchNext + a_rstrSep.length();
|
---|
390 | }
|
---|
391 |
|
---|
392 | return strRet;
|
---|
393 | }
|
---|
394 |
|
---|
395 | /* static */
|
---|
396 | RTCString
|
---|
397 | RTCString::join(const RTCList<RTCString, RTCString *> &a_rList,
|
---|
398 | const RTCString &a_rstrSep /* = "" */)
|
---|
399 | {
|
---|
400 | RTCString strRet;
|
---|
401 | if (a_rList.size() > 1)
|
---|
402 | {
|
---|
403 | /* calc the required size */
|
---|
404 | size_t cbNeeded = a_rstrSep.length() * (a_rList.size() - 1) + 1;
|
---|
405 | for (size_t i = 0; i < a_rList.size(); ++i)
|
---|
406 | cbNeeded += a_rList.at(i).length();
|
---|
407 | strRet.reserve(cbNeeded);
|
---|
408 |
|
---|
409 | /* do the appending. */
|
---|
410 | for (size_t i = 0; i < a_rList.size() - 1; ++i)
|
---|
411 | {
|
---|
412 | strRet.append(a_rList.at(i));
|
---|
413 | strRet.append(a_rstrSep);
|
---|
414 | }
|
---|
415 | strRet.append(a_rList.last());
|
---|
416 | }
|
---|
417 | /* special case: one list item. */
|
---|
418 | else if (a_rList.size() > 0)
|
---|
419 | strRet.append(a_rList.last());
|
---|
420 |
|
---|
421 | return strRet;
|
---|
422 | }
|
---|
423 |
|
---|
424 | const RTCString operator+(const RTCString &a_rStr1, const RTCString &a_rStr2)
|
---|
425 | {
|
---|
426 | RTCString strRet(a_rStr1);
|
---|
427 | strRet += a_rStr2;
|
---|
428 | return strRet;
|
---|
429 | }
|
---|
430 |
|
---|
431 | const RTCString operator+(const RTCString &a_rStr1, const char *a_pszStr2)
|
---|
432 | {
|
---|
433 | RTCString strRet(a_rStr1);
|
---|
434 | strRet += a_pszStr2;
|
---|
435 | return strRet;
|
---|
436 | }
|
---|
437 |
|
---|
438 | const RTCString operator+(const char *a_psz1, const RTCString &a_rStr2)
|
---|
439 | {
|
---|
440 | RTCString strRet(a_psz1);
|
---|
441 | strRet += a_rStr2;
|
---|
442 | return strRet;
|
---|
443 | }
|
---|
444 |
|
---|