Changeset 35567 in vbox
- Timestamp:
- Jan 14, 2011 2:16:45 PM (14 years ago)
- svn:sync-xref-src-repo-rev:
- 69451
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/cpp/ministring.h
r35128 r35567 185 185 * String length in bytes. 186 186 * 187 * Returns the length of the member string , which is equal to strlen(c_str()).188 * In other words, this does not count unicode codepoints but returns the number189 * of bytes. Thisis always cached so calling this is cheap and requires no187 * Returns the length of the member string in bytes, which is equal to strlen(c_str()). 188 * In other words, this does not count unicode codepoints; use utf8length() for that. 189 * The byte length is always cached so calling this is cheap and requires no 190 190 * strlen() invocation. 191 191 * … … 195 195 { 196 196 return m_cch; 197 } 198 199 /** 200 * String length in UTF-8 codepoints. 201 * 202 * As opposed to length(), which returns the length in bytes, this counts the number 203 * of UTF-8 codepoints. This is *not* cached so calling this is expensive. 204 * 205 * @returns Number of codepoints in the member string. 206 */ 207 size_t utf8length() const 208 { 209 return m_psz ? RTStrUniLen(m_psz) : 0; 197 210 } 198 211 … … 652 665 * Find the given substring. 653 666 * 654 * Looks for pcszFind in "this" starting at "pos" and returns its position ,655 * counting from the beginning of "this" at 0.667 * Looks for pcszFind in "this" starting at "pos" and returns its position 668 * as a byte (not codepoint) offset, counting from the beginning of "this" at 0. 656 669 * 657 670 * @param pcszFind The substring to find. … … 676 689 * Returns a substring of "this" as a new Utf8Str. 677 690 * 678 * Works exactly like its equivalent in std::string except that this interprets 679 * pos and n as unicode codepoints instead of bytes. With the default 680 * parameters "0" and "npos", this always copies the entire string. 691 * Works exactly like its equivalent in std::string. With the default 692 * parameters "0" and "npos", this always copies the entire string. The 693 * "pos" and "n" arguments represent bytes; it is the caller's responsibility 694 * to ensure that the offsets do not copy invalid UTF-8 sequences. When 695 * used in conjunction with find() and length(), this will work. 696 * 697 * @param pos Index of first byte offset to copy from "this", counting from 0. 698 * @param n Number of bytes to copy, starting with the one at "pos". 699 * The copying will stop if the null terminator is encountered before 700 * n bytes have been copied. 701 */ 702 iprt::MiniString substr(size_t pos = 0, size_t n = npos) const 703 { 704 return MiniString(*this, pos, n); 705 } 706 707 /** 708 * Returns a substring of "this" as a new Utf8Str. As opposed to substr(), 709 * this variant takes codepoint offsets instead of byte offsets. 681 710 * 682 711 * @param pos Index of first unicode codepoint to copy from … … 686 715 * terminator is encountered before n codepoints have 687 716 * been copied. 688 * 689 * @remarks This works on code points, not bytes! 690 */ 691 iprt::MiniString substr(size_t pos = 0, size_t n = npos) const; 717 */ 718 iprt::MiniString substrCP(size_t pos = 0, size_t n = npos) const; 692 719 693 720 /** -
trunk/src/VBox/Runtime/common/string/ministring.cpp
r35128 r35567 224 224 } 225 225 226 MiniString MiniString::substr (size_t pos /*= 0*/, size_t n /*= npos*/)226 MiniString MiniString::substrCP(size_t pos /*= 0*/, size_t n /*= npos*/) 227 227 const 228 228 { … … 256 256 257 257 size_t cbCopy = psz - pFirst; 258 ret.reserve(cbCopy + 1); // may throw bad_alloc 259 #ifndef RT_EXCEPTIONS_ENABLED 260 AssertRelease(capacity() >= cbCopy + 1); 261 #endif 262 memcpy(ret.m_psz, pFirst, cbCopy); 263 ret.m_cch = cbCopy; 264 ret.m_psz[cbCopy] = '\0'; 258 if (cbCopy) 259 { 260 ret.reserve(cbCopy + 1); // may throw bad_alloc 261 #ifndef RT_EXCEPTIONS_ENABLED 262 AssertRelease(capacity() >= cbCopy + 1); 263 #endif 264 memcpy(ret.m_psz, pFirst, cbCopy); 265 ret.m_cch = cbCopy; 266 ret.m_psz[cbCopy] = '\0'; 267 } 265 268 } 266 269 } -
trunk/src/VBox/Runtime/testcase/tstIprtMiniString.cpp
r33862 r35567 211 211 CHECK_EQUAL(SubStr15, "cdef"); 212 212 213 /* substr() and substrCP() functions */ 214 iprt::MiniString strTest(""); 215 CHECK_EQUAL(strTest.substr(0), ""); 216 CHECK_EQUAL(strTest.substrCP(0), ""); 217 CHECK_EQUAL(strTest.substr(1), ""); 218 CHECK_EQUAL(strTest.substrCP(1), ""); 219 220 /* now let's have some non-ASCII to chew on */ 221 strTest = "abcdefßäbcdef"; 222 // 13 codepoints, but 15 bytes (excluding null terminator); 223 // "ß" and "ä" consume two bytes each 224 CHECK_EQUAL(strTest.substr(0), strTest.c_str()); 225 CHECK_EQUAL(strTest.substrCP(0), strTest.c_str()); 226 227 CHECK_EQUAL(strTest.substr(2), "cdefßäbcdef"); 228 CHECK_EQUAL(strTest.substrCP(2), "cdefßäbcdef"); 229 230 CHECK_EQUAL(strTest.substr(2, 2), "cd"); 231 CHECK_EQUAL(strTest.substrCP(2, 2), "cd"); 232 233 CHECK_EQUAL(strTest.substr(6), "ßäbcdef"); 234 CHECK_EQUAL(strTest.substrCP(6), "ßäbcdef"); 235 236 CHECK_EQUAL(strTest.substr(6, 2), "ß"); // UTF-8 "ß" consumes two bytes 237 CHECK_EQUAL(strTest.substrCP(6, 1), "ß"); 238 239 CHECK_EQUAL(strTest.substr(8), "äbcdef"); // UTF-8 "ß" consumes two bytes 240 CHECK_EQUAL(strTest.substrCP(7), "äbcdef"); 241 242 CHECK_EQUAL(strTest.substr(8, 3), "äb"); // UTF-8 "ä" consumes two bytes 243 CHECK_EQUAL(strTest.substrCP(7, 2), "äb"); 244 245 CHECK_EQUAL(strTest.substr(14, 1), "f"); 246 CHECK_EQUAL(strTest.substrCP(12, 1), "f"); 247 248 CHECK_EQUAL(strTest.substr(15, 1), ""); 249 CHECK_EQUAL(strTest.substrCP(13, 1), ""); 250 251 CHECK_EQUAL(strTest.substr(16, 1), ""); 252 CHECK_EQUAL(strTest.substrCP(15, 1), ""); 213 253 214 254 /* special constructor and assignment arguments */
Note:
See TracChangeset
for help on using the changeset viewer.