VirtualBox

Changeset 24656 in vbox for trunk


Ignore:
Timestamp:
Nov 14, 2009 10:36:32 PM (15 years ago)
Author:
vboxsync
Message:

iprt/ministring: bird review - addressed object state after throwing std::bad_alloc.
Because of the cleanup() + copyFrom() approach to changing the string, there is
no way to preserve the original string value without rewriting the code
fundamentally. I would strongly recommend doing so. (The rewrite is to not
cleanup() first, but use RTMemRealloc() to extend the buffer.)

Left a few review @todos in the startsWith, endsWith & contains
implementations. They are related and all depends on a policy decision wrt
matching empty strings.

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/ministring_cpp.h

    r23223 r24656  
    11/** @file
    2  * VirtualBox mini C++ string class. This is a base for both Utf8Str and
    3  * other places where IPRT may want to use a lean C++ string class.
     2 * IPRT - Mini C++ string class.
    43 */
    54
     
    4140
    4241/**
    43  *  "MiniString" is a small C++ string class that does not depend on anything
    44  *  else except IPRT memory management functions. This is used as the base of
    45  *  both the Utf8Str class that COM uses as well as C++ code in IPRT that
    46  *  prefers to have a string class, like in xml.cpp.
     42 * @brief Mini C++ string class.
    4743 *
    48  *  Semantics are like in std::string, except it can do a lot less.
    49  *
    50  *  Much of the code in here used to be in com::Utf8Str so that com::Utf8Str
    51  *  can now derive from MiniString and only contain code that is COM-specific,
    52  *  such as com::Bstr conversions. Compared to the old Utf8Str though, MiniString
    53  *  always knows the length of its member string and the size of the buffer
    54  *  so it can use memcpy() instead of strdup().
     44 * "MiniString" is a small C++ string class that does not depend on anything
     45 * else except IPRT memory management functions.  Semantics are like in
     46 * std::string, except it can do a lot less.
    5547 */
    56 
     48#ifdef VBOX
     49 /** @remarks Much of the code in here used to be in com::Utf8Str so that
     50  *          com::Utf8Str can now derive from MiniString and only contain code
     51  *          that is COM-specific, such as com::Bstr conversions.  Compared to
     52  *          the old Utf8Str though, MiniString always knows the length of its
     53  *          member string and the size of the buffer so it can use memcpy()
     54  *          instead of strdup().
     55  */
     56#endif
    5757class RT_DECL_CLASS MiniString
    5858{
     
    6969
    7070    /**
    71      * Creates a copy of another MiniString. This allocates
    72      * s.length() + 1 bytes for the new instance.
    73      * @param s
     71     * Creates a copy of another MiniString.
     72     *
     73     * This allocates s.length() + 1 bytes for the new instance.
     74     *
     75     * @param   s               The source string.
     76     *
     77     * @throws  std::bad_alloc
    7478     */
    7579    MiniString(const MiniString &s)
     
    7983
    8084    /**
    81      * Creates a copy of another MiniString. This allocates
    82      * strlen(pcsz) + 1 bytes for the new instance.
    83      * @param pcsz
     85     * Creates a copy of another MiniString.
     86     *
     87     * This allocates strlen(pcsz) + 1 bytes for the new instance.
     88     *
     89     * @param   pcsz            The source string.
     90     *
     91     * @throws  std::bad_alloc
    8492     */
    8593    MiniString(const char *pcsz)
     
    97105
    98106    /**
    99      * Returns the length of the member string, which is equal to
    100      * strlen(c_str()). In other words, this does not count UTF-8 characters
    101      * but returns the number of bytes.     This is always cached
    102      * so calling this is cheap and requires no strlen() invocation.
    103      * @return
     107     * String length in bytes.
     108     *
     109     * Returns the length of the member string, which is equal to strlen(c_str()).
     110     * In other words, this does not count unicode codepoints but returns the number
     111     * of bytes.  This is always cached so calling this is cheap and requires no
     112     * strlen() invocation.
     113     *
     114     * @returns m_cbLength.
    104115     */
    105116    size_t length() const
     
    109120
    110121    /**
    111      * Returns the number of bytes allocated in the internal string buffer,
    112      * which is at least length() + 1 if length() > 0.
    113      * @return
     122     * The allocated buffer size (in bytes).
     123     *
     124     * Returns the number of bytes allocated in the internal string buffer, which is
     125     * at least length() + 1 if length() > 0.
     126     *
     127     * @returns m_cbAllocated.
    114128     */
    115129    size_t capacity() const
     
    119133
    120134    /**
     135     * Make sure at that least cb of buffer space is reserved.
     136     *
    121137     * Requests that the contained memory buffer have at least cb bytes allocated.
    122138     * This may expand or shrink the string's storage, but will never truncate the
    123      * contained string. In other words, cb will be ignored if it's smaller than
     139     * contained string.  In other words, cb will be ignored if it's smaller than
    124140     * length() + 1.
    125      * @param cb new minimum size of member memory buffer
     141     *
     142     * @param   cb              New minimum size (in bytes) of member memory buffer.
     143     *
     144     * @throws  std::bad_alloc  On allocation error.  The object is left unchanged.
    126145     */
    127146    void reserve(size_t cb)
    128147    {
    129         if (    (cb != m_cbAllocated)
    130              && (cb > m_cbLength + 1)
     148        if (    cb != m_cbAllocated
     149             && cb > m_cbLength + 1
    131150           )
    132151        {
    133             m_psz = (char*)RTMemRealloc(m_psz, cb);
     152            char *pszNew = (char*)RTMemRealloc(m_psz, cb);
     153            if (RT_LIKELY(pszNew))
     154            {
     155                m_psz = pszNew;
     156                m_cbAllocated = cb;
     157            }
    134158#ifdef RT_EXCEPTIONS_ENABLED
    135             if (!m_psz)
     159            else
    136160                throw std::bad_alloc();
    137161#endif
    138             m_cbAllocated = cb;
    139162        }
    140163    }
     
    149172
    150173    /**
    151      *  Returns a non-const raw pointer that allows to modify the string directly.
    152      *  @warning
    153      *      1)  Be sure not to modify data beyond the allocated memory! Call
    154      *          capacity() to find out how large that buffer is.
    155      *      2)  After any operation that modifies the length of the string,
    156      *          you _must_ call MiniString::jolt(), or subsequent copy operations
    157      *          may go nowhere. Better not use mutableRaw() at all.
    158      */
    159     char* mutableRaw()
     174     * Returns a non-const raw pointer that allows to modify the string directly.
     175     *
     176     * @warning
     177     *      -# Be sure not to modify data beyond the allocated memory! Call
     178     *         capacity() to find out how large that buffer is.
     179     *      -# After any operation that modifies the length of the string,
     180     *         you _must_ call MiniString::jolt(), or subsequent copy operations
     181     *         may go nowhere.  Better not use mutableRaw() at all.
     182     */
     183    char *mutableRaw()
    160184    {
    161185        return m_psz;
     
    163187
    164188    /**
     189     * Clean up after using mutableRaw.
     190     *
    165191     * Intended to be called after something has messed with the internal string
    166      * buffer (e.g. after using mutableRaw() or Utf8Str::asOutParam()). Resets
    167      * the internal lengths correctly. Otherwise subsequent copy operations may
    168      * go nowhere.
     192     * buffer (e.g. after using mutableRaw() or Utf8Str::asOutParam()).  Resets the
     193     * internal lengths correctly.  Otherwise subsequent copy operations may go
     194     * nowhere.
    169195     */
    170196    void jolt()
     
    173199        {
    174200            m_cbLength = strlen(m_psz);
    175             m_cbAllocated = m_cbLength + 1;
     201            m_cbAllocated = m_cbLength + 1; /* (Required for the Utf8Str::asOutParam case) */
    176202        }
    177203        else
     
    184210    /**
    185211     * Assigns a copy of pcsz to "this".
    186      * @param pcsz
    187      * @return
    188      */
    189     MiniString& operator=(const char *pcsz)
     212     *
     213     * @param   pcsz            The source string.
     214     *
     215     * @throws  std::bad_alloc  On allocation failure.  The object is left describing
     216     *             a NULL string.
     217     *
     218     * @returns Reference to the object.
     219     */
     220    MiniString &operator=(const char *pcsz)
    190221    {
    191222        if (m_psz != pcsz)
     
    199230    /**
    200231     * Assigns a copy of s to "this".
    201      * @param s
    202      * @return
    203      */
    204     MiniString& operator=(const MiniString &s)
     232     *
     233     * @param   s               The source string.
     234     *
     235     * @throws  std::bad_alloc  On allocation failure.  The object is left describing
     236     *             a NULL string.
     237     *
     238     * @returns Reference to the object.
     239     */
     240    MiniString &operator=(const MiniString &s)
    205241    {
    206242        if (this != &s)
     
    212248    }
    213249
    214     MiniString& append(const MiniString &that);
    215     MiniString& append(char c);
    216 
    217     /**
    218      * Returns the byte at the given index, or a null byte if
    219      * the index is not smaller than length(). This does _not_
    220      * count UTF-8 characters but simply points into the
    221      * member C string.
    222      * @param i
    223      * @return
     250    /**
     251     * Appends the string "that" to "this".
     252     *
     253     * @param   that            The string to append.
     254     *
     255     * @throws  std::bad_alloc  On allocation error.  The object is left unchanged.
     256     *
     257     * @returns Reference to the object.
     258     */
     259    MiniString &append(const MiniString &that);
     260
     261    /**
     262     * Appends the given character to "this".
     263     *
     264     * @param   c               The character to append.
     265     *
     266     * @throws  std::bad_alloc  On allocation error.  The object is left unchanged.
     267     *
     268     * @returns Reference to the object.
     269     */
     270    MiniString &append(char c);
     271
     272    /**
     273     * Index operator.
     274     *
     275     * Returns the byte at the given index, or a null byte if the index is not
     276     * smaller than length().  This does _not_ count codepoints but simply points
     277     * into the member C string.
     278     *
     279     * @param   i       The index into the string buffer.
     280     * @returns char at the index or null.
    224281     */
    225282    inline char operator[](size_t i) const
     
    232289    /**
    233290     * Returns the contained string as a C-style const char* pointer.
    234      * @return
    235      */
    236     inline const char* c_str() const
     291     *
     292     * @returns const pointer to C-style string.
     293     */
     294    inline const char *c_str() const
    237295    {
    238296        return m_psz;
     
    241299    /**
    242300     * Like c_str(), for compatibility with lots of VirtualBox Main code.
    243      * @return
    244      */
    245     inline const char* raw() const
     301     *
     302     * @returns const pointer to C-style string.
     303     */
     304    inline const char *raw() const
    246305    {
    247306        return m_psz;
     
    249308
    250309    /**
    251      * Returns true if the member string has no length. This states nothing about
     310     * Emptry string or not?
     311     *
     312     * Returns true if the member string has no length.  This states nothing about
    252313     * how much memory might be allocated.
    253      * @return
     314     *
     315     * @returns true if empty, false if not.
    254316     */
    255317    bool isEmpty() const
     
    258320    }
    259321
     322    /** Case sensitivity selector. */
    260323    enum CaseSensitivity
    261324    {
     
    290353    }
    291354
     355    /** @name Comparison operators.
     356     * @{  */
    292357    bool operator==(const MiniString &that) const { return !compare(that); }
    293358    bool operator!=(const MiniString &that) const { return !!compare(that); }
    294     bool operator<(const MiniString &that) const { return compare(that) < 0; }
    295     bool operator>(const MiniString &that) const { return compare(that) > 0; }
    296 
    297     bool operator==(const char *that) const { return !compare(that); }
    298     bool operator!=(const char *that) const { return !!compare(that); }
    299     bool operator<(const char *that) const { return compare(that) < 0; }
    300     bool operator>(const char *that) const { return compare(that) > 0; }
    301 
     359    bool operator<( const MiniString &that) const { return compare(that) < 0; }
     360    bool operator>( const MiniString &that) const { return compare(that) > 0; }
     361
     362    bool operator==(const char *that) const       { return !compare(that); }
     363    bool operator!=(const char *that) const       { return !!compare(that); }
     364    bool operator<( const char *that) const       { return compare(that) < 0; }
     365    bool operator>( const char *that) const       { return compare(that) > 0; }
     366    /** @} */
     367
     368    /** Max string offset value.
     369     *
     370     * When returned by a method, this indicates failure.  When taken as input,
     371     * typically a default, it means all the way to the string terminator.
     372     */
    302373    static const size_t npos;
    303374
    304375    /**
     376     * Find the given substring.
     377     *
    305378     * Looks for pcszFind in "this" starting at "pos" and returns its position,
    306      * counting from the beginning of "this" at 0. Returns npos if not found.
     379     * counting from the beginning of "this" at 0.
     380     *
     381     * @param   pcszFind        The substring to find.
     382     * @param   pos             The (byte) offset into the string buffer to start
     383     *                          searching.
     384     *
     385     * @returns 0 based position of pcszFind. npos if not found.
    307386     */
    308387    size_t find(const char *pcszFind, size_t pos = 0) const;
    309388
    310389    /**
    311      * Returns a substring of "this" as a new Utf8Str. Works exactly like
    312      * its equivalent in std::string except that this interprets pos and n
    313      * as UTF-8 codepoints instead of bytes. With the default parameters "0"
    314      * and "npos", this always copies the entire string.
    315      * @param pos Index of first codepoint to copy from "this", counting from 0.
    316      * @param n Number of codepoints to copy, starting with the one at "pos".
     390     * Returns a substring of "this" as a new Utf8Str.
     391     *
     392     * Works exactly like its equivalent in std::string except that this interprets
     393     * pos and n as unicode codepoints instead of bytes.  With the default
     394     * parameters "0" and "npos", this always copies the entire string.
     395     *
     396     * @param   pos             Index of first unicode codepoint to copy from
     397     *                          "this", counting from 0.
     398     * @param   n               Number of unicode codepoints to copy, starting with
     399     *                          the one at "pos".  The copying will stop if the null
     400     *                          terminator is encountered before n codepoints have
     401     *                          been copied.
     402     *
     403     * @remarks This works on code points, not bytes!
    317404     */
    318405    iprt::MiniString substr(size_t pos = 0, size_t n = npos) const;
     
    320407    /**
    321408     * Returns true if "this" ends with "that".
    322      * @param that
    323      * @param cs
    324      * @return
     409     *
     410     * @param   that    Suffix to test for.
     411     * @param   cs      Case sensitivity selector.
     412     * @returns true if match, false if mismatch.
    325413     */
    326414    bool endsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
     
    328416    /**
    329417     * Returns true if "this" begins with "that".
    330      * @return
     418     * @param   that    Prefix to test for.
     419     * @param   cs      Case sensitivity selector.
     420     * @returns true if match, false if mismatch.
    331421     */
    332422    bool startsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
     
    334424    /**
    335425     * Returns true if "this" contains "that" (strstr).
    336      * @param that
    337      * @param cs
    338      * @return
     426     *
     427     * @param   that    Substring to look for.
     428     * @param   cs      Case sensitivity selector.
     429     * @returns true if match, false if mismatch.
    339430     */
    340431    bool contains(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
     
    364455    /**
    365456     * Attempts to convert the member string into an unsigned 64-bit integer.
    366      * @return IPRT error code.
    367      * @param i Output buffer.
     457     *
     458     * @param   i       Where to return the value on success.
     459     * @returns IPRT error code, see RTStrToInt64.
    368460     */
    369461    int toInt(uint64_t &i) const;
     
    371463    /**
    372464     * Attempts to convert the member string into an unsigned 32-bit integer.
    373      * @return IPRT error code.
    374      * @param i Output buffer.
     465     *
     466     * @param   i       Where to return the value on success.
     467     * @returns IPRT error code, see RTStrToInt32.
    375468     */
    376469    int toInt(uint32_t &i) const;
     
    384477
    385478    /**
    386      *  Destructor implementation, also used to clean up in operator=()
    387      *  before assigning a new string.
     479     * Destructor implementation, also used to clean up in operator=() before
     480     * assigning a new string.
    388481     */
    389482    void cleanup()
     
    399492
    400493    /**
    401      * Protected internal helper.
    402      * copyFrom() unconditionally sets the members to a copy of the
    403      * given other strings and makes no assumptions about previous
    404      * contents. Can therefore be used both in copy constructors,
    405      * when member variables have no defined value, and in assignments
    406      * after having called cleanup().
     494     * Protected internal helper for copy a string that completely ignors the
     495     * current object state.
     496     *
     497     * copyFrom() unconditionally sets the members to a copy of the given other
     498     * strings and makes no assumptions about previous contents. Can therefore be
     499     * used both in copy constructors, when member variables have no defined value,
     500     * and in assignments after having called cleanup().
    407501     *
    408502     * This variant copies from another MiniString and is fast since
    409503     * the length of source string is known.
    410504     *
    411      * @param s
     505     * @param   s               The source string.
     506     *
     507     * @throws  std::bad_alloc  On allocation failure. The object is left describing
     508     *             a NULL string.
    412509     */
    413510    void copyFrom(const MiniString &s)
     
    416513        {
    417514            m_cbAllocated = m_cbLength + 1;
    418             m_psz = (char*)RTMemAlloc(m_cbAllocated);
     515            m_psz = (char *)RTMemAlloc(m_cbAllocated);
     516            if (RT_LIKELY(m_psz))
     517                memcpy(m_psz, s.m_psz, m_cbAllocated);      // include 0 terminator
     518            else
     519            {
     520                m_cbLength = 0;
     521                m_cbAllocated = 0;
    419522#ifdef RT_EXCEPTIONS_ENABLED
    420             if (!m_psz)
    421523                throw std::bad_alloc();
    422524#endif
    423             memcpy(m_psz, s.m_psz, m_cbAllocated);      // include 0 terminator
     525            }
    424526        }
    425527        else
     
    431533
    432534    /**
    433      * Protected internal helper.
     535     * Protected internal helper for copy a string that completely ignors the
     536     * current object state.
     537     *
    434538     * See copyFrom() above.
    435539     *
     
    437541     * on it. It's therefore slower than the one above.
    438542     *
    439      * @param pcsz
     543     * @param   pcsz            The source string.
     544     *
     545     * @throws  std::bad_alloc  On allocation failure. The object is left describing
     546     *             a NULL string.
    440547     */
    441548    void copyFrom(const char *pcsz)
     
    445552            m_cbLength = strlen(pcsz);
    446553            m_cbAllocated = m_cbLength + 1;
    447             m_psz = (char*)RTMemAlloc(m_cbAllocated);
     554            m_psz = (char *)RTMemAlloc(m_cbAllocated);
     555            if (RT_LIKELY(m_psz))
     556                memcpy(m_psz, pcsz, m_cbAllocated);     // include 0 terminator
     557            else
     558            {
     559                m_cbLength = 0;
     560                m_cbAllocated = 0;
    448561#ifdef RT_EXCEPTIONS_ENABLED
    449             if (!m_psz)
    450562                throw std::bad_alloc();
    451563#endif
    452             memcpy(m_psz, pcsz, m_cbAllocated);      // include 0 terminator
     564            }
    453565        }
    454566        else
     
    460572    }
    461573
    462     char    *m_psz;
    463     size_t  m_cbLength;                     // strlen(m_psz)
    464     size_t  m_cbAllocated;                  // size of buffer that m_psz points to; at least m_cbLength + 1
     574    char    *m_psz;                     /**< The string buffer. */
     575    size_t  m_cbLength;                 /**< strlen(m_psz) - i.e. no terminator included. */
     576    size_t  m_cbAllocated;              /**< Size of buffer that m_psz points to; at least m_cbLength + 1. */
    465577};
    466578
     
    468580
    469581#endif
     582
  • trunk/src/VBox/Runtime/common/string/ministring.cpp

    r23223 r24656  
    3636using namespace iprt;
    3737
    38 const size_t MiniString::npos = (size_t)-1;
    39 
    40 /**
    41  * Appends a copy of @a that to "this".
    42  * @param that
    43  */
    44 MiniString& MiniString::append(const MiniString &that)
     38const size_t MiniString::npos = ~(size_t)0;
     39
     40MiniString &MiniString::append(const MiniString &that)
    4541{
    4642    size_t lenThat = that.length();
     
    5147
    5248        reserve(cbBoth);
    53             // calls realloc(cbBoth) and sets m_cbAllocated
     49            // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
     50#ifndef RT_EXCEPTIONS_ENABLED
     51        AssertRelease(capacity() >= cbBoth);
     52#endif
    5453
    5554        memcpy(m_psz + lenThis, that.m_psz, lenThat);
     
    6059}
    6160
    62 /**
    63  * Appends the given character to "this".
    64  * @param c
    65  * @return
    66  */
    6761MiniString& MiniString::append(char c)
    6862{
     
    7165        // allocate in chunks of 20 in case this gets called several times
    7266        if (m_cbLength + 1 >= m_cbAllocated)
     67        {
    7368            reserve(m_cbLength + 10);
    74             // calls realloc() and sets m_cbAllocated
     69            // calls realloc(cbBoth) and sets m_cbAllocated; may throw bad_alloc.
     70#ifndef RT_EXCEPTIONS_ENABLED
     71            AssertRelease(capacity() >= m_cbLength + 1);
     72#endif
     73        }
    7574
    7675        m_psz[m_cbLength] = c;
     
    8180}
    8281
    83 size_t MiniString::find(const char *pcszFind,
    84                         size_t pos /*= 0*/)
     82size_t MiniString::find(const char *pcszFind, size_t pos /*= 0*/)
    8583    const
    8684{
     
    128126
    129127                size_t cbCopy = psz - pFirst;
    130                 ret.reserve(cbCopy + 1);
     128                ret.reserve(cbCopy + 1); // may throw bad_alloc
     129#ifndef RT_EXCEPTIONS_ENABLED
     130                AssertRelease(capacity() >= cbCopy + 1);
     131#endif
    131132                memcpy(ret.m_psz, pFirst, cbCopy);
    132133                ret.m_cbLength = cbCopy;
     
    148149    if (l1 < l2)
    149150        return false;
     151    /** @todo r=bird: If l2 is 0, then m_psz can be NULL and we will crash. See
     152     *        also handling of l2 == in startsWith. */
    150153
    151154    size_t l = l1 - l2;
     
    160163    size_t l1 = length();
    161164    size_t l2 = that.length();
    162     if (l1 == 0 || l2 == 0)
     165    if (l1 == 0 || l2 == 0) /** @todo r=bird: this differs from endsWith, and I think other IPRT code. If l2 == 0, it matches anything. */
    163166        return false;
    164167
     
    174177bool MiniString::contains(const MiniString &that, CaseSensitivity cs /*= CaseSensitive*/) const
    175178{
     179    /** @todo r-bird: Not checking for NULL strings like startsWith does (and
     180     *        endsWith only does half way). */
    176181    if (cs == CaseSensitive)
    177182        return ::RTStrStr(m_psz, that.m_psz) != NULL;
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette