/* $Id: strformat.cpp 628 2007-02-05 11:59:58Z vboxsync $ */ /** @file * InnoTek Portable Runtime - String Formatter. */ /* * Copyright (C) 2006 InnoTek Systemberatung GmbH * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; * you can redistribute it and/or modify it under the terms of the GNU * General Public License as published by the Free Software Foundation, * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE * distribution. VirtualBox OSE is distributed in the hope that it will * be useful, but WITHOUT ANY WARRANTY of any kind. * * If you received this file as part of a commercial VirtualBox * distribution, then only the terms of your commercial VirtualBox * license agreement apply instead of the previous paragraph. */ /******************************************************************************* * Defined Constants * *******************************************************************************/ #define ISDIGIT(c) ((c) >= '0' && (c) <= '9') /*#define MAX(a, b) ((a) >= (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) */ /******************************************************************************* * Header Files * *******************************************************************************/ #define LOG_GROUP RTLOGGROUP_STRING #include #include #ifdef IN_RING3 #include #include #endif #include #include #include "internal/string.h" /* Wrappers for converting to iprt facilities. */ #define SSToDS(ptr) ptr #define kASSERT Assert #define KENDIAN_LITTLE 1 #define KENDIAN KENDIAN_LITTLE #define KSIZE size_t typedef struct { uint32_t ulLo; uint32_t ulHi; } KSIZE64; /******************************************************************************* * Internal Functions * *******************************************************************************/ static unsigned _strnlen(const char *psz, unsigned cchMax); static unsigned _strnlenUCS2(PCRTUCS2 pucs, unsigned cchMax); static int rtStrFormatNumber(char *psz, KSIZE64 ullValue, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags); /** * Finds the length of a string up to cchMax. * @returns Length. * @param psz Pointer to string. * @param cchMax Max length. */ static unsigned _strnlen(const char *psz, unsigned cchMax) { const char *pszC = psz; while (cchMax-- > 0 && *psz != '\0') psz++; return psz - pszC; } /** * Finds the length of a string up to cchMax. * @returns Length. * @param pucs Pointer to string. * @param cchMax Max length. */ static unsigned _strnlenUCS2(PCRTUCS2 pucs, unsigned cchMax) { PCRTUCS2 pucsC = pucs; while (cchMax-- > 0 && *pucs != '\0') pucs++; return pucs - pucsC; } /** * Finds the length of a string up to cchMax. * @returns Length. * @param pusz Pointer to string. * @param cchMax Max length. */ static unsigned _strnlenUni(PCRTUNICP pusz, unsigned cchMax) { PCRTUNICP puszC = pusz; while (cchMax-- > 0 && *pusz != '\0') pusz++; return pusz - puszC; } /** * Formats an integer number according to the parameters. * * @returns Length of the formatted number. * @param psz Pointer to output string buffer of sufficient size. * @param u64Value Value to format. * @param uiBase Number representation base. * @param cchWidth Width. * @param cchPrecision Precision. * @param fFlags Flags (NTFS_*). */ RTDECL(int) RTStrFormatNumber(char *psz, uint64_t u64Value, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags) { return rtStrFormatNumber(psz, *(KSIZE64 *)(void *)&u64Value, uiBase, cchWidth, cchPrecision, fFlags); } /** * Formats an integer number according to the parameters. * * @returns Length of the number. * @param psz Pointer to output string. * @param ullValue Value. Using the high part is optional. * @param uiBase Number representation base. * @param cchWidth Width * @param cchPrecision Precision. * @param fFlags Flags (NTFS_*). */ static int rtStrFormatNumber(char *psz, KSIZE64 ullValue, unsigned int uiBase, signed int cchWidth, signed int cchPrecision, unsigned int fFlags) { char * pachDigits = "0123456789abcdef"; char * pszStart = psz; int cchValue; unsigned long ul; #if 0 unsigned long ullow; #endif int i; int j; /** @todo Formatting of 64 bit numbers is broken, fix it! */ /* * Validate and addjust input... */ kASSERT((uiBase >= 2 || uiBase <= 16)); if (fFlags & RTSTR_F_CAPITAL) pachDigits = "0123456789ABCDEF"; if (fFlags & RTSTR_F_LEFT) fFlags &= ~RTSTR_F_ZEROPAD; /* * Determin value length */ cchValue = 0; if (ullValue.ulHi || (fFlags & RTSTR_F_64BIT)) { uint64_t u64 = *(uint64_t *)(void *)&ullValue; if ((fFlags & RTSTR_F_VALSIGNED) && (ullValue.ulHi & 0x80000000)) u64 = -(int64_t)u64; do { cchValue++; u64 /= uiBase; } while (u64); } else { ul = (fFlags & RTSTR_F_VALSIGNED) && (ullValue.ulLo & 0x80000000) ? -(int32_t)ullValue.ulLo : ullValue.ulLo; do { cchValue++; ul /= uiBase; } while (ul); } /* * Sign (+/-). */ i = 0; if (fFlags & RTSTR_F_VALSIGNED) { if ((ullValue.ulHi || (fFlags & RTSTR_F_64BIT) ? ullValue.ulHi : ullValue.ulLo) & 0x80000000) { ullValue.ulLo = -(int32_t)ullValue.ulLo; if (ullValue.ulHi) ullValue.ulHi = ~ullValue.ulHi; psz[i++] = '-'; } else if (fFlags & (RTSTR_F_PLUS | RTSTR_F_BLANK)) psz[i++] = (char)(fFlags & RTSTR_F_PLUS ? '+' : ' '); } /* * Special (0/0x). */ if (fFlags & RTSTR_F_SPECIAL && (uiBase % 8) == 0) { psz[i++] = '0'; if (uiBase == 16) psz[i++] = (char)(fFlags & RTSTR_F_CAPITAL ? 'X' : 'x'); } /* * width - only if ZEROPAD */ cchWidth -= i + cchValue; if (fFlags & RTSTR_F_ZEROPAD) while (--cchWidth >= 0) { psz[i++] = '0'; cchPrecision--; } else if (!(fFlags & RTSTR_F_LEFT) && cchWidth > 0) { for (j = i-1; j >= 0; j--) psz[cchWidth + j] = psz[j]; for (j = 0; j < cchWidth; j++) psz[j] = ' '; i += cchWidth; } psz += i; /* * precision */ while (--cchPrecision >= cchValue) *psz++ = '0'; /* * write number - not good enough but it works */ psz += cchValue; i = -1; if (ullValue.ulHi || (fFlags & RTSTR_F_64BIT)) { uint64_t u64 = *(uint64_t *)(void *)&ullValue; do { psz[i--] = pachDigits[u64 % uiBase]; u64 /= uiBase; } while (u64); } else { ul = (fFlags & RTSTR_F_VALSIGNED) && (ullValue.ulLo & 0x80000000) ? -(int32_t)ullValue.ulLo : ullValue.ulLo; do { psz[i--] = pachDigits[ul % uiBase]; ul /= uiBase; } while (ul); } /* * width if RTSTR_F_LEFT */ if (fFlags & RTSTR_F_LEFT) while (--cchWidth >= 0) *psz++ = ' '; *psz = '\0'; return psz - pszStart; } /** * Partial implementation of a printf like formatter. * It doesn't do everything correct, and there is no floating point support. * However, it supports custom formats by the means of a format callback. * * @returns number of bytes formatted. * @param pfnOutput Output worker. * Called in two ways. Normally with a string an it's length. * For termination, it's called with NULL for string, 0 for length. * @param pvArgOutput Argument to the output worker. * @param pfnFormat Custom format worker. * @param pvArgFormat Argument to the format worker. * @param pszFormat Format string. * @param InArgs Argument list. */ RTDECL(size_t) RTStrFormatV(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, va_list InArgs) { /* make a copy so we can reference it (AMD64 / gcc). */ va_list args; va_copy(args, InArgs); KSIZE cch = 0; const char *pszStartOutput = pszFormat; while (*pszFormat != '\0') { if (*pszFormat == '%') { /* output pending string. */ if (pszStartOutput != pszFormat) cch += pfnOutput(pvArgOutput, pszStartOutput, pszFormat - pszStartOutput); /* skip '%' */ pszFormat++; if (*pszFormat == '%') /* '%%'-> '%' */ pszStartOutput = pszFormat++; else { unsigned int fFlags = 0; int cchWidth = -1; int cchPrecision = -1; unsigned int uBase = 10; char chArgSize; /* flags */ for (;;) { switch (*pszFormat++) { case '#': fFlags |= RTSTR_F_SPECIAL; continue; case '-': fFlags |= RTSTR_F_LEFT; continue; case '+': fFlags |= RTSTR_F_PLUS; continue; case ' ': fFlags |= RTSTR_F_BLANK; continue; case '0': fFlags |= RTSTR_F_ZEROPAD; continue; } pszFormat--; break; } /* width */ if (ISDIGIT(*pszFormat)) { for (cchWidth = 0; ISDIGIT(*pszFormat); pszFormat++) { cchWidth *= 10; cchWidth += *pszFormat - '0'; } fFlags |= RTSTR_F_WIDTH; } else if (*pszFormat == '*') { pszFormat++; cchWidth = va_arg(args, int); if (cchWidth < 0) { cchWidth = -cchWidth; fFlags |= RTSTR_F_LEFT; } fFlags |= RTSTR_F_WIDTH; } /* precision */ if (*pszFormat == '.') { pszFormat++; if (ISDIGIT(*pszFormat)) { for (cchPrecision = 0; ISDIGIT(*pszFormat); pszFormat++) { cchPrecision *= 10; cchPrecision += *pszFormat - '0'; } } else if (*pszFormat == '*') { pszFormat++; cchPrecision = va_arg(args, int); } if (cchPrecision < 0) cchPrecision = 0; fFlags |= RTSTR_F_PRECISION; } /* argsize */ chArgSize = *pszFormat; if (chArgSize != 'l' && chArgSize != 'L' && chArgSize != 'h' && chArgSize != 'j' && chArgSize != 'z' && chArgSize != 't') chArgSize = 0; else { pszFormat++; if (*pszFormat == 'l' && chArgSize == 'l') { chArgSize = 'L'; pszFormat++; } else if (*pszFormat == 'h' && chArgSize == 'h') { chArgSize = 'H'; pszFormat++; } } /* * The type. */ switch (*pszFormat++) { /* char */ case 'c': { char ch; if (!(fFlags & RTSTR_F_LEFT)) while (--cchWidth > 0) cch += pfnOutput(pvArgOutput, " ", 1); ch = (char)va_arg(args, int); cch += pfnOutput(pvArgOutput, SSToDS(&ch), 1); while (--cchWidth > 0) cch += pfnOutput(pvArgOutput, " ", 1); break; } #ifndef IN_RING3 case 'S': /* Unicode string as current code page. */ chArgSize = 'l'; /* fall thru */ #endif case 's': /* Unicode string as utf8 */ { if (chArgSize == 'l') { /* ucs2 -> utf8 */ int cchStr; PCRTUTF16 pwszStr = va_arg(args, PRTUTF16); if (!VALID_PTR(pwszStr)) { static RTUTF16 s_wszNull[] = {'<', 'N', 'U', 'L', 'L', '>', '\0' }; pwszStr = s_wszNull; } cchStr = _strnlenUCS2(pwszStr, (unsigned)cchPrecision); if (!(fFlags & RTSTR_F_LEFT)) while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); while (cchStr-- > 0) { #ifdef IN_RING3 RTUNICP Cp; RTUtf16GetCpEx(&pwszStr, &Cp); char szUtf8[8]; /* Cp=0x7fffffff -> 6 bytes. */ char *pszEnd = RTStrPutCp(szUtf8, Cp); cch += pfnOutput(pvArgOutput, szUtf8, pszEnd - szUtf8); #else char ch = (char)*pwszStr++; cch += pfnOutput(pvArgOutput, &ch, 1); #endif } while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); } else if (chArgSize == 'L') { /* unicp -> utf8 */ int cchStr; PCRTUNICP puszStr = va_arg(args, PCRTUNICP); if (!VALID_PTR(puszStr)) { static RTUNICP uszNull[] = {'<', 'N', 'U', 'L', 'L', '>', '\0' }; puszStr = uszNull; } cchStr = _strnlenUni(puszStr, (unsigned)cchPrecision); if (!(fFlags & RTSTR_F_LEFT)) while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); while (cchStr-- > 0) { #ifdef IN_RING3 char szUtf8[8]; /* Cp=0x7fffffff -> 6 bytes. */ char *pszEnd = RTStrPutCp(szUtf8, *puszStr++); cch += pfnOutput(pvArgOutput, szUtf8, pszEnd - szUtf8); #else char ch = (char)*puszStr++; cch += pfnOutput(pvArgOutput, &ch, 1); #endif } while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); } else { int cchStr; char *pszStr = va_arg(args, char*); if (!VALID_PTR(pszStr)) pszStr = ""; cchStr = _strnlen(pszStr, (unsigned)cchPrecision); if (!(fFlags & RTSTR_F_LEFT)) while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); cch += pfnOutput(pvArgOutput, pszStr, cchStr); while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); } break; } #ifdef IN_RING3 case 'S': /* Unicode string as current code page. */ { if (chArgSize == 'l') { int cchStr; PCRTUCS2 pucs2Str = va_arg(args, PRTUCS2); if (!VALID_PTR(pucs2Str)) { static RTUCS2 ucs2Null[] = {'<', 'N', 'U', 'L', 'L', '>', '\0' }; pucs2Str = ucs2Null; } cchStr = _strnlenUCS2(pucs2Str, (unsigned)cchPrecision); if (!(fFlags & RTSTR_F_LEFT)) while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); if (cchStr) { /* allocate temporary buffer. */ PRTUCS2 pucs2Tmp = (PRTUCS2)RTMemAlloc((cchStr + 1) * sizeof(RTUCS2)); memcpy(pucs2Tmp, pucs2Str, cchStr * sizeof(RTUCS2)); pucs2Tmp[cchStr] = '\0'; char *pszUtf8; int rc = RTStrUcs2ToUtf8(&pszUtf8, pucs2Tmp); if (RT_SUCCESS(rc)) { char *pszCurCp; rc = RTStrUtf8ToCurrentCP(&pszCurCp, pszUtf8); if (RT_SUCCESS(rc)) { cch += pfnOutput(pvArgOutput, pszCurCp, strlen(pszCurCp)); RTStrFree(pszCurCp); } RTStrFree(pszUtf8); } if (RT_FAILURE(rc)) while (cchStr-- > 0) cch += pfnOutput(pvArgOutput, "\x7f", 1); RTMemFree(pucs2Tmp); } while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); } else if (chArgSize == 'L') { AssertMsgFailed(("Not implemented yet\n")); } else { int cchStr; char *pszStr = va_arg(args, char*); if (!VALID_PTR(pszStr)) pszStr = ""; cchStr = _strnlen(pszStr, (unsigned)cchPrecision); if (!(fFlags & RTSTR_F_LEFT)) while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); if (cchStr) { /* allocate temporary buffer. */ char *pszTmp = (char *)RTMemAlloc(cchStr + 1); memcpy(pszTmp, pszStr, cchStr); pszTmp[cchStr] = '\0'; char *pszCurCp; int rc = RTStrUtf8ToCurrentCP(&pszCurCp, pszTmp); if (RT_SUCCESS(rc)) { cch += pfnOutput(pvArgOutput, pszCurCp, strlen(pszCurCp)); RTStrFree(pszCurCp); } else while (cchStr-- > 0) cch += pfnOutput(pvArgOutput, "\x7f", 1); RTMemFree(pszTmp); } while (--cchWidth >= cchStr) cch += pfnOutput(pvArgOutput, " ", 1); } break; } #endif /*-----------------*/ /* integer/pointer */ /*-----------------*/ case 'd': case 'i': case 'o': case 'p': case 'u': case 'x': case 'X': { char achNum[64]; /* FIXME */ int cchNum; uint64_t u64Value; switch (pszFormat[-1]) { case 'd': /* signed decimal integer */ case 'i': fFlags |= RTSTR_F_VALSIGNED; break; case 'o': uBase = 8; break; case 'p': fFlags |= RTSTR_F_ZEROPAD; /* Note not standard behaviour (but I like it this way!) */ uBase = 16; if (cchWidth < 0) cchWidth = sizeof(char *) * 2; break; case 'u': uBase = 10; break; case 'X': fFlags |= RTSTR_F_CAPITAL; case 'x': uBase = 16; break; } if (pszFormat[-1] == 'p') u64Value = va_arg(args, uintptr_t); else if (fFlags & RTSTR_F_VALSIGNED) { if (chArgSize == 'L') { u64Value = va_arg(args, int64_t); fFlags |= RTSTR_F_64BIT; } else if (chArgSize == 'l') { u64Value = va_arg(args, signed long); fFlags |= RTSTR_GET_BIT_FLAG(unsigned long); } else if (chArgSize == 'h') { u64Value = va_arg(args, /* signed short */ int); fFlags |= RTSTR_GET_BIT_FLAG(signed short); } else if (chArgSize == 'H') { u64Value = va_arg(args, /* int8_t */ int); fFlags |= RTSTR_GET_BIT_FLAG(int8_t); } else if (chArgSize == 'j') { u64Value = va_arg(args, /*intmax_t*/ int64_t); fFlags |= RTSTR_F_64BIT; } else if (chArgSize == 'z') { u64Value = va_arg(args, size_t); fFlags |= RTSTR_GET_BIT_FLAG(size_t); } else if (chArgSize == 't') { u64Value = va_arg(args, ptrdiff_t); fFlags |= RTSTR_GET_BIT_FLAG(ptrdiff_t); } else { u64Value = va_arg(args, signed int); fFlags |= RTSTR_GET_BIT_FLAG(signed int); } } else { if (chArgSize == 'L') { u64Value = va_arg(args, uint64_t); fFlags |= RTSTR_F_64BIT; } else if (chArgSize == 'l') { u64Value = va_arg(args, unsigned long); fFlags |= RTSTR_GET_BIT_FLAG(unsigned long); } else if (chArgSize == 'h') { u64Value = va_arg(args, /* unsigned short */ int); fFlags |= RTSTR_GET_BIT_FLAG(unsigned short); } else if (chArgSize == 'H') { u64Value = va_arg(args, /* uint8_t */ int); fFlags |= RTSTR_GET_BIT_FLAG(uint8_t); } else if (chArgSize == 'j') { u64Value = va_arg(args, /*uintmax_t*/ int64_t); fFlags |= RTSTR_F_64BIT; } else if (chArgSize == 'z') { u64Value = va_arg(args, size_t); fFlags |= RTSTR_GET_BIT_FLAG(size_t); } else if (chArgSize == 't') { u64Value = va_arg(args, ptrdiff_t); fFlags |= RTSTR_GET_BIT_FLAG(ptrdiff_t); } else { u64Value = va_arg(args, unsigned int); fFlags |= RTSTR_GET_BIT_FLAG(unsigned int); } } cchNum = RTStrFormatNumber((char *)SSToDS(&achNum), u64Value, uBase, cchWidth, cchPrecision, fFlags); cch += pfnOutput(pvArgOutput, (char *)SSToDS(&achNum), cchNum); break; } /* * Nested extension. */ case 'N': { const char *pszFormatNested = va_arg(args, const char *); va_list *pArgsNested = va_arg(args, va_list *); va_list ArgsNested; va_copy(ArgsNested, *pArgsNested); Assert(pszFormatNested); cch += RTStrFormatV(pfnOutput, pvArgOutput, pfnFormat, pvArgFormat, pszFormatNested, ArgsNested); break; } /* * InnoTek Portable Runtime Extensions. */ case 'R': { pszFormat--; cch += rtstrFormatRt(pfnOutput, pvArgOutput, &pszFormat, &args, cchPrecision, cchWidth, fFlags, chArgSize); break; } #ifdef RT_WITH_VBOX /* * VBox extensions. */ case 'V': { pszFormat--; cch += rtstrFormatVBox(pfnOutput, pvArgOutput, &pszFormat, &args, cchPrecision, cchWidth, fFlags, chArgSize); break; } #endif /* * Custom format. */ default: { if (pfnFormat) { pszFormat--; cch += pfnFormat(pvArgFormat, pfnOutput, pvArgOutput, &pszFormat, &args, cchPrecision, cchWidth, fFlags, chArgSize); } break; } } pszStartOutput = pszFormat; } } else pszFormat++; } /* output pending string. */ if (pszStartOutput != pszFormat) cch += pfnOutput(pvArgOutput, pszStartOutput, pszFormat - pszStartOutput); /* terminate the output */ pfnOutput(pvArgOutput, NULL, 0); return cch; } /** * Partial implementation of a printf like formatter. * It doesn't do everything correct, and there is no floating point support. * However, it supports custom formats by the means of a format callback. * * @returns number of bytes formatted. * @param pfnOutput Output worker. * Called in two ways. Normally with a string an it's length. * For termination, it's called with NULL for string, 0 for length. * @param pvArgOutput Argument to the output worker. * @param pfnFormat Custom format worker. * @param pvArgFormat Argument to the format worker. * @param pszFormat Format string. * @param ... Argument list. */ RTDECL(size_t) RTStrFormat(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput, PFNSTRFORMAT pfnFormat, void *pvArgFormat, const char *pszFormat, ...) { size_t cch; va_list args; va_start(args, pszFormat); cch = RTStrFormatV(pfnOutput, pvArgOutput, pfnFormat, pvArgFormat, pszFormat, args); va_end(args); return cch; }