VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/utf-16-printf.cpp@ 88485

Last change on this file since 88485 was 83943, checked in by vboxsync, 5 years ago

IPRT/RTUtf16Printf*: use U+fffd instead of U+007f as replacement for bad UTF-8 encodings. bugref:8489

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 7.7 KB
Line 
1/* $Id: utf-16-printf.cpp 83943 2020-04-23 10:43:21Z vboxsync $ */
2/** @file
3 * IPRT - String Formatters, Outputting UTF-16.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/utf16.h>
32#include "internal/iprt.h"
33
34#include <iprt/assert.h>
35#include <iprt/string.h>
36#include <iprt/uni.h>
37
38
39/*********************************************************************************************************************************
40* Structures and Typedefs *
41*********************************************************************************************************************************/
42/** rtUtf16PrintfOutput() argument structure. */
43typedef struct UTF16PRINTFOUTPUTARGS
44{
45 /** Pointer to current buffer position. */
46 PRTUTF16 pwszCur;
47 /** Number of RTUTF16 units left in the buffer (including the trailing zero). */
48 size_t cwcLeft;
49 /** Set if we overflowed. */
50 bool fOverflowed;
51} UTF16PRINTFOUTPUTARGS;
52/** Pointer to a rtUtf16PrintfOutput() argument structure. */
53typedef UTF16PRINTFOUTPUTARGS *PUTF16PRINTFOUTPUTARGS;
54
55
56/**
57 * Output callback.
58 *
59 * @returns Number of RTUTF16 units we (would have) outputted.
60 *
61 * @param pvArg Pointer to a STRBUFARG structure.
62 * @param pachChars Pointer to an array of utf-8 characters.
63 * @param cbChars Number of bytes in the character array pointed to by pachChars.
64 */
65static DECLCALLBACK(size_t) rtUtf16PrintfOutput(void *pvArg, const char *pachChars, size_t cbChars)
66{
67 PUTF16PRINTFOUTPUTARGS pArgs = (PUTF16PRINTFOUTPUTARGS)pvArg;
68 size_t cwcRet = 0;
69
70 size_t cwcLeft = pArgs->cwcLeft;
71 if (cwcLeft > 1)
72 {
73 Assert(!pArgs->fOverflowed);
74
75 PRTUTF16 pwszCur = pArgs->pwszCur;
76 for (;;)
77 {
78 if (cbChars > 0)
79 {
80 RTUNICP uc;
81 int rc = RTStrGetCpNEx(&pachChars, &cbChars, &uc);
82 AssertRCStmt(rc, uc = 0xfffd /* REPLACEMENT */);
83
84 /* Simple: */
85 if (RTUniCpIsBMP(uc))
86 {
87 cwcRet += 1;
88 if (RT_LIKELY(cwcLeft > 1))
89 *pwszCur++ = uc;
90 else
91 break;
92 cwcLeft--;
93 }
94 /* Surrogate pair: */
95 else if (uc >= 0x10000 && uc <= 0x0010ffff)
96 {
97 cwcRet += 2;
98 if (RT_LIKELY(cwcLeft > 2))
99 *pwszCur++ = 0xd800 | (uc >> 10);
100 else
101 {
102 if (cwcLeft > 1)
103 {
104 cwcLeft = 1;
105 pwszCur[1] = '\0';
106 }
107 break;
108 }
109 *pwszCur++ = 0xdc00 | (uc & 0x3ff);
110 cwcLeft -= 2;
111 }
112 else
113 {
114 AssertMsgFailed(("uc=%#x\n", uc));
115 cwcRet += 1;
116 if (RT_LIKELY(cwcLeft > 1))
117 *pwszCur++ = 0xfffd; /* REPLACEMENT */
118 else
119 break;
120 cwcLeft--;
121 }
122 }
123 else
124 {
125 *pwszCur = '\0';
126 pArgs->pwszCur = pwszCur;
127 pArgs->cwcLeft = cwcLeft;
128 return cwcRet;
129 }
130 }
131
132 /*
133 * We only get here if we run out of buffer space.
134 */
135 Assert(cwcLeft == 1);
136 *pwszCur = '\0';
137 pArgs->pwszCur = pwszCur;
138 pArgs->cwcLeft = cwcLeft;
139 }
140 /*
141 * We get a special zero byte call at the end for the formatting operation.
142 *
143 * Make sure we don't turn that into an overflow and that we'll terminate
144 * empty result strings.
145 */
146 else if (cbChars == 0 && cwcLeft > 0)
147 {
148 *pArgs->pwszCur = '\0';
149 return 0;
150 }
151
152 /*
153 * Overflow handling. Calc needed space.
154 */
155 pArgs->fOverflowed = true;
156
157 while (cbChars > 0)
158 {
159 RTUNICP uc;
160 int rc = RTStrGetCpNEx(&pachChars, &cbChars, &uc);
161 AssertRCStmt(rc, uc = 0xfffd /* REPLACEMENT */);
162
163 if (RTUniCpIsBMP(uc))
164 cwcRet += 1;
165 else if (uc >= 0x10000 && uc <= 0x0010ffff)
166 cwcRet += 2;
167 else
168 {
169 AssertMsgFailed(("uc=%#x\n", uc));
170 cwcRet += 1;
171 }
172 }
173
174 return cwcRet;
175}
176
177
178RTDECL(ssize_t) RTUtf16Printf(PRTUTF16 pwszBuffer, size_t cwcBuffer, const char *pszFormat, ...)
179{
180 /* Explicitly inline RTStrPrintfV + RTStrPrintfExV here because this is a frequently use API. */
181 UTF16PRINTFOUTPUTARGS Args;
182 size_t cwcRet;
183 va_list args;
184 AssertMsg(cwcBuffer > 0, ("Excellent idea! Format a string with no space for the output!\n"));
185
186 Args.pwszCur = pwszBuffer;
187 Args.cwcLeft = cwcBuffer;
188 Args.fOverflowed = false;
189
190 va_start(args, pszFormat);
191 cwcRet = RTStrFormatV(rtUtf16PrintfOutput, &Args, NULL, NULL, pszFormat, args);
192 va_end(args);
193
194 return !Args.fOverflowed ? (ssize_t)cwcRet : -(ssize_t)cwcRet - 1;
195}
196RT_EXPORT_SYMBOL(RTStrPrintf2);
197
198
199RTDECL(ssize_t) RTUtf16PrintfExV(PFNSTRFORMAT pfnFormat, void *pvArg, PRTUTF16 pwszBuffer, size_t cwcBuffer,
200 const char *pszFormat, va_list args)
201{
202 UTF16PRINTFOUTPUTARGS Args;
203 size_t cwcRet;
204 AssertMsg(cwcBuffer > 0, ("Excellent idea! Format a string with no space for the output!\n"));
205
206 Args.pwszCur = pwszBuffer;
207 Args.cwcLeft = cwcBuffer;
208 Args.fOverflowed = false;
209 cwcRet = RTStrFormatV(rtUtf16PrintfOutput, &Args, pfnFormat, pvArg, pszFormat, args);
210 return !Args.fOverflowed ? (ssize_t)cwcRet : -(ssize_t)cwcRet - 1;
211}
212RT_EXPORT_SYMBOL(RTUtf16PrintfExV);
213
214
215RTDECL(ssize_t) RTUtf16PrintfV(PRTUTF16 pwszBuffer, size_t cwcBuffer, const char *pszFormat, va_list args)
216{
217 return RTUtf16PrintfExV(NULL, NULL, pwszBuffer, cwcBuffer, pszFormat, args);
218}
219RT_EXPORT_SYMBOL(RTUtf16Printf2V);
220
221
222RTDECL(ssize_t) RTUtf16PrintfEx(PFNSTRFORMAT pfnFormat, void *pvArg, PRTUTF16 pwszBuffer, size_t cwcBuffer,
223 const char *pszFormat, ...)
224{
225 va_list args;
226 ssize_t cbRet;
227 va_start(args, pszFormat);
228 cbRet = RTUtf16PrintfExV(pfnFormat, pvArg, pwszBuffer, cwcBuffer, pszFormat, args);
229 va_end(args);
230 return cbRet;
231}
232RT_EXPORT_SYMBOL(RTUtf16PrintfEx);
233
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette