VirtualBox

source: vbox/trunk/include/iprt/latin1.h@ 76421

Last change on this file since 76421 was 76417, checked in by vboxsync, 6 years ago

iprt/errcore.h,*: Duplicate some of the most frequently used status codes in the errcore.h header, letting once.h, rest.h and others avoid iprt/err.h and thereby reducing err.h rebuild time (openssl includes once.h for everything). bugref:9344

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.3 KB
Line 
1/** @file
2 * IPRT - String Manipulation, Latin-1 (ISO-8859-1) encoding.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_latin1_h
27#define ___iprt_latin1_h
28
29#include <iprt/assert.h>
30#include <iprt/errcore.h> /* VERR_END_OF_STRING */
31
32RT_C_DECLS_BEGIN
33
34
35/** @defgroup rt_str_latin1 Latin-1 (ISO-8859-1) String Manipulation
36 * @ingroup grp_rt_str
37 *
38 * Deals with Latin-1 encoded strings.
39 *
40 * @warning Make sure to name all variables dealing with Latin-1 strings
41 * suchthat there is no way to mistake them for normal UTF-8 strings.
42 * There may be severe security issues resulting from mistaking Latin-1
43 * for UTF-8!
44 *
45 * @{
46 */
47
48/**
49 * Get the unicode code point at the given string position.
50 *
51 * @returns unicode code point.
52 * @returns RTUNICP_INVALID if the encoding is invalid.
53 * @param pszLatin1 The Latin-1 string.
54 */
55DECLINLINE(RTUNICP) RTLatin1GetCp(const char *pszLatin1)
56{
57 return *(const unsigned char *)pszLatin1;
58}
59
60/**
61 * Get the unicode code point at the given string position.
62 *
63 * @returns iprt status code.
64 * @param ppszLatin1 Pointer to the string pointer. This will be updated to
65 * point to the char following the current code point. This
66 * is advanced one character forward on failure.
67 * @param pCp Where to store the code point. RTUNICP_INVALID is stored
68 * here on failure.
69 */
70DECLINLINE(int) RTLatin1GetCpEx(const char **ppszLatin1, PRTUNICP pCp)
71{
72 const unsigned char uch = **(const unsigned char **)ppszLatin1;
73 (*ppszLatin1)++;
74 *pCp = uch;
75 return VINF_SUCCESS;
76}
77
78/**
79 * Get the unicode code point at the given string position for a string of a
80 * given maximum length.
81 *
82 * @returns iprt status code.
83 * @retval VERR_END_OF_STRING if *pcch is 0. *pCp is set to RTUNICP_INVALID.
84 *
85 * @param ppszLatin1 Pointer to the string pointer. This will be updated to
86 * point to the char following the current code point.
87 * @param pcchLatin1 Pointer to the maximum string length. This will be
88 * decremented by the size of the code point found.
89 * @param pCp Where to store the code point.
90 * RTUNICP_INVALID is stored here on failure.
91 */
92DECLINLINE(int) RTLatin1GetCpNEx(const char **ppszLatin1, size_t *pcchLatin1, PRTUNICP pCp)
93{
94 if (RT_LIKELY(*pcchLatin1 != 0))
95 {
96 const unsigned char uch = **(const unsigned char **)ppszLatin1;
97 (*ppszLatin1)++;
98 (*pcchLatin1)--;
99 *pCp = uch;
100 return VINF_SUCCESS;
101 }
102 *pCp = RTUNICP_INVALID;
103 return VERR_END_OF_STRING;
104}
105
106/**
107 * Get the Latin-1 size in characters of a given Unicode code point.
108 *
109 * The code point is expected to be a valid Unicode one, but not necessarily in
110 * the range supported by Latin-1.
111 *
112 * @returns the size in characters, or zero if there is no Latin-1 encoding
113 */
114DECLINLINE(size_t) RTLatin1CpSize(RTUNICP CodePoint)
115{
116 if (CodePoint < 0x100)
117 return 1;
118 return 0;
119}
120
121/**
122 * Put the unicode code point at the given string position
123 * and return the pointer to the char following it.
124 *
125 * This function will not consider anything at or following the
126 * buffer area pointed to by psz. It is therefore not suitable for
127 * inserting code points into a string, only appending/overwriting.
128 *
129 * @returns pointer to the char following the written code point.
130 * @param pszLatin1 The string.
131 * @param CodePoint The code point to write.
132 * This should not be RTUNICP_INVALID or any other
133 * character out of the Latin-1 range.
134 */
135DECLINLINE(char *) RTLatin1PutCp(char *pszLatin1, RTUNICP CodePoint)
136{
137 AssertReturn(CodePoint < 0x100, NULL);
138 *pszLatin1++ = (unsigned char)CodePoint;
139 return pszLatin1;
140}
141
142/**
143 * Skips ahead, past the current code point.
144 *
145 * @returns Pointer to the char after the current code point.
146 * @param pszLatin1 Pointer to the current code point.
147 * @remark This will not move the next valid code point, only past the current one.
148 */
149DECLINLINE(char *) RTLatin1NextCp(const char *pszLatin1)
150{
151 pszLatin1++;
152 return (char *)pszLatin1;
153}
154
155/**
156 * Skips back to the previous code point.
157 *
158 * @returns Pointer to the char before the current code point.
159 * @returns pszLatin1Start on failure.
160 * @param pszLatin1Start Pointer to the start of the string.
161 * @param pszLatin1 Pointer to the current code point.
162 */
163DECLINLINE(char *) RTLatin1PrevCp(const char *pszLatin1Start, const char *pszLatin1)
164{
165 if ((uintptr_t)pszLatin1 > (uintptr_t)pszLatin1Start)
166 {
167 pszLatin1--;
168 return (char *)pszLatin1;
169 }
170 return (char *)pszLatin1Start;
171}
172
173/**
174 * Translate a Latin1 string into a UTF-8 allocating the result buffer (default
175 * tag).
176 *
177 * @returns iprt status code.
178 * @param pszLatin1 Latin1 string to convert.
179 * @param ppszString Receives pointer of allocated UTF-8 string on
180 * success, and is always set to NULL on failure.
181 * The returned pointer must be freed using RTStrFree().
182 */
183#define RTLatin1ToUtf8(pszLatin1, ppszString) RTLatin1ToUtf8Tag((pszLatin1), (ppszString), RTSTR_TAG)
184
185/**
186 * Translate a Latin-1 string into a UTF-8 allocating the result buffer.
187 *
188 * @returns iprt status code.
189 * @param pszLatin1 Latin-1 string to convert.
190 * @param ppszString Receives pointer of allocated UTF-8 string on
191 * success, and is always set to NULL on failure.
192 * The returned pointer must be freed using RTStrFree().
193 * @param pszTag Allocation tag used for statistics and such.
194 */
195RTDECL(int) RTLatin1ToUtf8Tag(const char *pszLatin1, char **ppszString, const char *pszTag);
196
197/**
198 * Translates Latin-1 to UTF-8 using buffer provided by the caller or a fittingly
199 * sized buffer allocated by the function (default tag).
200 *
201 * @returns iprt status code.
202 * @param pszLatin1 The Latin-1 string to convert.
203 * @param cchLatin1 The number of Latin-1 characters to translate from
204 * pszLatin1. The translation will stop when reaching
205 * cchLatin1 or the terminator ('\\0'). Use RTSTR_MAX
206 * to translate the entire string.
207 * @param ppsz If @a cch is non-zero, this must either be pointing
208 * to a pointer to a buffer of the specified size, or
209 * pointer to a NULL pointer. If *ppsz is NULL or
210 * @a cch is zero a buffer of at least @a cch chars
211 * will be allocated to hold the translated string. If
212 * a buffer was requested it must be freed using
213 * RTStrFree().
214 * @param cch The buffer size in chars (the type). This includes the terminator.
215 * @param pcch Where to store the length of the translated string,
216 * excluding the terminator. (Optional)
217 *
218 * This may be set under some error conditions,
219 * however, only for VERR_BUFFER_OVERFLOW and
220 * VERR_NO_STR_MEMORY will it contain a valid string
221 * length that can be used to resize the buffer.
222 */
223#define RTLatin1ToUtf8Ex(pszLatin1, cchLatin1, ppsz, cch, pcch) \
224 RTLatin1ToUtf8ExTag((pszLatin1), (cchLatin1), (ppsz), (cch), (pcch), RTSTR_TAG)
225
226/**
227 * Translates Latin1 to UTF-8 using buffer provided by the caller or a fittingly
228 * sized buffer allocated by the function (custom tag).
229 *
230 * @returns iprt status code.
231 * @param pszLatin1 The Latin1 string to convert.
232 * @param cchLatin1 The number of Latin1 characters to translate from
233 * pwszString. The translation will stop when
234 * reaching cchLatin1 or the terminator ('\\0'). Use
235 * RTSTR_MAX to translate the entire string.
236 * @param ppsz If cch is non-zero, this must either be pointing to
237 * a pointer to a buffer of the specified size, or
238 * pointer to a NULL pointer. If *ppsz is NULL or cch
239 * is zero a buffer of at least cch chars will be
240 * allocated to hold the translated string. If a
241 * buffer was requested it must be freed using
242 * RTStrFree().
243 * @param cch The buffer size in chars (the type). This includes
244 * the terminator.
245 * @param pcch Where to store the length of the translated string,
246 * excluding the terminator. (Optional)
247 *
248 * This may be set under some error conditions,
249 * however, only for VERR_BUFFER_OVERFLOW and
250 * VERR_NO_STR_MEMORY will it contain a valid string
251 * length that can be used to resize the buffer.
252 * @param pszTag Allocation tag used for statistics and such.
253 */
254RTDECL(int) RTLatin1ToUtf8ExTag(const char *pszLatin1, size_t cchLatin1, char **ppsz, size_t cch, size_t *pcch,
255 const char *pszTag);
256
257/**
258 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).
259 *
260 * The primary purpose of this function is to help allocate buffers for
261 * RTLatin1ToUtf8() of the correct size. For most other purposes
262 * RTLatin1ToUtf8Ex() should be used.
263 *
264 * @returns Number of chars (bytes).
265 * @returns 0 if the string was incorrectly encoded.
266 * @param pszLatin1 The Latin-1 string.
267 */
268RTDECL(size_t) RTLatin1CalcUtf8Len(const char *pszLatin1);
269
270/**
271 * Calculates the length of the Latin-1 string in UTF-8 chars (bytes).
272 *
273 * @returns iprt status code.
274 * @param pszLatin1 The Latin-1 string.
275 * @param cchLatin1 The max string length. Use RTSTR_MAX to process the
276 * entire string.
277 * @param pcch Where to store the string length (in bytes). Optional.
278 * This is undefined on failure.
279 */
280RTDECL(int) RTLatin1CalcUtf8LenEx(const char *pszLatin1, size_t cchLatin1, size_t *pcch);
281
282/**
283 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items.
284 *
285 * @returns Number of RTUTF16 items.
286 * @param pszLatin1 The Latin-1 string.
287 */
288RTDECL(size_t) RTLatin1CalcUtf16Len(const char *pszLatin1);
289
290/**
291 * Calculates the length of the Latin-1 (ISO-8859-1) string in RTUTF16 items.
292 *
293 * @returns iprt status code.
294 * @param pszLatin1 The Latin-1 string.
295 * @param cchLatin1 The max string length. Use RTSTR_MAX to process the
296 * entire string.
297 * @param pcwc Where to store the string length. Optional.
298 * This is undefined on failure.
299 */
300RTDECL(int) RTLatin1CalcUtf16LenEx(const char *pszLatin1, size_t cchLatin1, size_t *pcwc);
301
302/**
303 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result
304 * buffer (default tag).
305 *
306 * @returns iprt status code.
307 * @param pszLatin1 The Latin-1 string to convert.
308 * @param ppwszString Receives pointer to the allocated UTF-16 string. The
309 * returned string must be freed using RTUtf16Free().
310 */
311#define RTLatin1ToUtf16(pszLatin1, ppwszString) RTLatin1ToUtf16Tag((pszLatin1), (ppwszString), RTSTR_TAG)
312
313/**
314 * Translate a Latin-1 (ISO-8859-1) string into a UTF-16 allocating the result
315 * buffer (custom tag).
316 *
317 * @returns iprt status code.
318 * @param pszLatin1 The Latin-1 string to convert.
319 * @param ppwszString Receives pointer to the allocated UTF-16 string. The
320 * returned string must be freed using RTUtf16Free().
321 * @param pszTag Allocation tag used for statistics and such.
322 */
323RTDECL(int) RTLatin1ToUtf16Tag(const char *pszLatin1, PRTUTF16 *ppwszString, const char *pszTag);
324
325/**
326 * Translates pszLatin1 from Latin-1 (ISO-8859-1) to UTF-16, allocating the
327 * result buffer if requested (default tag).
328 *
329 * @returns iprt status code.
330 * @param pszLatin1 The Latin-1 string to convert.
331 * @param cchLatin1 The maximum size in chars (the type) to convert. The
332 * conversion stops when it reaches cchLatin1 or the
333 * string terminator ('\\0'). Use RTSTR_MAX to
334 * translate the entire string.
335 * @param ppwsz If cwc is non-zero, this must either be pointing
336 * to pointer to a buffer of the specified size, or
337 * pointer to a NULL pointer.
338 * If *ppwsz is NULL or cwc is zero a buffer of at
339 * least cwc items will be allocated to hold the
340 * translated string. If a buffer was requested it
341 * must be freed using RTUtf16Free().
342 * @param cwc The buffer size in RTUTF16s. This includes the
343 * terminator.
344 * @param pcwc Where to store the length of the translated string,
345 * excluding the terminator. (Optional)
346 *
347 * This may be set under some error conditions,
348 * however, only for VERR_BUFFER_OVERFLOW and
349 * VERR_NO_STR_MEMORY will it contain a valid string
350 * length that can be used to resize the buffer.
351 */
352#define RTLatin1ToUtf16Ex(pszLatin1, cchLatin1, ppwsz, cwc, pcwc) \
353 RTLatin1ToUtf16ExTag((pszLatin1), (cchLatin1), (ppwsz), (cwc), (pcwc), RTSTR_TAG)
354
355/**
356 * Translates pszLatin1 from Latin-1 (ISO-8859-1) to UTF-16, allocating the
357 * result buffer if requested.
358 *
359 * @returns iprt status code.
360 * @param pszLatin1 The Latin-1 string to convert.
361 * @param cchLatin1 The maximum size in chars (the type) to convert. The
362 * conversion stops when it reaches cchLatin1 or the
363 * string terminator ('\\0'). Use RTSTR_MAX to
364 * translate the entire string.
365 * @param ppwsz If cwc is non-zero, this must either be pointing
366 * to pointer to a buffer of the specified size, or
367 * pointer to a NULL pointer.
368 * If *ppwsz is NULL or cwc is zero a buffer of at
369 * least cwc items will be allocated to hold the
370 * translated string. If a buffer was requested it
371 * must be freed using RTUtf16Free().
372 * @param cwc The buffer size in RTUTF16s. This includes the
373 * terminator.
374 * @param pcwc Where to store the length of the translated string,
375 * excluding the terminator. (Optional)
376 *
377 * This may be set under some error conditions,
378 * however, only for VERR_BUFFER_OVERFLOW and
379 * VERR_NO_STR_MEMORY will it contain a valid string
380 * length that can be used to resize the buffer.
381 * @param pszTag Allocation tag used for statistics and such.
382 */
383RTDECL(int) RTLatin1ToUtf16ExTag(const char *pszLatin1, size_t cchLatin1,
384 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
385
386/** @} */
387
388RT_C_DECLS_END
389
390/** @} */
391
392#endif
393
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette