VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 29948

Last change on this file since 29948 was 28878, checked in by vboxsync, 15 years ago

uniread.cpp: Updated to cope with version 5.2 of the spec. Preparing for exctracing necessary decomposition and normalization information. Fixed Oracle (C).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
Line 
1/** @file
2 * IPRT - Unicode Code Points.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_uni_h
27#define ___iprt_uni_h
28
29/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
30 * @ingroup grp_rt
31 * @{
32 */
33
34/** @def RTUNI_USE_WCTYPE
35 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
36 * data which the C runtime library provides. */
37#ifdef DOXYGEN_RUNNING
38# define RTUNI_USE_WCTYPE
39#endif
40
41#include <iprt/types.h>
42#ifdef RTUNI_USE_WCTYPE
43# include <wctype.h>
44#endif
45
46RT_C_DECLS_BEGIN
47
48
49/** Max value a RTUNICP type can hold. */
50#define RTUNICP_MAX ( ~(RTUNICP)0 )
51
52/** Invalid code point.
53 * This is returned when encountered invalid encodings or invalid
54 * unicode code points. */
55#define RTUNICP_INVALID ( 0xfffffffe )
56
57
58
59#ifndef RTUNI_USE_WCTYPE
60/**
61 * A unicode flags range.
62 * @internal
63 */
64typedef struct RTUNIFLAGSRANGE
65{
66 /** The first code point of the range. */
67 RTUNICP BeginCP;
68 /** The last + 1 code point of the range. */
69 RTUNICP EndCP;
70 /** Pointer to the array of case folded code points. */
71 const uint8_t *pafFlags;
72} RTUNIFLAGSRANGE;
73/** Pointer to a flags range.
74 * @internal */
75typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
76/** Pointer to a const flags range.
77 * @internal */
78typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
79
80/**
81 * A unicode case folded range.
82 * @internal
83 */
84typedef struct RTUNICASERANGE
85{
86 /** The first code point of the range. */
87 RTUNICP BeginCP;
88 /** The last + 1 code point of the range. */
89 RTUNICP EndCP;
90 /** Pointer to the array of case folded code points. */
91 PCRTUNICP paFoldedCPs;
92} RTUNICASERANGE;
93/** Pointer to a case folded range.
94 * @internal */
95typedef RTUNICASERANGE *PRTUNICASERANGE;
96/** Pointer to a const case folded range.
97 * @internal */
98typedef const RTUNICASERANGE *PCRTUNICASERANGE;
99
100/** @name Unicode Code Point Flags.
101 * @internal
102 * @{ */
103#define RTUNI_UPPER RT_BIT(0)
104#define RTUNI_LOWER RT_BIT(1)
105#define RTUNI_ALPHA RT_BIT(2)
106#define RTUNI_XDIGIT RT_BIT(3)
107#define RTUNI_DDIGIT RT_BIT(4)
108#define RTUNI_WSPACE RT_BIT(5)
109/*#define RTUNI_BSPACE RT_BIT(6) - later */
110/** When set, the codepoint requires further checking wrt NFC and NFD
111 * normalization. I.e. set when either of QC_NFD and QC_NFC are not Y. */
112#define RTUNI_QC_NFX RT_BIT(7)
113/** @} */
114
115
116/**
117 * Array of flags ranges.
118 * @internal
119 */
120extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
121
122/**
123 * Gets the flags for a unicode code point.
124 *
125 * @returns The flag mask. (RTUNI_*)
126 * @param CodePoint The unicode code point.
127 * @internal
128 */
129DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
130{
131 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
132 do
133 {
134 if (pCur->EndCP > CodePoint)
135 {
136 if (pCur->BeginCP <= CodePoint)
137 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
138 break;
139 }
140 pCur++;
141 } while (pCur->EndCP != RTUNICP_MAX);
142 return CodePoint;
143}
144
145
146/**
147 * Checks if a unicode code point is upper case.
148 *
149 * @returns true if it is.
150 * @returns false if it isn't.
151 * @param CodePoint The code point.
152 */
153DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
154{
155 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
156}
157
158
159/**
160 * Checks if a unicode code point is lower case.
161 *
162 * @returns true if it is.
163 * @returns false if it isn't.
164 * @param CodePoint The code point.
165 */
166DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
167{
168 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
169}
170
171
172/**
173 * Checks if a unicode code point is alphabetic.
174 *
175 * @returns true if it is.
176 * @returns false if it isn't.
177 * @param CodePoint The code point.
178 */
179DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
180{
181 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
182}
183
184
185/**
186 * Checks if a unicode code point is a decimal digit.
187 *
188 * @returns true if it is.
189 * @returns false if it isn't.
190 * @param CodePoint The code point.
191 */
192DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
193{
194 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
195}
196
197
198/**
199 * Checks if a unicode code point is a hexadecimal digit.
200 *
201 * @returns true if it is.
202 * @returns false if it isn't.
203 * @param CodePoint The code point.
204 */
205DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
206{
207 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
208}
209
210
211/**
212 * Checks if a unicode code point is white space.
213 *
214 * @returns true if it is.
215 * @returns false if it isn't.
216 * @param CodePoint The code point.
217 */
218DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
219{
220 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
221}
222
223
224
225/**
226 * Array of uppercase ranges.
227 * @internal
228 */
229extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
230
231/**
232 * Array of lowercase ranges.
233 * @internal
234 */
235extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
236
237
238/**
239 * Folds a unicode code point using the specified range array.
240 *
241 * @returns FOlded code point.
242 * @param CodePoint The unicode code point to fold.
243 * @param pCur The case folding range to use.
244 */
245DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
246{
247 do
248 {
249 if (pCur->EndCP > CodePoint)
250 {
251 if (pCur->BeginCP <= CodePoint)
252 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
253 break;
254 }
255 pCur++;
256 } while (pCur->EndCP != RTUNICP_MAX);
257 return CodePoint;
258}
259
260
261/**
262 * Folds a unicode code point to upper case.
263 *
264 * @returns Folded code point.
265 * @param CodePoint The unicode code point to fold.
266 */
267DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
268{
269 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
270}
271
272
273/**
274 * Folds a unicode code point to lower case.
275 *
276 * @returns Folded code point.
277 * @param CodePoint The unicode code point to fold.
278 */
279DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
280{
281 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
282}
283
284
285#else /* RTUNI_USE_WCTYPE */
286
287
288/**
289 * Checks if a unicode code point is upper case.
290 *
291 * @returns true if it is.
292 * @returns false if it isn't.
293 * @param CodePoint The code point.
294 */
295DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
296{
297 return !!iswupper(CodePoint);
298}
299
300
301/**
302 * Checks if a unicode code point is lower case.
303 *
304 * @returns true if it is.
305 * @returns false if it isn't.
306 * @param CodePoint The code point.
307 */
308DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
309{
310 return !!iswlower(CodePoint);
311}
312
313
314/**
315 * Checks if a unicode code point is alphabetic.
316 *
317 * @returns true if it is.
318 * @returns false if it isn't.
319 * @param CodePoint The code point.
320 */
321DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
322{
323 return !!iswalpha(CodePoint);
324}
325
326
327/**
328 * Checks if a unicode code point is a decimal digit.
329 *
330 * @returns true if it is.
331 * @returns false if it isn't.
332 * @param CodePoint The code point.
333 */
334DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
335{
336 return !!iswdigit(CodePoint);
337}
338
339
340/**
341 * Checks if a unicode code point is a hexadecimal digit.
342 *
343 * @returns true if it is.
344 * @returns false if it isn't.
345 * @param CodePoint The code point.
346 */
347DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
348{
349 return !!iswxdigit(CodePoint);
350}
351
352
353/**
354 * Checks if a unicode code point is white space.
355 *
356 * @returns true if it is.
357 * @returns false if it isn't.
358 * @param CodePoint The code point.
359 */
360DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
361{
362 return !!iswspace(CodePoint);
363}
364
365
366/**
367 * Folds a unicode code point to upper case.
368 *
369 * @returns Folded code point.
370 * @param CodePoint The unicode code point to fold.
371 */
372DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
373{
374 return towupper(CodePoint);
375}
376
377
378/**
379 * Folds a unicode code point to lower case.
380 *
381 * @returns Folded code point.
382 * @param CodePoint The unicode code point to fold.
383 */
384DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
385{
386 return towlower(CodePoint);
387}
388
389
390#endif /* RTUNI_USE_WCTYPE */
391
392
393/**
394 * Frees a unicode string.
395 *
396 * @param pusz The string to free.
397 */
398RTDECL(void) RTUniFree(PRTUNICP pusz);
399
400
401RT_C_DECLS_END
402/** @} */
403
404
405#endif
406
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette