VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 33516

Last change on this file since 33516 was 31418, checked in by vboxsync, 14 years ago

iprt/string.h,utf-8.cpp: avoid including uni.h; misc nits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1/** @file
2 * IPRT - Unicode Code Points.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_uni_h
27#define ___iprt_uni_h
28
29/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
30 * @ingroup grp_rt
31 * @{
32 */
33
34/** @def RTUNI_USE_WCTYPE
35 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
36 * data which the C runtime library provides. */
37#ifdef DOXYGEN_RUNNING
38# define RTUNI_USE_WCTYPE
39#endif
40
41#include <iprt/types.h>
42#ifdef RTUNI_USE_WCTYPE
43# include <wctype.h>
44#endif
45
46RT_C_DECLS_BEGIN
47
48
49
50#ifndef RTUNI_USE_WCTYPE
51/**
52 * A unicode flags range.
53 * @internal
54 */
55typedef struct RTUNIFLAGSRANGE
56{
57 /** The first code point of the range. */
58 RTUNICP BeginCP;
59 /** The last + 1 code point of the range. */
60 RTUNICP EndCP;
61 /** Pointer to the array of case folded code points. */
62 const uint8_t *pafFlags;
63} RTUNIFLAGSRANGE;
64/** Pointer to a flags range.
65 * @internal */
66typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
67/** Pointer to a const flags range.
68 * @internal */
69typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
70
71/**
72 * A unicode case folded range.
73 * @internal
74 */
75typedef struct RTUNICASERANGE
76{
77 /** The first code point of the range. */
78 RTUNICP BeginCP;
79 /** The last + 1 code point of the range. */
80 RTUNICP EndCP;
81 /** Pointer to the array of case folded code points. */
82 PCRTUNICP paFoldedCPs;
83} RTUNICASERANGE;
84/** Pointer to a case folded range.
85 * @internal */
86typedef RTUNICASERANGE *PRTUNICASERANGE;
87/** Pointer to a const case folded range.
88 * @internal */
89typedef const RTUNICASERANGE *PCRTUNICASERANGE;
90
91/** @name Unicode Code Point Flags.
92 * @internal
93 * @{ */
94#define RTUNI_UPPER RT_BIT(0)
95#define RTUNI_LOWER RT_BIT(1)
96#define RTUNI_ALPHA RT_BIT(2)
97#define RTUNI_XDIGIT RT_BIT(3)
98#define RTUNI_DDIGIT RT_BIT(4)
99#define RTUNI_WSPACE RT_BIT(5)
100/*#define RTUNI_BSPACE RT_BIT(6) - later */
101/** When set, the codepoint requires further checking wrt NFC and NFD
102 * normalization. I.e. set when either of QC_NFD and QC_NFC are not Y. */
103#define RTUNI_QC_NFX RT_BIT(7)
104/** @} */
105
106
107/**
108 * Array of flags ranges.
109 * @internal
110 */
111extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
112
113/**
114 * Gets the flags for a unicode code point.
115 *
116 * @returns The flag mask. (RTUNI_*)
117 * @param CodePoint The unicode code point.
118 * @internal
119 */
120DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
121{
122 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
123 do
124 {
125 if (pCur->EndCP > CodePoint)
126 {
127 if (pCur->BeginCP <= CodePoint)
128 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
129 break;
130 }
131 pCur++;
132 } while (pCur->EndCP != RTUNICP_MAX);
133 return CodePoint;
134}
135
136
137/**
138 * Checks if a unicode code point is upper case.
139 *
140 * @returns true if it is.
141 * @returns false if it isn't.
142 * @param CodePoint The code point.
143 */
144DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
145{
146 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
147}
148
149
150/**
151 * Checks if a unicode code point is lower case.
152 *
153 * @returns true if it is.
154 * @returns false if it isn't.
155 * @param CodePoint The code point.
156 */
157DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
158{
159 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
160}
161
162
163/**
164 * Checks if a unicode code point is alphabetic.
165 *
166 * @returns true if it is.
167 * @returns false if it isn't.
168 * @param CodePoint The code point.
169 */
170DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
171{
172 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
173}
174
175
176/**
177 * Checks if a unicode code point is a decimal digit.
178 *
179 * @returns true if it is.
180 * @returns false if it isn't.
181 * @param CodePoint The code point.
182 */
183DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
184{
185 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
186}
187
188
189/**
190 * Checks if a unicode code point is a hexadecimal digit.
191 *
192 * @returns true if it is.
193 * @returns false if it isn't.
194 * @param CodePoint The code point.
195 */
196DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
197{
198 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
199}
200
201
202/**
203 * Checks if a unicode code point is white space.
204 *
205 * @returns true if it is.
206 * @returns false if it isn't.
207 * @param CodePoint The code point.
208 */
209DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
210{
211 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
212}
213
214
215
216/**
217 * Array of uppercase ranges.
218 * @internal
219 */
220extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
221
222/**
223 * Array of lowercase ranges.
224 * @internal
225 */
226extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
227
228
229/**
230 * Folds a unicode code point using the specified range array.
231 *
232 * @returns FOlded code point.
233 * @param CodePoint The unicode code point to fold.
234 * @param pCur The case folding range to use.
235 */
236DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
237{
238 do
239 {
240 if (pCur->EndCP > CodePoint)
241 {
242 if (pCur->BeginCP <= CodePoint)
243 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
244 break;
245 }
246 pCur++;
247 } while (pCur->EndCP != RTUNICP_MAX);
248 return CodePoint;
249}
250
251
252/**
253 * Folds a unicode code point to upper case.
254 *
255 * @returns Folded code point.
256 * @param CodePoint The unicode code point to fold.
257 */
258DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
259{
260 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
261}
262
263
264/**
265 * Folds a unicode code point to lower case.
266 *
267 * @returns Folded code point.
268 * @param CodePoint The unicode code point to fold.
269 */
270DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
271{
272 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
273}
274
275
276#else /* RTUNI_USE_WCTYPE */
277
278
279/**
280 * Checks if a unicode code point is upper case.
281 *
282 * @returns true if it is.
283 * @returns false if it isn't.
284 * @param CodePoint The code point.
285 */
286DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
287{
288 return !!iswupper(CodePoint);
289}
290
291
292/**
293 * Checks if a unicode code point is lower case.
294 *
295 * @returns true if it is.
296 * @returns false if it isn't.
297 * @param CodePoint The code point.
298 */
299DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
300{
301 return !!iswlower(CodePoint);
302}
303
304
305/**
306 * Checks if a unicode code point is alphabetic.
307 *
308 * @returns true if it is.
309 * @returns false if it isn't.
310 * @param CodePoint The code point.
311 */
312DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
313{
314 return !!iswalpha(CodePoint);
315}
316
317
318/**
319 * Checks if a unicode code point is a decimal digit.
320 *
321 * @returns true if it is.
322 * @returns false if it isn't.
323 * @param CodePoint The code point.
324 */
325DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
326{
327 return !!iswdigit(CodePoint);
328}
329
330
331/**
332 * Checks if a unicode code point is a hexadecimal digit.
333 *
334 * @returns true if it is.
335 * @returns false if it isn't.
336 * @param CodePoint The code point.
337 */
338DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
339{
340 return !!iswxdigit(CodePoint);
341}
342
343
344/**
345 * Checks if a unicode code point is white space.
346 *
347 * @returns true if it is.
348 * @returns false if it isn't.
349 * @param CodePoint The code point.
350 */
351DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
352{
353 return !!iswspace(CodePoint);
354}
355
356
357/**
358 * Folds a unicode code point to upper case.
359 *
360 * @returns Folded code point.
361 * @param CodePoint The unicode code point to fold.
362 */
363DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
364{
365 return towupper(CodePoint);
366}
367
368
369/**
370 * Folds a unicode code point to lower case.
371 *
372 * @returns Folded code point.
373 * @param CodePoint The unicode code point to fold.
374 */
375DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
376{
377 return towlower(CodePoint);
378}
379
380
381#endif /* RTUNI_USE_WCTYPE */
382
383
384/**
385 * Frees a unicode string.
386 *
387 * @param pusz The string to free.
388 */
389RTDECL(void) RTUniFree(PRTUNICP pusz);
390
391
392RT_C_DECLS_END
393/** @} */
394
395
396#endif
397
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette