VirtualBox

source: vbox/trunk/include/iprt/uni.h@ 5967

Last change on this file since 5967 was 5605, checked in by vboxsync, 17 years ago

BIT => RT_BIT, BIT64 => RT_BIT_64. BIT() is defined in Linux 2.6.24

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1/** @file
2 * innotek Portable Runtime - Unicode Code Points.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 */
16
17#ifndef ___iprt_uni_h
18#define ___iprt_uni_h
19
20/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
21 * @ingroup grp_rt
22 * @{
23 */
24
25/** @def RTUNI_USE_WCTYPE
26 * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
27 * data which the C runtime library provides. */
28#ifdef __DOXYGEN__
29# define RTUNI_USE_WCTYPE
30#endif
31
32#include <iprt/types.h>
33#ifdef RTUNI_USE_WCTYPE
34# include <wctype.h>
35#endif
36
37__BEGIN_DECLS
38
39
40/** Max value a RTUNICP type can hold. */
41#define RTUNICP_MAX ( ~(RTUNICP)0 )
42
43/** Invalid code point.
44 * This is returned when encountered invalid encodings or invalid
45 * unicode code points. */
46#define RTUNICP_INVALID ( 0xfffffffe )
47
48
49
50#ifndef RTUNI_USE_WCTYPE
51/**
52 * A unicode flags range.
53 * @internal
54 */
55typedef struct RTUNIFLAGSRANGE
56{
57 /** The first code point of the range. */
58 RTUNICP BeginCP;
59 /** The last + 1 code point of the range. */
60 RTUNICP EndCP;
61 /** Pointer to the array of case folded code points. */
62 const uint8_t *pafFlags;
63} RTUNIFLAGSRANGE;
64/** Pointer to a flags range.
65 * @internal */
66typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
67/** Pointer to a const flags range.
68 * @internal */
69typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
70
71/**
72 * A unicode case folded range.
73 * @internal
74 */
75typedef struct RTUNICASERANGE
76{
77 /** The first code point of the range. */
78 RTUNICP BeginCP;
79 /** The last + 1 code point of the range. */
80 RTUNICP EndCP;
81 /** Pointer to the array of case folded code points. */
82 PCRTUNICP paFoldedCPs;
83} RTUNICASERANGE;
84/** Pointer to a case folded range.
85 * @internal */
86typedef RTUNICASERANGE *PRTUNICASERANGE;
87/** Pointer to a const case folded range.
88 * @internal */
89typedef const RTUNICASERANGE *PCRTUNICASERANGE;
90
91/** @name Unicode Code Point Flags.
92 * @internal
93 * @{ */
94#define RTUNI_UPPER RT_BIT(0)
95#define RTUNI_LOWER RT_BIT(1)
96#define RTUNI_ALPHA RT_BIT(2)
97#define RTUNI_XDIGIT RT_BIT(3)
98#define RTUNI_DDIGIT RT_BIT(4)
99#define RTUNI_WSPACE RT_BIT(5)
100/*#define RTUNI_BSPACE RT_BIT(6) - later */
101/** @} */
102
103
104/**
105 * Array of flags ranges.
106 * @internal
107 */
108extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
109
110/**
111 * Gets the flags for a unicode code point.
112 *
113 * @returns The flag mask. (RTUNI_*)
114 * @param CodePoint The unicode code point.
115 * @internal
116 */
117DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
118{
119 PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
120 do
121 {
122 if (pCur->EndCP > CodePoint)
123 {
124 if (pCur->BeginCP <= CodePoint)
125 CodePoint = pCur->pafFlags[CodePoint - pCur->BeginCP];
126 break;
127 }
128 pCur++;
129 } while (pCur->EndCP != RTUNICP_MAX);
130 return CodePoint;
131}
132
133
134/**
135 * Checks if a unicode code point is upper case.
136 *
137 * @returns true if it is.
138 * @returns false if it isn't.
139 * @param CodePoint The code point.
140 */
141DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
142{
143 return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
144}
145
146
147/**
148 * Checks if a unicode code point is lower case.
149 *
150 * @returns true if it is.
151 * @returns false if it isn't.
152 * @param CodePoint The code point.
153 */
154DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
155{
156 return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
157}
158
159
160/**
161 * Checks if a unicode code point is alphabetic.
162 *
163 * @returns true if it is.
164 * @returns false if it isn't.
165 * @param CodePoint The code point.
166 */
167DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
168{
169 return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
170}
171
172
173/**
174 * Checks if a unicode code point is a decimal digit.
175 *
176 * @returns true if it is.
177 * @returns false if it isn't.
178 * @param CodePoint The code point.
179 */
180DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
181{
182 return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
183}
184
185
186/**
187 * Checks if a unicode code point is a hexadecimal digit.
188 *
189 * @returns true if it is.
190 * @returns false if it isn't.
191 * @param CodePoint The code point.
192 */
193DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
194{
195 return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
196}
197
198
199/**
200 * Checks if a unicode code point is white space.
201 *
202 * @returns true if it is.
203 * @returns false if it isn't.
204 * @param CodePoint The code point.
205 */
206DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
207{
208 return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
209}
210
211
212
213/**
214 * Array of uppercase ranges.
215 * @internal
216 */
217extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
218
219/**
220 * Array of lowercase ranges.
221 * @internal
222 */
223extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
224
225
226/**
227 * Folds a unicode code point using the specified range array.
228 *
229 * @returns FOlded code point.
230 * @param CodePoint The unicode code point to fold.
231 * @param pCur The case folding range to use.
232 */
233DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
234{
235 do
236 {
237 if (pCur->EndCP > CodePoint)
238 {
239 if (pCur->BeginCP <= CodePoint)
240 CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
241 break;
242 }
243 pCur++;
244 } while (pCur->EndCP != RTUNICP_MAX);
245 return CodePoint;
246}
247
248
249/**
250 * Folds a unicode code point to upper case.
251 *
252 * @returns Folded code point.
253 * @param CodePoint The unicode code point to fold.
254 */
255DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
256{
257 return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
258}
259
260
261/**
262 * Folds a unicode code point to lower case.
263 *
264 * @returns Folded code point.
265 * @param CodePoint The unicode code point to fold.
266 */
267DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
268{
269 return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
270}
271
272
273#else /* RTUNI_USE_WCTYPE */
274
275
276/**
277 * Checks if a unicode code point is upper case.
278 *
279 * @returns true if it is.
280 * @returns false if it isn't.
281 * @param CodePoint The code point.
282 */
283DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
284{
285 return !!iswupper(CodePoint);
286}
287
288
289/**
290 * Checks if a unicode code point is lower case.
291 *
292 * @returns true if it is.
293 * @returns false if it isn't.
294 * @param CodePoint The code point.
295 */
296DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
297{
298 return !!iswlower(CodePoint);
299}
300
301
302/**
303 * Checks if a unicode code point is alphabetic.
304 *
305 * @returns true if it is.
306 * @returns false if it isn't.
307 * @param CodePoint The code point.
308 */
309DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
310{
311 return !!iswalpha(CodePoint);
312}
313
314
315/**
316 * Checks if a unicode code point is a decimal digit.
317 *
318 * @returns true if it is.
319 * @returns false if it isn't.
320 * @param CodePoint The code point.
321 */
322DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
323{
324 return !!iswdigit(CodePoint);
325}
326
327
328/**
329 * Checks if a unicode code point is a hexadecimal digit.
330 *
331 * @returns true if it is.
332 * @returns false if it isn't.
333 * @param CodePoint The code point.
334 */
335DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
336{
337 return !!iswxdigit(CodePoint);
338}
339
340
341/**
342 * Checks if a unicode code point is white space.
343 *
344 * @returns true if it is.
345 * @returns false if it isn't.
346 * @param CodePoint The code point.
347 */
348DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
349{
350 return !!iswspace(CodePoint);
351}
352
353
354/**
355 * Folds a unicode code point to upper case.
356 *
357 * @returns Folded code point.
358 * @param CodePoint The unicode code point to fold.
359 */
360DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
361{
362 return towupper(CodePoint);
363}
364
365
366/**
367 * Folds a unicode code point to lower case.
368 *
369 * @returns Folded code point.
370 * @param CodePoint The unicode code point to fold.
371 */
372DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
373{
374 return towlower(CodePoint);
375}
376
377
378#endif /* RTUNI_USE_WCTYPE */
379
380
381/**
382 * Frees a unicode string.
383 *
384 * @param pusz The string to free.
385 */
386RTDECL(void) RTUniFree(PRTUNICP pusz);
387
388
389__END_DECLS
390/** @} */
391
392
393#endif
394
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette