VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 43172

Last change on this file since 43172 was 43031, checked in by vboxsync, 12 years ago

clearifying comment.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 14.4 KB
Line 
1/* $Id: getoptargv.cpp 43031 2012-08-28 11:42:23Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#include <iprt/getopt.h>
31#include "internal/iprt.h"
32
33#include <iprt/asm.h>
34#include <iprt/assert.h>
35#include <iprt/err.h>
36#include <iprt/mem.h>
37#include <iprt/string.h>
38
39
40/*******************************************************************************
41* Header Files *
42*******************************************************************************/
43/**
44 * Array indexed by the quoting type and 7-bit ASCII character.
45 *
46 * We include some extra stuff here that the corresponding shell would normally
47 * require quoting of.
48 */
49static uint8_t const g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][128/8] =
50{
51 { 0xfe, 0xff, 0x0f, 0x00, 0x65, 0x00, 0x00, 0x50 },
52 { 0xfe, 0xff, 0x0f, 0x00, 0xd7, 0x07, 0x00, 0xd8 },
53};
54
55
56#if 0 /* To re-generate the bitmaps. */
57#include <stdio.h>
58int main()
59{
60 RT_ZERO(g_abmQuoteChars);
61
62# define SET_ALL(ch) \
63 do { \
64 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
65 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
66 } while (0)
67# define SET(ConstSuffix, ch) \
68 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch));
69
70 /* just flag all the control chars as in need of quoting. */
71 for (char ch = 1; ch < 0x20; ch++)
72 SET_ALL(ch);
73
74 /* ... and space of course */
75 SET_ALL(' ');
76
77 /* MS CRT / CMD.EXE: */
78 SET(MS_CRT, '"')
79 SET(MS_CRT, '&')
80 SET(MS_CRT, '>')
81 SET(MS_CRT, '<')
82 SET(MS_CRT, '|')
83 SET(MS_CRT, '%')
84
85 /* Bourne shell: */
86 SET(BOURNE_SH, '!');
87 SET(BOURNE_SH, '"');
88 SET(BOURNE_SH, '$');
89 SET(BOURNE_SH, '&');
90 SET(BOURNE_SH, '(');
91 SET(BOURNE_SH, ')');
92 SET(BOURNE_SH, '*');
93 SET(BOURNE_SH, ';');
94 SET(BOURNE_SH, '<');
95 SET(BOURNE_SH, '>');
96 SET(BOURNE_SH, '?');
97 SET(BOURNE_SH, '[');
98 SET(BOURNE_SH, '\'');
99 SET(BOURNE_SH, '\\');
100 SET(BOURNE_SH, '`');
101 SET(BOURNE_SH, '|');
102 SET(BOURNE_SH, '~');
103
104 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
105 {
106 printf(" {");
107 for (size_t iByte = 0; iByte < 8; iByte++)
108 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
109 printf(" },\n");
110 }
111 return 0;
112}
113#endif /* To re-generate the bitmaps. */
114
115
116/**
117 * Look for an unicode code point in the separator string.
118 *
119 * @returns true if it's a separator, false if it isn't.
120 * @param Cp The code point.
121 * @param pszSeparators The separators.
122 */
123static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
124{
125 /* This could be done in a more optimal fashion. Probably worth a
126 separate RTStr function at some point. */
127 for (;;)
128 {
129 RTUNICP CpSep;
130 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
131 AssertRCReturn(rc, false);
132 if (CpSep == Cp)
133 return true;
134 if (!CpSep)
135 return false;
136 }
137}
138
139
140/**
141 * Look for an 7-bit ASCII character in the separator string.
142 *
143 * @returns true if it's a separator, false if it isn't.
144 * @param ch The character.
145 * @param pszSeparators The separators.
146 * @param cchSeparators The number of separators chars.
147 */
148DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
149{
150 switch (cchSeparators)
151 {
152 case 8: if (ch == pszSeparators[7]) return true;
153 case 7: if (ch == pszSeparators[6]) return true;
154 case 6: if (ch == pszSeparators[5]) return true;
155 case 5: if (ch == pszSeparators[4]) return true;
156 case 4: if (ch == pszSeparators[3]) return true;
157 case 3: if (ch == pszSeparators[2]) return true;
158 case 2: if (ch == pszSeparators[1]) return true;
159 case 1: if (ch == pszSeparators[0]) return true;
160 return false;
161 default:
162 return memchr(pszSeparators, ch, cchSeparators) != NULL;
163 }
164}
165
166
167/**
168 * Checks if the character is in the set of separators
169 *
170 * @returns true if it is, false if it isn't.
171 *
172 * @param Cp The code point.
173 * @param pszSeparators The separators.
174 * @param cchSeparators The length of @a pszSeparators.
175 */
176DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
177{
178 if (RT_LIKELY(Cp <= 127))
179 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
180 return rtGetOptIsUniCpInString(Cp, pszSeparators);
181}
182
183
184/**
185 * Skips any delimiters at the start of the string that is pointed to.
186 *
187 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
188 * @param ppszSrc Where to get and return the string pointer.
189 * @param pszSeparators The separators.
190 * @param cchSeparators The length of @a pszSeparators.
191 */
192static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
193{
194 const char *pszSrc = *ppszSrc;
195 const char *pszRet;
196 for (;;)
197 {
198 pszRet = pszSrc;
199 RTUNICP Cp;
200 int rc = RTStrGetCpEx(&pszSrc, &Cp);
201 if (RT_FAILURE(rc))
202 {
203 *ppszSrc = pszRet;
204 return rc;
205 }
206 if ( !Cp
207 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
208 break;
209 }
210
211 *ppszSrc = pszRet;
212 return VINF_SUCCESS;
213}
214
215
216RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
217{
218 /*
219 * Some input validation.
220 */
221 AssertPtr(pszCmdLine);
222 AssertPtr(pcArgs);
223 AssertPtr(ppapszArgv);
224 if (!pszSeparators)
225 pszSeparators = " \t\n\r";
226 else
227 AssertPtr(pszSeparators);
228 size_t const cchSeparators = strlen(pszSeparators);
229 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
230
231 /*
232 * Parse the command line and chop off it into argv individual argv strings.
233 */
234 int rc = VINF_SUCCESS;
235 const char *pszSrc = pszCmdLine;
236 char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
237 char *pszDst = pszDup;
238 if (!pszDup)
239 return VERR_NO_STR_MEMORY;
240 char **papszArgs = NULL;
241 unsigned iArg = 0;
242 while (*pszSrc)
243 {
244 /* Skip stuff */
245 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
246 if (RT_FAILURE(rc))
247 break;
248 if (!*pszSrc)
249 break;
250
251 /* Start a new entry. */
252 if ((iArg % 32) == 0)
253 {
254 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
255 if (!pvNew)
256 {
257 rc = VERR_NO_MEMORY;
258 break;
259 }
260 papszArgs = (char **)pvNew;
261 }
262 papszArgs[iArg++] = pszDst;
263
264 /* Parse and copy the string over. */
265 RTUNICP CpQuote = 0;
266 RTUNICP Cp;
267 for (;;)
268 {
269 rc = RTStrGetCpEx(&pszSrc, &Cp);
270 if (RT_FAILURE(rc) || !Cp)
271 break;
272 if (!CpQuote)
273 {
274 if (Cp == '"' || Cp == '\'')
275 CpQuote = Cp;
276 else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
277 break;
278 else
279 pszDst = RTStrPutCp(pszDst, Cp);
280 }
281 else if (CpQuote != Cp)
282 pszDst = RTStrPutCp(pszDst, Cp);
283 else
284 CpQuote = 0;
285 }
286 *pszDst++ = '\0';
287 if (RT_FAILURE(rc) || !Cp)
288 break;
289 }
290
291 if (RT_FAILURE(rc))
292 {
293 RTMemFree(pszDup);
294 RTMemFree(papszArgs);
295 return rc;
296 }
297
298 /*
299 * Terminate the array.
300 * Check for empty string to make sure we've got an array.
301 */
302 if (iArg == 0)
303 {
304 RTMemFree(pszDup);
305 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
306 if (!papszArgs)
307 return VERR_NO_MEMORY;
308 }
309 papszArgs[iArg] = NULL;
310
311 *pcArgs = iArg;
312 *ppapszArgv = papszArgs;
313 return VINF_SUCCESS;
314}
315
316
317RTDECL(void) RTGetOptArgvFree(char **papszArgv)
318{
319 if (papszArgv)
320 {
321 /*
322 * We've really only _two_ allocations here. Check the code in
323 * RTGetOptArgvFromString for the particulars.
324 */
325 RTMemFree(papszArgv[0]);
326 RTMemFree(papszArgv);
327 }
328}
329
330
331/**
332 * Checks if the argument needs quoting or not.
333 *
334 * @returns true if it needs, false if it don't.
335 * @param pszArg The argument.
336 * @param fFlags Quoting style.
337 * @param pcch Where to store the argument length when quoting
338 * is not required. (optimization)
339 */
340DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
341{
342 char const *psz = pszArg;
343 unsigned char ch;
344 while ((ch = (unsigned char)*psz))
345 {
346 if ( ch < 128
347 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
348 return true;
349 psz++;
350 }
351
352 *pcch = psz - pszArg;
353 return false;
354}
355
356
357/**
358 * Grows the command line string buffer.
359 *
360 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
361 * @param ppszCmdLine Pointer to the command line string pointer.
362 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
363 * @param cchMin The minimum size to grow with, kind of.
364 */
365static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
366{
367 size_t cb = *pcbCmdLineAlloc;
368 while (cb < cchMin)
369 cb *= 2;
370 cb *= 2;
371 *pcbCmdLineAlloc = cb;
372 return RTStrRealloc(ppszCmdLine, cb);
373}
374
375/**
376 * Checks if we have a sequence of DOS slashes followed by a double quote char.
377 *
378 * @returns true / false accordingly.
379 * @param psz The string.
380 */
381DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
382{
383 while (*psz == '\\')
384 psz++;
385 return *psz == '"' || *psz == '\0';
386}
387
388
389RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
390{
391 AssertReturn(!(fFlags & ~RTGETOPTARGV_CNV_QUOTE_MASK), VERR_INVALID_PARAMETER);
392
393#define PUT_CH(ch) \
394 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
395 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
396 if (RT_FAILURE(rc)) \
397 break; \
398 } \
399 pszCmdLine[off++] = (ch)
400
401#define PUT_PSZ(psz, cch) \
402 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
403 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
404 if (RT_FAILURE(rc)) \
405 break; \
406 } \
407 memcpy(&pszCmdLine[off], (psz), (cch)); \
408 off += (cch);
409#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
410
411 /*
412 * Take the realloc approach, it requires less code and is probably more
413 * efficient than figuring out the size first.
414 */
415 int rc = VINF_SUCCESS;
416 size_t off = 0;
417 size_t cbCmdLineAlloc = 256;
418 char *pszCmdLine = RTStrAlloc(256);
419 if (!pszCmdLine)
420 return VERR_NO_STR_MEMORY;
421
422 for (size_t i = 0; papszArgv[i]; i++)
423 {
424 if (i > 0)
425 {
426 PUT_CH(' ');
427 }
428
429 /* does it need quoting? */
430 const char *pszArg = papszArgv[i];
431 size_t cchArg;
432 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
433 {
434 /* No quoting needed, just append the argument. */
435 PUT_PSZ(pszArg, cchArg);
436 }
437 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
438 {
439 /*
440 * Microsoft CRT quoting. Quote the whole argument in double
441 * quotes to make it easier to read and code.
442 */
443 PUT_CH('"');
444 char ch;
445 while ((ch = *pszArg++))
446 {
447 if ( ch == '\\'
448 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
449 {
450 PUT_SZ("\\\\");
451 }
452 else if (ch == '"')
453 {
454 PUT_SZ("\\\"");
455 }
456 else
457 {
458 PUT_CH(ch);
459 }
460 }
461 PUT_CH('"');
462 }
463 else
464 {
465 /*
466 * Bourne Shell quoting. Quote the whole thing in single quotes
467 * and use double quotes for any single quote chars.
468 */
469 PUT_CH('\'');
470 char ch;
471 while ((ch = *pszArg++))
472 {
473 if (ch == '\'')
474 {
475 PUT_SZ("'\"'\"'");
476 }
477 else
478 {
479 PUT_CH(ch);
480 }
481 }
482 PUT_CH('\'');
483 }
484 }
485
486 /* Set return value / cleanup. */
487 if (RT_SUCCESS(rc))
488 {
489 pszCmdLine[off] = '\0';
490 *ppszCmdLine = pszCmdLine;
491 }
492 else
493 RTStrFree(pszCmdLine);
494#undef PUT_SZ
495#undef PUT_PSZ
496#undef PUT_CH
497 return rc;
498}
499
500
501RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
502{
503 char *pszCmdLine;
504 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
505 if (RT_SUCCESS(rc))
506 {
507 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
508 RTStrFree(pszCmdLine);
509 }
510 return rc;
511}
512
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette