VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 68246

Last change on this file since 68246 was 67598, checked in by vboxsync, 8 years ago

RTGetOptArgvFromString: Added a RTGETOPTARGV_CNV_MODIFY_INPUT flag for avoiding duplicating the input command line string.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.8 KB
Line 
1/* $Id: getoptargv.cpp 67598 2017-06-26 09:17:22Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include <iprt/getopt.h>
32#include "internal/iprt.h"
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/mem.h>
38#include <iprt/string.h>
39
40
41/*********************************************************************************************************************************
42* Header Files *
43*********************************************************************************************************************************/
44/**
45 * Array indexed by the quoting type and 7-bit ASCII character.
46 *
47 * We include some extra stuff here that the corresponding shell would normally
48 * require quoting of.
49 */
50static uint8_t
51#ifndef IPRT_REGENERATE_QUOTE_CHARS
52const
53#endif
54g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][16] =
55{
56 { 0xfe, 0xff, 0xff, 0xff, 0x65, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 },
57 { 0xfe, 0xff, 0xff, 0xff, 0xd7, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x50 },
58};
59
60
61#ifdef IPRT_REGENERATE_QUOTE_CHARS /* To re-generate the bitmaps. */
62# include <stdio.h>
63int main()
64{
65 RT_ZERO(g_abmQuoteChars);
66
67# define SET_ALL(ch) \
68 do { \
69 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
70 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
71 } while (0)
72# define SET(ConstSuffix, ch) \
73 do { \
74 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch)); \
75 printf(#ConstSuffix ": %#x %d %c\n", (ch), (ch), (ch)); \
76 } while (0)
77
78 /* just flag all the control chars as in need of quoting. */
79 for (char ch = 1; ch < 0x20; ch++)
80 SET_ALL(ch);
81
82 /* ... and space of course */
83 SET_ALL(' ');
84
85 /* MS CRT / CMD.EXE: */
86 SET(MS_CRT, '"');
87 SET(MS_CRT, '&');
88 SET(MS_CRT, '>');
89 SET(MS_CRT, '<');
90 SET(MS_CRT, '|');
91 SET(MS_CRT, '%');
92
93 /* Bourne shell: */
94 SET(BOURNE_SH, '!');
95 SET(BOURNE_SH, '"');
96 SET(BOURNE_SH, '$');
97 SET(BOURNE_SH, '&');
98 SET(BOURNE_SH, '(');
99 SET(BOURNE_SH, ')');
100 SET(BOURNE_SH, '*');
101 SET(BOURNE_SH, ';');
102 SET(BOURNE_SH, '<');
103 SET(BOURNE_SH, '>');
104 SET(BOURNE_SH, '?');
105 SET(BOURNE_SH, '[');
106 SET(BOURNE_SH, '\'');
107 SET(BOURNE_SH, '\\');
108 SET(BOURNE_SH, '`');
109 SET(BOURNE_SH, '|');
110 SET(BOURNE_SH, '~');
111
112 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
113 {
114 printf(" {");
115 for (size_t iByte = 0; iByte < 16; iByte++)
116 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
117 printf(" },\n");
118 }
119 return 0;
120}
121
122#else /* !IPRT_REGENERATE_QUOTE_CHARS */
123
124/**
125 * Look for an unicode code point in the separator string.
126 *
127 * @returns true if it's a separator, false if it isn't.
128 * @param Cp The code point.
129 * @param pszSeparators The separators.
130 */
131static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
132{
133 /* This could be done in a more optimal fashion. Probably worth a
134 separate RTStr function at some point. */
135 for (;;)
136 {
137 RTUNICP CpSep;
138 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
139 AssertRCReturn(rc, false);
140 if (CpSep == Cp)
141 return true;
142 if (!CpSep)
143 return false;
144 }
145}
146
147
148/**
149 * Look for an 7-bit ASCII character in the separator string.
150 *
151 * @returns true if it's a separator, false if it isn't.
152 * @param ch The character.
153 * @param pszSeparators The separators.
154 * @param cchSeparators The number of separators chars.
155 */
156DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
157{
158 switch (cchSeparators)
159 {
160 case 8: if (ch == pszSeparators[7]) return true; /* fall thru */
161 case 7: if (ch == pszSeparators[6]) return true; /* fall thru */
162 case 6: if (ch == pszSeparators[5]) return true; /* fall thru */
163 case 5: if (ch == pszSeparators[4]) return true; /* fall thru */
164 case 4: if (ch == pszSeparators[3]) return true; /* fall thru */
165 case 3: if (ch == pszSeparators[2]) return true; /* fall thru */
166 case 2: if (ch == pszSeparators[1]) return true; /* fall thru */
167 case 1: if (ch == pszSeparators[0]) return true;
168 return false;
169 default:
170 return memchr(pszSeparators, ch, cchSeparators) != NULL;
171 }
172}
173
174
175/**
176 * Checks if the character is in the set of separators
177 *
178 * @returns true if it is, false if it isn't.
179 *
180 * @param Cp The code point.
181 * @param pszSeparators The separators.
182 * @param cchSeparators The length of @a pszSeparators.
183 */
184DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
185{
186 if (RT_LIKELY(Cp <= 127))
187 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
188 return rtGetOptIsUniCpInString(Cp, pszSeparators);
189}
190
191
192/**
193 * Skips any delimiters at the start of the string that is pointed to.
194 *
195 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
196 * @param ppszSrc Where to get and return the string pointer.
197 * @param pszSeparators The separators.
198 * @param cchSeparators The length of @a pszSeparators.
199 */
200static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
201{
202 const char *pszSrc = *ppszSrc;
203 const char *pszRet;
204 for (;;)
205 {
206 pszRet = pszSrc;
207 RTUNICP Cp;
208 int rc = RTStrGetCpEx(&pszSrc, &Cp);
209 if (RT_FAILURE(rc))
210 {
211 *ppszSrc = pszRet;
212 return rc;
213 }
214 if ( !Cp
215 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
216 break;
217 }
218
219 *ppszSrc = pszRet;
220 return VINF_SUCCESS;
221}
222
223
224RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine,
225 uint32_t fFlags, const char *pszSeparators)
226{
227 /*
228 * Some input validation.
229 */
230 AssertPtr(pszCmdLine);
231 AssertPtr(pcArgs);
232 AssertPtr(ppapszArgv);
233 AssertReturn( (fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH
234 || (fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS);
235 AssertReturn(~(fFlags & ~RTGETOPTARGV_CNV_VALID_MASK), VERR_INVALID_FLAGS);
236
237 if (!pszSeparators)
238 pszSeparators = " \t\n\r";
239 else
240 AssertPtr(pszSeparators);
241 size_t const cchSeparators = strlen(pszSeparators);
242 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
243
244 /*
245 * Parse the command line and chop off it into argv individual argv strings.
246 */
247 const char *pszSrc = pszCmdLine;
248 char *pszDup = NULL;
249 char *pszDst;
250 if (fFlags & RTGETOPTARGV_CNV_MODIFY_INPUT)
251 pszDst = (char *)pszCmdLine;
252 else
253 {
254 pszDst = pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
255 if (!pszDup)
256 return VERR_NO_STR_MEMORY;
257 }
258 int rc = VINF_SUCCESS;
259 char **papszArgs = NULL;
260 unsigned iArg = 0;
261 while (*pszSrc)
262 {
263 /* Skip stuff */
264 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
265 if (RT_FAILURE(rc))
266 break;
267 if (!*pszSrc)
268 break;
269
270 /* Start a new entry. */
271 if ((iArg % 32) == 0)
272 {
273 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
274 if (!pvNew)
275 {
276 rc = VERR_NO_MEMORY;
277 break;
278 }
279 papszArgs = (char **)pvNew;
280 }
281 papszArgs[iArg++] = pszDst;
282
283 /*
284 * Parse and copy the string over.
285 */
286 RTUNICP uc;
287 if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH)
288 {
289 /*
290 * Bourne shell style.
291 */
292 RTUNICP ucQuote = 0;
293 for (;;)
294 {
295 rc = RTStrGetCpEx(&pszSrc, &uc);
296 if (RT_FAILURE(rc) || !uc)
297 break;
298 if (!ucQuote)
299 {
300 if (uc == '"' || uc == '\'')
301 ucQuote = uc;
302 else if (rtGetOptIsCpInSet(uc, pszSeparators, cchSeparators))
303 break;
304 else if (uc != '\\')
305 pszDst = RTStrPutCp(pszDst, uc);
306 else
307 {
308 /* escaped char */
309 rc = RTStrGetCpEx(&pszSrc, &uc);
310 if (RT_FAILURE(rc) || !uc)
311 break;
312 pszDst = RTStrPutCp(pszDst, uc);
313 }
314 }
315 else if (ucQuote != uc)
316 {
317 if (uc != '\\' || ucQuote == '\'')
318 pszDst = RTStrPutCp(pszDst, uc);
319 else
320 {
321 /* escaped char */
322 rc = RTStrGetCpEx(&pszSrc, &uc);
323 if (RT_FAILURE(rc) || !uc)
324 break;
325 if ( uc != '"'
326 && uc != '\\'
327 && uc != '`'
328 && uc != '$'
329 && uc != '\n')
330 pszDst = RTStrPutCp(pszDst, ucQuote);
331 pszDst = RTStrPutCp(pszDst, uc);
332 }
333 }
334 else
335 ucQuote = 0;
336 }
337 }
338 else
339 {
340 /*
341 * Microsoft CRT style.
342 */
343 Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT);
344 bool fInQuote = false;
345 for (;;)
346 {
347 rc = RTStrGetCpEx(&pszSrc, &uc);
348 if (RT_FAILURE(rc) || !uc)
349 break;
350 if (uc == '"')
351 {
352 /* Two double quotes insides a quoted string in an escape
353 sequence and we output one double quote char.
354 See http://www.daviddeley.com/autohotkey/parameters/parameters.htm */
355 if (!fInQuote)
356 fInQuote = true;
357 else if (*pszSrc != '"')
358 fInQuote = false;
359 else
360 {
361 pszDst = RTStrPutCp(pszDst, '"');
362 pszSrc++;
363 }
364 }
365 else if (!fInQuote && rtGetOptIsCpInSet(uc, pszSeparators, cchSeparators))
366 break;
367 else if (uc != '\\')
368 pszDst = RTStrPutCp(pszDst, uc);
369 else
370 {
371 /* A backslash sequence is only relevant if followed by
372 a double quote, then it will work like an escape char. */
373 size_t cSlashes = 1;
374 while (*pszSrc == '\\')
375 {
376 cSlashes++;
377 pszSrc++;
378 }
379 if (*pszSrc != '"')
380 /* Not an escape sequence. */
381 while (cSlashes-- > 0)
382 pszDst = RTStrPutCp(pszDst, '\\');
383 else
384 {
385 /* Escape sequence. Output half of the slashes. If odd
386 number, output the escaped double quote . */
387 while (cSlashes >= 2)
388 {
389 pszDst = RTStrPutCp(pszDst, '\\');
390 cSlashes -= 2;
391 }
392 if (cSlashes)
393 {
394 pszDst = RTStrPutCp(pszDst, '"');
395 pszSrc++;
396 }
397 }
398 }
399 }
400 }
401
402 *pszDst++ = '\0';
403 if (RT_FAILURE(rc) || !uc)
404 break;
405 }
406
407 if (RT_FAILURE(rc))
408 {
409 RTMemFree(pszDup);
410 RTMemFree(papszArgs);
411 return rc;
412 }
413
414 /*
415 * Terminate the array.
416 * Check for empty string to make sure we've got an array.
417 */
418 if (iArg == 0)
419 {
420 RTMemFree(pszDup);
421 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
422 if (!papszArgs)
423 return VERR_NO_MEMORY;
424 }
425 papszArgs[iArg] = NULL;
426
427 *pcArgs = iArg;
428 *ppapszArgv = papszArgs;
429 return VINF_SUCCESS;
430}
431
432
433RTDECL(void) RTGetOptArgvFree(char **papszArgv)
434{
435 RTGetOptArgvFreeEx(papszArgv, 0);
436}
437
438
439RTDECL(void) RTGetOptArgvFreeEx(char **papszArgv, uint32_t fFlags)
440{
441 Assert(~(fFlags & ~RTGETOPTARGV_CNV_VALID_MASK));
442 if (papszArgv)
443 {
444 /*
445 * We've really only _two_ allocations here. Check the code in
446 * RTGetOptArgvFromString for the particulars.
447 */
448 if (!(fFlags & RTGETOPTARGV_CNV_MODIFY_INPUT))
449 RTMemFree(papszArgv[0]);
450 RTMemFree(papszArgv);
451 }
452}
453
454
455/**
456 * Checks if the argument needs quoting or not.
457 *
458 * @returns true if it needs, false if it don't.
459 * @param pszArg The argument.
460 * @param fFlags Quoting style.
461 * @param pcch Where to store the argument length when quoting
462 * is not required. (optimization)
463 */
464DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
465{
466 if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) != RTGETOPTARGV_CNV_UNQUOTED)
467 {
468 char const *psz = pszArg;
469 unsigned char ch;
470 while ((ch = (unsigned char)*psz))
471 {
472 if ( ch < 128
473 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
474 return true;
475 psz++;
476 }
477
478 *pcch = psz - pszArg;
479 }
480 else
481 *pcch = strlen(pszArg);
482 return false;
483}
484
485
486/**
487 * Grows the command line string buffer.
488 *
489 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
490 * @param ppszCmdLine Pointer to the command line string pointer.
491 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
492 * @param cchMin The minimum size to grow with, kind of.
493 */
494static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
495{
496 size_t cb = *pcbCmdLineAlloc;
497 while (cb < cchMin)
498 cb *= 2;
499 cb *= 2;
500 *pcbCmdLineAlloc = cb;
501 return RTStrRealloc(ppszCmdLine, cb);
502}
503
504/**
505 * Checks if we have a sequence of DOS slashes followed by a double quote char.
506 *
507 * @returns true / false accordingly.
508 * @param psz The string.
509 */
510DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
511{
512 while (*psz == '\\')
513 psz++;
514 return *psz == '"' || *psz == '\0';
515}
516
517
518RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
519{
520 AssertReturn((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) <= RTGETOPTARGV_CNV_UNQUOTED, VERR_INVALID_FLAGS);
521 AssertReturn(!(fFlags & (~RTGETOPTARGV_CNV_VALID_MASK | RTGETOPTARGV_CNV_MODIFY_INPUT)), VERR_INVALID_FLAGS);
522
523#define PUT_CH(ch) \
524 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
525 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
526 if (RT_FAILURE(rc)) \
527 break; \
528 } \
529 pszCmdLine[off++] = (ch)
530
531#define PUT_PSZ(psz, cch) \
532 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
533 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
534 if (RT_FAILURE(rc)) \
535 break; \
536 } \
537 memcpy(&pszCmdLine[off], (psz), (cch)); \
538 off += (cch);
539#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
540
541 /*
542 * Take the realloc approach, it requires less code and is probably more
543 * efficient than figuring out the size first.
544 */
545 int rc = VINF_SUCCESS;
546 size_t off = 0;
547 size_t cbCmdLineAlloc = 256;
548 char *pszCmdLine = RTStrAlloc(256);
549 if (!pszCmdLine)
550 return VERR_NO_STR_MEMORY;
551
552 for (size_t i = 0; papszArgv[i]; i++)
553 {
554 if (i > 0)
555 {
556 PUT_CH(' ');
557 }
558
559 /* does it need quoting? */
560 const char *pszArg = papszArgv[i];
561 size_t cchArg;
562 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
563 {
564 /* No quoting needed, just append the argument. */
565 PUT_PSZ(pszArg, cchArg);
566 }
567 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
568 {
569 /*
570 * Microsoft CRT quoting. Quote the whole argument in double
571 * quotes to make it easier to read and code.
572 */
573 PUT_CH('"');
574 char ch;
575 while ((ch = *pszArg++))
576 {
577 if ( ch == '\\'
578 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
579 {
580 PUT_SZ("\\\\");
581 }
582 else if (ch == '"')
583 {
584 PUT_SZ("\\\"");
585 }
586 else
587 {
588 PUT_CH(ch);
589 }
590 }
591 PUT_CH('"');
592 }
593 else
594 {
595 /*
596 * Bourne Shell quoting. Quote the whole thing in single quotes
597 * and use double quotes for any single quote chars.
598 */
599 PUT_CH('\'');
600 char ch;
601 while ((ch = *pszArg++))
602 {
603 if (ch == '\'')
604 {
605 PUT_SZ("'\"'\"'");
606 }
607 else
608 {
609 PUT_CH(ch);
610 }
611 }
612 PUT_CH('\'');
613 }
614 }
615
616 /* Set return value / cleanup. */
617 if (RT_SUCCESS(rc))
618 {
619 pszCmdLine[off] = '\0';
620 *ppszCmdLine = pszCmdLine;
621 }
622 else
623 RTStrFree(pszCmdLine);
624#undef PUT_SZ
625#undef PUT_PSZ
626#undef PUT_CH
627 return rc;
628}
629
630
631RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
632{
633 char *pszCmdLine;
634 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
635 if (RT_SUCCESS(rc))
636 {
637 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
638 RTStrFree(pszCmdLine);
639 }
640 return rc;
641}
642
643#endif /* !IPRT_REGENERATE_QUOTE_CHARS */
644
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette