VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 27968

Last change on this file since 27968 was 27968, checked in by vboxsync, 15 years ago

RTGetOptArgvToString: Implemented bourne shell style quoting.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 14.5 KB
Line 
1/* $Id: getoptargv.cpp 27968 2010-04-02 20:36:35Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31/*******************************************************************************
32* Header Files *
33*******************************************************************************/
34#include <iprt/getopt.h>
35#include "internal/iprt.h"
36
37#include <iprt/asm.h>
38#include <iprt/assert.h>
39#include <iprt/err.h>
40#include <iprt/mem.h>
41#include <iprt/string.h>
42
43
44/*******************************************************************************
45* Header Files *
46*******************************************************************************/
47/**
48 * Array indexed by the quoting type and 7-bit ASCII character.
49 *
50 * We include some extra stuff here that the corresponding shell would normally
51 * require qouting of.
52 */
53static uint8_t const g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][128/8] =
54{
55 { 0xfe, 0xff, 0x0f, 0x00, 0x65, 0x00, 0x00, 0x50 },
56 { 0xfe, 0xff, 0x0f, 0x00, 0xd7, 0x07, 0x00, 0xd8 },
57};
58
59
60#if 0 /* To re-generate the bitmaps. */
61#include <stdio.h>
62int main()
63{
64 RT_ZERO(g_abmQuoteChars);
65
66# define SET_ALL(ch) \
67 do { \
68 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
69 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
70 } while (0)
71# define SET(ConstSuffix, ch) \
72 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch));
73
74 /* just flag all the control chars as in need of quoting. */
75 for (char ch = 1; ch < 20; ch++)
76 SET_ALL(ch);
77
78 /* ... and space of course */
79 SET_ALL(' ');
80
81 /* MS CRT / CMD.EXE: */
82 SET(MS_CRT, '"')
83 SET(MS_CRT, '&')
84 SET(MS_CRT, '>')
85 SET(MS_CRT, '<')
86 SET(MS_CRT, '|')
87 SET(MS_CRT, '%')
88
89 /* Bourne shell: */
90 SET(BOURNE_SH, '!');
91 SET(BOURNE_SH, '"');
92 SET(BOURNE_SH, '$');
93 SET(BOURNE_SH, '&');
94 SET(BOURNE_SH, '(');
95 SET(BOURNE_SH, ')');
96 SET(BOURNE_SH, '*');
97 SET(BOURNE_SH, ';');
98 SET(BOURNE_SH, '<');
99 SET(BOURNE_SH, '>');
100 SET(BOURNE_SH, '?');
101 SET(BOURNE_SH, '[');
102 SET(BOURNE_SH, '\'');
103 SET(BOURNE_SH, '\\');
104 SET(BOURNE_SH, '`');
105 SET(BOURNE_SH, '|');
106 SET(BOURNE_SH, '~');
107
108 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
109 {
110 printf(" {");
111 for (size_t iByte = 0; iByte < 8; iByte++)
112 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
113 printf(" },\n");
114 }
115 return 0;
116}
117#endif /* To re-generate the bitmaps. */
118
119
120/**
121 * Look for an unicode code point in the separator string.
122 *
123 * @returns true if it's a separator, false if it isn't.
124 * @param Cp The code point.
125 * @param pszSeparators The separators.
126 */
127static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
128{
129 /* This could be done in a more optimal fashion. Probably worth a
130 separate RTStr function at some point. */
131 for (;;)
132 {
133 RTUNICP CpSep;
134 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
135 AssertRCReturn(rc, false);
136 if (CpSep == Cp)
137 return true;
138 if (!CpSep)
139 return false;
140 }
141}
142
143
144/**
145 * Look for an 7-bit ASCII character in the separator string.
146 *
147 * @returns true if it's a separator, false if it isn't.
148 * @param ch The character.
149 * @param pszSeparators The separators.
150 * @param cchSeparators The number of separators chars.
151 */
152DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
153{
154 switch (cchSeparators)
155 {
156 case 8: if (ch == pszSeparators[7]) return true;
157 case 7: if (ch == pszSeparators[6]) return true;
158 case 6: if (ch == pszSeparators[5]) return true;
159 case 5: if (ch == pszSeparators[4]) return true;
160 case 4: if (ch == pszSeparators[3]) return true;
161 case 3: if (ch == pszSeparators[2]) return true;
162 case 2: if (ch == pszSeparators[1]) return true;
163 case 1: if (ch == pszSeparators[0]) return true;
164 return false;
165 default:
166 return memchr(pszSeparators, ch, cchSeparators) != NULL;
167 }
168}
169
170
171/**
172 * Checks if the character is in the set of separators
173 *
174 * @returns true if it is, false if it isn't.
175 *
176 * @param Cp The code point.
177 * @param pszSeparators The separators.
178 * @param cchSeparators The length of @a pszSeparators.
179 */
180DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
181{
182 if (RT_LIKELY(Cp <= 127))
183 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
184 return rtGetOptIsUniCpInString(Cp, pszSeparators);
185}
186
187
188/**
189 * Skips any delimiters at the start of the string that is pointed to.
190 *
191 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
192 * @param ppszSrc Where to get and return the string pointer.
193 * @param pszSeparators The separators.
194 * @param cchSeparators The length of @a pszSeparators.
195 */
196static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
197{
198 const char *pszSrc = *ppszSrc;
199 const char *pszRet;
200 for (;;)
201 {
202 pszRet = pszSrc;
203 RTUNICP Cp;
204 int rc = RTStrGetCpEx(&pszSrc, &Cp);
205 if (RT_FAILURE(rc))
206 {
207 *ppszSrc = pszRet;
208 return rc;
209 }
210 if ( !Cp
211 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
212 break;
213 }
214
215 *ppszSrc = pszRet;
216 return VINF_SUCCESS;
217}
218
219
220RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
221{
222 /*
223 * Some input validation.
224 */
225 AssertPtr(pszCmdLine);
226 AssertPtr(pcArgs);
227 AssertPtr(ppapszArgv);
228 if (!pszSeparators)
229 pszSeparators = " \t\n\r";
230 else
231 AssertPtr(pszSeparators);
232 size_t const cchSeparators = strlen(pszSeparators);
233 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
234
235 /*
236 * Parse the command line and chop off it into argv individual argv strings.
237 */
238 int rc = VINF_SUCCESS;
239 const char *pszSrc = pszCmdLine;
240 char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
241 char *pszDst = pszDup;
242 if (!pszDup)
243 return VERR_NO_STR_MEMORY;
244 char **papszArgs = NULL;
245 unsigned iArg = 0;
246 while (*pszSrc)
247 {
248 /* Skip stuff */
249 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
250 if (RT_FAILURE(rc))
251 break;
252 if (!*pszSrc)
253 break;
254
255 /* Start a new entry. */
256 if ((iArg % 32) == 0)
257 {
258 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
259 if (!pvNew)
260 {
261 rc = VERR_NO_MEMORY;
262 break;
263 }
264 papszArgs = (char **)pvNew;
265 }
266 papszArgs[iArg++] = pszDst;
267
268 /* Parse and copy the string over. */
269 RTUNICP CpQuote = 0;
270 RTUNICP Cp;
271 for (;;)
272 {
273 rc = RTStrGetCpEx(&pszSrc, &Cp);
274 if (RT_FAILURE(rc) || !Cp)
275 break;
276 if (!CpQuote)
277 {
278 if (Cp == '"' || Cp == '\'')
279 CpQuote = Cp;
280 else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
281 break;
282 else
283 pszDst = RTStrPutCp(pszDst, Cp);
284 }
285 else if (CpQuote != Cp)
286 pszDst = RTStrPutCp(pszDst, Cp);
287 else
288 CpQuote = 0;
289 }
290 *pszDst++ = '\0';
291 if (RT_FAILURE(rc) || !Cp)
292 break;
293 }
294
295 if (RT_FAILURE(rc))
296 {
297 RTMemFree(pszDup);
298 RTMemFree(papszArgs);
299 return rc;
300 }
301
302 /*
303 * Terminate the array.
304 * Check for empty string to make sure we've got an array.
305 */
306 if (iArg == 0)
307 {
308 RTMemFree(pszDup);
309 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
310 if (!papszArgs)
311 return VERR_NO_MEMORY;
312 }
313 papszArgs[iArg] = NULL;
314
315 *pcArgs = iArg;
316 *ppapszArgv = papszArgs;
317 return VINF_SUCCESS;
318}
319
320
321RTDECL(void) RTGetOptArgvFree(char **papszArgv)
322{
323 if (papszArgv)
324 {
325 RTMemFree(papszArgv[0]);
326 RTMemFree(papszArgv);
327 }
328}
329
330
331/**
332 * Checks if the argument needs quoting or not.
333 *
334 * @returns true if it needs, false if it don't.
335 * @param pszArg The argument.
336 * @param fFlags Quoting style.
337 * @param pcch Where to store the argument length when quoting
338 * is not required. (optimization)
339 */
340DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
341{
342 char const *psz = pszArg;
343 unsigned char ch;
344 while ((ch = (unsigned char)*psz))
345 {
346 if ( ch < 128
347 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
348 return true;
349 psz++;
350 }
351
352 *pcch = psz - pszArg;
353 return false;
354}
355
356
357/**
358 * Grows the command line string buffer.
359 *
360 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
361 * @param ppszCmdLine Pointer to the command line string pointer.
362 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
363 * @param cchMin The minimum size to grow with, kind of.
364 */
365static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
366{
367 size_t cb = *pcbCmdLineAlloc;
368 while (cb < cchMin)
369 cb *= 2;
370 cb *= 2;
371 *pcbCmdLineAlloc = cb;
372 return RTStrRealloc(ppszCmdLine, cb);
373}
374
375/**
376 * Checks if we have a sequence of DOS slashes followed by a double quote char.
377 *
378 * @returns true / false accordingly.
379 * @param psz The string.
380 */
381DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
382{
383 while (*psz == '\\')
384 psz++;
385 return *psz == '"' || *psz == '\0';
386}
387
388
389RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
390{
391 AssertReturn(!(fFlags & ~RTGETOPTARGV_CNV_QUOTE_MASK), VERR_INVALID_PARAMETER);
392
393#define PUT_CH(ch) \
394 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
395 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
396 if (RT_FAILURE(rc)) \
397 break; \
398 } \
399 pszCmdLine[off++] = (ch)
400
401#define PUT_PSZ(psz, cch) \
402 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
403 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
404 if (RT_FAILURE(rc)) \
405 break; \
406 } \
407 memcpy(&pszCmdLine[off], (psz), (cch)); \
408 off += (cch);
409#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
410
411 /*
412 * Take the realloc approach, it requires less code and is probably more
413 * efficient than figuring out the size first.
414 */
415 int rc = VINF_SUCCESS;
416 size_t off = 0;
417 size_t cbCmdLineAlloc = 256;
418 char *pszCmdLine = RTStrAlloc(256);
419 if (!pszCmdLine)
420 return VERR_NO_STR_MEMORY;
421
422 for (size_t i = 0; papszArgv[i]; i++)
423 {
424 if (i > 0)
425 {
426 PUT_CH(' ');
427 }
428
429 /* does it need quoting? */
430 const char *pszArg = papszArgv[i];
431 size_t cchArg;
432 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
433 {
434 /* No quoting needed, just append the argument. */
435 PUT_PSZ(pszArg, cchArg);
436 }
437 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
438 {
439 /*
440 * Microsoft CRT quoting. Quote the whole argument in double
441 * quotes to make it easier to read and code.
442 */
443 PUT_CH('"');
444 char ch;
445 while ((ch = *pszArg++))
446 {
447 if ( ch == '\\'
448 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
449 {
450 PUT_SZ("\\\\");
451 }
452 else if (ch == '"')
453 {
454 PUT_SZ("\\\"");
455 }
456 else
457 {
458 PUT_CH(ch);
459 }
460 }
461 PUT_CH('"');
462 }
463 else
464 {
465 /*
466 * Bourne Shell quoting. Quote the whole thing in single quotes
467 * and use double quotes for any single quote chars.
468 */
469 PUT_CH('\'');
470 char ch;
471 while ((ch = *pszArg++))
472 {
473 if (ch == '\'')
474 {
475 PUT_SZ("'\"'\"'");
476 }
477 else
478 {
479 PUT_CH(ch);
480 }
481 }
482 PUT_CH('\'');
483 }
484 }
485
486 /* Set return value / cleanup. */
487 if (RT_SUCCESS(rc))
488 {
489 pszCmdLine[off] = '\0';
490 *ppszCmdLine = pszCmdLine;
491 }
492 else
493 RTStrFree(pszCmdLine);
494#undef PUT_SZ
495#undef PUT_PSZ
496#undef PUT_CH
497 return rc;
498}
499
500
501RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
502{
503 char *pszCmdLine;
504 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
505 if (RT_SUCCESS(rc))
506 {
507 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
508 RTStrFree(pszCmdLine);
509 }
510 return rc;
511}
512
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette