VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/misc/getoptargv.cpp@ 27386

Last change on this file since 27386 was 27386, checked in by vboxsync, 15 years ago

iprt: Fixed RTGetOptArgvToString bug dealing with DOS slashes at the end of the string.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 14.1 KB
Line 
1/* $Id: getoptargv.cpp 27386 2010-03-15 22:39:19Z vboxsync $ */
2/** @file
3 * IPRT - Command Line Parsing, Argument Vector.
4 */
5
6/*
7 * Copyright (C) 2010 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31/*******************************************************************************
32* Header Files *
33*******************************************************************************/
34#include <iprt/getopt.h>
35#include "internal/iprt.h"
36
37#include <iprt/asm.h>
38#include <iprt/assert.h>
39#include <iprt/err.h>
40#include <iprt/mem.h>
41#include <iprt/string.h>
42
43
44/*******************************************************************************
45* Header Files *
46*******************************************************************************/
47/**
48 * Array indexed by the quoting type and 7-bit ASCII character.
49 *
50 * We include some extra stuff here that the corresponding shell would normally
51 * require qouting of.
52 */
53static uint8_t const g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][128/8] =
54{
55 { 0xfe, 0xff, 0x0f, 0x00, 0x65, 0x00, 0x00, 0x50 },
56 { 0xfe, 0xff, 0x0f, 0x00, 0xd7, 0x07, 0x00, 0xd8 },
57};
58
59
60#if 0 /* To re-generate the bitmaps. */
61#include <stdio.h>
62int main()
63{
64 RT_ZERO(g_abmQuoteChars);
65
66# define SET_ALL(ch) \
67 do { \
68 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
69 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
70 } while (0)
71# define SET(ConstSuffix, ch) \
72 ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch));
73
74 /* just flag all the control chars as in need of quoting. */
75 for (char ch = 1; ch < 20; ch++)
76 SET_ALL(ch);
77
78 /* ... and space of course */
79 SET_ALL(' ');
80
81 /* MS CRT / CMD.EXE: */
82 SET(MS_CRT, '"')
83 SET(MS_CRT, '&')
84 SET(MS_CRT, '>')
85 SET(MS_CRT, '<')
86 SET(MS_CRT, '|')
87 SET(MS_CRT, '%')
88
89 /* Bourne shell: */
90 SET(BOURNE_SH, '!');
91 SET(BOURNE_SH, '"');
92 SET(BOURNE_SH, '$');
93 SET(BOURNE_SH, '&');
94 SET(BOURNE_SH, '(');
95 SET(BOURNE_SH, ')');
96 SET(BOURNE_SH, '*');
97 SET(BOURNE_SH, ';');
98 SET(BOURNE_SH, '<');
99 SET(BOURNE_SH, '>');
100 SET(BOURNE_SH, '?');
101 SET(BOURNE_SH, '[');
102 SET(BOURNE_SH, '\'');
103 SET(BOURNE_SH, '\\');
104 SET(BOURNE_SH, '`');
105 SET(BOURNE_SH, '|');
106 SET(BOURNE_SH, '~');
107
108 for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
109 {
110 printf(" {");
111 for (size_t iByte = 0; iByte < 8; iByte++)
112 printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
113 printf(" },\n");
114 }
115 return 0;
116}
117#endif /* To re-generate the bitmaps. */
118
119
120/**
121 * Look for an unicode code point in the separator string.
122 *
123 * @returns true if it's a separator, false if it isn't.
124 * @param Cp The code point.
125 * @param pszSeparators The separators.
126 */
127static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
128{
129 /* This could be done in a more optimal fashion. Probably worth a
130 separate RTStr function at some point. */
131 for (;;)
132 {
133 RTUNICP CpSep;
134 int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
135 AssertRCReturn(rc, false);
136 if (CpSep == Cp)
137 return true;
138 if (!CpSep)
139 return false;
140 }
141}
142
143
144/**
145 * Look for an 7-bit ASCII character in the separator string.
146 *
147 * @returns true if it's a separator, false if it isn't.
148 * @param ch The character.
149 * @param pszSeparators The separators.
150 * @param cchSeparators The number of separators chars.
151 */
152DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
153{
154 switch (cchSeparators)
155 {
156 case 8: if (ch == pszSeparators[7]) return true;
157 case 7: if (ch == pszSeparators[6]) return true;
158 case 6: if (ch == pszSeparators[5]) return true;
159 case 5: if (ch == pszSeparators[4]) return true;
160 case 4: if (ch == pszSeparators[3]) return true;
161 case 3: if (ch == pszSeparators[2]) return true;
162 case 2: if (ch == pszSeparators[1]) return true;
163 case 1: if (ch == pszSeparators[0]) return true;
164 return false;
165 default:
166 return memchr(pszSeparators, ch, cchSeparators) != NULL;
167 }
168}
169
170
171/**
172 * Checks if the character is in the set of separators
173 *
174 * @returns true if it is, false if it isn't.
175 *
176 * @param Cp The code point.
177 * @param pszSeparators The separators.
178 * @param cchSeparators The length of @a pszSeparators.
179 */
180DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
181{
182 if (RT_LIKELY(Cp <= 127))
183 return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
184 return rtGetOptIsUniCpInString(Cp, pszSeparators);
185}
186
187
188/**
189 * Skips any delimiters at the start of the string that is pointed to.
190 *
191 * @returns VINF_SUCCESS or RTStrGetCpEx status code.
192 * @param ppszSrc Where to get and return the string pointer.
193 * @param pszSeparators The separators.
194 * @param cchSeparators The length of @a pszSeparators.
195 */
196static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
197{
198 const char *pszSrc = *ppszSrc;
199 const char *pszRet;
200 for (;;)
201 {
202 pszRet = pszSrc;
203 RTUNICP Cp;
204 int rc = RTStrGetCpEx(&pszSrc, &Cp);
205 if (RT_FAILURE(rc))
206 {
207 *ppszSrc = pszRet;
208 return rc;
209 }
210 if ( !Cp
211 || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
212 break;
213 }
214
215 *ppszSrc = pszRet;
216 return VINF_SUCCESS;
217}
218
219
220RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
221{
222 /*
223 * Some input validation.
224 */
225 AssertPtr(pszCmdLine);
226 AssertPtr(pcArgs);
227 AssertPtr(ppapszArgv);
228 if (!pszSeparators)
229 pszSeparators = " \t\n\r";
230 else
231 AssertPtr(pszSeparators);
232 size_t const cchSeparators = strlen(pszSeparators);
233 AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
234
235 /*
236 * Parse the command line and chop off it into argv individual argv strings.
237 */
238 int rc = VINF_SUCCESS;
239 const char *pszSrc = pszCmdLine;
240 char *pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
241 char *pszDst = pszDup;
242 if (!pszDup)
243 return VERR_NO_STR_MEMORY;
244 char **papszArgs = NULL;
245 unsigned iArg = 0;
246 while (*pszSrc)
247 {
248 /* Skip stuff */
249 rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
250 if (RT_FAILURE(rc))
251 break;
252 if (!*pszSrc)
253 break;
254
255 /* Start a new entry. */
256 if ((iArg % 32) == 0)
257 {
258 void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
259 if (!pvNew)
260 {
261 rc = VERR_NO_MEMORY;
262 break;
263 }
264 papszArgs = (char **)pvNew;
265 }
266 papszArgs[iArg++] = pszDst;
267
268 /* Parse and copy the string over. */
269 RTUNICP CpQuote = 0;
270 RTUNICP Cp;
271 for (;;)
272 {
273 rc = RTStrGetCpEx(&pszSrc, &Cp);
274 if (RT_FAILURE(rc) || !Cp)
275 break;
276 if (!CpQuote)
277 {
278 if (Cp == '"' || Cp == '\'')
279 CpQuote = Cp;
280 else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
281 break;
282 else
283 pszDst = RTStrPutCp(pszDst, Cp);
284 }
285 else if (CpQuote != Cp)
286 pszDst = RTStrPutCp(pszDst, Cp);
287 else
288 CpQuote = 0;
289 }
290 *pszDst++ = '\0';
291 if (RT_FAILURE(rc) || !Cp)
292 break;
293 }
294
295 if (RT_FAILURE(rc))
296 {
297 RTMemFree(pszDup);
298 RTMemFree(papszArgs);
299 return rc;
300 }
301
302 /*
303 * Terminate the array.
304 * Check for empty string to make sure we've got an array.
305 */
306 if (iArg == 0)
307 {
308 RTMemFree(pszDup);
309 papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
310 if (!papszArgs)
311 return VERR_NO_MEMORY;
312 }
313 papszArgs[iArg] = NULL;
314
315 *pcArgs = iArg;
316 *ppapszArgv = papszArgs;
317 return VINF_SUCCESS;
318}
319
320
321RTDECL(void) RTGetOptArgvFree(char **papszArgv)
322{
323 if (papszArgv)
324 {
325 RTMemFree(papszArgv[0]);
326 RTMemFree(papszArgv);
327 }
328}
329
330
331/**
332 * Checks if the argument needs quoting or not.
333 *
334 * @returns true if it needs, false if it don't.
335 * @param pszArg The argument.
336 * @param fFlags Quoting style.
337 * @param pcch Where to store the argument length when quoting
338 * is not required. (optimization)
339 */
340DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
341{
342 char const *psz = pszArg;
343 unsigned char ch;
344 while ((ch = (unsigned char)*psz))
345 {
346 if ( ch < 128
347 && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
348 return true;
349 psz++;
350 }
351
352 *pcch = psz - pszArg;
353 return false;
354}
355
356
357/**
358 * Grows the command line string buffer.
359 *
360 * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
361 * @param ppszCmdLine Pointer to the command line string pointer.
362 * @param pcbCmdLineAlloc Pointer to the allocation length variable.
363 * @param cchMin The minimum size to grow with, kind of.
364 */
365static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
366{
367 size_t cb = *pcbCmdLineAlloc;
368 while (cb < cchMin)
369 cb *= 2;
370 cb *= 2;
371 *pcbCmdLineAlloc = cb;
372 return RTStrRealloc(ppszCmdLine, cb);
373}
374
375/**
376 * Checks if we have a sequence of DOS slashes followed by a double quote char.
377 *
378 * @returns true / false accordingly.
379 * @param psz The string.
380 */
381DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
382{
383 while (*psz == '\\')
384 psz++;
385 return *psz == '"' || *psz == '\0';
386}
387
388
389RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
390{
391 AssertReturn(!(fFlags & ~RTGETOPTARGV_CNV_QUOTE_MASK), VERR_INVALID_PARAMETER);
392 AssertReturn((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_NOT_IMPLEMENTED);
393
394#define PUT_CH(ch) \
395 if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
396 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
397 if (RT_FAILURE(rc)) \
398 break; \
399 } \
400 pszCmdLine[off++] = (ch)
401
402#define PUT_PSZ(psz, cch) \
403 if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
404 rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
405 if (RT_FAILURE(rc)) \
406 break; \
407 } \
408 memcpy(&pszCmdLine[off], (psz), (cch)); \
409 off += (cch);
410#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
411
412 /*
413 * Take the realloc approach, it requires less code and is probably more
414 * efficient than figuring out the size first.
415 */
416 int rc = VINF_SUCCESS;
417 size_t off = 0;
418 size_t cbCmdLineAlloc = 256;
419 char *pszCmdLine = RTStrAlloc(256);
420 if (!pszCmdLine)
421 return VERR_NO_STR_MEMORY;
422
423 for (size_t i = 0; papszArgv[i]; i++)
424 {
425 if (i > 0)
426 {
427 PUT_CH(' ');
428 }
429
430 /* does it need quoting? */
431 const char *pszArg = papszArgv[i];
432 size_t cchArg;
433 if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
434 {
435 /* No quoting needed, just append the argument. */
436 PUT_PSZ(pszArg, cchArg);
437 }
438 else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
439 {
440 /*
441 * Microsoft CRT quoting. Quote the whole argument in double
442 * quotes to make it easier to read and code.
443 */
444 PUT_CH('"');
445 char ch;
446 while ((ch = *pszArg++))
447 {
448 if ( ch == '\\'
449 && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
450 {
451 PUT_SZ("\\\\");
452 }
453 else if (ch == '"')
454 {
455 PUT_SZ("\\\"");
456 }
457 else
458 {
459 PUT_CH(ch);
460 }
461 }
462 PUT_CH('"');
463 }
464 else /* bourne shell */
465 {
466 AssertFailed(/*later*/);
467 }
468 }
469
470 /* Set return value / cleanup. */
471 if (RT_SUCCESS(rc))
472 {
473 pszCmdLine[off] = '\0';
474 *ppszCmdLine = pszCmdLine;
475 }
476 else
477 RTStrFree(pszCmdLine);
478#undef PUT_SZ
479#undef PUT_PSZ
480#undef PUT_CH
481 return rc;
482}
483
484
485RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
486{
487 char *pszCmdLine;
488 int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
489 if (RT_SUCCESS(rc))
490 {
491 rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
492 RTStrFree(pszCmdLine);
493 }
494 return rc;
495}
496
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette