VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/path/RTPathGlob.cpp@ 62477

Last change on this file since 62477 was 62477, checked in by vboxsync, 8 years ago

(C) 2016

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.6 KB
Line 
1/* $Id: RTPathGlob.cpp 62477 2016-07-22 18:27:37Z vboxsync $ */
2/** @file
3 * IPRT - RTPathGlob
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "internal/iprt.h"
32#include <iprt/path.h>
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/buildconfig.h>
37#include <iprt/ctype.h>
38#include <iprt/dir.h>
39#include <iprt/env.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/string.h>
43#include <iprt/uni.h>
44
45#if defined(RT_OS_WINDOWS)
46# include <Windows.h>
47# include "../../r3/win/internal-r3-win.h"
48
49#elif defined(RT_OS_OS2)
50# define INCL_BASE
51# include <os2.h>
52# undef RT_MAX /* collision */
53
54#endif
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60/** Maximum number of results. */
61#define RTPATHGLOB_MAX_RESULTS _32K
62/** Maximum number of zero-or-more wildcards in a pattern.
63 * This limits stack usage and recursion depth, as well as execution time. */
64#define RTPATHMATCH_MAX_ZERO_OR_MORE 24
65/** Maximum number of variable items. */
66#define RTPATHMATCH_MAX_VAR_ITEMS _4K
67
68
69
70/*********************************************************************************************************************************
71* Structures and Typedefs *
72*********************************************************************************************************************************/
73/**
74 * Matching operation.
75 */
76typedef enum RTPATHMATCHOP
77{
78 RTPATHMATCHOP_INVALID = 0,
79 /** EOS: Returns a match if at end of string. */
80 RTPATHMATCHOP_RETURN_MATCH_IF_AT_END,
81 /** Asterisk: Returns a match (trailing asterisk). */
82 RTPATHMATCHOP_RETURN_MATCH,
83 /** Asterisk: Returns a match (just asterisk), unless it's '.' or '..'. */
84 RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT,
85 /** Plain text: Case sensitive string compare. */
86 RTPATHMATCHOP_STRCMP,
87 /** Plain text: Case insensitive string compare. */
88 RTPATHMATCHOP_STRICMP,
89 /** Question marks: Skips exactly one code point. */
90 RTPATHMATCHOP_SKIP_ONE_CODEPOINT,
91 /** Question marks: Skips exactly RTPATHMATCHCORE::cch code points. */
92 RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS,
93 /** Char set: Requires the next codepoint to be in the ASCII-7 set defined by
94 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
95 RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7,
96 /** Char set: Requires the next codepoint to not be in the ASCII-7 set defined
97 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
98 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7,
99 /** Char set: Requires the next codepoint to be in the extended set defined by
100 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
101 RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED,
102 /** Char set: Requires the next codepoint to not be in the extended set defined
103 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
104 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED,
105 /** Variable: Case sensitive variable value compare, RTPATHMATCHCORE::uOp2 is
106 * the variable table index. */
107 RTPATHMATCHOP_VARIABLE_VALUE_CMP,
108 /** Variable: Case insensitive variable value compare, RTPATHMATCHCORE::uOp2 is
109 * the variable table index. */
110 RTPATHMATCHOP_VARIABLE_VALUE_ICMP,
111 /** Asterisk: Match zero or more code points, there must be at least
112 * RTPATHMATCHCORE::cch code points after it. */
113 RTPATHMATCHOP_ZERO_OR_MORE,
114 /** Asterisk: Match zero or more code points, there must be at least
115 * RTPATHMATCHCORE::cch code points after it, unless it's '.' or '..'. */
116 RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT,
117 /** End of valid operations. */
118 RTPATHMATCHOP_END
119} RTPATHMATCHOP;
120
121/**
122 * Matching instruction.
123 */
124typedef struct RTPATHMATCHCORE
125{
126 /** The action to take. */
127 RTPATHMATCHOP enmOpCode;
128 /** Generic value operand. */
129 uint16_t uOp2;
130 /** Generic length operand. */
131 uint16_t cch;
132 /** Generic string pointer operand. */
133 const char *pch;
134} RTPATHMATCHCORE;
135/** Pointer to a matching instruction. */
136typedef RTPATHMATCHCORE *PRTPATHMATCHCORE;
137/** Pointer to a const matching instruction. */
138typedef RTPATHMATCHCORE const *PCRTPATHMATCHCORE;
139
140/**
141 * Path matching instruction allocator.
142 */
143typedef struct RTPATHMATCHALLOC
144{
145 /** Allocated array of instructions. */
146 PRTPATHMATCHCORE paInstructions;
147 /** Index of the next free entry in paScratch. */
148 uint32_t iNext;
149 /** Number of instructions allocated. */
150 uint32_t cAllocated;
151} RTPATHMATCHALLOC;
152/** Pointer to a matching instruction allocator. */
153typedef RTPATHMATCHALLOC *PRTPATHMATCHALLOC;
154
155/**
156 * Path matching cache, mainly intended for variables like the PATH.
157 */
158typedef struct RTPATHMATCHCACHE
159{
160 /** @todo optimize later. */
161 uint32_t iNothingYet;
162} RTPATHMATCHCACHE;
163/** Pointer to a path matching cache. */
164typedef RTPATHMATCHCACHE *PRTPATHMATCHCACHE;
165
166
167
168/** Parsed path entry.*/
169typedef struct RTPATHGLOBPPE
170{
171 /** Normal: Index into RTPATHGLOB::MatchInstrAlloc.paInstructions. */
172 uint32_t iMatchProg : 16;
173 /** Set if this is a normal entry which is matched using iMatchProg. */
174 uint32_t fNormal : 1;
175 /** !fNormal: Plain name that can be dealt with using without
176 * enumerating the whole directory, unless of course the file system is case
177 * sensitive and the globbing isn't (that needs figuring out on a per
178 * directory basis). */
179 uint32_t fPlain : 1;
180 /** !fNormal: Match zero or more subdirectories. */
181 uint32_t fStarStar : 1;
182 /** !fNormal: The whole component is a variable expansion. */
183 uint32_t fExpVariable : 1;
184
185 /** Filter: Set if it only matches directories. */
186 uint32_t fDir : 1;
187 /** Set if it's the final component. */
188 uint32_t fFinal : 1;
189
190 /** Unused bits. */
191 uint32_t fReserved : 2+8;
192} RTPATHGLOBPPE;
193
194
195typedef struct RTPATHGLOB
196{
197 /** Path buffer. */
198 char szPath[RTPATH_MAX];
199 /** Temporary buffers. */
200 union
201 {
202 /** File system object info structure. */
203 RTFSOBJINFO ObjInfo;
204 /** Directory entry buffer. */
205 RTDIRENTRY DirEntry;
206 /** Padding the buffer to an unreasonably large size. */
207 uint8_t abPadding[RTPATH_MAX + sizeof(RTDIRENTRY)];
208 } u;
209
210
211 /** Where to insert the next one.*/
212 PRTPATHGLOBENTRY *ppNext;
213 /** The head pointer. */
214 PRTPATHGLOBENTRY pHead;
215 /** Result count. */
216 uint32_t cResults;
217 /** Counts path overflows. */
218 uint32_t cPathOverflows;
219 /** The input flags. */
220 uint32_t fFlags;
221 /** Matching instruction allocator. */
222 RTPATHMATCHALLOC MatchInstrAlloc;
223 /** Matching state. */
224 RTPATHMATCHCACHE MatchCache;
225
226 /** The pattern string. */
227 const char *pszPattern;
228 /** The parsed path. */
229 PRTPATHPARSED pParsed;
230 /** The component to start with. */
231 uint16_t iFirstComp;
232 /** The corresponding path offset (previous components already present). */
233 uint16_t offFirstPath;
234 /** Path component information we need. */
235 RTPATHGLOBPPE aComps[1];
236} RTPATHGLOB;
237typedef RTPATHGLOB *PRTPATHGLOB;
238
239
240/**
241 * Matching variable lookup table.
242 * Currently so small we don't bother sorting it and doing binary lookups.
243 */
244typedef struct RTPATHMATCHVAR
245{
246 /** The variable name. */
247 const char *pszName;
248 /** The variable name length. */
249 uint16_t cchName;
250 /** Only available as the verify first component. */
251 bool fFirstOnly;
252
253 /**
254 * Queries a given variable value.
255 *
256 * @returns IPRT status code.
257 * @retval VERR_BUFFER_OVERFLOW
258 * @retval VERR_TRY_AGAIN if the caller should skip this value item and try the
259 * next one instead (e.g. env var not present).
260 * @retval VINF_EOF when retrieving the last one, if possible.
261 * @retval VERR_EOF when @a iItem is past the item space.
262 *
263 * @param iItem The variable value item to retrieve. (A variable may
264 * have more than one value, e.g. 'BothProgramFile' on a
265 * 64-bit system or 'Path'.)
266 * @param pszBuf Where to return the value.
267 * @param cbBuf The buffer size.
268 * @param pcchValue Where to return the length of the return string.
269 * @param pCache Pointer to the path matching cache. May speed up
270 * enumerating PATH items and similar.
271 */
272 DECLCALLBACKMEMBER(int, pfnQuery)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, PRTPATHMATCHCACHE pCache);
273
274 /**
275 * Matching method, optional.
276 *
277 * @returns IPRT status code.
278 * @retval VINF_SUCCESS on match.
279 * @retval VERR_MISMATCH on mismatch.
280 *
281 * @param pszMatch String to match with (not terminated).
282 * @param cchMatch The length of what we match with.
283 * @param fIgnoreCase Whether to ignore case or not when comparing.
284 * @param pcchMatched Where to return the length of the match (value length).
285 */
286 DECLCALLBACKMEMBER(int, pfnMatch)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, size_t *pcchMatched);
287
288} RTPATHMATCHVAR;
289
290
291/*********************************************************************************************************************************
292* Internal Functions *
293*********************************************************************************************************************************/
294static int rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp, size_t offStarStarPath);
295static int rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
296static int rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
297static int rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
298
299
300/**
301 * Implements the two variable access functions for a simple one value variable.
302 */
303#define RTPATHMATCHVAR_SIMPLE(a_Name, a_GetStrExpr) \
304 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
305 PRTPATHMATCHCACHE pCache) \
306 { \
307 if (iItem == 0) \
308 { \
309 const char *pszValue = a_GetStrExpr; \
310 size_t cchValue = strlen(pszValue); \
311 if (cchValue + 1 <= cbBuf) \
312 { \
313 memcpy(pszBuf, pszValue, cchValue + 1); \
314 *pcchValue = cchValue; \
315 return VINF_EOF; \
316 } \
317 return VERR_BUFFER_OVERFLOW; \
318 } \
319 NOREF(pCache);\
320 return VERR_EOF; \
321 } \
322 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
323 size_t *pcchMatched) \
324 { \
325 const char *pszValue = a_GetStrExpr; \
326 size_t cchValue = strlen(pszValue); \
327 if ( cchValue >= cchMatch \
328 && ( !fIgnoreCase \
329 ? memcmp(pszValue, pchMatch, cchValue) == 0 \
330 : RTStrNICmp(pszValue, pchMatch, cchValue) == 0) ) \
331 { \
332 *pcchMatched = cchValue; \
333 return VINF_SUCCESS; \
334 } \
335 return VERR_MISMATCH; \
336 } \
337 typedef int RT_CONCAT(DummyColonType_,a_Name)
338
339/**
340 * Implements mapping a glob variable to an environment variable.
341 */
342#define RTPATHMATCHVAR_SIMPLE_ENVVAR(a_Name, a_pszEnvVar, a_cbMaxValue) \
343 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
344 PRTPATHMATCHCACHE pCache) \
345 { \
346 if (iItem == 0) \
347 { \
348 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, pszBuf, cbBuf, pcchValue); \
349 if (RT_SUCCESS(rc)) \
350 return VINF_EOF; \
351 if (rc != VERR_ENV_VAR_NOT_FOUND) \
352 return rc; \
353 } \
354 NOREF(pCache);\
355 return VERR_EOF; \
356 } \
357 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
358 size_t *pcchMatched) \
359 { \
360 char szValue[a_cbMaxValue]; \
361 size_t cchValue; \
362 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, szValue, sizeof(szValue), &cchValue); \
363 if ( RT_SUCCESS(rc) \
364 && cchValue >= cchMatch \
365 && ( !fIgnoreCase \
366 ? memcmp(szValue, pchMatch, cchValue) == 0 \
367 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
368 { \
369 *pcchMatched = cchValue; \
370 return VINF_SUCCESS; \
371 } \
372 return VERR_MISMATCH; \
373 } \
374 typedef int RT_CONCAT(DummyColonType_,a_Name)
375
376/**
377 * Implements mapping a glob variable to multiple environment variable values.
378 *
379 * @param a_Name The variable name.
380 * @param a_apszVarNames Assumes to be a global variable that RT_ELEMENTS
381 * works correctly on.
382 * @param a_cbMaxValue The max expected value size.
383 */
384#define RTPATHMATCHVAR_MULTIPLE_ENVVARS(a_Name, a_apszVarNames, a_cbMaxValue) \
385 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
386 PRTPATHMATCHCACHE pCache) \
387 { \
388 if (iItem < RT_ELEMENTS(a_apszVarNames)) \
389 { \
390 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], pszBuf, cbBuf, pcchValue); \
391 if (RT_SUCCESS(rc)) \
392 return iItem + 1 == RT_ELEMENTS(a_apszVarNames) ? VINF_EOF : VINF_SUCCESS; \
393 if (rc == VERR_ENV_VAR_NOT_FOUND) \
394 rc = VERR_TRY_AGAIN; \
395 return rc; \
396 } \
397 NOREF(pCache);\
398 return VERR_EOF; \
399 } \
400 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
401 size_t *pcchMatched) \
402 { \
403 for (uint32_t iItem = 0; iItem < RT_ELEMENTS(a_apszVarNames); iItem++) \
404 { \
405 char szValue[a_cbMaxValue]; \
406 size_t cchValue; \
407 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], szValue, sizeof(szValue), &cchValue);\
408 if ( RT_SUCCESS(rc) \
409 && cchValue >= cchMatch \
410 && ( !fIgnoreCase \
411 ? memcmp(szValue, pchMatch, cchValue) == 0 \
412 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
413 { \
414 *pcchMatched = cchValue; \
415 return VINF_SUCCESS; \
416 } \
417 } \
418 return VERR_MISMATCH; \
419 } \
420 typedef int RT_CONCAT(DummyColonType_,a_Name)
421
422
423RTPATHMATCHVAR_SIMPLE(Arch, RTBldCfgTargetArch());
424RTPATHMATCHVAR_SIMPLE(Bits, RT_XSTR(ARCH_BITS));
425#ifdef RT_OS_WINDOWS
426RTPATHMATCHVAR_SIMPLE_ENVVAR(WinAppData, "AppData", RTPATH_MAX);
427RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramData, "ProgramData", RTPATH_MAX);
428RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramFiles, "ProgramFiles", RTPATH_MAX);
429RTPATHMATCHVAR_SIMPLE_ENVVAR(WinCommonProgramFiles, "CommonProgramFiles", RTPATH_MAX);
430# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
431RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherProgramFiles, "ProgramFiles(x86)", RTPATH_MAX);
432RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherCommonProgramFiles, "CommonProgramFiles(x86)", RTPATH_MAX);
433# else
434# error "Port ME!"
435# endif
436static const char * const a_apszWinProgramFilesVars[] =
437{
438 "ProgramFiles",
439# ifdef RT_ARCH_AMD64
440 "ProgramFiles(x86)",
441# endif
442};
443RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllProgramFiles, a_apszWinProgramFilesVars, RTPATH_MAX);
444static const char * const a_apszWinCommonProgramFilesVars[] =
445{
446 "CommonProgramFiles",
447# ifdef RT_ARCH_AMD64
448 "CommonProgramFiles(x86)",
449# endif
450};
451RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllCommonProgramFiles, a_apszWinCommonProgramFilesVars, RTPATH_MAX);
452#endif
453
454
455/**
456 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery, Enumerates the PATH}
457 */
458static DECLCALLBACK(int) rtPathVarQuery_Path(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
459 PRTPATHMATCHCACHE pCache)
460{
461 /*
462 * Query the PATH value.
463 */
464/** @todo cache this in pCache with iItem and offset. */
465 char *pszPathFree = NULL;
466 char *pszPath = pszBuf;
467 size_t cchActual;
468 const char *pszVarNm = "PATH";
469 int rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPath, cbBuf, &cchActual);
470#ifdef RT_OS_WINDOWS
471 if (rc == VERR_ENV_VAR_NOT_FOUND)
472 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm = "Path", pszPath, cbBuf, &cchActual);
473#endif
474 if (rc == VERR_BUFFER_OVERFLOW)
475 {
476 for (uint32_t iTry = 0; iTry < 10; iTry++)
477 {
478 size_t cbPathBuf = RT_ALIGN_Z(cchActual + 1 + 64 * iTry, 64);
479 pszPathFree = (char *)RTMemTmpAlloc(cbPathBuf);
480 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPathFree, cbPathBuf, &cchActual);
481 if (RT_SUCCESS(rc))
482 break;
483 RTMemTmpFree(pszPathFree);
484 AssertReturn(cchActual >= cbPathBuf, VERR_INTERNAL_ERROR_3);
485 }
486 pszPath = pszPathFree;
487 }
488
489 /*
490 * Spool forward to the given PATH item.
491 */
492 rc = VERR_EOF;
493#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
494 const char chSep = ';';
495#else
496 const char chSep = ':';
497#endif
498 while (*pszPath != '\0')
499 {
500 char *pchSep = strchr(pszPath, chSep);
501
502 /* We ignore empty strings, which is probably not entirely correct,
503 but works better on DOS based system with many entries added
504 without checking whether there is a trailing separator or not.
505 Thus, the current directory is only searched if a '.' is present
506 in the PATH. */
507 if (pchSep == pszPath)
508 pszPath++;
509 else if (iItem > 0)
510 {
511 /* If we didn't find a separator, the item doesn't exists. Quit. */
512 if (!pchSep)
513 break;
514
515 pszPath = pchSep + 1;
516 iItem--;
517 }
518 else
519 {
520 /* We've reached the item we wanted. */
521 size_t cchComp = pchSep ? pchSep - pszPath : strlen(pszPath);
522 if (cchComp < cbBuf)
523 {
524 if (pszBuf != pszPath)
525 memmove(pszBuf, pszPath, cchComp);
526 pszBuf[cchComp] = '\0';
527 rc = pchSep ? VINF_SUCCESS : VINF_EOF;
528 }
529 else
530 rc = VERR_BUFFER_OVERFLOW;
531 *pcchValue = cchComp;
532 break;
533 }
534 }
535
536 if (pszPathFree)
537 RTMemTmpFree(pszPathFree);
538 return rc;
539}
540
541
542#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
543/**
544 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
545 * The system drive letter + colon.}.
546 */
547static DECLCALLBACK(int) rtPathVarQuery_DosSystemDrive(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
548 PRTPATHMATCHCACHE pCache)
549{
550 if (iItem == 0)
551 {
552 AssertReturn(cbBuf >= 3, VERR_BUFFER_OVERFLOW);
553
554# ifdef RT_OS_WINDOWS
555 /* Since this is used at the start of a pattern, we assume
556 we've got more than enough buffer space. */
557 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
558 PRTUTF16 pwszTmp = (PRTUTF16)pszBuf;
559 UINT cch = g_pfnGetSystemWindowsDirectoryW(pwszTmp, (UINT)(cbBuf / sizeof(WCHAR)));
560 if (cch >= 2)
561 {
562 RTUTF16 wcDrive = pwszTmp[0];
563 if ( RT_C_IS_ALPHA(wcDrive)
564 && pwszTmp[1] == ':')
565 {
566 pszBuf[0] = wcDrive;
567 pszBuf[1] = ':';
568 pszBuf[2] = '\0';
569 *pcchValue = 2;
570 return VINF_EOF;
571 }
572 }
573# else
574 ULONG ulDrive = ~(ULONG)0;
575 APIRET rc = DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, &ulDrive, sizeof(ulDrive));
576 ulDrive--; /* 1 = 'A' */
577 if ( rc == NO_ERROR
578 && ulDrive <= (ULONG)'Z')
579 {
580 pszBuf[0] = (char)ulDrive + 'A';
581 pszBuf[1] = ':';
582 pszBuf[2] = '\0';
583 *pcchValue = 2;
584 return VINF_EOF;
585 }
586# endif
587 return VERR_INTERNAL_ERROR_4;
588 }
589 return VERR_EOF;
590}
591#endif
592
593
594#ifdef RT_OS_WINDOWS
595/**
596 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
597 * The system root directory (C:\Windows).}.
598 */
599static DECLCALLBACK(int) rtPathVarQuery_WinSystemRoot(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
600 PRTPATHMATCHCACHE pCache)
601{
602 if (iItem == 0)
603 {
604 Assert(pszBuf); Assert(cbBuf);
605 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
606 RTUTF16 wszSystemRoot[MAX_PATH];
607 UINT cchSystemRoot = g_pfnGetSystemWindowsDirectoryW(wszSystemRoot, MAX_PATH);
608 if (cchSystemRoot > 0)
609 return RTUtf16ToUtf8Ex(wszSystemRoot, cchSystemRoot, &pszBuf, cbBuf, pcchValue);
610 return RTErrConvertFromWin32(GetLastError());
611 }
612 return VERR_EOF;
613}
614#endif
615
616#undef RTPATHMATCHVAR_SIMPLE
617#undef RTPATHMATCHVAR_SIMPLE_ENVVAR
618#undef RTPATHMATCHVAR_DOUBLE_ENVVAR
619
620/**
621 * Variables.
622 */
623static RTPATHMATCHVAR const g_aVariables[] =
624{
625 { RT_STR_TUPLE("Arch"), false, rtPathVarQuery_Arch, rtPathVarMatch_Arch },
626 { RT_STR_TUPLE("Bits"), false, rtPathVarQuery_Bits, rtPathVarMatch_Bits },
627 { RT_STR_TUPLE("Path"), true, rtPathVarQuery_Path, NULL },
628#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
629 { RT_STR_TUPLE("SystemDrive"), true, rtPathVarQuery_DosSystemDrive, NULL },
630#endif
631#ifdef RT_OS_WINDOWS
632 { RT_STR_TUPLE("SystemRoot"), true, rtPathVarQuery_WinSystemRoot, NULL },
633 { RT_STR_TUPLE("AppData"), true, rtPathVarQuery_WinAppData, rtPathVarMatch_WinAppData },
634 { RT_STR_TUPLE("ProgramData"), true, rtPathVarQuery_WinProgramData, rtPathVarMatch_WinProgramData },
635 { RT_STR_TUPLE("ProgramFiles"), true, rtPathVarQuery_WinProgramFiles, rtPathVarMatch_WinProgramFiles },
636 { RT_STR_TUPLE("OtherProgramFiles"), true, rtPathVarQuery_WinOtherProgramFiles, rtPathVarMatch_WinOtherProgramFiles },
637 { RT_STR_TUPLE("AllProgramFiles"), true, rtPathVarQuery_WinAllProgramFiles, rtPathVarMatch_WinAllProgramFiles },
638 { RT_STR_TUPLE("CommonProgramFiles"), true, rtPathVarQuery_WinCommonProgramFiles, rtPathVarMatch_WinCommonProgramFiles },
639 { RT_STR_TUPLE("OtherCommonProgramFiles"), true, rtPathVarQuery_WinOtherCommonProgramFiles, rtPathVarMatch_WinOtherCommonProgramFiles },
640 { RT_STR_TUPLE("AllCommonProgramFiles"), true, rtPathVarQuery_WinAllCommonProgramFiles, rtPathVarMatch_WinAllCommonProgramFiles },
641#endif
642};
643
644
645
646/**
647 * Handles a complicated set.
648 *
649 * A complicated set is either using ranges, character classes or code points
650 * outside the ASCII-7 range.
651 *
652 * @returns VINF_SUCCESS or VERR_MISMATCH. May also return UTF-8 decoding
653 * errors as well as VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED.
654 *
655 * @param ucInput The input code point to match with.
656 * @param pchSet The start of the set specification (after caret).
657 * @param cchSet The length of the set specification.
658 */
659static int rtPathMatchExecExtendedSet(RTUNICP ucInput, const char *pchSet, size_t cchSet)
660{
661 while (cchSet > 0)
662 {
663 RTUNICP ucSet;
664 int rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet);
665 AssertRCReturn(rc, rc);
666
667 /*
668 * Check for character class, collating symbol and equvalence class.
669 */
670 if (ucSet == '[' && cchSet > 0)
671 {
672 char chNext = *pchSet;
673 if (chNext == ':')
674 {
675#define CHECK_CHAR_CLASS(a_szClassNm, a_BoolTestExpr) \
676 if ( cchSet >= sizeof(a_szClassNm) \
677 && memcmp(pchSet, a_szClassNm "]", sizeof(a_szClassNm)) == 0) \
678 { \
679 if (a_BoolTestExpr) \
680 return VINF_SUCCESS; \
681 pchSet += sizeof(a_szClassNm); \
682 cchSet -= sizeof(a_szClassNm); \
683 continue; \
684 } do { } while (0)
685
686 CHECK_CHAR_CLASS(":alpha:", RTUniCpIsAlphabetic(ucInput));
687 CHECK_CHAR_CLASS(":alnum:", RTUniCpIsAlphabetic(ucInput) || RTUniCpIsDecDigit(ucInput)); /** @todo figure what's correct here and fix uni.h */
688 CHECK_CHAR_CLASS(":blank:", ucInput == ' ' || ucInput == '\t');
689 CHECK_CHAR_CLASS(":cntrl:", ucInput < 31 || ucInput == 127);
690 CHECK_CHAR_CLASS(":digit:", RTUniCpIsDecDigit(ucInput));
691 CHECK_CHAR_CLASS(":lower:", RTUniCpIsLower(ucInput));
692 CHECK_CHAR_CLASS(":print:", RTUniCpIsAlphabetic(ucInput) || (RT_C_IS_PRINT(ucInput) && ucInput < 127)); /** @todo fixme*/
693 CHECK_CHAR_CLASS(":punct:", RT_C_IS_PRINT(ucInput) && ucInput < 127); /** @todo fixme*/
694 CHECK_CHAR_CLASS(":space:", RTUniCpIsSpace(ucInput));
695 CHECK_CHAR_CLASS(":upper:", RTUniCpIsUpper(ucInput));
696 CHECK_CHAR_CLASS(":xdigit:", RTUniCpIsHexDigit(ucInput));
697 AssertMsgFailedReturn(("Unknown or malformed char class: '%.*s'\n", cchSet + 1, pchSet - 1),
698 VERR_PATH_GLOB_UNKNOWN_CHAR_CLASS);
699#undef CHECK_CHAR_CLASS
700 }
701 /** @todo implement collating symbol and equvalence class. */
702 else if (chNext == '=' || chNext == '.')
703 AssertFailedReturn(VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
704 }
705
706 /*
707 * Check for range (leading or final dash does not constitute a range).
708 */
709 if (cchSet > 1 && *pchSet == '-')
710 {
711 pchSet++; /* skip dash */
712 cchSet--;
713
714 RTUNICP ucSet2;
715 rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet2);
716 AssertRCReturn(rc, rc);
717 Assert(ucSet < ucSet2);
718 if (ucInput >= ucSet && ucInput <= ucSet2)
719 return VINF_SUCCESS;
720 }
721 /*
722 * Single char comparison.
723 */
724 else if (ucInput == ucSet)
725 return VINF_SUCCESS;
726 }
727 return VERR_MISMATCH;
728}
729
730
731/**
732 * Variable matching fallback using the query function.
733 *
734 * This must not be inlined as it consuming a lot of stack! Which is why it's
735 * placed a couple of functions away from the recursive rtPathExecMatch.
736 *
737 * @returns VINF_SUCCESS or VERR_MISMATCH.
738 * @param pchInput The current input position.
739 * @param cchInput The amount of input left..
740 * @param idxVar The variable table index.
741 * @param fIgnoreCase Whether to ignore case when comparing.
742 * @param pcchMatched Where to return how much we actually matched up.
743 * @param pCache Pointer to the path matching cache.
744 */
745DECL_NO_INLINE(static, int) rtPathMatchExecVariableFallback(const char *pchInput, size_t cchInput, uint16_t idxVar,
746 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
747{
748 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
749 {
750 char szValue[RTPATH_MAX];
751 size_t cchValue;
752 int rc = g_aVariables[idxVar].pfnQuery(iItem, szValue, sizeof(szValue), &cchValue, pCache);
753 if (RT_SUCCESS(rc))
754 {
755 if (cchValue <= cchInput)
756 {
757 if ( !fIgnoreCase
758 ? memcmp(pchInput, szValue, cchValue) == 0
759 : RTStrNICmp(pchInput, szValue, cchValue) == 0)
760 {
761 *pcchMatched = cchValue;
762 return VINF_SUCCESS;
763 }
764 }
765 if (rc == VINF_EOF)
766 return VERR_MISMATCH;
767 }
768 else if (rc == VERR_EOF)
769 return VERR_MISMATCH;
770 else
771 Assert(rc == VERR_BUFFER_OVERFLOW || rc == VERR_TRY_AGAIN);
772 }
773 AssertFailed();
774 return VERR_MISMATCH;
775}
776
777
778/**
779 * Variable matching worker.
780 *
781 * @returns VINF_SUCCESS or VERR_MISMATCH.
782 * @param pchInput The current input position.
783 * @param cchInput The amount of input left..
784 * @param idxVar The variable table index.
785 * @param fIgnoreCase Whether to ignore case when comparing.
786 * @param pcchMatched Where to return how much we actually matched up.
787 * @param pCache Pointer to the path matching cache.
788 */
789static int rtPathMatchExecVariable(const char *pchInput, size_t cchInput, uint16_t idxVar,
790 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
791{
792 Assert(idxVar < RT_ELEMENTS(g_aVariables));
793 if (g_aVariables[idxVar].pfnMatch)
794 return g_aVariables[idxVar].pfnMatch(pchInput, cchInput, fIgnoreCase, pcchMatched);
795 return rtPathMatchExecVariableFallback(pchInput, cchInput, idxVar, fIgnoreCase, pcchMatched, pCache);
796}
797
798
799/**
800 * Variable matching worker.
801 *
802 * @returns VINF_SUCCESS or VERR_MISMATCH.
803 * @param pchInput The current input position.
804 * @param cchInput The amount of input left..
805 * @param pProg The first matching program instruction.
806 * @param pCache Pointer to the path matching cache.
807 */
808static int rtPathMatchExec(const char *pchInput, size_t cchInput, PCRTPATHMATCHCORE pProg, PRTPATHMATCHCACHE pCache)
809{
810 for (;;)
811 {
812 switch (pProg->enmOpCode)
813 {
814 case RTPATHMATCHOP_RETURN_MATCH_IF_AT_END:
815 return cchInput == 0 ? VINF_SUCCESS : VERR_MISMATCH;
816
817 case RTPATHMATCHOP_RETURN_MATCH:
818 return VINF_SUCCESS;
819
820 case RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT:
821 if ( cchInput > 2
822 || cchInput < 1
823 || pchInput[0] != '.'
824 || (cchInput == 2 && pchInput[1] != '.') )
825 return VINF_SUCCESS;
826 return VERR_MISMATCH;
827
828 case RTPATHMATCHOP_STRCMP:
829 if (pProg->cch > cchInput)
830 return VERR_MISMATCH;
831 if (memcmp(pchInput, pProg->pch, pProg->cch) != 0)
832 return VERR_MISMATCH;
833 cchInput -= pProg->cch;
834 pchInput += pProg->cch;
835 break;
836
837 case RTPATHMATCHOP_STRICMP:
838 if (pProg->cch > cchInput)
839 return VERR_MISMATCH;
840 if (RTStrNICmp(pchInput, pProg->pch, pProg->cch) != 0)
841 return VERR_MISMATCH;
842 cchInput -= pProg->cch;
843 pchInput += pProg->cch;
844 break;
845
846 case RTPATHMATCHOP_SKIP_ONE_CODEPOINT:
847 {
848 if (cchInput == 0)
849 return VERR_MISMATCH;
850 RTUNICP ucInputIgnore;
851 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
852 AssertRCReturn(rc, rc);
853 break;
854 }
855
856 case RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS:
857 {
858 uint16_t cCpsLeft = pProg->cch;
859 Assert(cCpsLeft > 1);
860 if (cCpsLeft > cchInput)
861 return VERR_MISMATCH;
862 while (cCpsLeft-- > 0)
863 {
864 RTUNICP ucInputIgnore;
865 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
866 if (RT_FAILURE(rc))
867 return rc == VERR_END_OF_STRING ? VERR_MISMATCH : rc;
868 }
869 break;
870 }
871
872 case RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7:
873 {
874 if (cchInput == 0)
875 return VERR_MISMATCH;
876 RTUNICP ucInput;
877 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
878 AssertRCReturn(rc, rc);
879 if (ucInput >= 0x80)
880 return VERR_MISMATCH;
881 if (memchr(pProg->pch, (char)ucInput, pProg->cch) == NULL)
882 return VERR_MISMATCH;
883 break;
884 }
885
886 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7:
887 {
888 if (cchInput == 0)
889 return VERR_MISMATCH;
890 RTUNICP ucInput;
891 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
892 AssertRCReturn(rc, rc);
893 if (ucInput >= 0x80)
894 break;
895 if (memchr(pProg->pch, (char)ucInput, pProg->cch) != NULL)
896 return VERR_MISMATCH;
897 break;
898 }
899
900 case RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED:
901 {
902 if (cchInput == 0)
903 return VERR_MISMATCH;
904 RTUNICP ucInput;
905 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
906 AssertRCReturn(rc, rc);
907 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
908 if (rc == VINF_SUCCESS)
909 break;
910 return rc;
911 }
912
913 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED:
914 {
915 if (cchInput == 0)
916 return VERR_MISMATCH;
917 RTUNICP ucInput;
918 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
919 AssertRCReturn(rc, rc);
920 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
921 if (rc == VERR_MISMATCH)
922 break;
923 if (rc == VINF_SUCCESS)
924 rc = VERR_MISMATCH;
925 return rc;
926 }
927
928 case RTPATHMATCHOP_VARIABLE_VALUE_CMP:
929 case RTPATHMATCHOP_VARIABLE_VALUE_ICMP:
930 {
931 size_t cchMatched = 0;
932 int rc = rtPathMatchExecVariable(pchInput, cchInput, pProg->uOp2,
933 pProg->enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP, &cchMatched, pCache);
934 if (rc == VINF_SUCCESS)
935 {
936 pchInput += cchMatched;
937 cchInput -= cchMatched;
938 break;
939 }
940 return rc;
941 }
942
943 /*
944 * This is the expensive one. It always completes the program.
945 */
946 case RTPATHMATCHOP_ZERO_OR_MORE:
947 {
948 if (cchInput < pProg->cch)
949 return VERR_MISMATCH;
950 size_t cchMatched = cchInput - pProg->cch;
951 do
952 {
953 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
954 if (RT_SUCCESS(rc))
955 return rc;
956 } while (cchMatched-- > 0);
957 return VERR_MISMATCH;
958 }
959
960 /*
961 * Variant of the above that doesn't match '.' and '..' entries.
962 */
963 case RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT:
964 {
965 if (cchInput < pProg->cch)
966 return VERR_MISMATCH;
967 if ( cchInput <= 2
968 && cchInput > 0
969 && pchInput[0] == '.'
970 && (cchInput == 1 || pchInput[1] == '.') )
971 return VERR_MISMATCH;
972 size_t cchMatched = cchInput - pProg->cch;
973 do
974 {
975 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
976 if (RT_SUCCESS(rc))
977 return rc;
978 } while (cchMatched-- > 0);
979 return VERR_MISMATCH;
980 }
981
982 default:
983 AssertMsgFailedReturn(("enmOpCode=%d\n", pProg->enmOpCode), VERR_INTERNAL_ERROR_3);
984 }
985
986 pProg++;
987 }
988}
989
990
991
992
993/**
994 * Compiles a path matching program.
995 *
996 * @returns IPRT status code.
997 * @param pchPattern The pattern to compile.
998 * @param cchPattern The length of the pattern.
999 * @param fIgnoreCase Whether to ignore case or not when doing the
1000 * actual matching later on.
1001 * @param pAllocator Pointer to the instruction allocator & result
1002 * array. The compiled "program" starts at
1003 * PRTPATHMATCHALLOC::paInstructions[PRTPATHMATCHALLOC::iNext]
1004 * (input iNext value).
1005 *
1006 * @todo Expose this matching code and also use it for RTDirOpenFiltered
1007 */
1008static int rtPathMatchCompile(const char *pchPattern, size_t cchPattern, bool fIgnoreCase, PRTPATHMATCHALLOC pAllocator)
1009{
1010 /** @todo PORTME: big endian. */
1011 static const uint8_t s_bmMetaChars[256/8] =
1012 {
1013 0x00, 0x00, 0x00, 0x00, /* 0 thru 31 */
1014 0x10, 0x04, 0x00, 0x80, /* 32 thru 63 */
1015 0x00, 0x00, 0x00, 0x08, /* 64 thru 95 */
1016 0x00, 0x00, 0x00, 0x00, /* 96 thru 127 */
1017 /* UTF-8 multibyte: */
1018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1019 };
1020 Assert(ASMBitTest(s_bmMetaChars, '$')); AssertCompile('$' == 0x24 /*36*/);
1021 Assert(ASMBitTest(s_bmMetaChars, '*')); AssertCompile('*' == 0x2a /*42*/);
1022 Assert(ASMBitTest(s_bmMetaChars, '?')); AssertCompile('?' == 0x3f /*63*/);
1023 Assert(ASMBitTest(s_bmMetaChars, '[')); AssertCompile('[' == 0x5b /*91*/);
1024
1025 /*
1026 * For checking for the first instruction.
1027 */
1028 uint16_t const iFirst = pAllocator->iNext;
1029
1030 /*
1031 * This is for tracking zero-or-more instructions and for calculating
1032 * the minimum amount of input required for it to be considered.
1033 */
1034 uint16_t aiZeroOrMore[RTPATHMATCH_MAX_ZERO_OR_MORE];
1035 uint8_t cZeroOrMore = 0;
1036 size_t offInput = 0;
1037
1038 /*
1039 * Loop thru the pattern and translate it into string matching instructions.
1040 */
1041 for (;;)
1042 {
1043 /*
1044 * Allocate the next instruction.
1045 */
1046 if (pAllocator->iNext >= pAllocator->cAllocated)
1047 {
1048 uint32_t cNew = pAllocator->cAllocated ? pAllocator->cAllocated * 2 : 2;
1049 void *pvNew = RTMemRealloc(pAllocator->paInstructions, cNew * sizeof(pAllocator->paInstructions[0]));
1050 AssertReturn(pvNew, VERR_NO_MEMORY);
1051 pAllocator->paInstructions = (PRTPATHMATCHCORE)pvNew;
1052 pAllocator->cAllocated = cNew;
1053 }
1054 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[pAllocator->iNext++];
1055 pInstr->pch = pchPattern;
1056 pInstr->cch = 0;
1057 pInstr->uOp2 = 0;
1058
1059 /*
1060 * Special case: End of pattern.
1061 */
1062 if (!cchPattern)
1063 {
1064 pInstr->enmOpCode = RTPATHMATCHOP_RETURN_MATCH_IF_AT_END;
1065 break;
1066 }
1067
1068 /*
1069 * Parse the next bit of the pattern.
1070 */
1071 char ch = *pchPattern;
1072 if (ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1073 {
1074 /*
1075 * Zero or more characters wildcard.
1076 */
1077 if (ch == '*')
1078 {
1079 /* Skip extra asterisks. */
1080 do
1081 {
1082 cchPattern--;
1083 pchPattern++;
1084 } while (cchPattern > 0 && *pchPattern == '*');
1085
1086 /* There is a special optimization for trailing '*'. */
1087 pInstr->cch = 1;
1088 if (cchPattern == 0)
1089 {
1090 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1091 ? RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_RETURN_MATCH;
1092 break;
1093 }
1094
1095 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1096 ? RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_ZERO_OR_MORE;
1097 pInstr->uOp2 = (uint16_t)offInput;
1098 AssertReturn(cZeroOrMore < RT_ELEMENTS(aiZeroOrMore), VERR_OUT_OF_RANGE);
1099 aiZeroOrMore[cZeroOrMore] = (uint16_t)(pInstr - pAllocator->paInstructions);
1100
1101 /* cchInput unchanged, zero-or-more matches. */
1102 continue;
1103 }
1104
1105 /*
1106 * Single character wildcard.
1107 */
1108 if (ch == '?')
1109 {
1110 /* Count them if more. */
1111 uint16_t cchQms = 1;
1112 while (cchQms < cchPattern && pchPattern[cchQms] == '?')
1113 cchQms++;
1114
1115 pInstr->cch = cchQms;
1116 pInstr->enmOpCode = cchQms == 1 ? RTPATHMATCHOP_SKIP_ONE_CODEPOINT : RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS;
1117
1118 cchPattern -= cchQms;
1119 pchPattern += cchQms;
1120 offInput += cchQms;
1121 continue;
1122 }
1123
1124 /*
1125 * Character in set.
1126 *
1127 * Note that we skip the first char in the set as that is the only place
1128 * ']' can be placed if one desires to explicitly include it in the set.
1129 * To make life a bit more interesting, [:class:] is allowed inside the
1130 * set, so we have to do the counting game to find the end.
1131 */
1132 if (ch == '[')
1133 {
1134 if ( cchPattern > 2
1135 && (const char *)memchr(pchPattern + 2, ']', cchPattern) != NULL)
1136 {
1137
1138 /* Check for not-in. */
1139 bool fInverted = false;
1140 size_t offStart = 1;
1141 if (pchPattern[offStart] == '^')
1142 {
1143 fInverted = true;
1144 offStart++;
1145 }
1146
1147 /* Special case for ']' as the first char, it doesn't indicate closing then. */
1148 size_t off = offStart;
1149 if (pchPattern[off] == ']')
1150 off++;
1151
1152 bool fExtended = false;
1153 while (off < cchPattern)
1154 {
1155 ch = pchPattern[off++];
1156 if (ch == '[')
1157 {
1158 if (off < cchPattern)
1159 {
1160 char chOpen = pchPattern[off];
1161 if ( chOpen == ':'
1162 || chOpen == '='
1163 || chOpen == '.')
1164 {
1165 off++;
1166 const char *pchFound = (const char *)memchr(&pchPattern[off], ']', cchPattern - off);
1167 if ( pchFound
1168 && pchFound[-1] == chOpen)
1169 {
1170 fExtended = true;
1171 off = pchFound - pchPattern + 1;
1172 }
1173 else
1174 AssertFailed();
1175 }
1176 }
1177 }
1178 /* Check for closing. */
1179 else if (ch == ']')
1180 break;
1181 /* Check for range expression, promote to extended if this happens. */
1182 else if ( ch == '-'
1183 && off != offStart + 1
1184 && off < cchPattern
1185 && pchPattern[off] != ']')
1186 fExtended = true;
1187 /* UTF-8 multibyte chars forces us to use the extended version too. */
1188 else if ((uint8_t)ch >= 0x80)
1189 fExtended = true;
1190 }
1191
1192 if (ch == ']')
1193 {
1194 pInstr->pch = &pchPattern[offStart];
1195 pInstr->cch = (uint16_t)(off - offStart - 1);
1196 if (!fExtended)
1197 pInstr->enmOpCode = !fInverted
1198 ? RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7;
1199 else
1200 pInstr->enmOpCode = !fInverted
1201 ? RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED
1202 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED;
1203 pchPattern += off;
1204 cchPattern -= off;
1205 offInput += 1;
1206 continue;
1207 }
1208
1209 /* else: invalid, treat it as */
1210 AssertFailed();
1211 }
1212 }
1213 /*
1214 * Variable matching.
1215 */
1216 else if (ch == '$')
1217 {
1218 const char *pchFound;
1219 if ( cchPattern > 3
1220 && pchPattern[1] == '{'
1221 && (pchFound = (const char *)memchr(pchPattern + 2, '}', cchPattern)) != NULL
1222 && pchFound != &pchPattern[2])
1223 {
1224 /* skip to the variable name. */
1225 pchPattern += 2;
1226 cchPattern -= 2;
1227 size_t cchVarNm = pchFound - pchPattern;
1228
1229 /* Look it up. */
1230 uint32_t iVar;
1231 for (iVar = 0; iVar < RT_ELEMENTS(g_aVariables); iVar++)
1232 if ( g_aVariables[iVar].cchName == cchVarNm
1233 && memcmp(g_aVariables[iVar].pszName, pchPattern, cchVarNm) == 0)
1234 break;
1235 if (iVar < RT_ELEMENTS(g_aVariables))
1236 {
1237 pInstr->uOp2 = (uint16_t)iVar;
1238 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_VARIABLE_VALUE_CMP : RTPATHMATCHOP_VARIABLE_VALUE_ICMP;
1239 pInstr->pch = pchPattern; /* not necessary */
1240 pInstr->cch = (uint16_t)cchPattern; /* ditto */
1241 pchPattern += cchVarNm + 1;
1242 cchPattern -= cchVarNm + 1;
1243 AssertMsgReturn(!g_aVariables[iVar].fFirstOnly || iFirst + 1U == pAllocator->iNext,
1244 ("Glob variable '%s' should be first\n", g_aVariables[iVar].pszName),
1245 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1246 /* cchInput unchanged, value can be empty. */
1247 continue;
1248 }
1249 AssertMsgFailedReturn(("Unknown path matching variable '%.*s'\n", cchVarNm, pchPattern),
1250 VERR_PATH_MATCH_UNKNOWN_VARIABLE);
1251 }
1252 }
1253 else
1254 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1255 }
1256
1257 /*
1258 * Plain text. Look for the next meta char.
1259 */
1260 uint32_t cchPlain = 1;
1261 while (cchPlain < cchPattern)
1262 {
1263 ch = pchPattern[cchPlain];
1264 if (!ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1265 { /* probable */ }
1266 else if ( ch == '?'
1267 || ch == '*')
1268 break;
1269 else if (ch == '$')
1270 {
1271 const char *pchFound;
1272 if ( cchPattern > cchPlain + 3
1273 && pchPattern[cchPlain + 1] == '{'
1274 && (pchFound = (const char *)memchr(&pchPattern[cchPlain + 2], '}', cchPattern - cchPlain - 2)) != NULL
1275 && pchFound != &pchPattern[cchPlain + 2])
1276 break;
1277 }
1278 else if (ch == '[')
1279 {
1280 /* We don't put a lot of effort into getting this 100% right here,
1281 no point it complicating things for malformed expressions. */
1282 if ( cchPattern > cchPlain + 2
1283 && memchr(&pchPattern[cchPlain + 2], ']', cchPattern - cchPlain - 1) != NULL)
1284 break;
1285 }
1286 else
1287 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1288 cchPlain++;
1289 }
1290 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_STRCMP : RTPATHMATCHOP_STRICMP;
1291 pInstr->cch = cchPlain;
1292 Assert(pInstr->pch == pchPattern);
1293 Assert(pInstr->uOp2 == 0);
1294 pchPattern += cchPlain;
1295 cchPattern -= cchPlain;
1296 offInput += cchPlain;
1297 }
1298
1299 /*
1300 * Optimize zero-or-more matching.
1301 */
1302 while (cZeroOrMore-- > 0)
1303 {
1304 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[aiZeroOrMore[cZeroOrMore]];
1305 pInstr->uOp2 = (uint16_t)(offInput - pInstr->uOp2);
1306 }
1307
1308 /** @todo It's possible to use offInput to inject a instruction for checking
1309 * minimum input length at the start of the program. Not sure it's
1310 * worth it though, unless it's long a complicated expression... */
1311 return VINF_SUCCESS;
1312}
1313
1314
1315/**
1316 * Parses the glob pattern.
1317 *
1318 * This compiles filename matching programs for each component and determins the
1319 * optimal search strategy for them.
1320 *
1321 * @returns IPRT status code.
1322 * @param pGlob The glob instance data.
1323 * @param pszPattern The pattern to parse.
1324 * @param pParsed The RTPathParse output for the pattern.
1325 * @param fFlags The glob flags (same as pGlob->fFlags).
1326 */
1327static int rtPathGlobParse(PRTPATHGLOB pGlob, const char *pszPattern, PRTPATHPARSED pParsed, uint32_t fFlags)
1328{
1329 AssertReturn(pParsed->cComps > 0, VERR_INVALID_PARAMETER); /* shouldn't happen */
1330 uint32_t iComp = 0;
1331
1332 /*
1333 * If we've got a rootspec, mark it as plain. On platforms with
1334 * drive letter and/or UNC we don't allow wildcards or such in
1335 * the drive letter spec or UNC server name. (At least not yet.)
1336 */
1337 if (RTPATH_PROP_HAS_ROOT_SPEC(pParsed->fProps))
1338 {
1339 AssertReturn(pParsed->aComps[0].cch < sizeof(pGlob->szPath) - 1, VERR_FILENAME_TOO_LONG);
1340 memcpy(pGlob->szPath, &pszPattern[pParsed->aComps[0].off], pParsed->aComps[0].cch);
1341 pGlob->offFirstPath = pParsed->aComps[0].cch;
1342 pGlob->iFirstComp = iComp = 1;
1343 }
1344 else
1345 {
1346 const char * const pszComp = &pszPattern[pParsed->aComps[0].off];
1347
1348 /*
1349 * The tilde is only applicable to the first component, expand it
1350 * immediately.
1351 */
1352 if ( *pszComp == '~'
1353 && !(fFlags & RTPATHGLOB_F_NO_TILDE))
1354 {
1355 if (pParsed->aComps[0].cch == 1)
1356 {
1357 int rc = RTPathUserHome(pGlob->szPath, sizeof(pGlob->szPath) - 1);
1358 AssertRCReturn(rc, rc);
1359 }
1360 else
1361 AssertMsgFailedReturn(("'%.*s' is not supported yet\n", pszComp, pParsed->aComps[0].cch),
1362 VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
1363 pGlob->offFirstPath = (uint32_t)RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1364 pGlob->iFirstComp = iComp = 1;
1365 }
1366 }
1367
1368 /*
1369 * Process the other components.
1370 */
1371 bool fStarStar = false;
1372 for (; iComp < pParsed->cComps; iComp++)
1373 {
1374 const char *pszComp = &pszPattern[pParsed->aComps[iComp].off];
1375 uint16_t cchComp = pParsed->aComps[iComp].cch;
1376 Assert(pGlob->aComps[iComp].fNormal == false);
1377
1378 pGlob->aComps[iComp].fDir = iComp + 1 < pParsed->cComps || (fFlags & RTPATHGLOB_F_ONLY_DIRS);
1379 if ( cchComp != 2
1380 || pszComp[0] != '*'
1381 || pszComp[1] != '*'
1382 || (fFlags & RTPATHGLOB_F_NO_STARSTAR) )
1383 {
1384 /* Compile the pattern. */
1385 uint16_t const iMatchProg = pGlob->MatchInstrAlloc.iNext;
1386 pGlob->aComps[iComp].iMatchProg = iMatchProg;
1387 int rc = rtPathMatchCompile(pszComp, cchComp, RT_BOOL(fFlags & RTPATHGLOB_F_IGNORE_CASE),
1388 &pGlob->MatchInstrAlloc);
1389 if (RT_FAILURE(rc))
1390 return rc;
1391
1392 /* Check for plain text as well as full variable matching (not applicable after '**'). */
1393 uint16_t const cInstructions = pGlob->MatchInstrAlloc.iNext - iMatchProg;
1394 if ( cInstructions == 2
1395 && !fStarStar
1396 && pGlob->MatchInstrAlloc.paInstructions[iMatchProg + 1].enmOpCode == RTPATHMATCHOP_RETURN_MATCH_IF_AT_END)
1397 {
1398 if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRCMP
1399 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRICMP)
1400 pGlob->aComps[iComp].fPlain = true;
1401 else if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1402 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP)
1403 {
1404 pGlob->aComps[iComp].fExpVariable = true;
1405 AssertMsgReturn( iComp == 0
1406 || !g_aVariables[pGlob->MatchInstrAlloc.paInstructions[iMatchProg].uOp2].fFirstOnly,
1407 ("Glob variable '%.*s' can only be used as the path component.\n", cchComp, pszComp),
1408 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1409 }
1410 else
1411 pGlob->aComps[iComp].fNormal = true;
1412 }
1413 else
1414 pGlob->aComps[iComp].fNormal = true;
1415 }
1416 else
1417 {
1418 /* Recursive "**" matching. */
1419 pGlob->aComps[iComp].fNormal = false;
1420 pGlob->aComps[iComp].fStarStar = true;
1421 AssertReturn(!fStarStar, VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED); /** @todo implement multiple '**' sequences in a pattern. */
1422 fStarStar = true;
1423 }
1424 }
1425 pGlob->aComps[pParsed->cComps - 1].fFinal = true;
1426
1427 return VINF_SUCCESS;
1428}
1429
1430
1431/**
1432 * This is for skipping overly long directories entries.
1433 *
1434 * Since our directory entry buffer can hold filenames of RTPATH_MAX bytes, we
1435 * can safely skip filenames that are longer. There are very few file systems
1436 * that can actually store filenames longer than 255 bytes at time of coding
1437 * (2015-09), and extremely few which can exceed 4096 (RTPATH_MAX) bytes.
1438 *
1439 * @returns IPRT status code.
1440 * @param hDir The directory handle.
1441 * @param cbNeeded The required entry size.
1442 */
1443DECL_NO_INLINE(static, int) rtPathGlobSkipDirEntry(PRTDIR hDir, size_t cbNeeded)
1444{
1445 int rc = VERR_BUFFER_OVERFLOW;
1446 cbNeeded = RT_ALIGN_Z(cbNeeded, 16);
1447 PRTDIRENTRY pDirEntry = (PRTDIRENTRY)RTMemTmpAlloc(cbNeeded);
1448 if (pDirEntry)
1449 {
1450 rc = RTDirRead(hDir, pDirEntry, &cbNeeded);
1451 RTMemTmpFree(pDirEntry);
1452 }
1453 return rc;
1454}
1455
1456
1457/**
1458 * Adds a result.
1459 *
1460 * @returns IPRT status code.
1461 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1462 *
1463 * @param pGlob The glob instance data.
1464 * @param cchPath The number of bytes to add from pGlob->szPath.
1465 * @param uType The RTDIRENTRYTYPE value.
1466 */
1467DECL_NO_INLINE(static, int) rtPathGlobAddResult(PRTPATHGLOB pGlob, size_t cchPath, uint8_t uType)
1468{
1469 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1470 {
1471 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + 1]));
1472 if (pEntry)
1473 {
1474 pEntry->uType = uType;
1475 pEntry->cchPath = (uint16_t)cchPath;
1476 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1477 pEntry->szPath[cchPath] = '\0';
1478
1479 pEntry->pNext = NULL;
1480 *pGlob->ppNext = pEntry;
1481 pGlob->ppNext = &pEntry->pNext;
1482 pGlob->cResults++;
1483
1484 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1485 return VINF_SUCCESS;
1486 return VINF_CALLBACK_RETURN;
1487 }
1488 return VERR_NO_MEMORY;
1489 }
1490 return VERR_TOO_MUCH_DATA;
1491}
1492
1493
1494/**
1495 * Adds a result, constructing the path from two string.
1496 *
1497 * @returns IPRT status code.
1498 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1499 *
1500 * @param pGlob The glob instance data.
1501 * @param cchPath The number of bytes to add from pGlob->szPath.
1502 * @param pchName The string (usual filename) to append to the szPath.
1503 * @param cchName The length of the string to append.
1504 * @param uType The RTDIRENTRYTYPE value.
1505 */
1506DECL_NO_INLINE(static, int) rtPathGlobAddResult2(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1507 uint8_t uType)
1508{
1509 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1510 {
1511 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1512 if (pEntry)
1513 {
1514 pEntry->uType = uType;
1515 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1516 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1517 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1518 pEntry->szPath[cchPath + cchName] = '\0';
1519
1520 pEntry->pNext = NULL;
1521 *pGlob->ppNext = pEntry;
1522 pGlob->ppNext = &pEntry->pNext;
1523 pGlob->cResults++;
1524
1525 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1526 return VINF_SUCCESS;
1527 return VINF_CALLBACK_RETURN;
1528 }
1529 return VERR_NO_MEMORY;
1530 }
1531 return VERR_TOO_MUCH_DATA;
1532}
1533
1534
1535/**
1536 * Prepares a result, constructing the path from two string.
1537 *
1538 * The caller must call either rtPathGlobCommitResult or
1539 * rtPathGlobRollbackResult to complete the operation.
1540 *
1541 * @returns IPRT status code.
1542 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1543 *
1544 * @param pGlob The glob instance data.
1545 * @param cchPath The number of bytes to add from pGlob->szPath.
1546 * @param pchName The string (usual filename) to append to the szPath.
1547 * @param cchName The length of the string to append.
1548 * @param uType The RTDIRENTRYTYPE value.
1549 */
1550DECL_NO_INLINE(static, int) rtPathGlobAlmostAddResult(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1551 uint8_t uType)
1552{
1553 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1554 {
1555 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1556 if (pEntry)
1557 {
1558 pEntry->uType = uType;
1559 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1560 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1561 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1562 pEntry->szPath[cchPath + cchName] = '\0';
1563
1564 pEntry->pNext = NULL;
1565 *pGlob->ppNext = pEntry;
1566 /* Note! We don't update ppNext here, that is done in rtPathGlobCommitResult. */
1567
1568 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1569 return VINF_SUCCESS;
1570 return VINF_CALLBACK_RETURN;
1571 }
1572 return VERR_NO_MEMORY;
1573 }
1574 return VERR_TOO_MUCH_DATA;
1575}
1576
1577
1578/**
1579 * Commits a pending result from rtPathGlobAlmostAddResult.
1580 *
1581 * @param pGlob The glob instance data.
1582 * @param uType The RTDIRENTRYTYPE value.
1583 */
1584static void rtPathGlobCommitResult(PRTPATHGLOB pGlob, uint8_t uType)
1585{
1586 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1587 AssertPtr(pEntry);
1588 pEntry->uType = uType;
1589 pGlob->ppNext = &pEntry->pNext;
1590 pGlob->cResults++;
1591}
1592
1593
1594/**
1595 * Rolls back a pending result from rtPathGlobAlmostAddResult.
1596 *
1597 * @param pGlob The glob instance data.
1598 */
1599static void rtPathGlobRollbackResult(PRTPATHGLOB pGlob)
1600{
1601 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1602 AssertPtr(pEntry);
1603 RTMemFree(pEntry);
1604 *pGlob->ppNext = NULL;
1605}
1606
1607
1608
1609/**
1610 * Whether to call rtPathGlobExecRecursiveVarExp for the next component.
1611 *
1612 * @returns true / false.
1613 * @param pGlob The glob instance data.
1614 * @param offPath The next path offset/length.
1615 * @param iComp The next component.
1616 */
1617DECLINLINE(bool) rtPathGlobExecIsExpVar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1618{
1619 return pGlob->aComps[iComp].fExpVariable
1620 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1621 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1622}
1623
1624/**
1625 * Whether to call rtPathGlobExecRecursivePlainText for the next component.
1626 *
1627 * @returns true / false.
1628 * @param pGlob The glob instance data.
1629 * @param offPath The next path offset/length.
1630 * @param iComp The next component.
1631 */
1632DECLINLINE(bool) rtPathGlobExecIsPlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1633{
1634 return pGlob->aComps[iComp].fPlain
1635 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1636 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1637}
1638
1639
1640/**
1641 * Helper for rtPathGlobExecRecursiveVarExp and rtPathGlobExecRecursivePlainText
1642 * that compares a file mode mask with dir/no-dir wishes of the caller.
1643 *
1644 * @returns true if match, false if not.
1645 * @param pGlob The glob instance data.
1646 * @param fMode The file mode (only the type is used).
1647 */
1648DECLINLINE(bool) rtPathGlobExecIsMatchFinalWithFileMode(PRTPATHGLOB pGlob, RTFMODE fMode)
1649{
1650 if (!(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)))
1651 return true;
1652 return RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS) == RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode);
1653}
1654
1655
1656/**
1657 * Recursive globbing - star-star mode.
1658 *
1659 * @returns IPRT status code.
1660 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1661 *
1662 * @param pGlob The glob instance data.
1663 * @param offPath The current path offset/length.
1664 * @param iStarStarComp The star-star component index.
1665 * @param offStarStarPath The offset of the star-star component in the
1666 * pattern path.
1667 */
1668DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp,
1669 size_t offStarStarPath)
1670{
1671 /** @todo implement multi subdir matching. */
1672 return VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED;
1673}
1674
1675
1676
1677/**
1678 * Recursive globbing - variable expansion optimization.
1679 *
1680 * @returns IPRT status code.
1681 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1682 *
1683 * @param pGlob The glob instance data.
1684 * @param offPath The current path offset/length.
1685 * @param iComp The current component.
1686 */
1687DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1688{
1689 Assert(iComp < pGlob->pParsed->cComps);
1690 Assert(pGlob->szPath[offPath] == '\0');
1691 Assert(pGlob->aComps[iComp].fExpVariable);
1692 Assert(!pGlob->aComps[iComp].fPlain);
1693 Assert(!pGlob->aComps[iComp].fStarStar);
1694 Assert(rtPathGlobExecIsExpVar(pGlob, offPath, iComp));
1695
1696 /*
1697 * Fish the variable index out of the first matching instruction.
1698 */
1699 Assert( pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1700 == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1701 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1702 == RTPATHMATCHOP_VARIABLE_VALUE_ICMP);
1703 uint16_t const iVar = pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].uOp2;
1704
1705 /*
1706 * Enumerate all the variable, giving them the plain text treatment.
1707 */
1708 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
1709 {
1710 size_t cch;
1711 int rcVar = g_aVariables[iVar].pfnQuery(iItem, &pGlob->szPath[offPath], sizeof(pGlob->szPath) - offPath, &cch,
1712 &pGlob->MatchCache);
1713 if (RT_SUCCESS(rcVar))
1714 {
1715 Assert(pGlob->szPath[offPath + cch] == '\0');
1716
1717 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1718 if (RT_SUCCESS(rc))
1719 {
1720 if (pGlob->aComps[iComp].fFinal)
1721 {
1722 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1723 {
1724 rc = rtPathGlobAddResult(pGlob, cch,
1725 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1726 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1727 if (rc != VINF_SUCCESS)
1728 return rc;
1729 }
1730 }
1731 else if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1732 {
1733 Assert(pGlob->aComps[iComp].fDir);
1734 cch = RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1735 if (cch > 0)
1736 {
1737 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1738 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1739 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1740 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1741 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1742 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1743 else
1744 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1745 if (rc != VINF_SUCCESS)
1746 return rc;
1747 }
1748 else
1749 pGlob->cPathOverflows++;
1750 }
1751 }
1752 /* else: file doesn't exist or something else is wrong, ignore this. */
1753 if (rcVar == VINF_EOF)
1754 return VINF_SUCCESS;
1755 }
1756 else if (rcVar == VERR_EOF)
1757 return VINF_SUCCESS;
1758 else if (rcVar != VERR_TRY_AGAIN)
1759 {
1760 Assert(rcVar == VERR_BUFFER_OVERFLOW);
1761 pGlob->cPathOverflows++;
1762 }
1763 }
1764 AssertFailedReturn(VINF_SUCCESS); /* Too many items returned, probably buggy query method. */
1765}
1766
1767
1768/**
1769 * Recursive globbing - plain text optimization.
1770 *
1771 * @returns IPRT status code.
1772 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1773 *
1774 * @param pGlob The glob instance data.
1775 * @param offPath The current path offset/length.
1776 * @param iComp The current component.
1777 */
1778DECL_NO_INLINE(static, int) rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1779{
1780 /*
1781 * Instead of recursing, we loop thru adjacent plain text components.
1782 */
1783 for (;;)
1784 {
1785 /*
1786 * Preconditions.
1787 */
1788 Assert(iComp < pGlob->pParsed->cComps);
1789 Assert(pGlob->szPath[offPath] == '\0');
1790 Assert(pGlob->aComps[iComp].fPlain);
1791 Assert(!pGlob->aComps[iComp].fExpVariable);
1792 Assert(!pGlob->aComps[iComp].fStarStar);
1793 Assert(rtPathGlobExecIsPlainText(pGlob, offPath, iComp));
1794 Assert(pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1795 == RTPATHMATCHOP_STRCMP
1796 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1797 == RTPATHMATCHOP_STRICMP);
1798
1799 /*
1800 * Add the plain text component to the path.
1801 */
1802 size_t const cch = pGlob->pParsed->aComps[iComp].cch;
1803 if (cch + pGlob->aComps[iComp].fDir < sizeof(pGlob->szPath) - offPath)
1804 {
1805 memcpy(&pGlob->szPath[offPath], &pGlob->pszPattern[pGlob->pParsed->aComps[iComp].off], cch);
1806 offPath += cch;
1807 pGlob->szPath[offPath] = '\0';
1808
1809 /*
1810 * Check if it exists.
1811 */
1812 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1813 if (RT_SUCCESS(rc))
1814 {
1815 if (pGlob->aComps[iComp].fFinal)
1816 {
1817 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1818 return rtPathGlobAddResult(pGlob, offPath,
1819 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1820 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1821 break;
1822 }
1823
1824 if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1825 {
1826 Assert(pGlob->aComps[iComp].fDir);
1827 pGlob->szPath[offPath++] = RTPATH_SLASH;
1828 pGlob->szPath[offPath] = '\0';
1829
1830 iComp++;
1831 if (rtPathGlobExecIsExpVar(pGlob, offPath, iComp))
1832 return rtPathGlobExecRecursiveVarExp(pGlob, offPath, iComp);
1833 if (!rtPathGlobExecIsPlainText(pGlob, offPath, iComp))
1834 return rtPathGlobExecRecursiveGeneric(pGlob, offPath, iComp);
1835 if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1836 return rtPathGlobExecRecursiveStarStar(pGlob, offPath, iComp, offPath);
1837
1838 /* Continue with the next plain text component. */
1839 continue;
1840 }
1841 }
1842 /* else: file doesn't exist or something else is wrong, ignore this. */
1843 }
1844 else
1845 pGlob->cPathOverflows++;
1846 break;
1847 }
1848 return VINF_SUCCESS;
1849}
1850
1851
1852/**
1853 * Recursive globbing - generic.
1854 *
1855 * @returns IPRT status code.
1856 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1857 *
1858 * @param pGlob The glob instance data.
1859 * @param offPath The current path offset/length.
1860 * @param iComp The current component.
1861 */
1862DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1863{
1864 /*
1865 * Enumerate entire directory and match each entry.
1866 */
1867 PRTDIR hDir;
1868 int rc = RTDirOpen(&hDir, offPath ? pGlob->szPath : ".");
1869 if (RT_SUCCESS(rc))
1870 {
1871 for (;;)
1872 {
1873 size_t cch = sizeof(pGlob->u);
1874 rc = RTDirRead(hDir, &pGlob->u.DirEntry, &cch);
1875 if (RT_SUCCESS(rc))
1876 {
1877 if (pGlob->aComps[iComp].fFinal)
1878 {
1879 /*
1880 * Final component: Check if it matches the current pattern.
1881 */
1882 if ( !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS))
1883 || RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1884 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY)
1885 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1886 {
1887 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1888 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1889 &pGlob->MatchCache);
1890 if (RT_SUCCESS(rc))
1891 {
1892 /* Construct the result. */
1893 if ( pGlob->u.DirEntry.enmType != RTDIRENTRYTYPE_UNKNOWN
1894 || !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) )
1895 rc = rtPathGlobAddResult2(pGlob, offPath, pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1896 (uint8_t)pGlob->u.DirEntry.enmType);
1897 else
1898 {
1899 rc = rtPathGlobAlmostAddResult(pGlob, offPath,
1900 pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1901 (uint8_t)RTDIRENTRYTYPE_UNKNOWN);
1902 if (RT_SUCCESS(rc))
1903 {
1904 RTDirQueryUnknownType((*pGlob->ppNext)->szPath, false /*fFollowSymlinks*/,
1905 &pGlob->u.DirEntry.enmType);
1906 if ( RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1907 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY))
1908 rtPathGlobCommitResult(pGlob, (uint8_t)pGlob->u.DirEntry.enmType);
1909 else
1910 rtPathGlobRollbackResult(pGlob);
1911 }
1912 }
1913 if (rc != VINF_SUCCESS)
1914 break;
1915 }
1916 else
1917 {
1918 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1919 rc = VINF_SUCCESS;
1920 }
1921 }
1922 }
1923 /*
1924 * Intermediate component: Directories only.
1925 */
1926 else if ( pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
1927 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1928 {
1929 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1930 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1931 &pGlob->MatchCache);
1932 if (RT_SUCCESS(rc))
1933 {
1934 /* Recurse down into the alleged directory. */
1935 cch = offPath + pGlob->u.DirEntry.cbName;
1936 if (cch + 1 < sizeof(pGlob->szPath))
1937 {
1938 memcpy(&pGlob->szPath[offPath], pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName);
1939 pGlob->szPath[cch++] = RTPATH_SLASH;
1940 pGlob->szPath[cch] = '\0';
1941
1942 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1943 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1944 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1945 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1946 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1947 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1948 else
1949 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1950 if (rc != VINF_SUCCESS)
1951 return rc;
1952 }
1953 else
1954 pGlob->cPathOverflows++;
1955 }
1956 else
1957 {
1958 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1959 rc = VINF_SUCCESS;
1960 }
1961 }
1962 }
1963 /*
1964 * RTDirRead failure.
1965 */
1966 else
1967 {
1968 /* The end? */
1969 if (rc == VERR_NO_MORE_FILES)
1970 rc = VINF_SUCCESS;
1971 /* Try skip the entry if we end up with an overflow (szPath can't hold it either then). */
1972 else if (rc == VERR_BUFFER_OVERFLOW)
1973 {
1974 pGlob->cPathOverflows++;
1975 rc = rtPathGlobSkipDirEntry(hDir, cch);
1976 if (RT_SUCCESS(rc))
1977 continue;
1978 }
1979 /* else: Any other error is unexpected and should be reported. */
1980 break;
1981 }
1982 }
1983
1984 RTDirClose(hDir);
1985 }
1986 /* Directory doesn't exist or something else is wrong, ignore this. */
1987 else
1988 rc = VINF_SUCCESS;
1989 return rc;
1990}
1991
1992
1993/**
1994 * Executes a glob search.
1995 *
1996 * @returns IPRT status code.
1997 * @param pGlob The glob instance data.
1998 */
1999static int rtPathGlobExec(PRTPATHGLOB pGlob)
2000{
2001 Assert(pGlob->offFirstPath < sizeof(pGlob->szPath));
2002 Assert(pGlob->szPath[pGlob->offFirstPath] == '\0');
2003
2004 int rc;
2005 if (RT_LIKELY(pGlob->iFirstComp < pGlob->pParsed->cComps))
2006 {
2007 /*
2008 * Call the appropriate function.
2009 */
2010 if (rtPathGlobExecIsExpVar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2011 rc = rtPathGlobExecRecursiveVarExp(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2012 else if (rtPathGlobExecIsPlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2013 rc = rtPathGlobExecRecursivePlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2014 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
2015 rc = rtPathGlobExecRecursiveStarStar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp, pGlob->offFirstPath);
2016 else
2017 rc = rtPathGlobExecRecursiveGeneric(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2018 }
2019 else
2020 {
2021 /*
2022 * Special case where we only have a root component or tilde expansion.
2023 */
2024 Assert(pGlob->offFirstPath > 0);
2025 rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
2026 if ( RT_SUCCESS(rc)
2027 && rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
2028 rc = rtPathGlobAddResult(pGlob, pGlob->offFirstPath,
2029 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK) >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
2030 else
2031 rc = VINF_SUCCESS;
2032 }
2033
2034 /*
2035 * Adjust the status code. Check for results, hide RTPATHGLOB_F_FIRST_ONLY
2036 * status code, and add warning if necessary.
2037 */
2038 if (pGlob->cResults > 0)
2039 {
2040 if (rc == VINF_CALLBACK_RETURN)
2041 rc = VINF_SUCCESS;
2042 if (rc == VINF_SUCCESS)
2043 {
2044 if (pGlob->cPathOverflows > 0)
2045 rc = VINF_BUFFER_OVERFLOW;
2046 }
2047 }
2048 else
2049 rc = VERR_FILE_NOT_FOUND;
2050
2051 return rc;
2052}
2053
2054
2055RTDECL(int) RTPathGlob(const char *pszPattern, uint32_t fFlags, PPCRTPATHGLOBENTRY ppHead, uint32_t *pcResults)
2056{
2057 /*
2058 * Input validation.
2059 */
2060 AssertPtrReturn(ppHead, VERR_INVALID_POINTER);
2061 *ppHead = NULL;
2062 if (pcResults)
2063 {
2064 AssertPtrReturn(pcResults, VERR_INVALID_POINTER);
2065 *pcResults = 0;
2066 }
2067 AssertPtrReturn(pszPattern, VERR_INVALID_POINTER);
2068 AssertReturn(!(fFlags & ~RTPATHGLOB_F_MASK), VERR_INVALID_FLAGS);
2069 AssertReturn((fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) != (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS),
2070 VERR_INVALID_FLAGS);
2071
2072 /*
2073 * Parse the path.
2074 */
2075 size_t cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[1]); /** @todo 16 after testing */
2076 PRTPATHPARSED pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2077 AssertReturn(pParsed, VERR_NO_MEMORY);
2078 int rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2079 if (rc == VERR_BUFFER_OVERFLOW)
2080 {
2081 cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[pParsed->cComps + 1]);
2082 RTMemTmpFree(pParsed);
2083 pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2084 AssertReturn(pParsed, VERR_NO_MEMORY);
2085
2086 rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2087 }
2088 if (RT_SUCCESS(rc))
2089 {
2090 /*
2091 * Check dir slash vs. only/not dir flag.
2092 */
2093 if ( !(fFlags & RTPATHGLOB_F_NO_DIRS)
2094 || ( !(pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2095 && ( !(pParsed->fProps & (RTPATH_PROP_ROOT_SLASH | RTPATH_PROP_UNC))
2096 || pParsed->cComps > 1) ) )
2097 {
2098 if (pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2099 fFlags |= RTPATHGLOB_F_ONLY_DIRS;
2100
2101 /*
2102 * Allocate and initialize the glob state data structure.
2103 */
2104 size_t cbGlob = RT_OFFSETOF(RTPATHGLOB, aComps[pParsed->cComps + 1]);
2105 PRTPATHGLOB pGlob = (PRTPATHGLOB)RTMemTmpAllocZ(cbGlob);
2106 if (pGlob)
2107 {
2108 pGlob->pszPattern = pszPattern;
2109 pGlob->fFlags = fFlags;
2110 pGlob->pParsed = pParsed;
2111 pGlob->ppNext = &pGlob->pHead;
2112 rc = rtPathGlobParse(pGlob, pszPattern, pParsed, fFlags);
2113 if (RT_SUCCESS(rc))
2114 {
2115 /*
2116 * Execute the search.
2117 */
2118 rc = rtPathGlobExec(pGlob);
2119 if (RT_SUCCESS(rc))
2120 {
2121 *ppHead = pGlob->pHead;
2122 if (pcResults)
2123 *pcResults = pGlob->cResults;
2124 }
2125 else
2126 RTPathGlobFree(pGlob->pHead);
2127 }
2128
2129 RTMemTmpFree(pGlob->MatchInstrAlloc.paInstructions);
2130 RTMemTmpFree(pGlob);
2131 }
2132 else
2133 rc = VERR_NO_MEMORY;
2134 }
2135 else
2136 rc = VERR_NOT_FOUND;
2137 }
2138 RTMemTmpFree(pParsed);
2139 return rc;
2140
2141
2142}
2143
2144
2145RTDECL(void) RTPathGlobFree(PCRTPATHGLOBENTRY pHead)
2146{
2147 PRTPATHGLOBENTRY pCur = (PRTPATHGLOBENTRY)pHead;
2148 while (pCur)
2149 {
2150 PRTPATHGLOBENTRY pNext = pCur->pNext;
2151 pCur->pNext = NULL;
2152 RTMemFree(pCur);
2153 pCur = pNext;
2154 }
2155}
2156
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette