VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/path/RTPathGlob.cpp@ 72186

Last change on this file since 72186 was 69753, checked in by vboxsync, 7 years ago

iprt/dir: Morphing PRTDIR into a handle named RTDIR. (Been wanting to correct this for years. Don't know why I makde it a pointer rather than an abstrct handle like everything else.)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.8 KB
Line 
1/* $Id: RTPathGlob.cpp 69753 2017-11-19 14:27:58Z vboxsync $ */
2/** @file
3 * IPRT - RTPathGlob
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "internal/iprt.h"
32#include <iprt/path.h>
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/buildconfig.h>
37#include <iprt/ctype.h>
38#include <iprt/dir.h>
39#include <iprt/env.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/string.h>
43#include <iprt/uni.h>
44
45#if defined(RT_OS_WINDOWS)
46# include <iprt/win/windows.h>
47# include "../../r3/win/internal-r3-win.h"
48
49#elif defined(RT_OS_OS2)
50# define INCL_BASE
51# include <os2.h>
52# undef RT_MAX /* collision */
53
54#endif
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60/** Maximum number of results. */
61#define RTPATHGLOB_MAX_RESULTS _32K
62/** Maximum number of zero-or-more wildcards in a pattern.
63 * This limits stack usage and recursion depth, as well as execution time. */
64#define RTPATHMATCH_MAX_ZERO_OR_MORE 24
65/** Maximum number of variable items. */
66#define RTPATHMATCH_MAX_VAR_ITEMS _4K
67
68
69
70/*********************************************************************************************************************************
71* Structures and Typedefs *
72*********************************************************************************************************************************/
73/**
74 * Matching operation.
75 */
76typedef enum RTPATHMATCHOP
77{
78 RTPATHMATCHOP_INVALID = 0,
79 /** EOS: Returns a match if at end of string. */
80 RTPATHMATCHOP_RETURN_MATCH_IF_AT_END,
81 /** Asterisk: Returns a match (trailing asterisk). */
82 RTPATHMATCHOP_RETURN_MATCH,
83 /** Asterisk: Returns a match (just asterisk), unless it's '.' or '..'. */
84 RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT,
85 /** Plain text: Case sensitive string compare. */
86 RTPATHMATCHOP_STRCMP,
87 /** Plain text: Case insensitive string compare. */
88 RTPATHMATCHOP_STRICMP,
89 /** Question marks: Skips exactly one code point. */
90 RTPATHMATCHOP_SKIP_ONE_CODEPOINT,
91 /** Question marks: Skips exactly RTPATHMATCHCORE::cch code points. */
92 RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS,
93 /** Char set: Requires the next codepoint to be in the ASCII-7 set defined by
94 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
95 RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7,
96 /** Char set: Requires the next codepoint to not be in the ASCII-7 set defined
97 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
98 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7,
99 /** Char set: Requires the next codepoint to be in the extended set defined by
100 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
101 RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED,
102 /** Char set: Requires the next codepoint to not be in the extended set defined
103 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
104 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED,
105 /** Variable: Case sensitive variable value compare, RTPATHMATCHCORE::uOp2 is
106 * the variable table index. */
107 RTPATHMATCHOP_VARIABLE_VALUE_CMP,
108 /** Variable: Case insensitive variable value compare, RTPATHMATCHCORE::uOp2 is
109 * the variable table index. */
110 RTPATHMATCHOP_VARIABLE_VALUE_ICMP,
111 /** Asterisk: Match zero or more code points, there must be at least
112 * RTPATHMATCHCORE::cch code points after it. */
113 RTPATHMATCHOP_ZERO_OR_MORE,
114 /** Asterisk: Match zero or more code points, there must be at least
115 * RTPATHMATCHCORE::cch code points after it, unless it's '.' or '..'. */
116 RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT,
117 /** End of valid operations. */
118 RTPATHMATCHOP_END
119} RTPATHMATCHOP;
120
121/**
122 * Matching instruction.
123 */
124typedef struct RTPATHMATCHCORE
125{
126 /** The action to take. */
127 RTPATHMATCHOP enmOpCode;
128 /** Generic value operand. */
129 uint16_t uOp2;
130 /** Generic length operand. */
131 uint16_t cch;
132 /** Generic string pointer operand. */
133 const char *pch;
134} RTPATHMATCHCORE;
135/** Pointer to a matching instruction. */
136typedef RTPATHMATCHCORE *PRTPATHMATCHCORE;
137/** Pointer to a const matching instruction. */
138typedef RTPATHMATCHCORE const *PCRTPATHMATCHCORE;
139
140/**
141 * Path matching instruction allocator.
142 */
143typedef struct RTPATHMATCHALLOC
144{
145 /** Allocated array of instructions. */
146 PRTPATHMATCHCORE paInstructions;
147 /** Index of the next free entry in paScratch. */
148 uint32_t iNext;
149 /** Number of instructions allocated. */
150 uint32_t cAllocated;
151} RTPATHMATCHALLOC;
152/** Pointer to a matching instruction allocator. */
153typedef RTPATHMATCHALLOC *PRTPATHMATCHALLOC;
154
155/**
156 * Path matching cache, mainly intended for variables like the PATH.
157 */
158typedef struct RTPATHMATCHCACHE
159{
160 /** @todo optimize later. */
161 uint32_t iNothingYet;
162} RTPATHMATCHCACHE;
163/** Pointer to a path matching cache. */
164typedef RTPATHMATCHCACHE *PRTPATHMATCHCACHE;
165
166
167
168/** Parsed path entry.*/
169typedef struct RTPATHGLOBPPE
170{
171 /** Normal: Index into RTPATHGLOB::MatchInstrAlloc.paInstructions. */
172 uint32_t iMatchProg : 16;
173 /** Set if this is a normal entry which is matched using iMatchProg. */
174 uint32_t fNormal : 1;
175 /** !fNormal: Plain name that can be dealt with using without
176 * enumerating the whole directory, unless of course the file system is case
177 * sensitive and the globbing isn't (that needs figuring out on a per
178 * directory basis). */
179 uint32_t fPlain : 1;
180 /** !fNormal: Match zero or more subdirectories. */
181 uint32_t fStarStar : 1;
182 /** !fNormal: The whole component is a variable expansion. */
183 uint32_t fExpVariable : 1;
184
185 /** Filter: Set if it only matches directories. */
186 uint32_t fDir : 1;
187 /** Set if it's the final component. */
188 uint32_t fFinal : 1;
189
190 /** Unused bits. */
191 uint32_t fReserved : 2+8;
192} RTPATHGLOBPPE;
193
194
195typedef struct RTPATHGLOB
196{
197 /** Path buffer. */
198 char szPath[RTPATH_MAX];
199 /** Temporary buffers. */
200 union
201 {
202 /** File system object info structure. */
203 RTFSOBJINFO ObjInfo;
204 /** Directory entry buffer. */
205 RTDIRENTRY DirEntry;
206 /** Padding the buffer to an unreasonably large size. */
207 uint8_t abPadding[RTPATH_MAX + sizeof(RTDIRENTRY)];
208 } u;
209
210
211 /** Where to insert the next one.*/
212 PRTPATHGLOBENTRY *ppNext;
213 /** The head pointer. */
214 PRTPATHGLOBENTRY pHead;
215 /** Result count. */
216 uint32_t cResults;
217 /** Counts path overflows. */
218 uint32_t cPathOverflows;
219 /** The input flags. */
220 uint32_t fFlags;
221 /** Matching instruction allocator. */
222 RTPATHMATCHALLOC MatchInstrAlloc;
223 /** Matching state. */
224 RTPATHMATCHCACHE MatchCache;
225
226 /** The pattern string. */
227 const char *pszPattern;
228 /** The parsed path. */
229 PRTPATHPARSED pParsed;
230 /** The component to start with. */
231 uint16_t iFirstComp;
232 /** The corresponding path offset (previous components already present). */
233 uint16_t offFirstPath;
234 /** Path component information we need. */
235 RTPATHGLOBPPE aComps[1];
236} RTPATHGLOB;
237typedef RTPATHGLOB *PRTPATHGLOB;
238
239
240/**
241 * Matching variable lookup table.
242 * Currently so small we don't bother sorting it and doing binary lookups.
243 */
244typedef struct RTPATHMATCHVAR
245{
246 /** The variable name. */
247 const char *pszName;
248 /** The variable name length. */
249 uint16_t cchName;
250 /** Only available as the verify first component. */
251 bool fFirstOnly;
252
253 /**
254 * Queries a given variable value.
255 *
256 * @returns IPRT status code.
257 * @retval VERR_BUFFER_OVERFLOW
258 * @retval VERR_TRY_AGAIN if the caller should skip this value item and try the
259 * next one instead (e.g. env var not present).
260 * @retval VINF_EOF when retrieving the last one, if possible.
261 * @retval VERR_EOF when @a iItem is past the item space.
262 *
263 * @param iItem The variable value item to retrieve. (A variable may
264 * have more than one value, e.g. 'BothProgramFile' on a
265 * 64-bit system or 'Path'.)
266 * @param pszBuf Where to return the value.
267 * @param cbBuf The buffer size.
268 * @param pcchValue Where to return the length of the return string.
269 * @param pCache Pointer to the path matching cache. May speed up
270 * enumerating PATH items and similar.
271 */
272 DECLCALLBACKMEMBER(int, pfnQuery)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, PRTPATHMATCHCACHE pCache);
273
274 /**
275 * Matching method, optional.
276 *
277 * @returns IPRT status code.
278 * @retval VINF_SUCCESS on match.
279 * @retval VERR_MISMATCH on mismatch.
280 *
281 * @param pszMatch String to match with (not terminated).
282 * @param cchMatch The length of what we match with.
283 * @param fIgnoreCase Whether to ignore case or not when comparing.
284 * @param pcchMatched Where to return the length of the match (value length).
285 */
286 DECLCALLBACKMEMBER(int, pfnMatch)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, size_t *pcchMatched);
287
288} RTPATHMATCHVAR;
289
290
291/*********************************************************************************************************************************
292* Internal Functions *
293*********************************************************************************************************************************/
294static int rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp, size_t offStarStarPath);
295static int rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
296static int rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
297static int rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
298
299
300/**
301 * Implements the two variable access functions for a simple one value variable.
302 */
303#define RTPATHMATCHVAR_SIMPLE(a_Name, a_GetStrExpr) \
304 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
305 PRTPATHMATCHCACHE pCache) \
306 { \
307 if (iItem == 0) \
308 { \
309 const char *pszValue = a_GetStrExpr; \
310 size_t cchValue = strlen(pszValue); \
311 if (cchValue + 1 <= cbBuf) \
312 { \
313 memcpy(pszBuf, pszValue, cchValue + 1); \
314 *pcchValue = cchValue; \
315 return VINF_EOF; \
316 } \
317 return VERR_BUFFER_OVERFLOW; \
318 } \
319 NOREF(pCache);\
320 return VERR_EOF; \
321 } \
322 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
323 size_t *pcchMatched) \
324 { \
325 const char *pszValue = a_GetStrExpr; \
326 size_t cchValue = strlen(pszValue); \
327 if ( cchValue >= cchMatch \
328 && ( !fIgnoreCase \
329 ? memcmp(pszValue, pchMatch, cchValue) == 0 \
330 : RTStrNICmp(pszValue, pchMatch, cchValue) == 0) ) \
331 { \
332 *pcchMatched = cchValue; \
333 return VINF_SUCCESS; \
334 } \
335 return VERR_MISMATCH; \
336 } \
337 typedef int RT_CONCAT(DummyColonType_,a_Name)
338
339/**
340 * Implements mapping a glob variable to an environment variable.
341 */
342#define RTPATHMATCHVAR_SIMPLE_ENVVAR(a_Name, a_pszEnvVar, a_cbMaxValue) \
343 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
344 PRTPATHMATCHCACHE pCache) \
345 { \
346 if (iItem == 0) \
347 { \
348 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, pszBuf, cbBuf, pcchValue); \
349 if (RT_SUCCESS(rc)) \
350 return VINF_EOF; \
351 if (rc != VERR_ENV_VAR_NOT_FOUND) \
352 return rc; \
353 } \
354 NOREF(pCache);\
355 return VERR_EOF; \
356 } \
357 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
358 size_t *pcchMatched) \
359 { \
360 char szValue[a_cbMaxValue]; \
361 size_t cchValue; \
362 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, szValue, sizeof(szValue), &cchValue); \
363 if ( RT_SUCCESS(rc) \
364 && cchValue >= cchMatch \
365 && ( !fIgnoreCase \
366 ? memcmp(szValue, pchMatch, cchValue) == 0 \
367 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
368 { \
369 *pcchMatched = cchValue; \
370 return VINF_SUCCESS; \
371 } \
372 return VERR_MISMATCH; \
373 } \
374 typedef int RT_CONCAT(DummyColonType_,a_Name)
375
376/**
377 * Implements mapping a glob variable to multiple environment variable values.
378 *
379 * @param a_Name The variable name.
380 * @param a_apszVarNames Assumes to be a global variable that RT_ELEMENTS
381 * works correctly on.
382 * @param a_cbMaxValue The max expected value size.
383 */
384#define RTPATHMATCHVAR_MULTIPLE_ENVVARS(a_Name, a_apszVarNames, a_cbMaxValue) \
385 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
386 PRTPATHMATCHCACHE pCache) \
387 { \
388 if (iItem < RT_ELEMENTS(a_apszVarNames)) \
389 { \
390 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], pszBuf, cbBuf, pcchValue); \
391 if (RT_SUCCESS(rc)) \
392 return iItem + 1 == RT_ELEMENTS(a_apszVarNames) ? VINF_EOF : VINF_SUCCESS; \
393 if (rc == VERR_ENV_VAR_NOT_FOUND) \
394 rc = VERR_TRY_AGAIN; \
395 return rc; \
396 } \
397 NOREF(pCache);\
398 return VERR_EOF; \
399 } \
400 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
401 size_t *pcchMatched) \
402 { \
403 for (uint32_t iItem = 0; iItem < RT_ELEMENTS(a_apszVarNames); iItem++) \
404 { \
405 char szValue[a_cbMaxValue]; \
406 size_t cchValue; \
407 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], szValue, sizeof(szValue), &cchValue);\
408 if ( RT_SUCCESS(rc) \
409 && cchValue >= cchMatch \
410 && ( !fIgnoreCase \
411 ? memcmp(szValue, pchMatch, cchValue) == 0 \
412 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
413 { \
414 *pcchMatched = cchValue; \
415 return VINF_SUCCESS; \
416 } \
417 } \
418 return VERR_MISMATCH; \
419 } \
420 typedef int RT_CONCAT(DummyColonType_,a_Name)
421
422
423RTPATHMATCHVAR_SIMPLE(Arch, RTBldCfgTargetArch());
424RTPATHMATCHVAR_SIMPLE(Bits, RT_XSTR(ARCH_BITS));
425#ifdef RT_OS_WINDOWS
426RTPATHMATCHVAR_SIMPLE_ENVVAR(WinAppData, "AppData", RTPATH_MAX);
427RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramData, "ProgramData", RTPATH_MAX);
428RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramFiles, "ProgramFiles", RTPATH_MAX);
429RTPATHMATCHVAR_SIMPLE_ENVVAR(WinCommonProgramFiles, "CommonProgramFiles", RTPATH_MAX);
430# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
431RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherProgramFiles, "ProgramFiles(x86)", RTPATH_MAX);
432RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherCommonProgramFiles, "CommonProgramFiles(x86)", RTPATH_MAX);
433# else
434# error "Port ME!"
435# endif
436static const char * const a_apszWinProgramFilesVars[] =
437{
438 "ProgramFiles",
439# ifdef RT_ARCH_AMD64
440 "ProgramFiles(x86)",
441# endif
442};
443RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllProgramFiles, a_apszWinProgramFilesVars, RTPATH_MAX);
444static const char * const a_apszWinCommonProgramFilesVars[] =
445{
446 "CommonProgramFiles",
447# ifdef RT_ARCH_AMD64
448 "CommonProgramFiles(x86)",
449# endif
450};
451RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllCommonProgramFiles, a_apszWinCommonProgramFilesVars, RTPATH_MAX);
452#endif
453
454
455/**
456 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery, Enumerates the PATH}
457 */
458static DECLCALLBACK(int) rtPathVarQuery_Path(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
459 PRTPATHMATCHCACHE pCache)
460{
461 RT_NOREF_PV(pCache);
462
463 /*
464 * Query the PATH value.
465 */
466/** @todo cache this in pCache with iItem and offset. */
467 char *pszPathFree = NULL;
468 char *pszPath = pszBuf;
469 size_t cchActual;
470 const char *pszVarNm = "PATH";
471 int rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPath, cbBuf, &cchActual);
472#ifdef RT_OS_WINDOWS
473 if (rc == VERR_ENV_VAR_NOT_FOUND)
474 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm = "Path", pszPath, cbBuf, &cchActual);
475#endif
476 if (rc == VERR_BUFFER_OVERFLOW)
477 {
478 for (uint32_t iTry = 0; iTry < 10; iTry++)
479 {
480 size_t cbPathBuf = RT_ALIGN_Z(cchActual + 1 + 64 * iTry, 64);
481 pszPathFree = (char *)RTMemTmpAlloc(cbPathBuf);
482 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPathFree, cbPathBuf, &cchActual);
483 if (RT_SUCCESS(rc))
484 break;
485 RTMemTmpFree(pszPathFree);
486 AssertReturn(cchActual >= cbPathBuf, VERR_INTERNAL_ERROR_3);
487 }
488 pszPath = pszPathFree;
489 }
490
491 /*
492 * Spool forward to the given PATH item.
493 */
494 rc = VERR_EOF;
495#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
496 const char chSep = ';';
497#else
498 const char chSep = ':';
499#endif
500 while (*pszPath != '\0')
501 {
502 char *pchSep = strchr(pszPath, chSep);
503
504 /* We ignore empty strings, which is probably not entirely correct,
505 but works better on DOS based system with many entries added
506 without checking whether there is a trailing separator or not.
507 Thus, the current directory is only searched if a '.' is present
508 in the PATH. */
509 if (pchSep == pszPath)
510 pszPath++;
511 else if (iItem > 0)
512 {
513 /* If we didn't find a separator, the item doesn't exists. Quit. */
514 if (!pchSep)
515 break;
516
517 pszPath = pchSep + 1;
518 iItem--;
519 }
520 else
521 {
522 /* We've reached the item we wanted. */
523 size_t cchComp = pchSep ? pchSep - pszPath : strlen(pszPath);
524 if (cchComp < cbBuf)
525 {
526 if (pszBuf != pszPath)
527 memmove(pszBuf, pszPath, cchComp);
528 pszBuf[cchComp] = '\0';
529 rc = pchSep ? VINF_SUCCESS : VINF_EOF;
530 }
531 else
532 rc = VERR_BUFFER_OVERFLOW;
533 *pcchValue = cchComp;
534 break;
535 }
536 }
537
538 if (pszPathFree)
539 RTMemTmpFree(pszPathFree);
540 return rc;
541}
542
543
544#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
545/**
546 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
547 * The system drive letter + colon.}.
548 */
549static DECLCALLBACK(int) rtPathVarQuery_DosSystemDrive(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
550 PRTPATHMATCHCACHE pCache)
551{
552 RT_NOREF_PV(pCache);
553
554 if (iItem == 0)
555 {
556 AssertReturn(cbBuf >= 3, VERR_BUFFER_OVERFLOW);
557
558# ifdef RT_OS_WINDOWS
559 /* Since this is used at the start of a pattern, we assume
560 we've got more than enough buffer space. */
561 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
562 PRTUTF16 pwszTmp = (PRTUTF16)pszBuf;
563 UINT cch = g_pfnGetSystemWindowsDirectoryW(pwszTmp, (UINT)(cbBuf / sizeof(WCHAR)));
564 if (cch >= 2)
565 {
566 RTUTF16 wcDrive = pwszTmp[0];
567 if ( RT_C_IS_ALPHA(wcDrive)
568 && pwszTmp[1] == ':')
569 {
570 pszBuf[0] = wcDrive;
571 pszBuf[1] = ':';
572 pszBuf[2] = '\0';
573 *pcchValue = 2;
574 return VINF_EOF;
575 }
576 }
577# else
578 ULONG ulDrive = ~(ULONG)0;
579 APIRET rc = DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, &ulDrive, sizeof(ulDrive));
580 ulDrive--; /* 1 = 'A' */
581 if ( rc == NO_ERROR
582 && ulDrive <= (ULONG)'Z')
583 {
584 pszBuf[0] = (char)ulDrive + 'A';
585 pszBuf[1] = ':';
586 pszBuf[2] = '\0';
587 *pcchValue = 2;
588 return VINF_EOF;
589 }
590# endif
591 return VERR_INTERNAL_ERROR_4;
592 }
593 return VERR_EOF;
594}
595#endif
596
597
598#ifdef RT_OS_WINDOWS
599/**
600 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
601 * The system root directory (C:\Windows).}.
602 */
603static DECLCALLBACK(int) rtPathVarQuery_WinSystemRoot(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
604 PRTPATHMATCHCACHE pCache)
605{
606 RT_NOREF_PV(pCache);
607
608 if (iItem == 0)
609 {
610 Assert(pszBuf); Assert(cbBuf);
611 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
612 RTUTF16 wszSystemRoot[MAX_PATH];
613 UINT cchSystemRoot = g_pfnGetSystemWindowsDirectoryW(wszSystemRoot, MAX_PATH);
614 if (cchSystemRoot > 0)
615 return RTUtf16ToUtf8Ex(wszSystemRoot, cchSystemRoot, &pszBuf, cbBuf, pcchValue);
616 return RTErrConvertFromWin32(GetLastError());
617 }
618 return VERR_EOF;
619}
620#endif
621
622#undef RTPATHMATCHVAR_SIMPLE
623#undef RTPATHMATCHVAR_SIMPLE_ENVVAR
624#undef RTPATHMATCHVAR_DOUBLE_ENVVAR
625
626/**
627 * Variables.
628 */
629static RTPATHMATCHVAR const g_aVariables[] =
630{
631 { RT_STR_TUPLE("Arch"), false, rtPathVarQuery_Arch, rtPathVarMatch_Arch },
632 { RT_STR_TUPLE("Bits"), false, rtPathVarQuery_Bits, rtPathVarMatch_Bits },
633 { RT_STR_TUPLE("Path"), true, rtPathVarQuery_Path, NULL },
634#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
635 { RT_STR_TUPLE("SystemDrive"), true, rtPathVarQuery_DosSystemDrive, NULL },
636#endif
637#ifdef RT_OS_WINDOWS
638 { RT_STR_TUPLE("SystemRoot"), true, rtPathVarQuery_WinSystemRoot, NULL },
639 { RT_STR_TUPLE("AppData"), true, rtPathVarQuery_WinAppData, rtPathVarMatch_WinAppData },
640 { RT_STR_TUPLE("ProgramData"), true, rtPathVarQuery_WinProgramData, rtPathVarMatch_WinProgramData },
641 { RT_STR_TUPLE("ProgramFiles"), true, rtPathVarQuery_WinProgramFiles, rtPathVarMatch_WinProgramFiles },
642 { RT_STR_TUPLE("OtherProgramFiles"), true, rtPathVarQuery_WinOtherProgramFiles, rtPathVarMatch_WinOtherProgramFiles },
643 { RT_STR_TUPLE("AllProgramFiles"), true, rtPathVarQuery_WinAllProgramFiles, rtPathVarMatch_WinAllProgramFiles },
644 { RT_STR_TUPLE("CommonProgramFiles"), true, rtPathVarQuery_WinCommonProgramFiles, rtPathVarMatch_WinCommonProgramFiles },
645 { RT_STR_TUPLE("OtherCommonProgramFiles"), true, rtPathVarQuery_WinOtherCommonProgramFiles, rtPathVarMatch_WinOtherCommonProgramFiles },
646 { RT_STR_TUPLE("AllCommonProgramFiles"), true, rtPathVarQuery_WinAllCommonProgramFiles, rtPathVarMatch_WinAllCommonProgramFiles },
647#endif
648};
649
650
651
652/**
653 * Handles a complicated set.
654 *
655 * A complicated set is either using ranges, character classes or code points
656 * outside the ASCII-7 range.
657 *
658 * @returns VINF_SUCCESS or VERR_MISMATCH. May also return UTF-8 decoding
659 * errors as well as VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED.
660 *
661 * @param ucInput The input code point to match with.
662 * @param pchSet The start of the set specification (after caret).
663 * @param cchSet The length of the set specification.
664 */
665static int rtPathMatchExecExtendedSet(RTUNICP ucInput, const char *pchSet, size_t cchSet)
666{
667 while (cchSet > 0)
668 {
669 RTUNICP ucSet;
670 int rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet);
671 AssertRCReturn(rc, rc);
672
673 /*
674 * Check for character class, collating symbol and equvalence class.
675 */
676 if (ucSet == '[' && cchSet > 0)
677 {
678 char chNext = *pchSet;
679 if (chNext == ':')
680 {
681#define CHECK_CHAR_CLASS(a_szClassNm, a_BoolTestExpr) \
682 if ( cchSet >= sizeof(a_szClassNm) \
683 && memcmp(pchSet, a_szClassNm "]", sizeof(a_szClassNm)) == 0) \
684 { \
685 if (a_BoolTestExpr) \
686 return VINF_SUCCESS; \
687 pchSet += sizeof(a_szClassNm); \
688 cchSet -= sizeof(a_szClassNm); \
689 continue; \
690 } do { } while (0)
691
692 CHECK_CHAR_CLASS(":alpha:", RTUniCpIsAlphabetic(ucInput));
693 CHECK_CHAR_CLASS(":alnum:", RTUniCpIsAlphabetic(ucInput) || RTUniCpIsDecDigit(ucInput)); /** @todo figure what's correct here and fix uni.h */
694 CHECK_CHAR_CLASS(":blank:", ucInput == ' ' || ucInput == '\t');
695 CHECK_CHAR_CLASS(":cntrl:", ucInput < 31 || ucInput == 127);
696 CHECK_CHAR_CLASS(":digit:", RTUniCpIsDecDigit(ucInput));
697 CHECK_CHAR_CLASS(":lower:", RTUniCpIsLower(ucInput));
698 CHECK_CHAR_CLASS(":print:", RTUniCpIsAlphabetic(ucInput) || (RT_C_IS_PRINT(ucInput) && ucInput < 127)); /** @todo fixme*/
699 CHECK_CHAR_CLASS(":punct:", RT_C_IS_PRINT(ucInput) && ucInput < 127); /** @todo fixme*/
700 CHECK_CHAR_CLASS(":space:", RTUniCpIsSpace(ucInput));
701 CHECK_CHAR_CLASS(":upper:", RTUniCpIsUpper(ucInput));
702 CHECK_CHAR_CLASS(":xdigit:", RTUniCpIsHexDigit(ucInput));
703 AssertMsgFailedReturn(("Unknown or malformed char class: '%.*s'\n", cchSet + 1, pchSet - 1),
704 VERR_PATH_GLOB_UNKNOWN_CHAR_CLASS);
705#undef CHECK_CHAR_CLASS
706 }
707 /** @todo implement collating symbol and equvalence class. */
708 else if (chNext == '=' || chNext == '.')
709 AssertFailedReturn(VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
710 }
711
712 /*
713 * Check for range (leading or final dash does not constitute a range).
714 */
715 if (cchSet > 1 && *pchSet == '-')
716 {
717 pchSet++; /* skip dash */
718 cchSet--;
719
720 RTUNICP ucSet2;
721 rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet2);
722 AssertRCReturn(rc, rc);
723 Assert(ucSet < ucSet2);
724 if (ucInput >= ucSet && ucInput <= ucSet2)
725 return VINF_SUCCESS;
726 }
727 /*
728 * Single char comparison.
729 */
730 else if (ucInput == ucSet)
731 return VINF_SUCCESS;
732 }
733 return VERR_MISMATCH;
734}
735
736
737/**
738 * Variable matching fallback using the query function.
739 *
740 * This must not be inlined as it consuming a lot of stack! Which is why it's
741 * placed a couple of functions away from the recursive rtPathExecMatch.
742 *
743 * @returns VINF_SUCCESS or VERR_MISMATCH.
744 * @param pchInput The current input position.
745 * @param cchInput The amount of input left..
746 * @param idxVar The variable table index.
747 * @param fIgnoreCase Whether to ignore case when comparing.
748 * @param pcchMatched Where to return how much we actually matched up.
749 * @param pCache Pointer to the path matching cache.
750 */
751DECL_NO_INLINE(static, int) rtPathMatchExecVariableFallback(const char *pchInput, size_t cchInput, uint16_t idxVar,
752 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
753{
754 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
755 {
756 char szValue[RTPATH_MAX];
757 size_t cchValue;
758 int rc = g_aVariables[idxVar].pfnQuery(iItem, szValue, sizeof(szValue), &cchValue, pCache);
759 if (RT_SUCCESS(rc))
760 {
761 if (cchValue <= cchInput)
762 {
763 if ( !fIgnoreCase
764 ? memcmp(pchInput, szValue, cchValue) == 0
765 : RTStrNICmp(pchInput, szValue, cchValue) == 0)
766 {
767 *pcchMatched = cchValue;
768 return VINF_SUCCESS;
769 }
770 }
771 if (rc == VINF_EOF)
772 return VERR_MISMATCH;
773 }
774 else if (rc == VERR_EOF)
775 return VERR_MISMATCH;
776 else
777 Assert(rc == VERR_BUFFER_OVERFLOW || rc == VERR_TRY_AGAIN);
778 }
779 AssertFailed();
780 return VERR_MISMATCH;
781}
782
783
784/**
785 * Variable matching worker.
786 *
787 * @returns VINF_SUCCESS or VERR_MISMATCH.
788 * @param pchInput The current input position.
789 * @param cchInput The amount of input left..
790 * @param idxVar The variable table index.
791 * @param fIgnoreCase Whether to ignore case when comparing.
792 * @param pcchMatched Where to return how much we actually matched up.
793 * @param pCache Pointer to the path matching cache.
794 */
795static int rtPathMatchExecVariable(const char *pchInput, size_t cchInput, uint16_t idxVar,
796 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
797{
798 Assert(idxVar < RT_ELEMENTS(g_aVariables));
799 if (g_aVariables[idxVar].pfnMatch)
800 return g_aVariables[idxVar].pfnMatch(pchInput, cchInput, fIgnoreCase, pcchMatched);
801 return rtPathMatchExecVariableFallback(pchInput, cchInput, idxVar, fIgnoreCase, pcchMatched, pCache);
802}
803
804
805/**
806 * Variable matching worker.
807 *
808 * @returns VINF_SUCCESS or VERR_MISMATCH.
809 * @param pchInput The current input position.
810 * @param cchInput The amount of input left..
811 * @param pProg The first matching program instruction.
812 * @param pCache Pointer to the path matching cache.
813 */
814static int rtPathMatchExec(const char *pchInput, size_t cchInput, PCRTPATHMATCHCORE pProg, PRTPATHMATCHCACHE pCache)
815{
816 for (;;)
817 {
818 switch (pProg->enmOpCode)
819 {
820 case RTPATHMATCHOP_RETURN_MATCH_IF_AT_END:
821 return cchInput == 0 ? VINF_SUCCESS : VERR_MISMATCH;
822
823 case RTPATHMATCHOP_RETURN_MATCH:
824 return VINF_SUCCESS;
825
826 case RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT:
827 if ( cchInput > 2
828 || cchInput < 1
829 || pchInput[0] != '.'
830 || (cchInput == 2 && pchInput[1] != '.') )
831 return VINF_SUCCESS;
832 return VERR_MISMATCH;
833
834 case RTPATHMATCHOP_STRCMP:
835 if (pProg->cch > cchInput)
836 return VERR_MISMATCH;
837 if (memcmp(pchInput, pProg->pch, pProg->cch) != 0)
838 return VERR_MISMATCH;
839 cchInput -= pProg->cch;
840 pchInput += pProg->cch;
841 break;
842
843 case RTPATHMATCHOP_STRICMP:
844 if (pProg->cch > cchInput)
845 return VERR_MISMATCH;
846 if (RTStrNICmp(pchInput, pProg->pch, pProg->cch) != 0)
847 return VERR_MISMATCH;
848 cchInput -= pProg->cch;
849 pchInput += pProg->cch;
850 break;
851
852 case RTPATHMATCHOP_SKIP_ONE_CODEPOINT:
853 {
854 if (cchInput == 0)
855 return VERR_MISMATCH;
856 RTUNICP ucInputIgnore;
857 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
858 AssertRCReturn(rc, rc);
859 break;
860 }
861
862 case RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS:
863 {
864 uint16_t cCpsLeft = pProg->cch;
865 Assert(cCpsLeft > 1);
866 if (cCpsLeft > cchInput)
867 return VERR_MISMATCH;
868 while (cCpsLeft-- > 0)
869 {
870 RTUNICP ucInputIgnore;
871 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
872 if (RT_FAILURE(rc))
873 return rc == VERR_END_OF_STRING ? VERR_MISMATCH : rc;
874 }
875 break;
876 }
877
878 case RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7:
879 {
880 if (cchInput == 0)
881 return VERR_MISMATCH;
882 RTUNICP ucInput;
883 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
884 AssertRCReturn(rc, rc);
885 if (ucInput >= 0x80)
886 return VERR_MISMATCH;
887 if (memchr(pProg->pch, (char)ucInput, pProg->cch) == NULL)
888 return VERR_MISMATCH;
889 break;
890 }
891
892 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7:
893 {
894 if (cchInput == 0)
895 return VERR_MISMATCH;
896 RTUNICP ucInput;
897 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
898 AssertRCReturn(rc, rc);
899 if (ucInput >= 0x80)
900 break;
901 if (memchr(pProg->pch, (char)ucInput, pProg->cch) != NULL)
902 return VERR_MISMATCH;
903 break;
904 }
905
906 case RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED:
907 {
908 if (cchInput == 0)
909 return VERR_MISMATCH;
910 RTUNICP ucInput;
911 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
912 AssertRCReturn(rc, rc);
913 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
914 if (rc == VINF_SUCCESS)
915 break;
916 return rc;
917 }
918
919 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED:
920 {
921 if (cchInput == 0)
922 return VERR_MISMATCH;
923 RTUNICP ucInput;
924 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
925 AssertRCReturn(rc, rc);
926 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
927 if (rc == VERR_MISMATCH)
928 break;
929 if (rc == VINF_SUCCESS)
930 rc = VERR_MISMATCH;
931 return rc;
932 }
933
934 case RTPATHMATCHOP_VARIABLE_VALUE_CMP:
935 case RTPATHMATCHOP_VARIABLE_VALUE_ICMP:
936 {
937 size_t cchMatched = 0;
938 int rc = rtPathMatchExecVariable(pchInput, cchInput, pProg->uOp2,
939 pProg->enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP, &cchMatched, pCache);
940 if (rc == VINF_SUCCESS)
941 {
942 pchInput += cchMatched;
943 cchInput -= cchMatched;
944 break;
945 }
946 return rc;
947 }
948
949 /*
950 * This is the expensive one. It always completes the program.
951 */
952 case RTPATHMATCHOP_ZERO_OR_MORE:
953 {
954 if (cchInput < pProg->cch)
955 return VERR_MISMATCH;
956 size_t cchMatched = cchInput - pProg->cch;
957 do
958 {
959 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
960 if (RT_SUCCESS(rc))
961 return rc;
962 } while (cchMatched-- > 0);
963 return VERR_MISMATCH;
964 }
965
966 /*
967 * Variant of the above that doesn't match '.' and '..' entries.
968 */
969 case RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT:
970 {
971 if (cchInput < pProg->cch)
972 return VERR_MISMATCH;
973 if ( cchInput <= 2
974 && cchInput > 0
975 && pchInput[0] == '.'
976 && (cchInput == 1 || pchInput[1] == '.') )
977 return VERR_MISMATCH;
978 size_t cchMatched = cchInput - pProg->cch;
979 do
980 {
981 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
982 if (RT_SUCCESS(rc))
983 return rc;
984 } while (cchMatched-- > 0);
985 return VERR_MISMATCH;
986 }
987
988 default:
989 AssertMsgFailedReturn(("enmOpCode=%d\n", pProg->enmOpCode), VERR_INTERNAL_ERROR_3);
990 }
991
992 pProg++;
993 }
994}
995
996
997
998
999/**
1000 * Compiles a path matching program.
1001 *
1002 * @returns IPRT status code.
1003 * @param pchPattern The pattern to compile.
1004 * @param cchPattern The length of the pattern.
1005 * @param fIgnoreCase Whether to ignore case or not when doing the
1006 * actual matching later on.
1007 * @param pAllocator Pointer to the instruction allocator & result
1008 * array. The compiled "program" starts at
1009 * PRTPATHMATCHALLOC::paInstructions[PRTPATHMATCHALLOC::iNext]
1010 * (input iNext value).
1011 *
1012 * @todo Expose this matching code and also use it for RTDirOpenFiltered
1013 */
1014static int rtPathMatchCompile(const char *pchPattern, size_t cchPattern, bool fIgnoreCase, PRTPATHMATCHALLOC pAllocator)
1015{
1016 /** @todo PORTME: big endian. */
1017 static const uint8_t s_bmMetaChars[256/8] =
1018 {
1019 0x00, 0x00, 0x00, 0x00, /* 0 thru 31 */
1020 0x10, 0x04, 0x00, 0x80, /* 32 thru 63 */
1021 0x00, 0x00, 0x00, 0x08, /* 64 thru 95 */
1022 0x00, 0x00, 0x00, 0x00, /* 96 thru 127 */
1023 /* UTF-8 multibyte: */
1024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1025 };
1026 Assert(ASMBitTest(s_bmMetaChars, '$')); AssertCompile('$' == 0x24 /*36*/);
1027 Assert(ASMBitTest(s_bmMetaChars, '*')); AssertCompile('*' == 0x2a /*42*/);
1028 Assert(ASMBitTest(s_bmMetaChars, '?')); AssertCompile('?' == 0x3f /*63*/);
1029 Assert(ASMBitTest(s_bmMetaChars, '[')); AssertCompile('[' == 0x5b /*91*/);
1030
1031 /*
1032 * For checking for the first instruction.
1033 */
1034 uint16_t const iFirst = pAllocator->iNext;
1035
1036 /*
1037 * This is for tracking zero-or-more instructions and for calculating
1038 * the minimum amount of input required for it to be considered.
1039 */
1040 uint16_t aiZeroOrMore[RTPATHMATCH_MAX_ZERO_OR_MORE];
1041 uint8_t cZeroOrMore = 0;
1042 size_t offInput = 0;
1043
1044 /*
1045 * Loop thru the pattern and translate it into string matching instructions.
1046 */
1047 for (;;)
1048 {
1049 /*
1050 * Allocate the next instruction.
1051 */
1052 if (pAllocator->iNext >= pAllocator->cAllocated)
1053 {
1054 uint32_t cNew = pAllocator->cAllocated ? pAllocator->cAllocated * 2 : 2;
1055 void *pvNew = RTMemRealloc(pAllocator->paInstructions, cNew * sizeof(pAllocator->paInstructions[0]));
1056 AssertReturn(pvNew, VERR_NO_MEMORY);
1057 pAllocator->paInstructions = (PRTPATHMATCHCORE)pvNew;
1058 pAllocator->cAllocated = cNew;
1059 }
1060 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[pAllocator->iNext++];
1061 pInstr->pch = pchPattern;
1062 pInstr->cch = 0;
1063 pInstr->uOp2 = 0;
1064
1065 /*
1066 * Special case: End of pattern.
1067 */
1068 if (!cchPattern)
1069 {
1070 pInstr->enmOpCode = RTPATHMATCHOP_RETURN_MATCH_IF_AT_END;
1071 break;
1072 }
1073
1074 /*
1075 * Parse the next bit of the pattern.
1076 */
1077 char ch = *pchPattern;
1078 if (ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1079 {
1080 /*
1081 * Zero or more characters wildcard.
1082 */
1083 if (ch == '*')
1084 {
1085 /* Skip extra asterisks. */
1086 do
1087 {
1088 cchPattern--;
1089 pchPattern++;
1090 } while (cchPattern > 0 && *pchPattern == '*');
1091
1092 /* There is a special optimization for trailing '*'. */
1093 pInstr->cch = 1;
1094 if (cchPattern == 0)
1095 {
1096 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1097 ? RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_RETURN_MATCH;
1098 break;
1099 }
1100
1101 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1102 ? RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_ZERO_OR_MORE;
1103 pInstr->uOp2 = (uint16_t)offInput;
1104 AssertReturn(cZeroOrMore < RT_ELEMENTS(aiZeroOrMore), VERR_OUT_OF_RANGE);
1105 aiZeroOrMore[cZeroOrMore] = (uint16_t)(pInstr - pAllocator->paInstructions);
1106
1107 /* cchInput unchanged, zero-or-more matches. */
1108 continue;
1109 }
1110
1111 /*
1112 * Single character wildcard.
1113 */
1114 if (ch == '?')
1115 {
1116 /* Count them if more. */
1117 uint16_t cchQms = 1;
1118 while (cchQms < cchPattern && pchPattern[cchQms] == '?')
1119 cchQms++;
1120
1121 pInstr->cch = cchQms;
1122 pInstr->enmOpCode = cchQms == 1 ? RTPATHMATCHOP_SKIP_ONE_CODEPOINT : RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS;
1123
1124 cchPattern -= cchQms;
1125 pchPattern += cchQms;
1126 offInput += cchQms;
1127 continue;
1128 }
1129
1130 /*
1131 * Character in set.
1132 *
1133 * Note that we skip the first char in the set as that is the only place
1134 * ']' can be placed if one desires to explicitly include it in the set.
1135 * To make life a bit more interesting, [:class:] is allowed inside the
1136 * set, so we have to do the counting game to find the end.
1137 */
1138 if (ch == '[')
1139 {
1140 if ( cchPattern > 2
1141 && (const char *)memchr(pchPattern + 2, ']', cchPattern) != NULL)
1142 {
1143
1144 /* Check for not-in. */
1145 bool fInverted = false;
1146 size_t offStart = 1;
1147 if (pchPattern[offStart] == '^')
1148 {
1149 fInverted = true;
1150 offStart++;
1151 }
1152
1153 /* Special case for ']' as the first char, it doesn't indicate closing then. */
1154 size_t off = offStart;
1155 if (pchPattern[off] == ']')
1156 off++;
1157
1158 bool fExtended = false;
1159 while (off < cchPattern)
1160 {
1161 ch = pchPattern[off++];
1162 if (ch == '[')
1163 {
1164 if (off < cchPattern)
1165 {
1166 char chOpen = pchPattern[off];
1167 if ( chOpen == ':'
1168 || chOpen == '='
1169 || chOpen == '.')
1170 {
1171 off++;
1172 const char *pchFound = (const char *)memchr(&pchPattern[off], ']', cchPattern - off);
1173 if ( pchFound
1174 && pchFound[-1] == chOpen)
1175 {
1176 fExtended = true;
1177 off = pchFound - pchPattern + 1;
1178 }
1179 else
1180 AssertFailed();
1181 }
1182 }
1183 }
1184 /* Check for closing. */
1185 else if (ch == ']')
1186 break;
1187 /* Check for range expression, promote to extended if this happens. */
1188 else if ( ch == '-'
1189 && off != offStart + 1
1190 && off < cchPattern
1191 && pchPattern[off] != ']')
1192 fExtended = true;
1193 /* UTF-8 multibyte chars forces us to use the extended version too. */
1194 else if ((uint8_t)ch >= 0x80)
1195 fExtended = true;
1196 }
1197
1198 if (ch == ']')
1199 {
1200 pInstr->pch = &pchPattern[offStart];
1201 pInstr->cch = (uint16_t)(off - offStart - 1);
1202 if (!fExtended)
1203 pInstr->enmOpCode = !fInverted
1204 ? RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7;
1205 else
1206 pInstr->enmOpCode = !fInverted
1207 ? RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED
1208 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED;
1209 pchPattern += off;
1210 cchPattern -= off;
1211 offInput += 1;
1212 continue;
1213 }
1214
1215 /* else: invalid, treat it as */
1216 AssertFailed();
1217 }
1218 }
1219 /*
1220 * Variable matching.
1221 */
1222 else if (ch == '$')
1223 {
1224 const char *pchFound;
1225 if ( cchPattern > 3
1226 && pchPattern[1] == '{'
1227 && (pchFound = (const char *)memchr(pchPattern + 2, '}', cchPattern)) != NULL
1228 && pchFound != &pchPattern[2])
1229 {
1230 /* skip to the variable name. */
1231 pchPattern += 2;
1232 cchPattern -= 2;
1233 size_t cchVarNm = pchFound - pchPattern;
1234
1235 /* Look it up. */
1236 uint32_t iVar;
1237 for (iVar = 0; iVar < RT_ELEMENTS(g_aVariables); iVar++)
1238 if ( g_aVariables[iVar].cchName == cchVarNm
1239 && memcmp(g_aVariables[iVar].pszName, pchPattern, cchVarNm) == 0)
1240 break;
1241 if (iVar < RT_ELEMENTS(g_aVariables))
1242 {
1243 pInstr->uOp2 = (uint16_t)iVar;
1244 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_VARIABLE_VALUE_CMP : RTPATHMATCHOP_VARIABLE_VALUE_ICMP;
1245 pInstr->pch = pchPattern; /* not necessary */
1246 pInstr->cch = (uint16_t)cchPattern; /* ditto */
1247 pchPattern += cchVarNm + 1;
1248 cchPattern -= cchVarNm + 1;
1249 AssertMsgReturn(!g_aVariables[iVar].fFirstOnly || iFirst + 1U == pAllocator->iNext,
1250 ("Glob variable '%s' should be first\n", g_aVariables[iVar].pszName),
1251 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1252 /* cchInput unchanged, value can be empty. */
1253 continue;
1254 }
1255 AssertMsgFailedReturn(("Unknown path matching variable '%.*s'\n", cchVarNm, pchPattern),
1256 VERR_PATH_MATCH_UNKNOWN_VARIABLE);
1257 }
1258 }
1259 else
1260 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1261 }
1262
1263 /*
1264 * Plain text. Look for the next meta char.
1265 */
1266 uint32_t cchPlain = 1;
1267 while (cchPlain < cchPattern)
1268 {
1269 ch = pchPattern[cchPlain];
1270 if (!ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1271 { /* probable */ }
1272 else if ( ch == '?'
1273 || ch == '*')
1274 break;
1275 else if (ch == '$')
1276 {
1277 const char *pchFound;
1278 if ( cchPattern > cchPlain + 3
1279 && pchPattern[cchPlain + 1] == '{'
1280 && (pchFound = (const char *)memchr(&pchPattern[cchPlain + 2], '}', cchPattern - cchPlain - 2)) != NULL
1281 && pchFound != &pchPattern[cchPlain + 2])
1282 break;
1283 }
1284 else if (ch == '[')
1285 {
1286 /* We don't put a lot of effort into getting this 100% right here,
1287 no point it complicating things for malformed expressions. */
1288 if ( cchPattern > cchPlain + 2
1289 && memchr(&pchPattern[cchPlain + 2], ']', cchPattern - cchPlain - 1) != NULL)
1290 break;
1291 }
1292 else
1293 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1294 cchPlain++;
1295 }
1296 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_STRCMP : RTPATHMATCHOP_STRICMP;
1297 pInstr->cch = cchPlain;
1298 Assert(pInstr->pch == pchPattern);
1299 Assert(pInstr->uOp2 == 0);
1300 pchPattern += cchPlain;
1301 cchPattern -= cchPlain;
1302 offInput += cchPlain;
1303 }
1304
1305 /*
1306 * Optimize zero-or-more matching.
1307 */
1308 while (cZeroOrMore-- > 0)
1309 {
1310 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[aiZeroOrMore[cZeroOrMore]];
1311 pInstr->uOp2 = (uint16_t)(offInput - pInstr->uOp2);
1312 }
1313
1314 /** @todo It's possible to use offInput to inject a instruction for checking
1315 * minimum input length at the start of the program. Not sure it's
1316 * worth it though, unless it's long a complicated expression... */
1317 return VINF_SUCCESS;
1318}
1319
1320
1321/**
1322 * Parses the glob pattern.
1323 *
1324 * This compiles filename matching programs for each component and determins the
1325 * optimal search strategy for them.
1326 *
1327 * @returns IPRT status code.
1328 * @param pGlob The glob instance data.
1329 * @param pszPattern The pattern to parse.
1330 * @param pParsed The RTPathParse output for the pattern.
1331 * @param fFlags The glob flags (same as pGlob->fFlags).
1332 */
1333static int rtPathGlobParse(PRTPATHGLOB pGlob, const char *pszPattern, PRTPATHPARSED pParsed, uint32_t fFlags)
1334{
1335 AssertReturn(pParsed->cComps > 0, VERR_INVALID_PARAMETER); /* shouldn't happen */
1336 uint32_t iComp = 0;
1337
1338 /*
1339 * If we've got a rootspec, mark it as plain. On platforms with
1340 * drive letter and/or UNC we don't allow wildcards or such in
1341 * the drive letter spec or UNC server name. (At least not yet.)
1342 */
1343 if (RTPATH_PROP_HAS_ROOT_SPEC(pParsed->fProps))
1344 {
1345 AssertReturn(pParsed->aComps[0].cch < sizeof(pGlob->szPath) - 1, VERR_FILENAME_TOO_LONG);
1346 memcpy(pGlob->szPath, &pszPattern[pParsed->aComps[0].off], pParsed->aComps[0].cch);
1347 pGlob->offFirstPath = pParsed->aComps[0].cch;
1348 pGlob->iFirstComp = iComp = 1;
1349 }
1350 else
1351 {
1352 const char * const pszComp = &pszPattern[pParsed->aComps[0].off];
1353
1354 /*
1355 * The tilde is only applicable to the first component, expand it
1356 * immediately.
1357 */
1358 if ( *pszComp == '~'
1359 && !(fFlags & RTPATHGLOB_F_NO_TILDE))
1360 {
1361 if (pParsed->aComps[0].cch == 1)
1362 {
1363 int rc = RTPathUserHome(pGlob->szPath, sizeof(pGlob->szPath) - 1);
1364 AssertRCReturn(rc, rc);
1365 }
1366 else
1367 AssertMsgFailedReturn(("'%.*s' is not supported yet\n", pszComp, pParsed->aComps[0].cch),
1368 VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
1369 pGlob->offFirstPath = (uint32_t)RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1370 pGlob->iFirstComp = iComp = 1;
1371 }
1372 }
1373
1374 /*
1375 * Process the other components.
1376 */
1377 bool fStarStar = false;
1378 for (; iComp < pParsed->cComps; iComp++)
1379 {
1380 const char *pszComp = &pszPattern[pParsed->aComps[iComp].off];
1381 uint16_t cchComp = pParsed->aComps[iComp].cch;
1382 Assert(pGlob->aComps[iComp].fNormal == false);
1383
1384 pGlob->aComps[iComp].fDir = iComp + 1 < pParsed->cComps || (fFlags & RTPATHGLOB_F_ONLY_DIRS);
1385 if ( cchComp != 2
1386 || pszComp[0] != '*'
1387 || pszComp[1] != '*'
1388 || (fFlags & RTPATHGLOB_F_NO_STARSTAR) )
1389 {
1390 /* Compile the pattern. */
1391 uint16_t const iMatchProg = pGlob->MatchInstrAlloc.iNext;
1392 pGlob->aComps[iComp].iMatchProg = iMatchProg;
1393 int rc = rtPathMatchCompile(pszComp, cchComp, RT_BOOL(fFlags & RTPATHGLOB_F_IGNORE_CASE),
1394 &pGlob->MatchInstrAlloc);
1395 if (RT_FAILURE(rc))
1396 return rc;
1397
1398 /* Check for plain text as well as full variable matching (not applicable after '**'). */
1399 uint16_t const cInstructions = pGlob->MatchInstrAlloc.iNext - iMatchProg;
1400 if ( cInstructions == 2
1401 && !fStarStar
1402 && pGlob->MatchInstrAlloc.paInstructions[iMatchProg + 1].enmOpCode == RTPATHMATCHOP_RETURN_MATCH_IF_AT_END)
1403 {
1404 if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRCMP
1405 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRICMP)
1406 pGlob->aComps[iComp].fPlain = true;
1407 else if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1408 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP)
1409 {
1410 pGlob->aComps[iComp].fExpVariable = true;
1411 AssertMsgReturn( iComp == 0
1412 || !g_aVariables[pGlob->MatchInstrAlloc.paInstructions[iMatchProg].uOp2].fFirstOnly,
1413 ("Glob variable '%.*s' can only be used as the path component.\n", cchComp, pszComp),
1414 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1415 }
1416 else
1417 pGlob->aComps[iComp].fNormal = true;
1418 }
1419 else
1420 pGlob->aComps[iComp].fNormal = true;
1421 }
1422 else
1423 {
1424 /* Recursive "**" matching. */
1425 pGlob->aComps[iComp].fNormal = false;
1426 pGlob->aComps[iComp].fStarStar = true;
1427 AssertReturn(!fStarStar, VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED); /** @todo implement multiple '**' sequences in a pattern. */
1428 fStarStar = true;
1429 }
1430 }
1431 pGlob->aComps[pParsed->cComps - 1].fFinal = true;
1432
1433 return VINF_SUCCESS;
1434}
1435
1436
1437/**
1438 * This is for skipping overly long directories entries.
1439 *
1440 * Since our directory entry buffer can hold filenames of RTPATH_MAX bytes, we
1441 * can safely skip filenames that are longer. There are very few file systems
1442 * that can actually store filenames longer than 255 bytes at time of coding
1443 * (2015-09), and extremely few which can exceed 4096 (RTPATH_MAX) bytes.
1444 *
1445 * @returns IPRT status code.
1446 * @param hDir The directory handle.
1447 * @param cbNeeded The required entry size.
1448 */
1449DECL_NO_INLINE(static, int) rtPathGlobSkipDirEntry(RTDIR hDir, size_t cbNeeded)
1450{
1451 int rc = VERR_BUFFER_OVERFLOW;
1452 cbNeeded = RT_ALIGN_Z(cbNeeded, 16);
1453 PRTDIRENTRY pDirEntry = (PRTDIRENTRY)RTMemTmpAlloc(cbNeeded);
1454 if (pDirEntry)
1455 {
1456 rc = RTDirRead(hDir, pDirEntry, &cbNeeded);
1457 RTMemTmpFree(pDirEntry);
1458 }
1459 return rc;
1460}
1461
1462
1463/**
1464 * Adds a result.
1465 *
1466 * @returns IPRT status code.
1467 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1468 *
1469 * @param pGlob The glob instance data.
1470 * @param cchPath The number of bytes to add from pGlob->szPath.
1471 * @param uType The RTDIRENTRYTYPE value.
1472 */
1473DECL_NO_INLINE(static, int) rtPathGlobAddResult(PRTPATHGLOB pGlob, size_t cchPath, uint8_t uType)
1474{
1475 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1476 {
1477 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + 1]));
1478 if (pEntry)
1479 {
1480 pEntry->uType = uType;
1481 pEntry->cchPath = (uint16_t)cchPath;
1482 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1483 pEntry->szPath[cchPath] = '\0';
1484
1485 pEntry->pNext = NULL;
1486 *pGlob->ppNext = pEntry;
1487 pGlob->ppNext = &pEntry->pNext;
1488 pGlob->cResults++;
1489
1490 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1491 return VINF_SUCCESS;
1492 return VINF_CALLBACK_RETURN;
1493 }
1494 return VERR_NO_MEMORY;
1495 }
1496 return VERR_TOO_MUCH_DATA;
1497}
1498
1499
1500/**
1501 * Adds a result, constructing the path from two string.
1502 *
1503 * @returns IPRT status code.
1504 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1505 *
1506 * @param pGlob The glob instance data.
1507 * @param cchPath The number of bytes to add from pGlob->szPath.
1508 * @param pchName The string (usual filename) to append to the szPath.
1509 * @param cchName The length of the string to append.
1510 * @param uType The RTDIRENTRYTYPE value.
1511 */
1512DECL_NO_INLINE(static, int) rtPathGlobAddResult2(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1513 uint8_t uType)
1514{
1515 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1516 {
1517 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1518 if (pEntry)
1519 {
1520 pEntry->uType = uType;
1521 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1522 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1523 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1524 pEntry->szPath[cchPath + cchName] = '\0';
1525
1526 pEntry->pNext = NULL;
1527 *pGlob->ppNext = pEntry;
1528 pGlob->ppNext = &pEntry->pNext;
1529 pGlob->cResults++;
1530
1531 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1532 return VINF_SUCCESS;
1533 return VINF_CALLBACK_RETURN;
1534 }
1535 return VERR_NO_MEMORY;
1536 }
1537 return VERR_TOO_MUCH_DATA;
1538}
1539
1540
1541/**
1542 * Prepares a result, constructing the path from two string.
1543 *
1544 * The caller must call either rtPathGlobCommitResult or
1545 * rtPathGlobRollbackResult to complete the operation.
1546 *
1547 * @returns IPRT status code.
1548 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1549 *
1550 * @param pGlob The glob instance data.
1551 * @param cchPath The number of bytes to add from pGlob->szPath.
1552 * @param pchName The string (usual filename) to append to the szPath.
1553 * @param cchName The length of the string to append.
1554 * @param uType The RTDIRENTRYTYPE value.
1555 */
1556DECL_NO_INLINE(static, int) rtPathGlobAlmostAddResult(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1557 uint8_t uType)
1558{
1559 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1560 {
1561 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1562 if (pEntry)
1563 {
1564 pEntry->uType = uType;
1565 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1566 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1567 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1568 pEntry->szPath[cchPath + cchName] = '\0';
1569
1570 pEntry->pNext = NULL;
1571 *pGlob->ppNext = pEntry;
1572 /* Note! We don't update ppNext here, that is done in rtPathGlobCommitResult. */
1573
1574 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1575 return VINF_SUCCESS;
1576 return VINF_CALLBACK_RETURN;
1577 }
1578 return VERR_NO_MEMORY;
1579 }
1580 return VERR_TOO_MUCH_DATA;
1581}
1582
1583
1584/**
1585 * Commits a pending result from rtPathGlobAlmostAddResult.
1586 *
1587 * @param pGlob The glob instance data.
1588 * @param uType The RTDIRENTRYTYPE value.
1589 */
1590static void rtPathGlobCommitResult(PRTPATHGLOB pGlob, uint8_t uType)
1591{
1592 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1593 AssertPtr(pEntry);
1594 pEntry->uType = uType;
1595 pGlob->ppNext = &pEntry->pNext;
1596 pGlob->cResults++;
1597}
1598
1599
1600/**
1601 * Rolls back a pending result from rtPathGlobAlmostAddResult.
1602 *
1603 * @param pGlob The glob instance data.
1604 */
1605static void rtPathGlobRollbackResult(PRTPATHGLOB pGlob)
1606{
1607 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1608 AssertPtr(pEntry);
1609 RTMemFree(pEntry);
1610 *pGlob->ppNext = NULL;
1611}
1612
1613
1614
1615/**
1616 * Whether to call rtPathGlobExecRecursiveVarExp for the next component.
1617 *
1618 * @returns true / false.
1619 * @param pGlob The glob instance data.
1620 * @param offPath The next path offset/length.
1621 * @param iComp The next component.
1622 */
1623DECLINLINE(bool) rtPathGlobExecIsExpVar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1624{
1625 return pGlob->aComps[iComp].fExpVariable
1626 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1627 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1628}
1629
1630/**
1631 * Whether to call rtPathGlobExecRecursivePlainText for the next component.
1632 *
1633 * @returns true / false.
1634 * @param pGlob The glob instance data.
1635 * @param offPath The next path offset/length.
1636 * @param iComp The next component.
1637 */
1638DECLINLINE(bool) rtPathGlobExecIsPlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1639{
1640 return pGlob->aComps[iComp].fPlain
1641 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1642 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1643}
1644
1645
1646/**
1647 * Helper for rtPathGlobExecRecursiveVarExp and rtPathGlobExecRecursivePlainText
1648 * that compares a file mode mask with dir/no-dir wishes of the caller.
1649 *
1650 * @returns true if match, false if not.
1651 * @param pGlob The glob instance data.
1652 * @param fMode The file mode (only the type is used).
1653 */
1654DECLINLINE(bool) rtPathGlobExecIsMatchFinalWithFileMode(PRTPATHGLOB pGlob, RTFMODE fMode)
1655{
1656 if (!(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)))
1657 return true;
1658 return RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS) == RTFS_IS_DIRECTORY(fMode);
1659}
1660
1661
1662/**
1663 * Recursive globbing - star-star mode.
1664 *
1665 * @returns IPRT status code.
1666 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1667 *
1668 * @param pGlob The glob instance data.
1669 * @param offPath The current path offset/length.
1670 * @param iStarStarComp The star-star component index.
1671 * @param offStarStarPath The offset of the star-star component in the
1672 * pattern path.
1673 */
1674DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp,
1675 size_t offStarStarPath)
1676{
1677 /** @todo implement multi subdir matching. */
1678 RT_NOREF_PV(pGlob);
1679 RT_NOREF_PV(offPath);
1680 RT_NOREF_PV(iStarStarComp);
1681 RT_NOREF_PV(offStarStarPath);
1682 return VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED;
1683}
1684
1685
1686
1687/**
1688 * Recursive globbing - variable expansion optimization.
1689 *
1690 * @returns IPRT status code.
1691 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1692 *
1693 * @param pGlob The glob instance data.
1694 * @param offPath The current path offset/length.
1695 * @param iComp The current component.
1696 */
1697DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1698{
1699 Assert(iComp < pGlob->pParsed->cComps);
1700 Assert(pGlob->szPath[offPath] == '\0');
1701 Assert(pGlob->aComps[iComp].fExpVariable);
1702 Assert(!pGlob->aComps[iComp].fPlain);
1703 Assert(!pGlob->aComps[iComp].fStarStar);
1704 Assert(rtPathGlobExecIsExpVar(pGlob, offPath, iComp));
1705
1706 /*
1707 * Fish the variable index out of the first matching instruction.
1708 */
1709 Assert( pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1710 == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1711 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1712 == RTPATHMATCHOP_VARIABLE_VALUE_ICMP);
1713 uint16_t const iVar = pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].uOp2;
1714
1715 /*
1716 * Enumerate all the variable, giving them the plain text treatment.
1717 */
1718 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
1719 {
1720 size_t cch;
1721 int rcVar = g_aVariables[iVar].pfnQuery(iItem, &pGlob->szPath[offPath], sizeof(pGlob->szPath) - offPath, &cch,
1722 &pGlob->MatchCache);
1723 if (RT_SUCCESS(rcVar))
1724 {
1725 Assert(pGlob->szPath[offPath + cch] == '\0');
1726
1727 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1728 if (RT_SUCCESS(rc))
1729 {
1730 if (pGlob->aComps[iComp].fFinal)
1731 {
1732 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1733 {
1734 rc = rtPathGlobAddResult(pGlob, cch,
1735 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1736 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1737 if (rc != VINF_SUCCESS)
1738 return rc;
1739 }
1740 }
1741 else if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1742 {
1743 Assert(pGlob->aComps[iComp].fDir);
1744 cch = RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1745 if (cch > 0)
1746 {
1747 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1748 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1749 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1750 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1751 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1752 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1753 else
1754 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1755 if (rc != VINF_SUCCESS)
1756 return rc;
1757 }
1758 else
1759 pGlob->cPathOverflows++;
1760 }
1761 }
1762 /* else: file doesn't exist or something else is wrong, ignore this. */
1763 if (rcVar == VINF_EOF)
1764 return VINF_SUCCESS;
1765 }
1766 else if (rcVar == VERR_EOF)
1767 return VINF_SUCCESS;
1768 else if (rcVar != VERR_TRY_AGAIN)
1769 {
1770 Assert(rcVar == VERR_BUFFER_OVERFLOW);
1771 pGlob->cPathOverflows++;
1772 }
1773 }
1774 AssertFailedReturn(VINF_SUCCESS); /* Too many items returned, probably buggy query method. */
1775}
1776
1777
1778/**
1779 * Recursive globbing - plain text optimization.
1780 *
1781 * @returns IPRT status code.
1782 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1783 *
1784 * @param pGlob The glob instance data.
1785 * @param offPath The current path offset/length.
1786 * @param iComp The current component.
1787 */
1788DECL_NO_INLINE(static, int) rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1789{
1790 /*
1791 * Instead of recursing, we loop thru adjacent plain text components.
1792 */
1793 for (;;)
1794 {
1795 /*
1796 * Preconditions.
1797 */
1798 Assert(iComp < pGlob->pParsed->cComps);
1799 Assert(pGlob->szPath[offPath] == '\0');
1800 Assert(pGlob->aComps[iComp].fPlain);
1801 Assert(!pGlob->aComps[iComp].fExpVariable);
1802 Assert(!pGlob->aComps[iComp].fStarStar);
1803 Assert(rtPathGlobExecIsPlainText(pGlob, offPath, iComp));
1804 Assert(pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1805 == RTPATHMATCHOP_STRCMP
1806 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1807 == RTPATHMATCHOP_STRICMP);
1808
1809 /*
1810 * Add the plain text component to the path.
1811 */
1812 size_t const cch = pGlob->pParsed->aComps[iComp].cch;
1813 if (cch + pGlob->aComps[iComp].fDir < sizeof(pGlob->szPath) - offPath)
1814 {
1815 memcpy(&pGlob->szPath[offPath], &pGlob->pszPattern[pGlob->pParsed->aComps[iComp].off], cch);
1816 offPath += cch;
1817 pGlob->szPath[offPath] = '\0';
1818
1819 /*
1820 * Check if it exists.
1821 */
1822 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1823 if (RT_SUCCESS(rc))
1824 {
1825 if (pGlob->aComps[iComp].fFinal)
1826 {
1827 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1828 return rtPathGlobAddResult(pGlob, offPath,
1829 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1830 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1831 break;
1832 }
1833
1834 if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1835 {
1836 Assert(pGlob->aComps[iComp].fDir);
1837 pGlob->szPath[offPath++] = RTPATH_SLASH;
1838 pGlob->szPath[offPath] = '\0';
1839
1840 iComp++;
1841 if (rtPathGlobExecIsExpVar(pGlob, offPath, iComp))
1842 return rtPathGlobExecRecursiveVarExp(pGlob, offPath, iComp);
1843 if (!rtPathGlobExecIsPlainText(pGlob, offPath, iComp))
1844 return rtPathGlobExecRecursiveGeneric(pGlob, offPath, iComp);
1845 if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1846 return rtPathGlobExecRecursiveStarStar(pGlob, offPath, iComp, offPath);
1847
1848 /* Continue with the next plain text component. */
1849 continue;
1850 }
1851 }
1852 /* else: file doesn't exist or something else is wrong, ignore this. */
1853 }
1854 else
1855 pGlob->cPathOverflows++;
1856 break;
1857 }
1858 return VINF_SUCCESS;
1859}
1860
1861
1862/**
1863 * Recursive globbing - generic.
1864 *
1865 * @returns IPRT status code.
1866 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1867 *
1868 * @param pGlob The glob instance data.
1869 * @param offPath The current path offset/length.
1870 * @param iComp The current component.
1871 */
1872DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1873{
1874 /*
1875 * Enumerate entire directory and match each entry.
1876 */
1877 RTDIR hDir;
1878 int rc = RTDirOpen(&hDir, offPath ? pGlob->szPath : ".");
1879 if (RT_SUCCESS(rc))
1880 {
1881 for (;;)
1882 {
1883 size_t cch = sizeof(pGlob->u);
1884 rc = RTDirRead(hDir, &pGlob->u.DirEntry, &cch);
1885 if (RT_SUCCESS(rc))
1886 {
1887 if (pGlob->aComps[iComp].fFinal)
1888 {
1889 /*
1890 * Final component: Check if it matches the current pattern.
1891 */
1892 if ( !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS))
1893 || RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1894 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY)
1895 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1896 {
1897 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1898 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1899 &pGlob->MatchCache);
1900 if (RT_SUCCESS(rc))
1901 {
1902 /* Construct the result. */
1903 if ( pGlob->u.DirEntry.enmType != RTDIRENTRYTYPE_UNKNOWN
1904 || !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) )
1905 rc = rtPathGlobAddResult2(pGlob, offPath, pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1906 (uint8_t)pGlob->u.DirEntry.enmType);
1907 else
1908 {
1909 rc = rtPathGlobAlmostAddResult(pGlob, offPath,
1910 pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1911 (uint8_t)RTDIRENTRYTYPE_UNKNOWN);
1912 if (RT_SUCCESS(rc))
1913 {
1914 RTDirQueryUnknownType((*pGlob->ppNext)->szPath, false /*fFollowSymlinks*/,
1915 &pGlob->u.DirEntry.enmType);
1916 if ( RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1917 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY))
1918 rtPathGlobCommitResult(pGlob, (uint8_t)pGlob->u.DirEntry.enmType);
1919 else
1920 rtPathGlobRollbackResult(pGlob);
1921 }
1922 }
1923 if (rc != VINF_SUCCESS)
1924 break;
1925 }
1926 else
1927 {
1928 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1929 rc = VINF_SUCCESS;
1930 }
1931 }
1932 }
1933 /*
1934 * Intermediate component: Directories only.
1935 */
1936 else if ( pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
1937 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1938 {
1939 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1940 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1941 &pGlob->MatchCache);
1942 if (RT_SUCCESS(rc))
1943 {
1944 /* Recurse down into the alleged directory. */
1945 cch = offPath + pGlob->u.DirEntry.cbName;
1946 if (cch + 1 < sizeof(pGlob->szPath))
1947 {
1948 memcpy(&pGlob->szPath[offPath], pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName);
1949 pGlob->szPath[cch++] = RTPATH_SLASH;
1950 pGlob->szPath[cch] = '\0';
1951
1952 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1953 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1954 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1955 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1956 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1957 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1958 else
1959 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1960 if (rc != VINF_SUCCESS)
1961 return rc;
1962 }
1963 else
1964 pGlob->cPathOverflows++;
1965 }
1966 else
1967 {
1968 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1969 rc = VINF_SUCCESS;
1970 }
1971 }
1972 }
1973 /*
1974 * RTDirRead failure.
1975 */
1976 else
1977 {
1978 /* The end? */
1979 if (rc == VERR_NO_MORE_FILES)
1980 rc = VINF_SUCCESS;
1981 /* Try skip the entry if we end up with an overflow (szPath can't hold it either then). */
1982 else if (rc == VERR_BUFFER_OVERFLOW)
1983 {
1984 pGlob->cPathOverflows++;
1985 rc = rtPathGlobSkipDirEntry(hDir, cch);
1986 if (RT_SUCCESS(rc))
1987 continue;
1988 }
1989 /* else: Any other error is unexpected and should be reported. */
1990 break;
1991 }
1992 }
1993
1994 RTDirClose(hDir);
1995 }
1996 /* Directory doesn't exist or something else is wrong, ignore this. */
1997 else
1998 rc = VINF_SUCCESS;
1999 return rc;
2000}
2001
2002
2003/**
2004 * Executes a glob search.
2005 *
2006 * @returns IPRT status code.
2007 * @param pGlob The glob instance data.
2008 */
2009static int rtPathGlobExec(PRTPATHGLOB pGlob)
2010{
2011 Assert(pGlob->offFirstPath < sizeof(pGlob->szPath));
2012 Assert(pGlob->szPath[pGlob->offFirstPath] == '\0');
2013
2014 int rc;
2015 if (RT_LIKELY(pGlob->iFirstComp < pGlob->pParsed->cComps))
2016 {
2017 /*
2018 * Call the appropriate function.
2019 */
2020 if (rtPathGlobExecIsExpVar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2021 rc = rtPathGlobExecRecursiveVarExp(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2022 else if (rtPathGlobExecIsPlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2023 rc = rtPathGlobExecRecursivePlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2024 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
2025 rc = rtPathGlobExecRecursiveStarStar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp, pGlob->offFirstPath);
2026 else
2027 rc = rtPathGlobExecRecursiveGeneric(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2028 }
2029 else
2030 {
2031 /*
2032 * Special case where we only have a root component or tilde expansion.
2033 */
2034 Assert(pGlob->offFirstPath > 0);
2035 rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
2036 if ( RT_SUCCESS(rc)
2037 && rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
2038 rc = rtPathGlobAddResult(pGlob, pGlob->offFirstPath,
2039 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK) >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
2040 else
2041 rc = VINF_SUCCESS;
2042 }
2043
2044 /*
2045 * Adjust the status code. Check for results, hide RTPATHGLOB_F_FIRST_ONLY
2046 * status code, and add warning if necessary.
2047 */
2048 if (pGlob->cResults > 0)
2049 {
2050 if (rc == VINF_CALLBACK_RETURN)
2051 rc = VINF_SUCCESS;
2052 if (rc == VINF_SUCCESS)
2053 {
2054 if (pGlob->cPathOverflows > 0)
2055 rc = VINF_BUFFER_OVERFLOW;
2056 }
2057 }
2058 else
2059 rc = VERR_FILE_NOT_FOUND;
2060
2061 return rc;
2062}
2063
2064
2065RTDECL(int) RTPathGlob(const char *pszPattern, uint32_t fFlags, PPCRTPATHGLOBENTRY ppHead, uint32_t *pcResults)
2066{
2067 /*
2068 * Input validation.
2069 */
2070 AssertPtrReturn(ppHead, VERR_INVALID_POINTER);
2071 *ppHead = NULL;
2072 if (pcResults)
2073 {
2074 AssertPtrReturn(pcResults, VERR_INVALID_POINTER);
2075 *pcResults = 0;
2076 }
2077 AssertPtrReturn(pszPattern, VERR_INVALID_POINTER);
2078 AssertReturn(!(fFlags & ~RTPATHGLOB_F_MASK), VERR_INVALID_FLAGS);
2079 AssertReturn((fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) != (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS),
2080 VERR_INVALID_FLAGS);
2081
2082 /*
2083 * Parse the path.
2084 */
2085 size_t cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[1]); /** @todo 16 after testing */
2086 PRTPATHPARSED pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2087 AssertReturn(pParsed, VERR_NO_MEMORY);
2088 int rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2089 if (rc == VERR_BUFFER_OVERFLOW)
2090 {
2091 cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[pParsed->cComps + 1]);
2092 RTMemTmpFree(pParsed);
2093 pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2094 AssertReturn(pParsed, VERR_NO_MEMORY);
2095
2096 rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2097 }
2098 if (RT_SUCCESS(rc))
2099 {
2100 /*
2101 * Check dir slash vs. only/not dir flag.
2102 */
2103 if ( !(fFlags & RTPATHGLOB_F_NO_DIRS)
2104 || ( !(pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2105 && ( !(pParsed->fProps & (RTPATH_PROP_ROOT_SLASH | RTPATH_PROP_UNC))
2106 || pParsed->cComps > 1) ) )
2107 {
2108 if (pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2109 fFlags |= RTPATHGLOB_F_ONLY_DIRS;
2110
2111 /*
2112 * Allocate and initialize the glob state data structure.
2113 */
2114 size_t cbGlob = RT_OFFSETOF(RTPATHGLOB, aComps[pParsed->cComps + 1]);
2115 PRTPATHGLOB pGlob = (PRTPATHGLOB)RTMemTmpAllocZ(cbGlob);
2116 if (pGlob)
2117 {
2118 pGlob->pszPattern = pszPattern;
2119 pGlob->fFlags = fFlags;
2120 pGlob->pParsed = pParsed;
2121 pGlob->ppNext = &pGlob->pHead;
2122 rc = rtPathGlobParse(pGlob, pszPattern, pParsed, fFlags);
2123 if (RT_SUCCESS(rc))
2124 {
2125 /*
2126 * Execute the search.
2127 */
2128 rc = rtPathGlobExec(pGlob);
2129 if (RT_SUCCESS(rc))
2130 {
2131 *ppHead = pGlob->pHead;
2132 if (pcResults)
2133 *pcResults = pGlob->cResults;
2134 }
2135 else
2136 RTPathGlobFree(pGlob->pHead);
2137 }
2138
2139 RTMemTmpFree(pGlob->MatchInstrAlloc.paInstructions);
2140 RTMemTmpFree(pGlob);
2141 }
2142 else
2143 rc = VERR_NO_MEMORY;
2144 }
2145 else
2146 rc = VERR_NOT_FOUND;
2147 }
2148 RTMemTmpFree(pParsed);
2149 return rc;
2150
2151
2152}
2153
2154
2155RTDECL(void) RTPathGlobFree(PCRTPATHGLOBENTRY pHead)
2156{
2157 PRTPATHGLOBENTRY pCur = (PRTPATHGLOBENTRY)pHead;
2158 while (pCur)
2159 {
2160 PRTPATHGLOBENTRY pNext = pCur->pNext;
2161 pCur->pNext = NULL;
2162 RTMemFree(pCur);
2163 pCur = pNext;
2164 }
2165}
2166
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette