Changeset 41624 in vbox
- Timestamp:
- Jun 8, 2012 3:46:26 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/bldprogs/filesplitter.cpp
r35945 r41624 1 /* $Id$ */ 1 2 /** @file 2 * File splitter : splits a text file according to ###### markers in it.3 * File splitter - Splits a text file according to ###### markers in it. 3 4 */ 4 5 5 6 /* 6 * Copyright (C) 2006-20 09Oracle Corporation7 * Copyright (C) 2006-2012 Oracle Corporation 7 8 * 8 9 * This file is part of VirtualBox Open Source Edition (OSE), as … … 15 16 */ 16 17 18 19 /******************************************************************************* 20 * Header Files * 21 *******************************************************************************/ 17 22 #include <sys/types.h> 18 23 #include <sys/stat.h> 19 24 #include <stdio.h> 20 #include <string.h>21 25 #include <stdlib.h> 22 23 static unsigned long lineNumber(const char *pStr, const char *pPos) 26 #include <errno.h> 27 28 #include <iprt/string.h> 29 #include <iprt/stdarg.h> 30 31 32 /** 33 * Calculates the line number for a file position. 34 * 35 * @returns Line number. 36 * @param pcszContent The file content. 37 * @param pcszPos The current position. 38 */ 39 static unsigned long lineNumber(const char *pcszContent, const char *pcszPos) 24 40 { 25 41 unsigned long cLine = 0; 26 while (*pStr && pStr < pPos) 27 { 28 pStr = strchr(pStr, '\n'); 29 if (!pStr) 42 while ( *pcszContent 43 && (uintptr_t)pcszContent < (uintptr_t)pcszPos) 44 { 45 pcszContent = strchr(pcszContent, '\n'); 46 if (!pcszContent) 30 47 break; 31 48 ++cLine; 32 ++p Str;49 ++pcszContent; 33 50 } 34 51 … … 36 53 } 37 54 55 56 /** 57 * Writes an error message. 58 * 59 * @returns RTEXITCODE_FAILURE. 60 * @param pcszFormat Error message. 61 * @param ... Format argument referenced in the message. 62 */ 63 static int printErr(const char *pcszFormat, ...) 64 { 65 va_list va; 66 67 fprintf(stderr, "filesplitter: "); 68 va_start(va, pcszFormat); 69 vfprintf(stderr, pcszFormat, va); 70 va_end(va); 71 72 return RTEXITCODE_FAILURE; 73 } 74 75 76 /** 77 * Reads in a file. 78 * 79 * @returns Exit code. 80 * @param pcszFile The path to the file. 81 * @param ppszFile Where to return the buffer. 82 * @param pcchFile Where to return the file size. 83 */ 84 static int readFile(const char *pcszFile, char **ppszFile, size_t *pcchFile) 85 { 86 FILE *pFile; 87 struct stat FileStat; 88 int rc; 89 90 if (stat(pcszFile, &FileStat)) 91 return printErr("Failed to stat \"%s\": %s\n", pcszFile, strerror(errno)); 92 93 pFile = fopen(pcszFile, "r"); 94 if (!pFile) 95 return printErr("Failed to open \"%s\": %s\n", pcszFile, strerror(errno)); 96 97 *ppszFile = (char *)malloc(FileStat.st_size + 1); 98 if (*ppszFile) 99 { 100 errno = 0; 101 size_t cbRead = fread(*ppszFile, 1, FileStat.st_size, pFile); 102 if ( cbRead <= (size_t)FileStat.st_size 103 && (cbRead > 0 || !ferror(pFile)) ) 104 { 105 if (ftell(pFile) == FileStat.st_size) /* (\r\n vs \n in the DOS world) */ 106 { 107 (*ppszFile)[cbRead] = '\0'; 108 if (pcchFile) 109 *pcchFile = (size_t)cbRead; 110 111 fclose(pFile); 112 return 0; 113 } 114 } 115 116 rc = printErr("Error reading \"%s\": %s\n", pcszFile, strerror(errno)); 117 free(*ppszFile); 118 *ppszFile = NULL; 119 } 120 else 121 rc = printErr("Failed to allocate %lu bytes\n", (unsigned long)(FileStat.st_size + 1)); 122 fclose(pFile); 123 return rc; 124 } 125 126 127 /** 128 * Checks whether the sub-file already exists and has the exact 129 * same content. 130 * 131 * @returns @c true if the existing file matches exactly, otherwise @c false. 132 * @param pcszFilename The path to the file. 133 * @param pcszSubContent The content to write. 134 * @param cchSubContent The length of the content. 135 */ 136 static bool compareSubFile(const char *pcszFilename, const char *pcszSubContent, size_t cchSubContent) 137 { 138 struct stat FileStat; 139 FILE *pFile; 140 if (stat(pcszFilename, &FileStat)) 141 return false; 142 if ((size_t)FileStat.st_size < cchSubContent) 143 return false; 144 145 size_t cchExisting; 146 char *pszExisting; 147 int rc = readFile(pcszFilename, &pszExisting, &cchExisting); 148 if (rc) 149 return false; 150 151 bool fRc = cchExisting == cchSubContent 152 && !memcmp(pcszSubContent, pszExisting, cchSubContent); 153 free(pszExisting); 154 155 return fRc; 156 } 157 158 159 /** 160 * Writes out a sub-file. 161 * 162 * @returns exit code. 163 * @param pcszFilename The path to the sub-file. 164 * @param pcszSubContent The content of the file. 165 * @param cchSubContent The size of the content. 166 */ 167 static int writeSubFile(const char *pcszFilename, const char *pcszSubContent, size_t cchSubContent) 168 { 169 FILE *pFile = fopen(pcszFilename, "w"); 170 if (!pFile) 171 return printErr("Failed to open \"%s\" for writing: %s\n", pcszFilename, strerror(errno)); 172 173 errno = 0; 174 int rc = 0; 175 if (fwrite(pcszSubContent, cchSubContent, 1, pFile) != 1) 176 rc = printErr("Error writing \"%s\": %s\n", pcszFilename, strerror(errno)); 177 178 errno = 0; 179 int rc2 = fclose(pFile); 180 if (rc2 == EOF) 181 rc = printErr("Error closing \"%s\": %s\n", pcszFilename, strerror(errno)); 182 return rc; 183 } 184 185 186 /** 187 * Does the actual file splitting. 188 * 189 * @returns exit code. 190 * @param pcszOutDir Path to the output directory. 191 * @param pcszContent The content to split up. 192 */ 193 static int splitFile(const char *pcszOutDir, const char *pcszContent) 194 { 195 static char const s_szBeginMarker[] = "\n// ##### BEGINFILE \""; 196 static char const s_szEndMarker[] = "\n// ##### ENDFILE"; 197 const size_t cchBeginMarker = sizeof(s_szBeginMarker) - 1; 198 const char *pcszSearch = pcszContent; 199 size_t const cchOutDir = strlen(pcszOutDir); 200 unsigned long cFilesWritten = 0; 201 unsigned long cFilesUnchanged = 0; 202 int rc = 0; 203 204 do 205 { 206 /* find begin marker */ 207 const char *pcszBegin = strstr(pcszSearch, s_szBeginMarker); 208 if (!pcszBegin) 209 break; 210 211 /* find line after begin marker */ 212 const char *pcszLineAfterBegin = strchr(pcszBegin + cchBeginMarker, '\n'); 213 if (!pcszLineAfterBegin) 214 return printErr("No newline after begin-file marker found.\n"); 215 ++pcszLineAfterBegin; 216 217 /* find filename end quote in begin marker line */ 218 const char *pcszStartFilename = pcszBegin + cchBeginMarker; 219 const char *pcszEndQuote = (const char *)memchr(pcszStartFilename, '\"', pcszLineAfterBegin - pcszStartFilename); 220 if (!pcszEndQuote) 221 return printErr("Can't parse filename after begin-file marker (line %lu).\n", 222 lineNumber(pcszContent, s_szBeginMarker)); 223 224 /* find end marker */ 225 const char *pcszEnd = strstr(pcszLineAfterBegin, s_szEndMarker); 226 if (!pcszEnd) 227 return printErr("No matching end-line marker for begin-file marker found (line %lu).\n", 228 lineNumber(pcszContent, s_szBeginMarker)); 229 230 /* construct output filename */ 231 size_t cchFilename = pcszEndQuote - pcszStartFilename; 232 char *pszFilename = (char *)malloc(cchOutDir + 1 + cchFilename + 1); 233 if (!pszFilename) 234 return printErr("Can't allocate memory for filename.\n"); 235 236 memcpy(pszFilename, pcszOutDir, cchOutDir); 237 pszFilename[cchOutDir] = '/'; 238 memcpy(pszFilename + cchOutDir + 1, pcszStartFilename, cchFilename); 239 pszFilename[cchFilename + 1 + cchOutDir] = '\0'; 240 241 /* Write the file only if necessary. */ 242 if (compareSubFile(pszFilename, pcszLineAfterBegin, pcszEnd - pcszLineAfterBegin)) 243 cFilesUnchanged++; 244 else 245 { 246 rc = writeSubFile(pszFilename, pcszLineAfterBegin, pcszEnd - pcszLineAfterBegin); 247 cFilesWritten++; 248 } 249 250 free(pszFilename); 251 252 pcszSearch = pcszEnd; 253 } while (rc == 0 && pcszSearch); 254 255 printf("filesplitter: Out of %lu files: %lu rewritten, %lu unchanged. (%s)\n", 256 cFilesWritten + cFilesUnchanged, cFilesWritten, cFilesUnchanged, pcszOutDir); 257 return rc; 258 } 259 260 38 261 int main(int argc, char *argv[]) 39 262 { 40 263 int rc = 0; 41 const char *pcszBeginMarker = "\n// ##### BEGINFILE \""; 42 const char *pcszEndMarker = "\n// ##### ENDFILE"; 43 const size_t cbBeginMarker = strlen(pcszBeginMarker); 44 FILE *pFileIn = NULL; 45 char *pBuffer = NULL; 46 47 do 48 { 49 if (argc != 3) 264 265 if (argc == 3) 266 { 267 struct stat DirStat; 268 if (stat(argv[2], &DirStat) == 0 269 && S_ISDIR(DirStat.st_mode)) 50 270 { 51 fprintf(stderr, "filesplitter: Must be started with exactly two arguments,\n" 52 "1) the input file and 2) the directory where to put the output files\n"); 53 rc = 2; 54 break; 271 char *pszContent; 272 rc = readFile(argv[1], &pszContent, NULL); 273 if (!rc) 274 { 275 rc = splitFile(argv[2], pszContent); 276 free(pszContent); 277 } 55 278 } 56 57 struct stat lStat; 58 if ( stat(argv[2], &lStat) != 0 59 || (lStat.st_mode & S_IFDIR) != S_IFDIR) 60 { 61 fprintf(stderr, "filesplitter: Given argument \"%s\" is not a valid directory.\n", argv[2]); 62 rc = 2; 63 break; 64 } 65 66 if ( stat(argv[1], &lStat) 67 || !(pFileIn = fopen(argv[1], "r"))) 68 { 69 fprintf(stderr, "filesplitter: Cannot open file \"%s\" for reading.\n", argv[1]); 70 rc = 2; 71 break; 72 } 73 74 if (!(pBuffer = (char*)malloc(lStat.st_size + 1))) 75 { 76 fprintf(stderr, "filesplitter: Failed to allocate %ld bytes.\n", (long)lStat.st_size); 77 rc = 2; 78 break; 79 } 80 81 if (fread(pBuffer, 1, lStat.st_size, pFileIn) != (size_t)lStat.st_size) 82 { 83 fprintf(stderr, "filesplitter: Failed to read %ld bytes from input file.\n", (long)lStat.st_size); 84 rc = 2; 85 break; 86 } 87 pBuffer[lStat.st_size] = '\0'; 88 89 const char *pSearch = pBuffer; 90 unsigned long cFiles = 0; 91 size_t cbDirName = strlen(argv[2]); 92 93 do 94 { 95 /* find begin marker */ 96 const char *pBegin = strstr(pSearch, pcszBeginMarker); 97 if (!pBegin) 98 break; 99 100 /* find line after begin marker */ 101 const char *pLineAfterBegin = strchr(pBegin + cbBeginMarker, '\n'); 102 if (!pLineAfterBegin) 103 { 104 fprintf(stderr, "filesplitter: No newline after begin-file marker found.\n"); 105 rc = 2; 106 break; 107 } 108 ++pLineAfterBegin; 109 110 /* find second quote in begin marker line */ 111 const char *pSecondQuote = strchr(pBegin + cbBeginMarker, '\"'); 112 if ( !pSecondQuote 113 || pSecondQuote >= pLineAfterBegin) 114 { 115 fprintf(stderr, "filesplitter: Can't parse filename after begin-file marker (line %lu).\n", lineNumber(pBuffer, pcszBeginMarker)); 116 rc = 2; 117 break; 118 } 119 120 /* find end marker */ 121 const char *pEnd = strstr(pLineAfterBegin, pcszEndMarker); 122 if (!pEnd) 123 { 124 fprintf(stderr, "filesplitter: No matching end-line marker for begin-file marker found (line %lu).\n", lineNumber(pBuffer, pcszBeginMarker)); 125 rc = 2; 126 break; 127 } 128 129 /* construct output filename */ 130 char *pszFilename; 131 size_t cbFilename; 132 cbFilename = pSecondQuote - (pBegin + cbBeginMarker); 133 if (!(pszFilename = (char*)malloc(cbDirName + 1 + cbFilename + 1))) 134 { 135 fprintf(stderr, "filesplitter: Can't allocate memory for filename.\n"); 136 rc = 2; 137 break; 138 } 139 memcpy(pszFilename, argv[2], cbDirName); 140 pszFilename[cbDirName] = '/'; 141 memcpy(pszFilename + cbDirName + 1, pBegin + cbBeginMarker, cbFilename); 142 pszFilename[cbFilename + 1 + cbDirName] = '\0'; 143 144 /* create output file and write file contents */ 145 FILE *pFileOut; 146 if (!(pFileOut = fopen(pszFilename, "w"))) 147 { 148 fprintf(stderr, "filesplitter: Failed to open file \"%s\" for writing\n", pszFilename); 149 rc = 2; 150 } 151 else 152 { 153 size_t cbFile = pEnd - pLineAfterBegin; 154 if (fwrite(pLineAfterBegin, 1, cbFile, pFileOut) != cbFile) 155 { 156 fprintf(stderr, "filesplitter: Failed to write %ld bytes to file \"%s\"\n", (long)cbFile, pszFilename); 157 rc = 2; 158 } 159 160 fclose(pFileOut); 161 162 if (!rc) 163 { 164 ++cFiles; 165 pSearch = strchr(pEnd, '\n'); 166 } 167 } 168 169 free(pszFilename); 170 171 if (rc) 172 break; 173 174 } while (pSearch); 175 176 printf("filesplitter: Created %lu files.\n", cFiles); 177 } while (0); 178 179 if (pBuffer) 180 free(pBuffer); 181 if (pFileIn) 182 fclose(pFileIn); 183 184 return rc; 185 } 279 else 280 rc = printErr("Given argument \"%s\" is not a valid directory.\n", argv[2]); 281 } 282 else 283 rc = printErr("Must be started with exactly two arguments,\n" 284 "1) the input file and 2) the directory where to put the output files\n"); 285 return rc; 286 }
Note:
See TracChangeset
for help on using the changeset viewer.