1 | /* $Id: dos2unix.c 3114 2017-10-29 18:02:04Z bird $ */
|
---|
2 | /** @file
|
---|
3 | * dos2unix - Line ending conversion routines.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (c) 2017 knut st. osmundsen <[email protected]>
|
---|
8 | *
|
---|
9 | * Permission is hereby granted, free of charge, to any person obtaining a
|
---|
10 | * copy of this software and associated documentation files (the "Software"),
|
---|
11 | * to deal in the Software without restriction, including without limitation
|
---|
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
---|
13 | * and/or sell copies of the Software, and to permit persons to whom the
|
---|
14 | * Software is furnished to do so, subject to the following conditions:
|
---|
15 | *
|
---|
16 | * The above copyright notice and this permission notice shall be included
|
---|
17 | * in all copies or substantial portions of the Software.
|
---|
18 | *
|
---|
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
---|
20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
---|
22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
---|
23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
---|
24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
---|
25 | * IN THE SOFTWARE.
|
---|
26 | *
|
---|
27 | * Alternatively, the content of this file may be used under the terms of the
|
---|
28 | * GPL version 2 or later, or LGPL version 2.1 or later.
|
---|
29 | */
|
---|
30 |
|
---|
31 |
|
---|
32 | /*********************************************************************************************************************************
|
---|
33 | * Header Files *
|
---|
34 | *********************************************************************************************************************************/
|
---|
35 | #include "dos2unix.h"
|
---|
36 | #include <k/kDefs.h>
|
---|
37 | #include <errno.h>
|
---|
38 | #include <fcntl.h>
|
---|
39 | #if K_OS == K_OS_WINDOWS
|
---|
40 | # include <io.h>
|
---|
41 | #else
|
---|
42 | # include <unistd.h>
|
---|
43 | #endif
|
---|
44 | #include <assert.h>
|
---|
45 |
|
---|
46 | #ifndef O_BINARY
|
---|
47 | # ifdef _O_BINARY
|
---|
48 | # define O_BINARY _O_BINARY
|
---|
49 | # else
|
---|
50 | # define O_BINARY 0
|
---|
51 | # endif
|
---|
52 | #endif
|
---|
53 |
|
---|
54 |
|
---|
55 | /*********************************************************************************************************************************
|
---|
56 | * Defined Constants And Macros *
|
---|
57 | *********************************************************************************************************************************/
|
---|
58 | #define STACK_BUF_SIZE 0x20000
|
---|
59 |
|
---|
60 | #define DOS2UNIX_LF 0x0a
|
---|
61 | #define DOS2UNIX_CR 0x0d
|
---|
62 |
|
---|
63 |
|
---|
64 |
|
---|
65 | /**
|
---|
66 | * Does a line ending analysis of the given file.
|
---|
67 | *
|
---|
68 | * @returns 0 on success, errno value on open or read error.
|
---|
69 | * @param pszFilename The path to the file
|
---|
70 | * @param pfStyle Where to return the DOS2UNIX_STYLE_XXX and
|
---|
71 | * DOS2UNIX_F_XXX flags.
|
---|
72 | * @param pcDosEols Where to return the number of DOS end-of-line
|
---|
73 | * sequences found. Optional.
|
---|
74 | * @param pcUnixEols Where to return the number of UNIX end-of-line
|
---|
75 | * sequences found.
|
---|
76 | */
|
---|
77 | int dos2unix_analyze_file(const char *pszFilename, KU32 *pfStyle, KSIZE *pcDosEols, KSIZE *pcUnixEols)
|
---|
78 | {
|
---|
79 | int iRet = 0;
|
---|
80 | int fd = open(pszFilename, O_RDONLY | O_BINARY);
|
---|
81 | if (fd >= 0)
|
---|
82 | {
|
---|
83 | iRet = dos2unix_analyze_fd(fd, pfStyle, pcDosEols, pcUnixEols);
|
---|
84 | close(fd);
|
---|
85 | }
|
---|
86 | else
|
---|
87 | {
|
---|
88 | iRet = errno;
|
---|
89 | *pfStyle = DOS2UNIX_STYLE_NONE;
|
---|
90 | if (pcUnixEols)
|
---|
91 | *pcUnixEols = 0;
|
---|
92 | if (pcDosEols)
|
---|
93 | *pcDosEols = 0;
|
---|
94 | }
|
---|
95 | return iRet;
|
---|
96 | }
|
---|
97 |
|
---|
98 | /**
|
---|
99 | * Does a line ending analysis of the given file descriptor.
|
---|
100 | *
|
---|
101 | * @returns 0 on success, errno value on open or read error.
|
---|
102 | * @param fd The file descriptor to analyze. Caller must
|
---|
103 | * place this as the desired position.
|
---|
104 | * @param pfStyle Where to return the DOS2UNIX_STYLE_XXX and
|
---|
105 | * DOS2UNIX_F_XXX flags.
|
---|
106 | * @param pcDosEols Where to return the number of DOS end-of-line
|
---|
107 | * sequences found. Optional.
|
---|
108 | * @param pcUnixEols Where to return the number of UNIX end-of-line
|
---|
109 | * sequences found.
|
---|
110 | */
|
---|
111 | int dos2unix_analyze_fd(int fd, KU32 *pfStyle, KSIZE *pcDosEols, KSIZE *pcUnixEols)
|
---|
112 | {
|
---|
113 | KSIZE cUnixEols = 0;
|
---|
114 | KSIZE cDosEols = 0;
|
---|
115 | KSIZE cLoneCrs = 0;
|
---|
116 | KBOOL fPendingCr = K_FALSE;
|
---|
117 | int iRet = 0;
|
---|
118 |
|
---|
119 | /*
|
---|
120 | * Do the analysis.
|
---|
121 | */
|
---|
122 | *pfStyle = DOS2UNIX_STYLE_NONE;
|
---|
123 | for (;;)
|
---|
124 | {
|
---|
125 | char achBuf[STACK_BUF_SIZE];
|
---|
126 | int cchRead = read(fd, achBuf, sizeof(achBuf));
|
---|
127 | if (cchRead > 0)
|
---|
128 | {
|
---|
129 | int off = 0;
|
---|
130 | if (fPendingCr)
|
---|
131 | {
|
---|
132 | if (achBuf[0] == DOS2UNIX_LF)
|
---|
133 | {
|
---|
134 | off++;
|
---|
135 | cDosEols++;
|
---|
136 | }
|
---|
137 | else
|
---|
138 | cLoneCrs++;
|
---|
139 | fPendingCr = K_FALSE;
|
---|
140 | }
|
---|
141 |
|
---|
142 | while (off < cchRead)
|
---|
143 | {
|
---|
144 | char ch = achBuf[off++];
|
---|
145 | if ((unsigned char)ch > (unsigned char)DOS2UNIX_CR)
|
---|
146 | { /* likely */ }
|
---|
147 | else if (ch == DOS2UNIX_CR)
|
---|
148 | {
|
---|
149 | if (off < cchRead && achBuf[off] == DOS2UNIX_CR)
|
---|
150 | cDosEols++;
|
---|
151 | else
|
---|
152 | {
|
---|
153 | fPendingCr = K_TRUE;
|
---|
154 | while (off < cchRead)
|
---|
155 | {
|
---|
156 | ch = achBuf[off++];
|
---|
157 | if (ch != DOS2UNIX_CR)
|
---|
158 | {
|
---|
159 | if (ch == DOS2UNIX_LF)
|
---|
160 | cDosEols++;
|
---|
161 | else
|
---|
162 | cLoneCrs++;
|
---|
163 | fPendingCr = K_FALSE;
|
---|
164 | break;
|
---|
165 | }
|
---|
166 | cLoneCrs++;
|
---|
167 | }
|
---|
168 | }
|
---|
169 | }
|
---|
170 | else if (ch == DOS2UNIX_LF)
|
---|
171 | cUnixEols++;
|
---|
172 | else if (ch == '\0')
|
---|
173 | *pfStyle |= DOS2UNIX_F_BINARY;
|
---|
174 | }
|
---|
175 | }
|
---|
176 | else
|
---|
177 | {
|
---|
178 | if (cchRead < 0)
|
---|
179 | iRet = errno;
|
---|
180 | if (fPendingCr)
|
---|
181 | cLoneCrs++;
|
---|
182 | break;
|
---|
183 | }
|
---|
184 | }
|
---|
185 |
|
---|
186 | /*
|
---|
187 | * Set return values.
|
---|
188 | */
|
---|
189 | if (cUnixEols > 0 && cDosEols == 0)
|
---|
190 | *pfStyle |= DOS2UNIX_STYLE_UNIX;
|
---|
191 | else if (cDosEols > 0 && cUnixEols == 0)
|
---|
192 | *pfStyle |= DOS2UNIX_STYLE_DOS;
|
---|
193 | else if (cDosEols != 0 && cUnixEols != 0)
|
---|
194 | *pfStyle |= DOS2UNIX_STYLE_MIXED;
|
---|
195 | if (pcUnixEols)
|
---|
196 | *pcUnixEols = cUnixEols;
|
---|
197 | if (pcDosEols)
|
---|
198 | *pcDosEols = cDosEols;
|
---|
199 |
|
---|
200 | return iRet;
|
---|
201 | }
|
---|
202 |
|
---|
203 |
|
---|
204 | /**
|
---|
205 | * Converts a buffer to unix line (LF) endings.
|
---|
206 | *
|
---|
207 | * @retval K_TRUE if pending CR. The caller must handle this case.
|
---|
208 | * @retval K_FALSE if no pending CR.
|
---|
209 | *
|
---|
210 | * @param pchSrc The input buffer.
|
---|
211 | * @param cchSrc Number of characters to convert from the input
|
---|
212 | * buffer.
|
---|
213 | * @param pchDst The output buffer. This must be at least as big as
|
---|
214 | * the input. It is okay if this overlaps with the
|
---|
215 | * source buffer, as long as this is at the same or a
|
---|
216 | * lower address.
|
---|
217 | * @param pcchDst Where to return the number of characters in the
|
---|
218 | * output buffer.
|
---|
219 | */
|
---|
220 | KBOOL dos2unix_convert_to_unix(const char *pchSrc, KSIZE cchSrc, char *pchDst, KSIZE *pcchDst)
|
---|
221 | {
|
---|
222 | KSIZE offDst = 0;
|
---|
223 | while (cchSrc-- > 0)
|
---|
224 | {
|
---|
225 | char ch = *pchSrc++;
|
---|
226 | if ((unsigned char)ch != (unsigned char)DOS2UNIX_CR)
|
---|
227 | pchDst[offDst++] = ch;
|
---|
228 | else if (cchSrc > 0 && *pchSrc == DOS2UNIX_LF)
|
---|
229 | {
|
---|
230 | pchDst[offDst++] = DOS2UNIX_LF;
|
---|
231 | cchSrc--;
|
---|
232 | pchSrc++;
|
---|
233 | }
|
---|
234 | else if (cchSrc == 0)
|
---|
235 | {
|
---|
236 | *pcchDst = offDst;
|
---|
237 | return K_TRUE;
|
---|
238 | }
|
---|
239 | else
|
---|
240 | pchDst[offDst++] = ch;
|
---|
241 | }
|
---|
242 |
|
---|
243 | *pcchDst = offDst;
|
---|
244 | return K_FALSE;
|
---|
245 | }
|
---|
246 |
|
---|
247 |
|
---|
248 | /**
|
---|
249 | * Converts a buffer to DOS (CRLF) endings.
|
---|
250 | *
|
---|
251 | * @retval K_TRUE if pending CR. The caller must handle this case.
|
---|
252 | * @retval K_FALSE if no pending CR.
|
---|
253 | *
|
---|
254 | * @param pchSrc The input buffer.
|
---|
255 | * @param cchSrc Number of characters to convert from the input
|
---|
256 | * buffer.
|
---|
257 | * @param pchDst The output buffer. This must be at least _twice_ as
|
---|
258 | * big as the input. It is okay if the top half of the
|
---|
259 | * buffer overlaps with the source buffer.
|
---|
260 | * @param pcchDst Where to return the number of characters in the
|
---|
261 | * output buffer.
|
---|
262 | */
|
---|
263 | KBOOL dos2unix_convert_to_dos(const char *pchSrc, KSIZE cchSrc, char *pchDst, KSIZE *pcchDst)
|
---|
264 | {
|
---|
265 | KSIZE offDst = 0;
|
---|
266 | while (cchSrc-- > 0)
|
---|
267 | {
|
---|
268 | char ch = *pchSrc++;
|
---|
269 | if ((unsigned char)ch > (unsigned char)DOS2UNIX_CR)
|
---|
270 | pchDst[offDst++] = ch;
|
---|
271 | else if (ch == DOS2UNIX_CR)
|
---|
272 | {
|
---|
273 | /* We treat CR kind of like an escape character. */
|
---|
274 | do
|
---|
275 | {
|
---|
276 | if (cchSrc > 0)
|
---|
277 | {
|
---|
278 | pchDst[offDst++] = ch;
|
---|
279 | cchSrc--;
|
---|
280 | ch = *pchSrc++;
|
---|
281 | }
|
---|
282 | else
|
---|
283 | {
|
---|
284 | *pcchDst = offDst;
|
---|
285 | return K_TRUE;
|
---|
286 | }
|
---|
287 | } while (ch == DOS2UNIX_CR);
|
---|
288 | pchDst[offDst++] = ch;
|
---|
289 | }
|
---|
290 | else if (ch == DOS2UNIX_LF)
|
---|
291 | {
|
---|
292 | pchDst[offDst++] = DOS2UNIX_CR;
|
---|
293 | pchDst[offDst++] = DOS2UNIX_LF;
|
---|
294 | }
|
---|
295 | else
|
---|
296 | pchDst[offDst++] = ch;
|
---|
297 | }
|
---|
298 |
|
---|
299 | *pcchDst = offDst;
|
---|
300 | return K_FALSE;
|
---|
301 | }
|
---|
302 |
|
---|