VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/crypto/pemfile-read.cpp@ 106518

Last change on this file since 106518 was 106061, checked in by vboxsync, 4 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 23.5 KB
Line 
1/* $Id: pemfile-read.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * IPRT - Crypto - PEM file reader.
4 *
5 * See RFC-1341 for the original ideas for the format, but keep in mind
6 * that the format was hijacked and put to different uses. We're aiming at
7 * dealing with the different uses rather than anything email related here.
8 */
9
10/*
11 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
12 *
13 * This file is part of VirtualBox base platform packages, as
14 * available from https://www.virtualbox.org.
15 *
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation, in version 3 of the
19 * License.
20 *
21 * This program is distributed in the hope that it will be useful, but
22 * WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, see <https://www.gnu.org/licenses>.
28 *
29 * The contents of this file may alternatively be used under the terms
30 * of the Common Development and Distribution License Version 1.0
31 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
32 * in the VirtualBox distribution, in which case the provisions of the
33 * CDDL are applicable instead of those of the GPL.
34 *
35 * You may elect to license modified versions of this file under the
36 * terms and conditions of either the GPL or the CDDL or both.
37 *
38 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
39 */
40
41
42/*********************************************************************************************************************************
43* Header Files *
44*********************************************************************************************************************************/
45#include "internal/iprt.h"
46#include <iprt/crypto/pem.h>
47
48#include <iprt/asm.h>
49#include <iprt/base64.h>
50#include <iprt/ctype.h>
51#include <iprt/err.h>
52#include <iprt/mem.h>
53#include <iprt/memsafer.h>
54#include <iprt/file.h>
55#include <iprt/string.h>
56
57
58
59/**
60 * Looks for a PEM-like marker.
61 *
62 * @returns true if found, false if not.
63 * @param pbContent Start of the content to search thru.
64 * @param cbContent The size of the content to search.
65 * @param offStart The offset into pbContent to start searching.
66 * @param pszLeadWord The lead word (BEGIN/END).
67 * @param cchLeadWord The length of the lead word.
68 * @param paMarkers Pointer to an array of markers.
69 * @param cMarkers Number of markers in the array.
70 * @param ppMatch Where to return the pointer to the matching
71 * marker. Optional.
72 * @param poffBegin Where to return the start offset of the marker.
73 * Optional.
74 * @param poffEnd Where to return the end offset of the marker
75 * (trailing whitespace and newlines will be
76 * skipped). Optional.
77 */
78static bool rtCrPemFindMarker(uint8_t const *pbContent, size_t cbContent, size_t offStart,
79 const char *pszLeadWord, size_t cchLeadWord, PCRTCRPEMMARKER paMarkers, size_t cMarkers,
80 PCRTCRPEMMARKER *ppMatch, size_t *poffBegin, size_t *poffEnd)
81{
82 /* Remember the start of the content for the purpose of calculating offsets. */
83 uint8_t const * const pbStart = pbContent;
84
85 /* Skip adhead by offStart */
86 if (offStart >= cbContent)
87 return false;
88 pbContent += offStart;
89 cbContent -= offStart;
90
91 /*
92 * Search the content.
93 */
94 while (cbContent > 6)
95 {
96 /*
97 * Look for dashes.
98 */
99 uint8_t const *pbStartSearch = pbContent;
100 pbContent = (uint8_t const *)memchr(pbContent, '-', cbContent);
101 if (!pbContent)
102 break;
103
104 cbContent -= pbContent - pbStartSearch;
105 if (cbContent < 6)
106 break;
107
108 /*
109 * There must be at least three to interest us.
110 */
111 if ( pbContent[1] == '-'
112 && pbContent[2] == '-')
113 {
114 unsigned cDashes = 3;
115 while (cDashes < cbContent && pbContent[cDashes] == '-')
116 cDashes++;
117
118 if (poffBegin)
119 *poffBegin = pbContent - pbStart;
120 cbContent -= cDashes;
121 pbContent += cDashes;
122
123 /*
124 * Match lead word.
125 */
126 if ( cbContent > cchLeadWord
127 && memcmp(pbContent, pszLeadWord, cchLeadWord) == 0
128 && RT_C_IS_BLANK(pbContent[cchLeadWord]) )
129 {
130 pbContent += cchLeadWord;
131 cbContent -= cchLeadWord;
132 while (cbContent > 0 && RT_C_IS_BLANK(*pbContent))
133 {
134 pbContent++;
135 cbContent--;
136 }
137
138 /*
139 * Match one of the specified markers.
140 */
141 uint8_t const *pbSavedContent = pbContent;
142 size_t const cbSavedContent = cbContent;
143 for (uint32_t iMarker = 0; iMarker < cMarkers; iMarker++)
144 {
145 pbContent = pbSavedContent;
146 cbContent = cbSavedContent;
147
148 uint32_t cWords = paMarkers[iMarker].cWords;
149 PCRTCRPEMMARKERWORD pWord = paMarkers[iMarker].paWords;
150 while (cWords > 0)
151 {
152 uint32_t const cchWord = pWord->cchWord;
153 if (cbContent <= cchWord)
154 break;
155 if (memcmp(pbContent, pWord->pszWord, cchWord))
156 break;
157 pbContent += cchWord;
158 cbContent -= cchWord;
159
160 if (!cbContent)
161 break;
162 if (RT_C_IS_BLANK(*pbContent))
163 do
164 {
165 pbContent++;
166 cbContent--;
167 } while (cbContent > 0 && RT_C_IS_BLANK(*pbContent));
168 else if (cWords > 1 || pbContent[0] != '-')
169 break;
170
171 cWords--;
172 if (cWords == 0)
173 {
174 /*
175 * If there are three or more dashes following now, we've got a hit.
176 */
177 if ( cbContent > 3
178 && pbContent[0] == '-'
179 && pbContent[1] == '-'
180 && pbContent[2] == '-')
181 {
182 cDashes = 3;
183 while (cDashes < cbContent && pbContent[cDashes] == '-')
184 cDashes++;
185 cbContent -= cDashes;
186 pbContent += cDashes;
187
188 /*
189 * Skip spaces and newline.
190 */
191 while (cbContent > 0 && RT_C_IS_SPACE(*pbContent))
192 pbContent++, cbContent--;
193 if (poffEnd)
194 *poffEnd = pbContent - pbStart;
195 if (ppMatch)
196 *ppMatch = &paMarkers[iMarker];
197 return true;
198 }
199 break;
200 }
201 pWord++;
202 } /* for each word in marker. */
203 } /* for each marker. */
204 }
205 }
206 else
207 {
208 pbContent++;
209 cbContent--;
210 }
211 }
212
213 return false;
214}
215
216
217static bool rtCrPemFindMarkerSection(uint8_t const *pbContent, size_t cbContent, size_t offStart,
218 PCRTCRPEMMARKER paMarkers, size_t cMarkers,
219 PCRTCRPEMMARKER *ppMatch, size_t *poffBegin, size_t *poffEnd, size_t *poffResume)
220{
221 /** @todo Detect BEGIN / END mismatch. */
222 PCRTCRPEMMARKER pMatch;
223 if (rtCrPemFindMarker(pbContent, cbContent, offStart, "BEGIN", 5, paMarkers, cMarkers,
224 &pMatch, NULL /*poffStart*/, poffBegin))
225 {
226 if (rtCrPemFindMarker(pbContent, cbContent, *poffBegin, "END", 3, pMatch, 1,
227 NULL /*ppMatch*/, poffEnd, poffResume))
228 {
229 *ppMatch = pMatch;
230 return true;
231 }
232 }
233 *ppMatch = NULL;
234 return false;
235}
236
237
238/**
239 * Parses any fields the message may contain.
240 *
241 * @retval VINF_SUCCESS
242 * @retval VERR_NO_MEMORY
243 * @retval VERR_CR_MALFORMED_PEM_HEADER
244 *
245 * @param pSection The current section, where we will attach a list of
246 * fields to the pFieldHead member.
247 * @param pbContent The content of the PEM message being parsed.
248 * @param cbContent The length of the PEM message.
249 * @param pcbFields Where to return the length of the header fields we found.
250 */
251static int rtCrPemProcessFields(PRTCRPEMSECTION pSection, uint8_t const *pbContent, size_t cbContent, size_t *pcbFields)
252{
253 uint8_t const * const pbContentStart = pbContent;
254
255 /*
256 * Work the encapulated header protion field by field.
257 *
258 * This is optional, so currently we don't throw errors here but leave that
259 * to when we work the text portion with the base64 decoder. Also, as a reader
260 * we don't go all pedanic on confirming to specification (RFC-1421), especially
261 * given that it's used for crypto certificates, keys and the like not email. :-)
262 */
263 PCRTCRPEMFIELD *ppNext = &pSection->pFieldHead;
264 while (cbContent > 0)
265 {
266 /* Just look for a colon first. */
267 const uint8_t *pbColon = (const uint8_t *)memchr(pbContent, ':', cbContent);
268 if (!pbColon)
269 break;
270 size_t offColon = pbColon - pbContent;
271
272 /* Check that the colon is within the first line. */
273 if (!memchr(pbContent, '\n', cbContent - offColon))
274 return VERR_CR_MALFORMED_PEM_HEADER;
275
276 /* Skip leading spaces (there shouldn't be any, but just in case). */
277 while (RT_C_IS_BLANK(*pbContent) && /*paranoia:*/ offColon > 0)
278 {
279 offColon--;
280 cbContent--;
281 pbContent++;
282 }
283
284 /* There shouldn't be any spaces before the colon, but just in case */
285 size_t cchName = offColon;
286 while (cchName > 0 && RT_C_IS_BLANK(pbContent[cchName - 1]))
287 cchName--;
288
289 /* Skip leading value spaces (there typically is at least one). */
290 size_t offValue = offColon + 1;
291 while (offValue < cbContent && RT_C_IS_BLANK(pbContent[offValue]))
292 offValue++;
293
294 /* Find the newline the field value ends with and where the next iteration should start later on. */
295 size_t cbLeft;
296 uint8_t const *pbNext = (uint8_t const *)memchr(&pbContent[offValue], '\n', cbContent - offValue);
297 while ( pbNext
298 && (cbLeft = pbNext - pbContent) < cbContent
299 && RT_C_IS_BLANK(pbNext[1]) /* next line must start with a space or tab */)
300 pbNext = (uint8_t const *)memchr(&pbNext[1], '\n', cbLeft - 1);
301
302 size_t cchValue;
303 if (pbNext)
304 {
305 cchValue = pbNext - &pbContent[offValue];
306 if (cchValue > 0 && pbNext[-1] == '\r')
307 cchValue--;
308 pbNext++;
309 }
310 else
311 {
312 cchValue = cbContent - offValue;
313 pbNext = &pbContent[cbContent];
314 }
315
316 /* Strip trailing spaces. */
317 while (cchValue > 0 && RT_C_IS_BLANK(pbContent[offValue + cchValue - 1]))
318 cchValue--;
319
320 /*
321 * Allocate a field instance.
322 *
323 * Note! We don't consider field data sensitive at the moment. This
324 * mainly because the fields are chiefly used to indicate the
325 * encryption parameters to the body.
326 */
327 PRTCRPEMFIELD pNewField = (PRTCRPEMFIELD)RTMemAllocZVar(sizeof(*pNewField) + cchName + 1 + cchValue + 1);
328 if (!pNewField)
329 return VERR_NO_MEMORY;
330 pNewField->cchName = cchName;
331 pNewField->cchValue = cchValue;
332 memcpy(pNewField->szName, pbContent, cchName);
333 pNewField->szName[cchName] = '\0';
334 char *pszDst = (char *)memcpy(&pNewField->szName[cchName + 1], &pbContent[offValue], cchValue);
335 pNewField->pszValue = pszDst;
336 pszDst[cchValue] = '\0';
337 pNewField->pNext = NULL;
338
339 *ppNext = pNewField;
340 ppNext = &pNewField->pNext;
341
342 /*
343 * Advance past the field.
344 */
345 cbContent -= pbNext - pbContent;
346 pbContent = pbNext;
347 }
348
349 /*
350 * Skip blank line(s) before the body.
351 */
352 while (cbContent >= 1)
353 {
354 size_t cbSkip;
355 if (pbContent[0] == '\n')
356 cbSkip = 1;
357 else if ( pbContent[0] == '\r'
358 && cbContent >= 2
359 && pbContent[1] == '\n')
360 cbSkip = 2;
361 else
362 break;
363 pbContent += cbSkip;
364 cbContent -= cbSkip;
365 }
366
367 *pcbFields = pbContent - pbContentStart;
368 return VINF_SUCCESS;
369}
370
371
372/**
373 * Does the decoding of a PEM-like data blob after it has been located.
374 *
375 * @returns IPRT status ocde
376 * @param pbContent The start of the PEM-like content (text).
377 * @param cbContent The max size of the PEM-like content.
378 * @param fSensitive Set if the safer allocator should be used.
379 * @param ppvDecoded Where to return a heap block containing the
380 * decoded content.
381 * @param pcbDecoded Where to return the size of the decoded content.
382 */
383static int rtCrPemDecodeBase64(uint8_t const *pbContent, size_t cbContent, bool fSensitive,
384 void **ppvDecoded, size_t *pcbDecoded)
385{
386 ssize_t cbDecoded = RTBase64DecodedSizeEx((const char *)pbContent, cbContent, NULL);
387 if (cbDecoded < 0)
388 return VERR_INVALID_BASE64_ENCODING;
389
390 *pcbDecoded = cbDecoded;
391 void *pvDecoded = !fSensitive ? RTMemAlloc(cbDecoded) : RTMemSaferAllocZ(cbDecoded);
392 if (!pvDecoded)
393 return VERR_NO_MEMORY;
394
395 size_t cbActual;
396 int rc = RTBase64DecodeEx((const char *)pbContent, cbContent, pvDecoded, cbDecoded, &cbActual, NULL);
397 if (RT_SUCCESS(rc))
398 {
399 if (cbActual == (size_t)cbDecoded)
400 {
401 *ppvDecoded = pvDecoded;
402 return VINF_SUCCESS;
403 }
404
405 rc = VERR_INTERNAL_ERROR_3;
406 }
407 if (!fSensitive)
408 RTMemFree(pvDecoded);
409 else
410 RTMemSaferFree(pvDecoded, cbDecoded);
411 return rc;
412}
413
414
415/**
416 * Checks if the content of a file looks to be binary or not.
417 *
418 * @returns true if likely to be binary, false if not binary.
419 * @param pbFile The file bytes to scan.
420 * @param cbFile The number of bytes.
421 * @param fFlags RTCRPEMREADFILE_F_XXX
422 */
423static bool rtCrPemIsBinaryBlob(uint8_t const *pbFile, size_t cbFile, uint32_t fFlags)
424{
425 if (fFlags & RTCRPEMREADFILE_F_ONLY_PEM)
426 return false;
427
428 /*
429 * Well formed PEM files should probably only contain 7-bit ASCII and
430 * restrict thenselfs to the following control characters:
431 * tab, newline, return, form feed
432 *
433 * However, if we want to read PEM files which contains human readable
434 * certificate details before or after each base-64 section, we can't stick
435 * to 7-bit ASCII. We could say it must be UTF-8, but that's probably to
436 * limited as well. So, we'll settle for detecting binary files by control
437 * characters alone (safe enough for DER encoded stuff, I think).
438 */
439 while (cbFile-- > 0)
440 {
441 uint8_t const b = *pbFile++;
442 if (b < 32 && b != '\t' && b != '\n' && b != '\r' && b != '\f')
443 {
444 /* Ignore EOT (4), SUB (26) and NUL (0) at the end of the file. */
445 if ( (b == 4 || b == 26)
446 && ( cbFile == 0
447 || ( cbFile == 1
448 && *pbFile == '\0')))
449 return false;
450
451 if (b == 0 && cbFile == 0)
452 return false;
453
454 return true;
455 }
456 }
457 return false;
458}
459
460
461RTDECL(int) RTCrPemFreeSections(PCRTCRPEMSECTION pSectionHead)
462{
463 while (pSectionHead != NULL)
464 {
465 PRTCRPEMSECTION pFree = (PRTCRPEMSECTION)pSectionHead;
466 pSectionHead = pSectionHead->pNext;
467 ASMCompilerBarrier(); /* paranoia */
468
469 if (pFree->pbData)
470 {
471 if (!pFree->fSensitive)
472 RTMemFree(pFree->pbData);
473 else
474 RTMemSaferFree(pFree->pbData, pFree->cbData);
475 pFree->pbData = NULL;
476 pFree->cbData = 0;
477 }
478
479 PRTCRPEMFIELD pField = (PRTCRPEMFIELD)pFree->pFieldHead;
480 if (pField)
481 {
482 pFree->pFieldHead = NULL;
483 do
484 {
485 PRTCRPEMFIELD pFreeField = pField;
486 pField = (PRTCRPEMFIELD)pField->pNext;
487 ASMCompilerBarrier(); /* paranoia */
488
489 pFreeField->pszValue = NULL;
490 RTMemFree(pFreeField);
491 } while (pField);
492 }
493
494 RTMemFree(pFree);
495 }
496 return VINF_SUCCESS;
497}
498
499
500RTDECL(int) RTCrPemParseContent(void const *pvContent, size_t cbContent, uint32_t fFlags,
501 PCRTCRPEMMARKER paMarkers, size_t cMarkers,
502 PCRTCRPEMSECTION *ppSectionHead, PRTERRINFO pErrInfo)
503{
504 RT_NOREF_PV(pErrInfo);
505
506 /*
507 * Input validation.
508 */
509 AssertPtr(ppSectionHead);
510 *ppSectionHead = NULL;
511 AssertReturn(cbContent, VINF_EOF);
512 AssertPtr(pvContent);
513 AssertPtr(paMarkers);
514 AssertReturn(!(fFlags & ~RTCRPEMREADFILE_F_VALID_MASK), VERR_INVALID_FLAGS);
515
516 /*
517 * Pre-allocate a section.
518 */
519 int rc = VINF_SUCCESS;
520 PRTCRPEMSECTION pSection = (PRTCRPEMSECTION)RTMemAllocZ(sizeof(*pSection));
521 if (pSection)
522 {
523 bool const fSensitive = RT_BOOL(fFlags & RTCRPEMREADFILE_F_SENSITIVE);
524
525 /*
526 * Try locate the first section.
527 */
528 uint8_t const *pbContent = (uint8_t const *)pvContent;
529 size_t offBegin, offEnd, offResume;
530 PCRTCRPEMMARKER pMatch;
531 if ( !rtCrPemIsBinaryBlob(pbContent, cbContent, fFlags)
532 && rtCrPemFindMarkerSection(pbContent, cbContent, 0 /*offStart*/, paMarkers, cMarkers,
533 &pMatch, &offBegin, &offEnd, &offResume) )
534 {
535 PCRTCRPEMSECTION *ppNext = ppSectionHead;
536 for (;;)
537 {
538 //pSection->pNext = NULL;
539 pSection->pMarker = pMatch;
540 //pSection->pbData = NULL;
541 //pSection->cbData = 0;
542 //pSection->pFieldHead = NULL;
543 pSection->fSensitive = fSensitive;
544
545 *ppNext = pSection;
546 ppNext = &pSection->pNext;
547
548 /*
549 * Decode the section.
550 */
551 size_t cbFields = 0;
552 int rc2 = rtCrPemProcessFields(pSection, pbContent + offBegin, offEnd - offBegin, &cbFields);
553 offBegin += cbFields;
554 if (RT_SUCCESS(rc2))
555 rc2 = rtCrPemDecodeBase64(pbContent + offBegin, offEnd - offBegin, fSensitive,
556 (void **)&pSection->pbData, &pSection->cbData);
557 if (RT_FAILURE(rc2))
558 {
559 pSection->pbData = NULL;
560 pSection->cbData = 0;
561 if ( rc2 == VERR_INVALID_BASE64_ENCODING
562 && (fFlags & RTCRPEMREADFILE_F_CONTINUE_ON_ENCODING_ERROR))
563 rc = -rc2;
564 else
565 {
566 rc = rc2;
567 break;
568 }
569 }
570
571 /*
572 * More sections?
573 */
574 if ( offResume + 12 >= cbContent
575 || offResume >= cbContent
576 || !rtCrPemFindMarkerSection(pbContent, cbContent, offResume, paMarkers, cMarkers,
577 &pMatch, &offBegin, &offEnd, &offResume) )
578 break; /* No. */
579
580 /* Ok, allocate a new record for it. */
581 pSection = (PRTCRPEMSECTION)RTMemAllocZ(sizeof(*pSection));
582 if (RT_UNLIKELY(!pSection))
583 {
584 rc = VERR_NO_MEMORY;
585 break;
586 }
587 }
588 if (RT_SUCCESS(rc))
589 return rc;
590
591 RTCrPemFreeSections(*ppSectionHead);
592 }
593 else
594 {
595 if (!(fFlags & RTCRPEMREADFILE_F_ONLY_PEM))
596 {
597 /*
598 * No PEM section found. Return the whole file as one binary section.
599 */
600 //pSection->pNext = NULL;
601 //pSection->pMarker = NULL;
602 //pSection->pFieldHead = NULL;
603 pSection->cbData = cbContent;
604 pSection->fSensitive = fSensitive;
605 if (!fSensitive)
606 pSection->pbData = (uint8_t *)RTMemDup(pbContent, cbContent);
607 else
608 {
609 pSection->pbData = (uint8_t *)RTMemSaferAllocZ(cbContent);
610 if (pSection->pbData)
611 memcpy(pSection->pbData, pbContent, cbContent);
612 }
613 if (pSection->pbData)
614 {
615 *ppSectionHead = pSection;
616 return VINF_SUCCESS;
617 }
618
619 rc = VERR_NO_MEMORY;
620 }
621 else
622 rc = VWRN_NOT_FOUND;
623 RTMemFree(pSection);
624 }
625 }
626 else
627 rc = VERR_NO_MEMORY;
628 *ppSectionHead = NULL;
629 return rc;
630}
631
632
633RTDECL(int) RTCrPemReadFile(const char *pszFilename, uint32_t fFlags, PCRTCRPEMMARKER paMarkers, size_t cMarkers,
634 PCRTCRPEMSECTION *ppSectionHead, PRTERRINFO pErrInfo)
635{
636 *ppSectionHead = NULL;
637 AssertReturn(!(fFlags & ~RTCRPEMREADFILE_F_VALID_MASK), VERR_INVALID_FLAGS);
638
639 size_t cbContent;
640 void *pvContent;
641 int rc = RTFileReadAllEx(pszFilename, 0, 64U*_1M, RTFILE_RDALL_O_DENY_WRITE, &pvContent, &cbContent);
642 if (RT_SUCCESS(rc))
643 {
644 rc = RTCrPemParseContent(pvContent, cbContent, fFlags, paMarkers, cMarkers, ppSectionHead, pErrInfo);
645 if (fFlags & RTCRPEMREADFILE_F_SENSITIVE)
646 RTMemWipeThoroughly(pvContent, cbContent, 3);
647 RTFileReadAllFree(pvContent, cbContent);
648 }
649 else
650 rc = RTErrInfoSetF(pErrInfo, rc, "RTFileReadAllEx failed with %Rrc on '%s'", rc, pszFilename);
651 return rc;
652}
653
654
655RTDECL(const char *) RTCrPemFindFirstSectionInContent(void const *pvContent, size_t cbContent,
656 PCRTCRPEMMARKER paMarkers, size_t cMarkers)
657{
658 size_t offBegin;
659 if (rtCrPemFindMarker((uint8_t *)pvContent, cbContent, 0, "BEGIN", 5, paMarkers, cMarkers, NULL, &offBegin, NULL))
660 return (const char *)pvContent + offBegin;
661 return NULL;
662}
663
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette