VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPLib-linux.cpp@ 105680

Last change on this file since 105680 was 98103, checked in by vboxsync, 23 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
Line 
1/* $Id: SUPLib-linux.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * VirtualBox Support Library - GNU/Linux specific parts.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP
42#ifdef IN_SUP_HARDENED_R3
43# undef DEBUG /* Warning: disables RT_STRICT */
44# undef RT_STRICT
45# ifndef LOG_DISABLED
46# define LOG_DISABLED
47# endif
48# define RTLOG_REL_DISABLED
49# include <iprt/log.h>
50#endif
51
52#include <sys/fcntl.h>
53#include <sys/ioctl.h>
54#include <sys/mman.h>
55#include <errno.h>
56#include <unistd.h>
57#include <stdlib.h>
58#include <malloc.h>
59
60#include <VBox/log.h>
61#include <VBox/sup.h>
62#include <iprt/path.h>
63#include <iprt/assert.h>
64#include <VBox/types.h>
65#include <iprt/string.h>
66#include <iprt/system.h>
67#include <VBox/err.h>
68#include <VBox/param.h>
69#include "../SUPLibInternal.h"
70#include "../SUPDrvIOC.h"
71
72
73/*********************************************************************************************************************************
74* Defined Constants And Macros *
75*********************************************************************************************************************************/
76/** System device name. */
77#define DEVICE_NAME_SYS "/dev/vboxdrv"
78/** User device name. */
79#define DEVICE_NAME_USR "/dev/vboxdrvu"
80
81/* define MADV_DONTFORK if it's missing from the system headers. */
82#ifndef MADV_DONTFORK
83# define MADV_DONTFORK 10
84#endif
85
86
87
88DECLHIDDEN(int) suplibOsInit(PSUPLIBDATA pThis, bool fPreInited, uint32_t fFlags, SUPINITOP *penmWhat, PRTERRINFO pErrInfo)
89{
90 RT_NOREF2(penmWhat, pErrInfo);
91
92 /*
93 * Nothing to do if pre-inited.
94 */
95 if (fPreInited)
96 return VINF_SUCCESS;
97 Assert(pThis->hDevice == (intptr_t)NIL_RTFILE);
98
99 /*
100 * Check if madvise works.
101 */
102 void *pv = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
103 if (pv == MAP_FAILED)
104 return VERR_NO_MEMORY;
105 pThis->fSysMadviseWorks = (0 == madvise(pv, PAGE_SIZE, MADV_DONTFORK));
106 munmap(pv, PAGE_SIZE);
107
108 /*
109 * Driverless?
110 */
111 if (fFlags & SUPR3INIT_F_DRIVERLESS)
112 {
113 pThis->fDriverless = true;
114 return VINF_SUCCESS;
115 }
116
117 /*
118 * Try open the device.
119 */
120 const char *pszDeviceNm = fFlags & SUPR3INIT_F_UNRESTRICTED ? DEVICE_NAME_SYS : DEVICE_NAME_USR;
121 int hDevice = open(pszDeviceNm, O_RDWR, 0);
122 if (hDevice < 0)
123 {
124 /*
125 * Try load the device.
126 */
127 hDevice = open(pszDeviceNm, O_RDWR, 0);
128 if (hDevice < 0)
129 {
130 int rc;
131 switch (errno)
132 {
133 case ENXIO: /* see man 2 open, ENODEV is actually a kernel bug */
134 case ENODEV: rc = VERR_VM_DRIVER_LOAD_ERROR; break;
135 case EPERM:
136 case EACCES: rc = VERR_VM_DRIVER_NOT_ACCESSIBLE; break;
137 case ENOENT: rc = VERR_VM_DRIVER_NOT_INSTALLED; break;
138 default: rc = VERR_VM_DRIVER_OPEN_ERROR; break;
139 }
140 if (fFlags & SUPR3INIT_F_DRIVERLESS_MASK)
141 {
142 LogRel(("Failed to open \"%s\", errno=%d, rc=%Rrc - Switching to driverless mode.\n", pszDeviceNm, errno, rc));
143 pThis->fDriverless = true;
144 return VINF_SUCCESS;
145 }
146 LogRel(("Failed to open \"%s\", errno=%d, rc=%Rrc\n", pszDeviceNm, errno, rc));
147 return rc;
148 }
149 }
150
151 /*
152 * Mark the file handle close on exec.
153 */
154 if (fcntl(hDevice, F_SETFD, FD_CLOEXEC) == -1)
155 {
156 close(hDevice);
157#ifdef IN_SUP_HARDENED_R3
158 return VERR_INTERNAL_ERROR;
159#else
160 return RTErrConvertFromErrno(errno);
161#endif
162 }
163
164 /*
165 * We're done.
166 */
167 pThis->hDevice = hDevice;
168 pThis->fUnrestricted = RT_BOOL(fFlags & SUPR3INIT_F_UNRESTRICTED);
169 return VINF_SUCCESS;
170}
171
172
173DECLHIDDEN(int) suplibOsTerm(PSUPLIBDATA pThis)
174{
175 /*
176 * Close the device if it's actually open.
177 */
178 if (pThis->hDevice != (intptr_t)NIL_RTFILE)
179 {
180 if (close(pThis->hDevice))
181 AssertFailed();
182 pThis->hDevice = (intptr_t)NIL_RTFILE;
183 }
184
185 return 0;
186}
187
188
189#ifndef IN_SUP_HARDENED_R3
190
191DECLHIDDEN(int) suplibOsInstall(void)
192{
193 // nothing to do on Linux
194 return VERR_NOT_IMPLEMENTED;
195}
196
197
198DECLHIDDEN(int) suplibOsUninstall(void)
199{
200 // nothing to do on Linux
201 return VERR_NOT_IMPLEMENTED;
202}
203
204
205DECLHIDDEN(int) suplibOsIOCtl(PSUPLIBDATA pThis, uintptr_t uFunction, void *pvReq, size_t cbReq)
206{
207 AssertMsg(pThis->hDevice != (intptr_t)NIL_RTFILE, ("SUPLIB not initiated successfully!\n"));
208 NOREF(cbReq);
209
210 /*
211 * Issue device iocontrol.
212 */
213 if (RT_LIKELY(ioctl(pThis->hDevice, uFunction, pvReq) >= 0))
214 return VINF_SUCCESS;
215
216 /* This is the reverse operation of the one found in SUPDrv-linux.c */
217 switch (errno)
218 {
219 case EACCES: return VERR_GENERAL_FAILURE;
220 case EINVAL: return VERR_INVALID_PARAMETER;
221 case EILSEQ: return VERR_INVALID_MAGIC;
222 case ENXIO: return VERR_INVALID_HANDLE;
223 case EFAULT: return VERR_INVALID_POINTER;
224 case ENOLCK: return VERR_LOCK_FAILED;
225 case EEXIST: return VERR_ALREADY_LOADED;
226 case EPERM: return VERR_PERMISSION_DENIED;
227 case ENOSYS: return VERR_VERSION_MISMATCH;
228 case 1000: return VERR_IDT_FAILED;
229 }
230
231 return RTErrConvertFromErrno(errno);
232}
233
234
235DECLHIDDEN(int) suplibOsIOCtlFast(PSUPLIBDATA pThis, uintptr_t uFunction, uintptr_t idCpu)
236{
237 int rc = ioctl(pThis->hDevice, uFunction, idCpu);
238 if (rc == -1)
239 rc = -errno;
240 return rc;
241}
242
243
244DECLHIDDEN(int) suplibOsPageAlloc(PSUPLIBDATA pThis, size_t cPages, uint32_t fFlags, void **ppvPages)
245{
246 /*
247 * If large pages are requested, try use the MAP_HUGETBL flags. This takes
248 * pages from the reserved huge page pool (see sysctl vm.nr_hugepages) and
249 * is typically not configured. Also, when the pool is exhausted we get
250 * ENOMEM back at us. So, when it fails try again w/o MAP_HUGETLB.
251 */
252 int fMmap = MAP_PRIVATE | MAP_ANONYMOUS;
253#ifdef MAP_HUGETLB
254 if ((fFlags & SUP_PAGE_ALLOC_F_LARGE_PAGES) && !(cPages & 511))
255 fMmap |= MAP_HUGETLB;
256#endif
257
258 size_t cbMmap = cPages << PAGE_SHIFT;
259 if ( !pThis->fSysMadviseWorks
260 && (fFlags & (SUP_PAGE_ALLOC_F_FOR_LOCKING | SUP_PAGE_ALLOC_F_LARGE_PAGES)) == SUP_PAGE_ALLOC_F_FOR_LOCKING)
261 cbMmap += PAGE_SIZE * 2;
262
263 uint8_t *pbPages = (uint8_t *)mmap(NULL, cbMmap, PROT_READ | PROT_WRITE, fMmap, -1, 0);
264#ifdef MAP_HUGETLB
265 if (pbPages == MAP_FAILED && (fMmap & MAP_HUGETLB))
266 {
267 /* Try again without MAP_HUGETLB if mmap fails: */
268 fMmap &= ~MAP_HUGETLB;
269 if (!pThis->fSysMadviseWorks && (fFlags & SUP_PAGE_ALLOC_F_FOR_LOCKING))
270 cbMmap = (cPages + 2) << PAGE_SHIFT;
271 pbPages = (uint8_t *)mmap(NULL, cbMmap, PROT_READ | PROT_WRITE, fMmap, -1, 0);
272 }
273#endif
274 if (pbPages != MAP_FAILED)
275 {
276 if ( !(fFlags & SUP_PAGE_ALLOC_F_FOR_LOCKING)
277 || pThis->fSysMadviseWorks
278#ifdef MAP_HUGETLB
279 || (fMmap & MAP_HUGETLB)
280#endif
281 )
282 {
283 /*
284 * It is not fatal if we fail here but a forked child (e.g. the ALSA sound server)
285 * could crash. Linux < 2.6.16 does not implement madvise(MADV_DONTFORK) but the
286 * kernel seems to split bigger VMAs and that is all that we want -- later we set the
287 * VM_DONTCOPY attribute in supdrvOSLockMemOne().
288 */
289 if ( madvise(pbPages, cbMmap, MADV_DONTFORK)
290#ifdef MAP_HUGETLB
291 && !(fMmap & MAP_HUGETLB)
292#endif
293 )
294 LogRel(("SUPLib: madvise %p-%p failed\n", pbPages, cbMmap));
295
296#ifdef MADV_HUGEPAGE
297 /*
298 * Try enable transparent huge pages for the allocation if desired
299 * and we weren't able to use MAP_HUGETBL above.
300 * Note! KVM doesn't seem to benefit much from this.
301 */
302 if ( !(fMmap & MAP_HUGETLB)
303 && (fFlags & SUP_PAGE_ALLOC_F_LARGE_PAGES)
304 && !(cPages & 511)) /** @todo PORTME: x86 assumption */
305 madvise(pbPages, cbMmap, MADV_HUGEPAGE);
306#endif
307 }
308 else
309 {
310 /*
311 * madvise(MADV_DONTFORK) is not available (most probably Linux 2.4). Enclose any
312 * mmapped region by two unmapped pages to guarantee that there is exactly one VM
313 * area struct of the very same size as the mmap area.
314 */
315 mprotect(pbPages, PAGE_SIZE, PROT_NONE);
316 mprotect(pbPages + cbMmap - PAGE_SIZE, PAGE_SIZE, PROT_NONE);
317 pbPages += PAGE_SHIFT;
318 }
319
320 /** @todo Dunno why we do this, really. It's a waste of time. Maybe it was
321 * to try make sure the pages were allocated or something before we locked them,
322 * so I qualified it with SUP_PAGE_ALLOC_F_FOR_LOCKING (unused) for now... */
323 if (fFlags & SUP_PAGE_ALLOC_F_FOR_LOCKING)
324 memset(pbPages, 0, cPages << PAGE_SHIFT);
325
326 *ppvPages = pbPages;
327 return VINF_SUCCESS;
328 }
329 return VERR_NO_MEMORY;
330}
331
332
333DECLHIDDEN(int) suplibOsPageFree(PSUPLIBDATA pThis, void *pvPages, size_t cPages)
334{
335 NOREF(pThis);
336 munmap(pvPages, cPages << PAGE_SHIFT);
337 return VINF_SUCCESS;
338}
339
340
341/**
342 * Check if the host kernel supports VT-x or not.
343 *
344 * Older Linux kernels clear the VMXE bit in the CR4 register (function
345 * tlb_flush_all()) leading to a host kernel panic.
346 *
347 * @returns VBox status code (no info).
348 * @param ppszWhy Where to return explanatory message.
349 */
350DECLHIDDEN(int) suplibOsQueryVTxSupported(const char **ppszWhy)
351{
352 char szBuf[256];
353 int rc = RTSystemQueryOSInfo(RTSYSOSINFO_RELEASE, szBuf, sizeof(szBuf));
354 if (RT_SUCCESS(rc))
355 {
356 char *pszNext;
357 uint32_t uA, uB, uC;
358
359 rc = RTStrToUInt32Ex(szBuf, &pszNext, 10, &uA);
360 if ( RT_SUCCESS(rc)
361 && *pszNext == '.')
362 {
363 /*
364 * new version number scheme starting with Linux 3.0
365 */
366 if (uA >= 3)
367 return VINF_SUCCESS;
368 rc = RTStrToUInt32Ex(pszNext+1, &pszNext, 10, &uB);
369 if ( RT_SUCCESS(rc)
370 && *pszNext == '.')
371 {
372 rc = RTStrToUInt32Ex(pszNext+1, &pszNext, 10, &uC);
373 if (RT_SUCCESS(rc))
374 {
375 uint32_t uLinuxVersion = (uA << 16) + (uB << 8) + uC;
376 if (uLinuxVersion >= (2 << 16) + (6 << 8) + 13)
377 return VINF_SUCCESS;
378 }
379 }
380 }
381 }
382
383 *ppszWhy = "Linux 2.6.13 or newer required!";
384 return VERR_SUPDRV_KERNEL_TOO_OLD_FOR_VTX;
385}
386
387#endif /* !IN_SUP_HARDENED_R3 */
388
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette