VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 62557

Last change on this file since 62557 was 62511, checked in by vboxsync, 9 years ago

(C) 2016

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 241.5 KB
Line 
1/* $Id: SrvIntNetR0.cpp 62511 2016-07-22 19:12:58Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 *
5 * @remarks No lazy code changes. If you don't understand exactly what you're
6 * doing, get an understanding or forget it.
7 * All changes shall be reviewed by bird before commit. If not around,
8 * email and let Frank and/or Klaus OK the changes before committing.
9 */
10
11/*
12 * Copyright (C) 2006-2016 Oracle Corporation
13 *
14 * This file is part of VirtualBox Open Source Edition (OSE), as
15 * available from http://www.virtualbox.org. This file is free software;
16 * you can redistribute it and/or modify it under the terms of the GNU
17 * General Public License (GPL) as published by the Free Software
18 * Foundation, in version 2 as it comes in the "COPYING" file of the
19 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
20 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
21 */
22
23
24/*********************************************************************************************************************************
25* Header Files *
26*********************************************************************************************************************************/
27#define LOG_GROUP LOG_GROUP_SRV_INTNET
28#include <VBox/intnet.h>
29#include <VBox/intnetinline.h>
30#include <VBox/vmm/pdmnetinline.h>
31#include <VBox/sup.h>
32#include <VBox/vmm/pdm.h>
33#include <VBox/log.h>
34
35#include <iprt/asm.h>
36#include <iprt/assert.h>
37#include <iprt/handletable.h>
38#include <iprt/mp.h>
39#include <iprt/mem.h>
40#include <iprt/net.h>
41#include <iprt/semaphore.h>
42#include <iprt/spinlock.h>
43#include <iprt/string.h>
44#include <iprt/thread.h>
45#include <iprt/time.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** @def INTNET_WITH_DHCP_SNOOPING
52 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
53#define INTNET_WITH_DHCP_SNOOPING
54
55/** The maximum number of interface in a network. */
56#define INTNET_MAX_IFS (1023 + 1 + 16)
57
58/** The number of entries to grow the destination tables with. */
59#if 0
60# define INTNET_GROW_DSTTAB_SIZE 16
61#else
62# define INTNET_GROW_DSTTAB_SIZE 1
63#endif
64
65/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
66#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
67
68
69/*********************************************************************************************************************************
70* Structures and Typedefs *
71*********************************************************************************************************************************/
72/**
73 * MAC address lookup table entry.
74 */
75typedef struct INTNETMACTABENTRY
76{
77 /** The MAC address of this entry. */
78 RTMAC MacAddr;
79 /** Is it is effectively promiscuous mode. */
80 bool fPromiscuousEff;
81 /** Is it promiscuous and should it see unrelated trunk traffic. */
82 bool fPromiscuousSeeTrunk;
83 /** Is it active.
84 * We ignore the entry if this is clear and may end up sending packets addressed
85 * to this interface onto the trunk. The reasoning for this is that this could
86 * be the interface of a VM that just has been teleported to a different host. */
87 bool fActive;
88 /** Pointer to the network interface. */
89 struct INTNETIF *pIf;
90} INTNETMACTABENTRY;
91/** Pointer to a MAC address lookup table entry. */
92typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
93
94/**
95 * MAC address lookup table.
96 *
97 * @todo Having this in a separate structure didn't work out as well as it
98 * should. Consider merging it into INTNETNETWORK.
99 */
100typedef struct INTNETMACTAB
101{
102 /** The current number of entries. */
103 uint32_t cEntries;
104 /** The number of entries we've allocated space for. */
105 uint32_t cEntriesAllocated;
106 /** Table entries. */
107 PINTNETMACTABENTRY paEntries;
108
109 /** The number of interface entries currently in promicuous mode. */
110 uint32_t cPromiscuousEntries;
111 /** The number of interface entries currently in promicuous mode that
112 * shall not see unrelated trunk traffic. */
113 uint32_t cPromiscuousNoTrunkEntries;
114
115 /** The host MAC address (reported). */
116 RTMAC HostMac;
117 /** The effective host promiscuous setting (reported). */
118 bool fHostPromiscuousEff;
119 /** The real host promiscuous setting (reported). */
120 bool fHostPromiscuousReal;
121 /** Whether the host is active. */
122 bool fHostActive;
123
124 /** Whether the wire is promiscuous (config). */
125 bool fWirePromiscuousEff;
126 /** Whether the wire is promiscuous (config).
127 * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
128 * INTNETNETWORK::fFlags.) */
129 bool fWirePromiscuousReal;
130 /** Whether the wire is active. */
131 bool fWireActive;
132
133 /** Pointer to the trunk interface. */
134 struct INTNETTRUNKIF *pTrunk;
135} INTNETMACTAB;
136/** Pointer to a MAC address . */
137typedef INTNETMACTAB *PINTNETMACTAB;
138
139/**
140 * Destination table.
141 */
142typedef struct INTNETDSTTAB
143{
144 /** The trunk destinations. */
145 uint32_t fTrunkDst;
146 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
147 struct INTNETTRUNKIF *pTrunk;
148 /** The number of destination interfaces. */
149 uint32_t cIfs;
150 /** The interfaces (referenced). Variable sized array. */
151 struct
152 {
153 /** The destination interface. */
154 struct INTNETIF *pIf;
155 /** Whether to replace the destination MAC address.
156 * This is used when sharing MAC address with the host on the wire(less). */
157 bool fReplaceDstMac;
158 } aIfs[1];
159} INTNETDSTTAB;
160/** Pointer to a destination table. */
161typedef INTNETDSTTAB *PINTNETDSTTAB;
162/** Pointer to a const destination table. */
163typedef INTNETDSTTAB const *PCINTNETDSTTAB;
164
165/**
166 * Address and type.
167 */
168typedef struct INTNETADDR
169{
170 /** The address type. */
171 INTNETADDRTYPE enmType;
172 /** The address. */
173 RTNETADDRU Addr;
174} INTNETADDR;
175/** Pointer to an address. */
176typedef INTNETADDR *PINTNETADDR;
177/** Pointer to a const address. */
178typedef INTNETADDR const *PCINTNETADDR;
179
180
181/**
182 * Address cache for a specific network layer.
183 */
184typedef struct INTNETADDRCACHE
185{
186 /** Pointer to the table of addresses. */
187 uint8_t *pbEntries;
188 /** The number of valid address entries. */
189 uint8_t cEntries;
190 /** The number of allocated address entries. */
191 uint8_t cEntriesAlloc;
192 /** The address size. */
193 uint8_t cbAddress;
194 /** The size of an entry. */
195 uint8_t cbEntry;
196} INTNETADDRCACHE;
197/** Pointer to an address cache. */
198typedef INTNETADDRCACHE *PINTNETADDRCACHE;
199/** Pointer to a const address cache. */
200typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
201
202
203/**
204 * A network interface.
205 *
206 * Unless explicitly stated, all members are protect by the network semaphore.
207 */
208typedef struct INTNETIF
209{
210 /** The MAC address.
211 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
212 RTMAC MacAddr;
213 /** Set if the INTNET::MacAddr member has been explicitly set. */
214 bool fMacSet;
215 /** Tracks the desired promiscuous setting of the interface. */
216 bool fPromiscuousReal;
217 /** Whether the interface is active or not.
218 * This is shadowed by INTNETMACTABENTRY::fActive. */
219 bool fActive;
220 /** Whether someone has indicated that the end is nigh by means of IntNetR0IfAbortWait. */
221 bool volatile fNoMoreWaits;
222 /** The flags specified when opening this interface. */
223 uint32_t fOpenFlags;
224 /** Number of yields done to try make the interface read pending data.
225 * We will stop yielding when this reaches a threshold assuming that the VM is
226 * paused or that it simply isn't worth all the delay. It is cleared when a
227 * successful send has been done. */
228 uint32_t cYields;
229 /** Pointer to the current exchange buffer (ring-0). */
230 PINTNETBUF pIntBuf;
231 /** Pointer to ring-3 mapping of the current exchange buffer. */
232 R3PTRTYPE(PINTNETBUF) pIntBufR3;
233 /** Pointer to the default exchange buffer for the interface. */
234 PINTNETBUF pIntBufDefault;
235 /** Pointer to ring-3 mapping of the default exchange buffer. */
236 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
237 /** Event semaphore which a receiver/consumer thread will sleep on while
238 * waiting for data to arrive. */
239 RTSEMEVENT volatile hRecvEvent;
240 /** Number of threads sleeping on the event semaphore. */
241 uint32_t volatile cSleepers;
242 /** The interface handle.
243 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
244 * should return with the appropriate error condition. */
245 INTNETIFHANDLE volatile hIf;
246 /** The native handle of the destructor thread. This is NIL_RTNATIVETHREAD when
247 * the object is valid and set when intnetR0IfDestruct is in progress. This is
248 * used to cover an unlikely (impossible?) race between SUPDRVSESSION cleanup
249 * and lingering threads waiting for recv or similar. */
250 RTNATIVETHREAD volatile hDestructorThread;
251 /** Pointer to the network this interface is connected to.
252 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
253 struct INTNETNETWORK *pNetwork;
254 /** The session this interface is associated with. */
255 PSUPDRVSESSION pSession;
256 /** The SUPR0 object id. */
257 void *pvObj;
258 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
259 * This is protected by the address spinlock of the network. */
260 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
261 /** Spinlock protecting the input (producer) side of the receive ring. */
262 RTSPINLOCK hRecvInSpinlock;
263 /** Busy count for tracking destination table references and active sends.
264 * Usually incremented while owning the switch table spinlock. The 30th bit
265 * is used to indicate wakeup. */
266 uint32_t volatile cBusy;
267 /** The preallocated destination table.
268 * This is NULL when it's in use as a precaution against unserialized
269 * transmitting. This is grown when new interfaces are added to the network. */
270 PINTNETDSTTAB volatile pDstTab;
271 /** Pointer to the trunk's per interface data. Can be NULL. */
272 void *pvIfData;
273 /** Header buffer for when we're carving GSO frames. */
274 uint8_t abGsoHdrs[256];
275} INTNETIF;
276/** Pointer to an internal network interface. */
277typedef INTNETIF *PINTNETIF;
278
279
280/**
281 * A trunk interface.
282 */
283typedef struct INTNETTRUNKIF
284{
285 /** The port interface we present to the component. */
286 INTNETTRUNKSWPORT SwitchPort;
287 /** The port interface we get from the component. */
288 PINTNETTRUNKIFPORT pIfPort;
289 /** Pointer to the network we're connect to.
290 * This may be NULL if we're orphaned? */
291 struct INTNETNETWORK *pNetwork;
292 /** The current MAC address for the interface. (reported)
293 * Updated while owning the switch table spinlock. */
294 RTMAC MacAddr;
295 /** Whether to supply physical addresses with the outbound SGs. (reported) */
296 bool fPhysSG;
297 /** Explicit alignment. */
298 bool fUnused;
299 /** Busy count for tracking destination table references and active sends.
300 * Usually incremented while owning the switch table spinlock. The 30th bit
301 * is used to indicate wakeup. */
302 uint32_t volatile cBusy;
303 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
304 uint32_t fNoPreemptDsts;
305 /** The GSO capabilities of the wire destination. (reported) */
306 uint32_t fWireGsoCapabilites;
307 /** The GSO capabilities of the host destination. (reported)
308 * This is as bit map where each bit represents the GSO type with the same
309 * number. */
310 uint32_t fHostGsoCapabilites;
311 /** The destination table spinlock, interrupt safe.
312 * Protects apTaskDstTabs and apIntDstTabs. */
313 RTSPINLOCK hDstTabSpinlock;
314 /** The number of entries in apIntDstTabs. */
315 uint32_t cIntDstTabs;
316 /** The task time destination tables.
317 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
318 * precedes apIntDstTabs so that these two tables can be used as one
319 * contiguous one. */
320 PINTNETDSTTAB apTaskDstTabs[2];
321 /** The interrupt / disabled-preemption time destination tables.
322 * This is a variable sized array. */
323 PINTNETDSTTAB apIntDstTabs[1];
324} INTNETTRUNKIF;
325/** Pointer to a trunk interface. */
326typedef INTNETTRUNKIF *PINTNETTRUNKIF;
327
328/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
329#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
330
331
332/**
333 * Internal representation of a network.
334 */
335typedef struct INTNETNETWORK
336{
337 /** The Next network in the chain.
338 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
339 struct INTNETNETWORK *pNext;
340
341 /** The spinlock protecting MacTab, aAddrBlacklist and INTNETIF::aAddrCache.
342 * Interrupt safe. */
343 RTSPINLOCK hAddrSpinlock;
344 /** MAC address table.
345 * This doubles as interface collection. */
346 INTNETMACTAB MacTab;
347
348 /** The network layer address cache. (Indexed by type, 0 entry isn't used.
349 * Contains host addresses. We don't let guests spoof them. */
350 INTNETADDRCACHE aAddrBlacklist[kIntNetAddrType_End];
351
352 /** Wait for an interface to stop being busy so it can be removed or have its
353 * destination table replaced. We have to wait upon this while owning the
354 * network mutex. Will only ever have one waiter because of the big mutex. */
355 RTSEMEVENT hEvtBusyIf;
356 /** Pointer to the instance data. */
357 struct INTNET *pIntNet;
358 /** The SUPR0 object id. */
359 void *pvObj;
360 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
361 * This is allocated after this structure if we're sharing the MAC address with
362 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
363 uint8_t *pbTmp;
364 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
365 uint32_t fFlags;
366 /** Any restrictive policies required as a minimum by some interface.
367 * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
368 uint32_t fMinFlags;
369 /** The number of active interfaces (excluding the trunk). */
370 uint32_t cActiveIFs;
371 /** The length of the network name. */
372 uint8_t cchName;
373 /** The network name. */
374 char szName[INTNET_MAX_NETWORK_NAME];
375 /** The trunk type. */
376 INTNETTRUNKTYPE enmTrunkType;
377 /** The trunk name. */
378 char szTrunk[INTNET_MAX_TRUNK_NAME];
379} INTNETNETWORK;
380/** Pointer to an internal network. */
381typedef INTNETNETWORK *PINTNETNETWORK;
382/** Pointer to a const internal network. */
383typedef const INTNETNETWORK *PCINTNETNETWORK;
384
385/** The size of the buffer INTNETNETWORK::pbTmp points at. */
386#define INTNETNETWORK_TMP_SIZE 2048
387
388
389/**
390 * Internal networking instance.
391 */
392typedef struct INTNET
393{
394 /** Magic number (INTNET_MAGIC). */
395 uint32_t volatile u32Magic;
396 /** Mutex protecting the creation, opening and destruction of both networks and
397 * interfaces. (This means all operations affecting the pNetworks list.) */
398 RTSEMMUTEX hMtxCreateOpenDestroy;
399 /** List of networks. Protected by INTNET::Spinlock. */
400 PINTNETNETWORK volatile pNetworks;
401 /** Handle table for the interfaces. */
402 RTHANDLETABLE hHtIfs;
403} INTNET;
404/** Pointer to an internal network ring-0 instance. */
405typedef struct INTNET *PINTNET;
406
407/** Magic number for the internal network instance data (Hayao Miyazaki). */
408#define INTNET_MAGIC UINT32_C(0x19410105)
409
410
411/*********************************************************************************************************************************
412* Global Variables *
413*********************************************************************************************************************************/
414/** Pointer to the internal network instance data. */
415static PINTNET volatile g_pIntNet = NULL;
416
417static const struct INTNETOPENNETWORKFLAGS
418{
419 uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
420 uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
421 uint32_t fFixed; /**< The config-fixed flag. */
422 uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
423}
424/** Open network policy flags relating to the network. */
425g_afIntNetOpenNetworkNetFlags[] =
426{
427 { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
428 { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
429 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
430 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
431 { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
432 { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
433 { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
434 { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
435},
436/** Open network policy flags relating to the new interface. */
437g_afIntNetOpenNetworkIfFlags[] =
438{
439 { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
440 { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
441};
442
443
444/*********************************************************************************************************************************
445* Forward Declarations *
446*********************************************************************************************************************************/
447static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
448
449
450/**
451 * Checks if a pointer belongs to the list of known networks without
452 * accessing memory it points to.
453 *
454 * @returns true, if such network is in the list.
455 * @param pIntNet The pointer to the internal network instance (global).
456 * @param pNetwork The pointer that must be validated.
457 */
458DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
459{
460 for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
461 if (pCurr == pNetwork)
462 return true;
463 return false;
464}
465
466
467/**
468 * Worker for intnetR0SgWritePart that deals with the case where the
469 * request doesn't fit into the first segment.
470 *
471 * @returns true, unless the request or SG invalid.
472 * @param pSG The SG list to write to.
473 * @param off Where to start writing (offset into the SG).
474 * @param cb How much to write.
475 * @param pvBuf The buffer to containing the bits to write.
476 */
477static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
478{
479 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
480 return false;
481
482 /*
483 * Skip ahead to the segment where off starts.
484 */
485 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
486 unsigned iSeg = 0;
487 while (off > pSG->aSegs[iSeg].cb)
488 {
489 off -= pSG->aSegs[iSeg++].cb;
490 AssertReturn(iSeg < cSegs, false);
491 }
492
493 /*
494 * Copy the data, hoping that it's all from one segment...
495 */
496 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
497 if (cbCanCopy >= cb)
498 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
499 else
500 {
501 /* copy the portion in the current segment. */
502 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
503 cb -= cbCanCopy;
504
505 /* copy the portions in the other segments. */
506 do
507 {
508 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
509 iSeg++;
510 AssertReturn(iSeg < cSegs, false);
511
512 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
513 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
514
515 cb -= cbCanCopy;
516 } while (cb > 0);
517 }
518
519 return true;
520}
521
522
523/**
524 * Writes to a part of an SG.
525 *
526 * @returns true on success, false on failure (out of bounds).
527 * @param pSG The SG list to write to.
528 * @param off Where to start writing (offset into the SG).
529 * @param cb How much to write.
530 * @param pvBuf The buffer to containing the bits to write.
531 */
532DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
533{
534 Assert(off + cb > off);
535
536 /* The optimized case. */
537 if (RT_LIKELY( pSG->cSegsUsed == 1
538 || pSG->aSegs[0].cb >= off + cb))
539 {
540 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
541 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
542 return true;
543 }
544 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
545}
546
547
548/**
549 * Reads a byte from a SG list.
550 *
551 * @returns The byte on success. 0xff on failure.
552 * @param pSG The SG list to read.
553 * @param off The offset (into the SG) off the byte.
554 */
555DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
556{
557 if (RT_LIKELY(pSG->aSegs[0].cb > off))
558 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
559
560 off -= pSG->aSegs[0].cb;
561 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
562 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
563 {
564 if (pSG->aSegs[iSeg].cb > off)
565 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
566 off -= pSG->aSegs[iSeg].cb;
567 }
568 return false;
569}
570
571
572/**
573 * Worker for intnetR0SgReadPart that deals with the case where the
574 * requested data isn't in the first segment.
575 *
576 * @returns true, unless the SG is invalid.
577 * @param pSG The SG list to read.
578 * @param off Where to start reading (offset into the SG).
579 * @param cb How much to read.
580 * @param pvBuf The buffer to read into.
581 */
582static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
583{
584 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
585 return false;
586
587 /*
588 * Skip ahead to the segment where off starts.
589 */
590 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
591 unsigned iSeg = 0;
592 while (off > pSG->aSegs[iSeg].cb)
593 {
594 off -= pSG->aSegs[iSeg++].cb;
595 AssertReturn(iSeg < cSegs, false);
596 }
597
598 /*
599 * Copy the data, hoping that it's all from one segment...
600 */
601 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
602 if (cbCanCopy >= cb)
603 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
604 else
605 {
606 /* copy the portion in the current segment. */
607 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
608 cb -= cbCanCopy;
609
610 /* copy the portions in the other segments. */
611 do
612 {
613 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
614 iSeg++;
615 AssertReturn(iSeg < cSegs, false);
616
617 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
618 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
619
620 cb -= cbCanCopy;
621 } while (cb > 0);
622 }
623
624 return true;
625}
626
627
628/**
629 * Reads a part of an SG into a buffer.
630 *
631 * @returns true on success, false on failure (out of bounds).
632 * @param pSG The SG list to read.
633 * @param off Where to start reading (offset into the SG).
634 * @param cb How much to read.
635 * @param pvBuf The buffer to read into.
636 */
637DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
638{
639 Assert(off + cb > off);
640
641 /* The optimized case. */
642 if (RT_LIKELY( pSG->cSegsUsed == 1
643 || pSG->aSegs[0].cb >= off + cb))
644 {
645 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
646 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
647 return true;
648 }
649 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
650}
651
652
653/**
654 * Wait for a busy counter to reach zero.
655 *
656 * @param pNetwork The network.
657 * @param pcBusy The busy counter.
658 */
659static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
660{
661 if (ASMAtomicReadU32(pcBusy) == 0)
662 return;
663
664 /*
665 * We have to be a bit cautious here so we don't destroy the network or the
666 * semaphore before intnetR0BusyDec has signalled us.
667 */
668
669 /* Reset the semaphore and flip the wakeup bit. */
670 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
671 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
672 do
673 {
674 if (cCurBusy == 0)
675 return;
676 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
677 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
678 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
679
680 /* Wait for the count to reach zero. */
681 do
682 {
683 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
684 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
685 cCurBusy = ASMAtomicReadU32(pcBusy);
686 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
687 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
688 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
689 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
690}
691
692
693/**
694 * Decrements the busy counter and maybe wakes up any threads waiting for it to
695 * reach zero.
696 *
697 * @param pNetwork The network.
698 * @param pcBusy The busy counter.
699 */
700DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
701{
702 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
703 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
704 && pNetwork))
705 RTSemEventSignal(pNetwork->hEvtBusyIf);
706 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
707}
708
709
710/**
711 * Increments the busy count of the specified interface.
712 *
713 * The caller must own the MAC address table spinlock.
714 *
715 * @param pIf The interface.
716 */
717DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
718{
719 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
720}
721
722
723/**
724 * Increments the busy count of the specified interface.
725 *
726 * The caller must own the MAC address table spinlock or an explicity reference.
727 *
728 * @param pTrunk The trunk.
729 */
730DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
731{
732 if (pTrunk)
733 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
734}
735
736
737/**
738 * Increments the busy count of the specified interface.
739 *
740 * The caller must own the MAC address table spinlock or an explicity reference.
741 *
742 * @param pIf The interface.
743 */
744DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
745{
746 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
747 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
748 NOREF(cNewBusy);
749}
750
751
752/**
753 * Increments the busy count of the specified interface.
754 *
755 * The caller must own the MAC address table spinlock or an explicity reference.
756 *
757 * @param pTrunk The trunk.
758 */
759DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
760{
761 if (!pTrunk) return;
762 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
763 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
764 NOREF(cNewBusy);
765}
766
767
768/**
769 * Retain an interface.
770 *
771 * @returns VBox status code, can assume success in most situations.
772 * @param pIf The interface instance.
773 * @param pSession The current session.
774 */
775DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
776{
777 Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
778
779 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
780 AssertRCReturn(rc, rc);
781
782 return VINF_SUCCESS;
783}
784
785
786/**
787 * Release an interface previously retained by intnetR0IfRetain or
788 * by handle lookup/freeing.
789 *
790 * @returns true if destroyed, false if not.
791 * @param pIf The interface instance.
792 * @param pSession The current session.
793 */
794DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
795{
796 Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
797
798 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
799 AssertRC(rc);
800
801 return rc == VINF_OBJECT_DESTROYED;
802}
803
804
805/**
806 * RTHandleCreateEx callback that retains an object in the
807 * handle table before returning it.
808 *
809 * (Avoids racing the freeing of the handle.)
810 *
811 * @returns VBox status code.
812 * @param hHandleTable The handle table (ignored).
813 * @param pvObj The object (INTNETIF).
814 * @param pvCtx The context (SUPDRVSESSION).
815 * @param pvUser The user context (ignored).
816 */
817static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
818{
819 NOREF(pvUser);
820 NOREF(hHandleTable);
821
822 PINTNETIF pIf = (PINTNETIF)pvObj;
823 RTNATIVETHREAD hDtorThrd;
824 ASMAtomicUoReadHandle(&pIf->hDestructorThread, &hDtorThrd);
825 if (hDtorThrd == NIL_RTNATIVETHREAD)
826 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
827
828 /* Allow intnetR0IfDestruct to call RTHandleTableFreeWithCtx to free
829 the handle, but not even think about retaining a referenceas we don't
830 want to confuse SUPDrv and risk having the destructor called twice. */
831 if (hDtorThrd == RTThreadNativeSelf())
832 return VINF_SUCCESS;
833
834 return VERR_SEM_DESTROYED;
835}
836
837
838
839/**
840 * Checks if the interface has a usable MAC address or not.
841 *
842 * @returns true if MacAddr is usable, false if not.
843 * @param pIf The interface.
844 */
845DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
846{
847 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
848}
849
850
851/**
852 * Locates the MAC address table entry for the given interface.
853 *
854 * The caller holds the MAC address table spinlock, obviously.
855 *
856 * @returns Pointer to the entry on if found, NULL if not.
857 * @param pNetwork The network.
858 * @param pIf The interface.
859 */
860DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
861{
862 uint32_t iIf = pNetwork->MacTab.cEntries;
863 while (iIf-- > 0)
864 {
865 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
866 return &pNetwork->MacTab.paEntries[iIf];
867 }
868 return NULL;
869}
870
871
872/**
873 * Checks if the IPv6 address is a good interface address.
874 * @returns true/false.
875 * @param addr The address, network endian.
876 */
877DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
878{
879 return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
880 || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
881 || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
882 && addr.Words.w2 == 0 && addr.Words.w3 == 0
883 && addr.Words.w4 == 0 && addr.Words.w5 == 0
884 && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
885}
886
887
888/**
889 * Checks if the IPv4 address is a broadcast address.
890 * @returns true/false.
891 * @param Addr The address, network endian.
892 */
893DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
894{
895 /* Just check for 255.255.255.255 atm. */
896 return Addr.u == UINT32_MAX;
897}
898
899
900/**
901 * Checks if the IPv4 address is a good interface address.
902 * @returns true/false.
903 * @param Addr The address, network endian.
904 */
905DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
906{
907 /* Usual suspects. */
908 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
909 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
910 return false;
911
912 /* Unusual suspects. */
913 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
914 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
915 ))
916 return false;
917 return true;
918}
919
920
921/**
922 * Gets the address size of a network layer type.
923 *
924 * @returns size in bytes.
925 * @param enmType The type.
926 */
927DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
928{
929 switch (enmType)
930 {
931 case kIntNetAddrType_IPv4: return 4;
932 case kIntNetAddrType_IPv6: return 16;
933 case kIntNetAddrType_IPX: return 4 + 6;
934 default: AssertFailedReturn(0);
935 }
936}
937
938
939/**
940 * Compares two address to see if they are equal, assuming naturally align structures.
941 *
942 * @returns true if equal, false if not.
943 * @param pAddr1 The first address.
944 * @param pAddr2 The second address.
945 * @param cbAddr The address size.
946 */
947DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
948{
949 switch (cbAddr)
950 {
951 case 4: /* IPv4 */
952 return pAddr1->au32[0] == pAddr2->au32[0];
953 case 16: /* IPv6 */
954 return pAddr1->au64[0] == pAddr2->au64[0]
955 && pAddr1->au64[1] == pAddr2->au64[1];
956 case 10: /* IPX */
957 return pAddr1->au64[0] == pAddr2->au64[0]
958 && pAddr1->au16[4] == pAddr2->au16[4];
959 default:
960 AssertFailedReturn(false);
961 }
962}
963
964
965/**
966 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
967 * in the remaining cache entries after the caller has check the
968 * most likely ones.
969 *
970 * @returns -1 if not found, the index of the cache entry if found.
971 * @param pCache The cache.
972 * @param pAddr The address.
973 * @param cbAddr The address size (optimization).
974 */
975static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
976{
977 unsigned i = pCache->cEntries - 2;
978 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
979 while (i >= 1)
980 {
981 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
982 return i;
983 pbEntry -= pCache->cbEntry;
984 i--;
985 }
986
987 return -1;
988}
989
990/**
991 * Lookup an address in a cache without any expectations.
992 *
993 * @returns -1 if not found, the index of the cache entry if found.
994 * @param pCache The cache.
995 * @param pAddr The address.
996 * @param cbAddr The address size (optimization).
997 */
998DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
999{
1000 Assert(pCache->cbAddress == cbAddr);
1001
1002 /*
1003 * The optimized case is when there is one cache entry and
1004 * it doesn't match.
1005 */
1006 unsigned i = pCache->cEntries;
1007 if ( i > 0
1008 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
1009 return 0;
1010 if (i <= 1)
1011 return -1;
1012
1013 /*
1014 * Check the last entry.
1015 */
1016 i--;
1017 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
1018 return i;
1019 if (i <= 1)
1020 return -1;
1021
1022 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
1023}
1024
1025
1026/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
1027DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1028{
1029 /** @todo implement this. */
1030 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1031}
1032
1033
1034/**
1035 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
1036 * the lookup in the remaining cache entries after the caller
1037 * has check the most likely ones.
1038 *
1039 * The routine is expecting not to find the address.
1040 *
1041 * @returns -1 if not found, the index of the cache entry if found.
1042 * @param pCache The cache.
1043 * @param pAddr The address.
1044 * @param cbAddr The address size (optimization).
1045 */
1046static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1047{
1048 /*
1049 * Perform a full table lookup.
1050 */
1051 unsigned i = pCache->cEntries - 2;
1052 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1053 while (i >= 1)
1054 {
1055 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1056 return i;
1057 pbEntry -= pCache->cbEntry;
1058 i--;
1059 }
1060
1061 return -1;
1062}
1063
1064
1065/**
1066 * Lookup an address in a cache expecting not to find it.
1067 *
1068 * @returns -1 if not found, the index of the cache entry if found.
1069 * @param pCache The cache.
1070 * @param pAddr The address.
1071 * @param cbAddr The address size (optimization).
1072 */
1073DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1074{
1075 Assert(pCache->cbAddress == cbAddr);
1076
1077 /*
1078 * The optimized case is when there is one cache entry and
1079 * it doesn't match.
1080 */
1081 unsigned i = pCache->cEntries;
1082 if (RT_UNLIKELY( i > 0
1083 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
1084 return 0;
1085 if (RT_LIKELY(i <= 1))
1086 return -1;
1087
1088 /*
1089 * Then check the last entry and return if there are just two cache entries.
1090 */
1091 i--;
1092 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1093 return i;
1094 if (i <= 1)
1095 return -1;
1096
1097 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1098}
1099
1100
1101/**
1102 * Deletes a specific cache entry.
1103 *
1104 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1105 *
1106 * @param pIf The interface (for logging).
1107 * @param pCache The cache.
1108 * @param iEntry The entry to delete.
1109 * @param pszMsg Log message.
1110 */
1111static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1112{
1113 AssertReturnVoid(iEntry < pCache->cEntries);
1114 AssertReturnVoid(iEntry >= 0);
1115#ifdef LOG_ENABLED
1116 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1117 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1118 switch (enmAddrType)
1119 {
1120 case kIntNetAddrType_IPv4:
1121 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
1122 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
1123 break;
1124 case kIntNetAddrType_IPv6:
1125 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
1126 pIf->hIf, &pIf->MacAddr, iEntry, &pAddr->IPv6, pszMsg));
1127 break;
1128 default:
1129 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1130 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1131 break;
1132 }
1133#endif
1134
1135 pCache->cEntries--;
1136 if (iEntry < pCache->cEntries)
1137 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1138 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1139 (pCache->cEntries - iEntry) * pCache->cbEntry);
1140}
1141
1142
1143/**
1144 * Deletes an address from the cache, assuming it isn't actually in the cache.
1145 *
1146 * May or may not own the spinlock when calling this.
1147 *
1148 * @param pIf The interface (for logging).
1149 * @param pCache The cache.
1150 * @param pAddr The address.
1151 * @param cbAddr The address size (optimization).
1152 */
1153DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1154{
1155 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1156 if (RT_UNLIKELY(i >= 0))
1157 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1158}
1159
1160
1161/**
1162 * Deletes the address from all the interface caches.
1163 *
1164 * This is used to remove stale entries that has been reassigned to
1165 * other machines on the network.
1166 *
1167 * @param pNetwork The network.
1168 * @param pAddr The address.
1169 * @param enmType The address type.
1170 * @param cbAddr The address size (optimization).
1171 * @param pszMsg Log message.
1172 */
1173DECLINLINE(void) intnetR0NetworkAddrCacheDeleteLocked(PINTNETNETWORK pNetwork,
1174 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType,
1175 uint8_t const cbAddr,
1176 const char *pszMsg)
1177{
1178 uint32_t iIf = pNetwork->MacTab.cEntries;
1179 while (iIf--)
1180 {
1181 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1182
1183 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1184 if (RT_UNLIKELY(i >= 0))
1185 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1186 }
1187}
1188
1189
1190/**
1191 * Deletes the address from all the interface caches.
1192 *
1193 * This is used to remove stale entries that has been reassigned to
1194 * other machines on the network.
1195 *
1196 * @param pNetwork The network.
1197 * @param pAddr The address.
1198 * @param enmType The address type.
1199 * @param cbAddr The address size (optimization).
1200 * @param pszMsg Log message.
1201 */
1202DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1203 uint8_t const cbAddr, const char *pszMsg)
1204{
1205 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1206
1207 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, pszMsg);
1208
1209 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1210}
1211
1212
1213/**
1214 * Deletes the address from all the interface caches except the specified one.
1215 *
1216 * This is used to remove stale entries that has been reassigned to
1217 * other machines on the network.
1218 *
1219 * @param pNetwork The network.
1220 * @param pAddr The address.
1221 * @param enmType The address type.
1222 * @param cbAddr The address size (optimization).
1223 * @param pszMsg Log message.
1224 */
1225DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1226 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1227{
1228 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1229
1230 uint32_t iIf = pNetwork->MacTab.cEntries;
1231 while (iIf--)
1232 {
1233 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1234 if (pIf != pIfSender)
1235 {
1236 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1237 if (RT_UNLIKELY(i >= 0))
1238 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1239 }
1240 }
1241
1242 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1243}
1244
1245
1246/**
1247 * Lookup an address on the network, returning the (first) interface having it
1248 * in its address cache.
1249 *
1250 * @returns Pointer to the interface on success, NULL if not found. The caller
1251 * must release the interface by calling intnetR0BusyDecIf.
1252 * @param pNetwork The network.
1253 * @param pAddr The address to lookup.
1254 * @param enmType The address type.
1255 * @param cbAddr The size of the address.
1256 */
1257DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1258{
1259 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1260
1261 uint32_t iIf = pNetwork->MacTab.cEntries;
1262 while (iIf--)
1263 {
1264 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1265 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1266 if (i >= 0)
1267 {
1268 intnetR0BusyIncIf(pIf);
1269 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1270 return pIf;
1271 }
1272 }
1273
1274 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1275 return NULL;
1276}
1277
1278
1279/**
1280 * Look up specified address in the network's blacklist.
1281 *
1282 * @param pNetwork The network.
1283 * @param enmType The address type.
1284 * @param pAddr The address.
1285 */
1286static bool intnetR0NetworkBlacklistLookup(PINTNETNETWORK pNetwork,
1287 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1288{
1289 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1290
1291 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1292 return false;
1293
1294 const uint8_t cbAddr = pCache->cbAddress;
1295 Assert(cbAddr == intnetR0AddrSize(enmType));
1296
1297 for (unsigned i = 0; i < pCache->cEntries; ++i)
1298 {
1299 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1300 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1301 return true;
1302 }
1303
1304 return false;
1305}
1306
1307
1308/**
1309 * Deletes specified address from network's blacklist.
1310 *
1311 * @param pNetwork The network.
1312 * @param enmType The address type.
1313 * @param pAddr The address.
1314 */
1315static void intnetR0NetworkBlacklistDelete(PINTNETNETWORK pNetwork,
1316 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1317{
1318 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1319
1320 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1321 return;
1322
1323 const uint8_t cbAddr = pCache->cbAddress;
1324 Assert(cbAddr == intnetR0AddrSize(enmType));
1325
1326 for (unsigned i = 0; i < pCache->cEntries; ++i)
1327 {
1328 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1329 if (!intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1330 continue;
1331
1332 --pCache->cEntries;
1333 memmove(pCache->pbEntries + i * pCache->cbEntry,
1334 pCache->pbEntries + (i + 1) * pCache->cbEntry,
1335 (pCache->cEntries - i) * pCache->cbEntry);
1336 return;
1337 }
1338}
1339
1340
1341/**
1342 * Adds specified address from network's blacklist.
1343 *
1344 * @param pNetwork The network.
1345 * @param enmType The address type.
1346 * @param pAddr The address.
1347 */
1348static void intnetR0NetworkBlacklistAdd(PINTNETNETWORK pNetwork,
1349 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1350{
1351 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1352
1353 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1354 return;
1355
1356 const uint8_t cbAddr = pCache->cbAddress;
1357 Assert(cbAddr == intnetR0AddrSize(enmType));
1358
1359 /* lookup */
1360 for (unsigned i = 0; i < pCache->cEntries; ++i)
1361 {
1362 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1363 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1364 return; /* already exists */
1365 }
1366
1367 if (pCache->cEntries >= pCache->cEntriesAlloc)
1368 {
1369 /* shift */
1370 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry,
1371 pCache->cbEntry * (pCache->cEntries - 1));
1372 --pCache->cEntries;
1373 }
1374
1375 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1376
1377 /* push */
1378 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1379 memcpy(pbEntry, pAddr, cbAddr);
1380 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - cbAddr);
1381 ++pCache->cEntries;
1382
1383 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1384}
1385
1386
1387/**
1388 * Adds an address to the cache, the caller is responsible for making sure it's
1389 * not already in the cache.
1390 *
1391 * The caller must not
1392 *
1393 * @param pIf The interface (for logging).
1394 * @param pCache The address cache.
1395 * @param pAddr The address.
1396 * @param pszMsg log message.
1397 */
1398static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1399 const char *pszMsg)
1400{
1401 PINTNETNETWORK pNetwork = pIf->pNetwork;
1402 AssertReturnVoid(pNetwork);
1403
1404 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1405
1406#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
1407 const uint8_t cbAddr = pCache->cbAddress;
1408 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1409#endif
1410
1411 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1412
1413 bool fBlacklisted = intnetR0NetworkBlacklistLookup(pNetwork, pAddr, enmAddrType);
1414 if (fBlacklisted)
1415 {
1416 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1417
1418#ifdef LOG_ENABLED
1419 switch (enmAddrType)
1420 {
1421 case kIntNetAddrType_IPv4:
1422 Log(("%s: spoofing attempt for %RTnaipv4\n",
1423 __FUNCTION__, pAddr->IPv4));
1424 break;
1425 case kIntNetAddrType_IPv6:
1426 Log(("%s: spoofing attempt for %RTnaipv6\n",
1427 __FUNCTION__, &pAddr->IPv6));
1428 break;
1429 default:
1430 Log(("%s: spoofing attempt for %.*Rhxs (type %d)\n",
1431 __FUNCTION__, cbAddr, pAddr, enmAddrType));
1432 break;
1433 }
1434#endif
1435 return;
1436 }
1437
1438 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1439 {
1440 /* This shouldn't happen*/
1441 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1442 return;
1443 }
1444
1445 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1446 if (pCache->cEntries >= pCache->cEntriesAlloc)
1447 {
1448 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1449 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1450 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1451 pCache->cEntries--;
1452 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1453 }
1454
1455 /*
1456 * Add the new entry to the end of the array.
1457 */
1458 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1459 memcpy(pbEntry, pAddr, pCache->cbAddress);
1460 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1461
1462#ifdef LOG_ENABLED
1463 switch (enmAddrType)
1464 {
1465 case kIntNetAddrType_IPv4:
1466 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
1467 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
1468 break;
1469 case kIntNetAddrType_IPv6:
1470 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
1471 pIf->hIf, &pIf->MacAddr, pCache->cEntries, &pAddr->IPv6, pszMsg));
1472 break;
1473 default:
1474 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1475 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1476 break;
1477 }
1478#endif
1479 pCache->cEntries++;
1480 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1481
1482 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1483}
1484
1485
1486/**
1487 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1488 *
1489 * @param pIf The interface (for logging).
1490 * @param pCache The address cache.
1491 * @param pAddr The address.
1492 * @param cbAddr The size of the address (optimization).
1493 * @param pszMsg Log message.
1494 */
1495static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1496 const char *pszMsg)
1497{
1498 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1499
1500 const uint8_t cbAddr = pCache->cbAddress;
1501 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1502
1503 /*
1504 * Check all but the first and last entries, the caller
1505 * has already checked those.
1506 */
1507 int i = pCache->cEntries - 2;
1508 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1509 while (i >= 1)
1510 {
1511 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1512 return;
1513 pbEntry += pCache->cbEntry;
1514 i--;
1515 }
1516
1517 /*
1518 * Not found, add it.
1519 */
1520 intnetR0IfAddrCacheAddIt(pIf, enmAddrType, pAddr, pszMsg);
1521}
1522
1523
1524/**
1525 * Adds an address to the cache if it's not already there.
1526 *
1527 * Must not own any spinlocks when calling this function.
1528 *
1529 * @param pIf The interface (for logging).
1530 * @param pCache The address cache.
1531 * @param pAddr The address.
1532 * @param cbAddr The size of the address (optimization).
1533 * @param pszMsg Log message.
1534 */
1535DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1536 const char *pszMsg)
1537{
1538 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1539
1540 const uint8_t cbAddr = pCache->cbAddress;
1541 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1542
1543 /*
1544 * The optimized case is when the address the first or last cache entry.
1545 */
1546 unsigned i = pCache->cEntries;
1547 if (RT_LIKELY( i > 0
1548 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1549 || (i > 1
1550 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * (i-1)), pAddr, cbAddr))) ))
1551 return;
1552
1553 intnetR0IfAddrCacheAddSlow(pIf, enmAddrType, pAddr, pszMsg);
1554}
1555
1556
1557/**
1558 * Destroys the specified address cache.
1559 * @param pCache The address cache.
1560 */
1561static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1562{
1563 void *pvFree = pCache->pbEntries;
1564 pCache->pbEntries = NULL;
1565 pCache->cEntries = 0;
1566 pCache->cEntriesAlloc = 0;
1567 RTMemFree(pvFree);
1568}
1569
1570
1571/**
1572 * Initialize the address cache for the specified address type.
1573 *
1574 * The cache storage is preallocated and fixed size so that we can handle
1575 * inserts from problematic contexts.
1576 *
1577 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1578 * @param pCache The cache to initialize.
1579 * @param enmAddrType The address type.
1580 * @param fEnabled Whether the address cache is enabled or not.
1581 */
1582static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1583{
1584 pCache->cEntries = 0;
1585 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1586 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1587 if (fEnabled)
1588 {
1589 pCache->cEntriesAlloc = 32;
1590 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1591 if (!pCache->pbEntries)
1592 return VERR_NO_MEMORY;
1593 }
1594 else
1595 {
1596 pCache->cEntriesAlloc = 0;
1597 pCache->pbEntries = NULL;
1598 }
1599 return VINF_SUCCESS;
1600}
1601
1602
1603/**
1604 * Is it a multicast or broadcast MAC address?
1605 *
1606 * @returns true if multicast, false if not.
1607 * @param pMacAddr The address to inspect.
1608 */
1609DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1610{
1611 return !!(pMacAddr->au8[0] & 0x01);
1612}
1613
1614
1615/**
1616 * Is it a dummy MAC address?
1617 *
1618 * We use dummy MAC addresses for interfaces which we don't know the MAC
1619 * address of because they haven't sent anything (learning) or explicitly set
1620 * it.
1621 *
1622 * @returns true if dummy, false if not.
1623 * @param pMacAddr The address to inspect.
1624 */
1625DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1626{
1627 /* The dummy address are broadcast addresses, don't bother check it all. */
1628 return pMacAddr->au16[0] == 0xffff;
1629}
1630
1631
1632/**
1633 * Compares two MAC addresses.
1634 *
1635 * @returns true if equal, false if not.
1636 * @param pDstAddr1 Address 1.
1637 * @param pDstAddr2 Address 2.
1638 */
1639DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1640{
1641 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1642 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1643 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1644}
1645
1646
1647/**
1648 * Switch a unicast frame based on the network layer address (OSI level 3) and
1649 * return a destination table.
1650 *
1651 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1652 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1653 * @param pNetwork The network to switch on.
1654 * @param pDstMacAddr The destination MAC address.
1655 * @param enmL3AddrType The level-3 destination address type.
1656 * @param pL3Addr The level-3 destination address.
1657 * @param cbL3Addr The size of the level-3 destination address.
1658 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1659 * @param pDstTab The destination output table.
1660 */
1661static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1662 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1663 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1664{
1665 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1666
1667 /*
1668 * Grab the spinlock first and do the switching.
1669 */
1670 PINTNETMACTAB pTab = &pNetwork->MacTab;
1671 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1672
1673 pDstTab->fTrunkDst = 0;
1674 pDstTab->pTrunk = 0;
1675 pDstTab->cIfs = 0;
1676
1677 /* Find exactly matching or promiscuous interfaces. */
1678 uint32_t cExactHits = 0;
1679 uint32_t iIfMac = pTab->cEntries;
1680 while (iIfMac-- > 0)
1681 {
1682 if (pTab->paEntries[iIfMac].fActive)
1683 {
1684 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1685 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1686 if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1687 {
1688 cExactHits += fExact;
1689
1690 uint32_t iIfDst = pDstTab->cIfs++;
1691 pDstTab->aIfs[iIfDst].pIf = pIf;
1692 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1693 intnetR0BusyIncIf(pIf);
1694
1695 if (fExact)
1696 pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
1697 }
1698 }
1699 }
1700
1701 /* Network only promicuous mode ifs should see related trunk traffic. */
1702 if ( cExactHits
1703 && fSrc
1704 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1705 {
1706 iIfMac = pTab->cEntries;
1707 while (iIfMac-- > 0)
1708 {
1709 if ( pTab->paEntries[iIfMac].fActive
1710 && pTab->paEntries[iIfMac].fPromiscuousEff
1711 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1712 {
1713 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1714 if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
1715 {
1716 uint32_t iIfDst = pDstTab->cIfs++;
1717 pDstTab->aIfs[iIfDst].pIf = pIf;
1718 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1719 intnetR0BusyIncIf(pIf);
1720 }
1721 }
1722 }
1723 }
1724
1725 /* Does it match the host, or is the host promiscuous? */
1726 if (pTab->fHostActive)
1727 {
1728 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1729 if ( fExact
1730 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1731 || pTab->fHostPromiscuousEff)
1732 {
1733 cExactHits += fExact;
1734 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1735 }
1736 }
1737
1738 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1739 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
1740 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1741 pDstTab->fTrunkDst &= ~fSrc;
1742 if (pDstTab->fTrunkDst)
1743 {
1744 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1745 pDstTab->pTrunk = pTrunk;
1746 intnetR0BusyIncTrunk(pTrunk);
1747 }
1748
1749 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1750 return pDstTab->cIfs
1751 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1752 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1753}
1754
1755
1756/**
1757 * Pre-switch a unicast MAC address.
1758 *
1759 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1760 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1761 * @param pNetwork The network to switch on.
1762 * @param fSrc The frame source.
1763 * @param pSrcAddr The source address of the frame.
1764 * @param pDstAddr The destination address of the frame.
1765 */
1766static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1767 PCRTMAC pDstAddr)
1768{
1769 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1770 Assert(fSrc);
1771
1772 /*
1773 * Grab the spinlock first and do the switching.
1774 */
1775 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1776 PINTNETMACTAB pTab = &pNetwork->MacTab;
1777 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1778
1779 /* Iterate the internal network interfaces and look for matching source and
1780 destination addresses. */
1781 uint32_t iIfMac = pTab->cEntries;
1782 while (iIfMac-- > 0)
1783 {
1784 if (pTab->paEntries[iIfMac].fActive)
1785 {
1786 /* Unknown interface address? */
1787 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1788 break;
1789
1790 /* Paranoia - this shouldn't happen, right? */
1791 if ( pSrcAddr
1792 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1793 break;
1794
1795 /* Exact match? */
1796 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1797 {
1798 enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
1799 ? INTNETSWDECISION_BROADCAST
1800 : INTNETSWDECISION_INTNET;
1801 break;
1802 }
1803 }
1804 }
1805
1806 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1807 return enmSwDecision;
1808}
1809
1810
1811/**
1812 * Switch a unicast MAC address and return a destination table.
1813 *
1814 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1815 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1816 * @param pNetwork The network to switch on.
1817 * @param fSrc The frame source.
1818 * @param pIfSender The sender interface, NULL if trunk. Used to
1819 * prevent sending an echo to the sender.
1820 * @param pDstAddr The destination address of the frame.
1821 * @param pDstTab The destination output table.
1822 */
1823static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1824 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1825{
1826 AssertPtr(pDstTab);
1827 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1828
1829 /*
1830 * Grab the spinlock first and do the switching.
1831 */
1832 PINTNETMACTAB pTab = &pNetwork->MacTab;
1833 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1834
1835 pDstTab->fTrunkDst = 0;
1836 pDstTab->pTrunk = 0;
1837 pDstTab->cIfs = 0;
1838
1839 /* Find exactly matching or promiscuous interfaces. */
1840 uint32_t cExactHits = 0;
1841 uint32_t iIfMac = pTab->cEntries;
1842 while (iIfMac-- > 0)
1843 {
1844 if (pTab->paEntries[iIfMac].fActive)
1845 {
1846 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1847 if ( fExact
1848 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1849 || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1850 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1851 )
1852 {
1853 cExactHits += fExact;
1854
1855 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1856 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1857 {
1858 uint32_t iIfDst = pDstTab->cIfs++;
1859 pDstTab->aIfs[iIfDst].pIf = pIf;
1860 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1861 intnetR0BusyIncIf(pIf);
1862 }
1863 }
1864 }
1865 }
1866
1867 /* Network only promicuous mode ifs should see related trunk traffic. */
1868 if ( cExactHits
1869 && fSrc
1870 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1871 {
1872 iIfMac = pTab->cEntries;
1873 while (iIfMac-- > 0)
1874 {
1875 if ( pTab->paEntries[iIfMac].fPromiscuousEff
1876 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1877 && pTab->paEntries[iIfMac].fActive
1878 && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
1879 && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
1880 {
1881 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1882 uint32_t iIfDst = pDstTab->cIfs++;
1883 pDstTab->aIfs[iIfDst].pIf = pIf;
1884 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1885 intnetR0BusyIncIf(pIf);
1886 }
1887 }
1888 }
1889
1890 /* Does it match the host, or is the host promiscuous? */
1891 if ( fSrc != INTNETTRUNKDIR_HOST
1892 && pTab->fHostActive)
1893 {
1894 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1895 if ( fExact
1896 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1897 || pTab->fHostPromiscuousEff)
1898 {
1899 cExactHits += fExact;
1900 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1901 }
1902 }
1903
1904 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1905 if ( fSrc != INTNETTRUNKDIR_WIRE
1906 && pTab->fWireActive
1907 && (!cExactHits || pTab->fWirePromiscuousEff)
1908 )
1909 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1910
1911 /* Grab the trunk if we're sending to it. */
1912 if (pDstTab->fTrunkDst)
1913 {
1914 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1915 pDstTab->pTrunk = pTrunk;
1916 intnetR0BusyIncTrunk(pTrunk);
1917 }
1918
1919 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1920 return pDstTab->cIfs
1921 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1922 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1923}
1924
1925
1926/**
1927 * Create a destination table for a broadcast frame.
1928 *
1929 * @returns INTNETSWDECISION_BROADCAST.
1930 * @param pNetwork The network to switch on.
1931 * @param fSrc The frame source.
1932 * @param pIfSender The sender interface, NULL if trunk. Used to
1933 * prevent sending an echo to the sender.
1934 * @param pDstTab The destination output table.
1935 */
1936static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1937 PINTNETDSTTAB pDstTab)
1938{
1939 AssertPtr(pDstTab);
1940
1941 /*
1942 * Grab the spinlock first and record all active interfaces.
1943 */
1944 PINTNETMACTAB pTab = &pNetwork->MacTab;
1945 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1946
1947 pDstTab->fTrunkDst = 0;
1948 pDstTab->pTrunk = 0;
1949 pDstTab->cIfs = 0;
1950
1951 /* Regular interfaces. */
1952 uint32_t iIfMac = pTab->cEntries;
1953 while (iIfMac-- > 0)
1954 {
1955 if (pTab->paEntries[iIfMac].fActive)
1956 {
1957 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1958 if (pIf != pIfSender)
1959 {
1960 uint32_t iIfDst = pDstTab->cIfs++;
1961 pDstTab->aIfs[iIfDst].pIf = pIf;
1962 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1963 intnetR0BusyIncIf(pIf);
1964 }
1965 }
1966 }
1967
1968 /* The trunk interface. */
1969 if (pTab->fHostActive)
1970 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1971 if (pTab->fWireActive)
1972 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1973 pDstTab->fTrunkDst &= ~fSrc;
1974 if (pDstTab->fTrunkDst)
1975 {
1976 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1977 pDstTab->pTrunk = pTrunk;
1978 intnetR0BusyIncTrunk(pTrunk);
1979 }
1980
1981 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1982 return INTNETSWDECISION_BROADCAST;
1983}
1984
1985
1986/**
1987 * Create a destination table with the trunk and any promiscuous interfaces.
1988 *
1989 * This is only used in a fallback case of the level-3 switching, so we can
1990 * assume the wire as source and skip the sender interface filtering.
1991 *
1992 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1993 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1994 * @param pNetwork The network to switch on.
1995 * @param fSrc The frame source.
1996 * @param pDstTab The destination output table.
1997 */
1998static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1999{
2000 Assert(fSrc == INTNETTRUNKDIR_WIRE);
2001
2002 /*
2003 * Grab the spinlock first and do the switching.
2004 */
2005 PINTNETMACTAB pTab = &pNetwork->MacTab;
2006 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2007
2008 pDstTab->fTrunkDst = 0;
2009 pDstTab->pTrunk = 0;
2010 pDstTab->cIfs = 0;
2011
2012 /* Find promiscuous interfaces. */
2013 uint32_t iIfMac = pTab->cEntries;
2014 while (iIfMac-- > 0)
2015 {
2016 if ( pTab->paEntries[iIfMac].fActive
2017 && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
2018 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
2019 )
2020 {
2021 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
2022 uint32_t iIfDst = pDstTab->cIfs++;
2023 pDstTab->aIfs[iIfDst].pIf = pIf;
2024 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
2025 intnetR0BusyIncIf(pIf);
2026 }
2027 }
2028
2029 /* The trunk interface. */
2030 if (pTab->fHostActive)
2031 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2032 if (pTab->fWireActive)
2033 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2034 pDstTab->fTrunkDst &= ~fSrc;
2035 if (pDstTab->fTrunkDst)
2036 {
2037 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2038 pDstTab->pTrunk = pTrunk;
2039 intnetR0BusyIncTrunk(pTrunk);
2040 }
2041
2042 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2043 return !pDstTab->cIfs
2044 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
2045 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
2046}
2047
2048
2049/**
2050 * Create a destination table for a trunk frame.
2051 *
2052 * @returns INTNETSWDECISION_BROADCAST.
2053 * @param pNetwork The network to switch on.
2054 * @param fSrc The frame source.
2055 * @param pDstTab The destination output table.
2056 */
2057static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2058{
2059 AssertPtr(pDstTab);
2060
2061 /*
2062 * Grab the spinlock first and record all active interfaces.
2063 */
2064 PINTNETMACTAB pTab= &pNetwork->MacTab;
2065 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2066
2067 pDstTab->fTrunkDst = 0;
2068 pDstTab->pTrunk = 0;
2069 pDstTab->cIfs = 0;
2070
2071 /* The trunk interface. */
2072 if (pTab->fHostActive)
2073 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2074 if (pTab->fWireActive)
2075 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2076 pDstTab->fTrunkDst &= ~fSrc;
2077 if (pDstTab->fTrunkDst)
2078 {
2079 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2080 pDstTab->pTrunk = pTrunk;
2081 intnetR0BusyIncTrunk(pTrunk);
2082 }
2083
2084 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2085 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
2086}
2087
2088
2089/**
2090 * Wrapper around RTMemAlloc for allocating a destination table.
2091 *
2092 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
2093 * @param cEntries The size given as an entry count.
2094 * @param ppDstTab Where to store the pointer (always).
2095 */
2096DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
2097{
2098 PINTNETDSTTAB pDstTab;
2099 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_OFFSETOF(INTNETDSTTAB, aIfs[cEntries]));
2100 if (RT_UNLIKELY(!pDstTab))
2101 return VERR_NO_MEMORY;
2102 return VINF_SUCCESS;
2103}
2104
2105
2106/**
2107 * Ensures that there is space for another interface in the MAC address lookup
2108 * table as well as all the destination tables.
2109 *
2110 * The caller must own the create/open/destroy mutex.
2111 *
2112 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
2113 * @param pNetwork The network to operate on.
2114 */
2115static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
2116{
2117 /*
2118 * The cEntries and cEntriesAllocated members are only updated while
2119 * owning the big mutex, so we only need the spinlock when doing the
2120 * actual table replacing.
2121 */
2122 PINTNETMACTAB pTab = &pNetwork->MacTab;
2123 int rc = VINF_SUCCESS;
2124 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
2125 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
2126 {
2127 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
2128 if (cAllocated <= INTNET_MAX_IFS)
2129 {
2130 /*
2131 * Resize the destination tables first, this can be kind of tedious.
2132 */
2133 for (uint32_t i = 0; i < pTab->cEntries; i++)
2134 {
2135 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
2136 PINTNETDSTTAB pNew;
2137 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2138 if (RT_FAILURE(rc))
2139 break;
2140
2141 for (;;)
2142 {
2143 PINTNETDSTTAB pOld = pIf->pDstTab;
2144 if ( pOld
2145 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
2146 {
2147 RTMemFree(pOld);
2148 break;
2149 }
2150 intnetR0BusyWait(pNetwork, &pIf->cBusy);
2151 }
2152 }
2153
2154 /*
2155 * The trunk.
2156 */
2157 if ( RT_SUCCESS(rc)
2158 && pNetwork->MacTab.pTrunk)
2159 {
2160 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
2161 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
2162 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
2163 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
2164 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
2165 ppDstTab++)
2166 {
2167 PINTNETDSTTAB pNew;
2168 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2169 if (RT_FAILURE(rc))
2170 break;
2171
2172 for (;;)
2173 {
2174 RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
2175 void *pvOld = *ppDstTab;
2176 if (pvOld)
2177 *ppDstTab = pNew;
2178 RTSpinlockRelease(pTrunk->hDstTabSpinlock);
2179 if (pvOld)
2180 {
2181 RTMemFree(pvOld);
2182 break;
2183 }
2184 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
2185 }
2186 }
2187 }
2188
2189 /*
2190 * The MAC Address table itself.
2191 */
2192 if (RT_SUCCESS(rc))
2193 {
2194 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
2195 if (paNew)
2196 {
2197 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2198
2199 PINTNETMACTABENTRY paOld = pTab->paEntries;
2200 uint32_t i = pTab->cEntries;
2201 while (i-- > 0)
2202 {
2203 paNew[i] = paOld[i];
2204
2205 paOld[i].fActive = false;
2206 paOld[i].pIf = NULL;
2207 }
2208
2209 pTab->paEntries = paNew;
2210 pTab->cEntriesAllocated = cAllocated;
2211
2212 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2213
2214 RTMemFree(paOld);
2215 }
2216 else
2217 rc = VERR_NO_MEMORY;
2218 }
2219 }
2220 else
2221 rc = VERR_OUT_OF_RANGE;
2222 }
2223 return rc;
2224}
2225
2226
2227
2228
2229#ifdef INTNET_WITH_DHCP_SNOOPING
2230
2231/**
2232 * Snoops IP assignments and releases from the DHCPv4 traffic.
2233 *
2234 * The caller is responsible for making sure this traffic between the
2235 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
2236 * need not be validated beyond the ports.
2237 *
2238 * @param pNetwork The network this frame was seen on.
2239 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
2240 * header validation, so only the minimum header size
2241 * needs to be available and valid here.
2242 * @param pUdpHdr Pointer to the UDP header in the frame.
2243 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
2244 * @param fGso Set if this is a GSO frame, clear if regular.
2245 */
2246static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
2247{
2248 /*
2249 * Check if the DHCP message is valid and get the type.
2250 */
2251 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2252 {
2253 Log6(("Bad UDP packet\n"));
2254 return;
2255 }
2256 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2257 uint8_t MsgType;
2258 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2259 {
2260 Log6(("Bad DHCP packet\n"));
2261 return;
2262 }
2263
2264#ifdef LOG_ENABLED
2265 /*
2266 * Log it.
2267 */
2268 const char *pszType = "unknown";
2269 switch (MsgType)
2270 {
2271 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
2272 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
2273 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
2274 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
2275 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
2276 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
2277 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
2278 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
2279 }
2280 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
2281 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
2282 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
2283#endif /* LOG_EANBLED */
2284
2285 /*
2286 * Act upon the message.
2287 */
2288 switch (MsgType)
2289 {
2290#if 0
2291 case RTNET_DHCP_MT_REQUEST:
2292 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
2293 * know, and add the IP to the cache. */
2294 break;
2295#endif
2296
2297
2298 /*
2299 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
2300 * Delete the old client address first, just in case it changed in a renewal.
2301 */
2302 case RTNET_DHCP_MT_ACK:
2303 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
2304 {
2305 PINTNETIF pMatchingIf = NULL;
2306 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2307
2308 uint32_t iIf = pNetwork->MacTab.cEntries;
2309 while (iIf-- > 0)
2310 {
2311 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2312 if ( intnetR0IfHasMacAddr(pCur)
2313 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2314 {
2315 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2316 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2317 if (!pMatchingIf)
2318 {
2319 pMatchingIf = pCur;
2320 intnetR0BusyIncIf(pMatchingIf);
2321 }
2322 }
2323 }
2324
2325 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2326
2327 if (pMatchingIf)
2328 {
2329 intnetR0IfAddrCacheAdd(pMatchingIf, kIntNetAddrType_IPv4,
2330 (PCRTNETADDRU)&pDhcp->bp_yiaddr, "DHCP_MT_ACK");
2331 intnetR0BusyDecIf(pMatchingIf);
2332 }
2333 }
2334 return;
2335
2336
2337 /*
2338 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2339 */
2340 case RTNET_DHCP_MT_RELEASE:
2341 {
2342 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2343
2344 uint32_t iIf = pNetwork->MacTab.cEntries;
2345 while (iIf-- > 0)
2346 {
2347 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2348 if ( intnetR0IfHasMacAddr(pCur)
2349 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2350 {
2351 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2352 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2353 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2354 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2355 }
2356 }
2357
2358 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2359 break;
2360 }
2361 }
2362
2363}
2364
2365
2366/**
2367 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2368 * is likely to be a DHCP message.
2369 *
2370 * The caller has already check that the UDP source and destination ports
2371 * are BOOTPS or BOOTPC.
2372 *
2373 * @param pNetwork The network this frame was seen on.
2374 * @param pSG The gather list for the frame.
2375 */
2376static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2377{
2378 /*
2379 * Get a pointer to a linear copy of the full packet, using the
2380 * temporary buffer if necessary.
2381 */
2382 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2383 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2384 if (pSG->cSegsUsed > 1)
2385 {
2386 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2387 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2388 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2389 return;
2390 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2391 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2392 }
2393
2394 /*
2395 * Validate the IP header and find the UDP packet.
2396 */
2397 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2398 {
2399 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2400 return;
2401 }
2402 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2403
2404 /*
2405 * Hand it over to the common DHCP snooper.
2406 */
2407 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2408}
2409
2410#endif /* INTNET_WITH_DHCP_SNOOPING */
2411
2412
2413/**
2414 * Snoops up source addresses from ARP requests and purge these from the address
2415 * caches.
2416 *
2417 * The purpose of this purging is to get rid of stale addresses.
2418 *
2419 * @param pNetwork The network this frame was seen on.
2420 * @param pSG The gather list for the frame.
2421 */
2422static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2423{
2424 /*
2425 * Check the minimum size first.
2426 */
2427 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2428 return;
2429
2430 /*
2431 * Copy to temporary buffer if necessary.
2432 */
2433 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2434 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2435 if ( pSG->cSegsUsed != 1
2436 && pSG->aSegs[0].cb < cbPacket)
2437 {
2438 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2439 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2440 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2441 return;
2442 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2443 }
2444
2445 /*
2446 * Ignore packets which doesn't interest us or we perceive as malformed.
2447 */
2448 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2449 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2450 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2451 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2452 return;
2453 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2454 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2455 && ar_oper != RTNET_ARPOP_REPLY))
2456 {
2457 Log6(("ts-ar: op=%#x\n", ar_oper));
2458 return;
2459 }
2460
2461 /*
2462 * Delete the source address if it's OK.
2463 */
2464 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2465 && ( pArpIPv4->ar_sha.au16[0]
2466 || pArpIPv4->ar_sha.au16[1]
2467 || pArpIPv4->ar_sha.au16[2])
2468 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2469 {
2470 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2471 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2472 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2473 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2474 }
2475}
2476
2477
2478#ifdef INTNET_WITH_DHCP_SNOOPING
2479/**
2480 * Snoop up addresses from ARP and DHCP traffic from frames coming
2481 * over the trunk connection.
2482 *
2483 * The caller is responsible for do some basic filtering before calling
2484 * this function.
2485 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2486 *
2487 * @param pNetwork The network.
2488 * @param pSG The SG list for the frame.
2489 * @param EtherType The Ethertype of the frame.
2490 */
2491static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2492{
2493 switch (EtherType)
2494 {
2495 case RTNET_ETHERTYPE_IPV4:
2496 {
2497 uint32_t cbIpHdr;
2498 uint8_t b;
2499
2500 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2501 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2502 {
2503 /* check if the protocol is UDP */
2504 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2505 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2506 return;
2507
2508 /* get the TCP header length */
2509 cbIpHdr = pIpHdr->ip_hl * 4;
2510 }
2511 else
2512 {
2513 /* check if the protocol is UDP */
2514 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_p))
2515 != RTNETIPV4_PROT_UDP)
2516 return;
2517
2518 /* get the TCP header length */
2519 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2520 cbIpHdr = (b & 0x0f) * 4;
2521 }
2522 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2523 return;
2524
2525 /* compare the ports. */
2526 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2527 {
2528 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2529 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2530 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2531 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2532 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2533 return;
2534 }
2535 else
2536 {
2537 /* get the lower byte of the UDP source port number. */
2538 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport) + 1);
2539 if ( b != RTNETIPV4_PORT_BOOTPS
2540 && b != RTNETIPV4_PORT_BOOTPC)
2541 return;
2542 uint8_t SrcPort = b;
2543 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport));
2544 if (b)
2545 return;
2546
2547 /* get the lower byte of the UDP destination port number. */
2548 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport) + 1);
2549 if ( b != RTNETIPV4_PORT_BOOTPS
2550 && b != RTNETIPV4_PORT_BOOTPC)
2551 return;
2552 if (b == SrcPort)
2553 return;
2554 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport));
2555 if (b)
2556 return;
2557 }
2558 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2559 break;
2560 }
2561
2562 case RTNET_ETHERTYPE_ARP:
2563 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2564 break;
2565 }
2566}
2567#endif /* INTNET_WITH_DHCP_SNOOPING */
2568
2569/**
2570 * Deals with an IPv6 packet.
2571 *
2572 * This will fish out the source IP address and add it to the cache.
2573 * Then it will look for DHCPRELEASE requests (?) and anything else
2574 * that we might find useful later.
2575 *
2576 * @param pIf The interface that's sending the frame.
2577 * @param pIpHdr Pointer to the IPv4 header in the frame.
2578 * @param cbPacket The size of the packet, or more correctly the
2579 * size of the frame without the ethernet header.
2580 * @param fGso Set if this is a GSO frame, clear if regular.
2581 */
2582static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
2583{
2584 NOREF(fGso);
2585
2586 /*
2587 * Check the header size first to prevent access invalid data.
2588 */
2589 if (cbPacket < RTNETIPV6_MIN_LEN)
2590 return;
2591
2592 /*
2593 * If the source address is good (not multicast) and
2594 * not already in the address cache of the sender, add it.
2595 */
2596 RTNETADDRU Addr;
2597 Addr.IPv6 = pIpHdr->ip6_src;
2598
2599 if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
2600 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
2601 {
2602 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv6, &Addr, "if/ipv6");
2603 }
2604}
2605
2606
2607/**
2608 * Deals with an IPv4 packet.
2609 *
2610 * This will fish out the source IP address and add it to the cache.
2611 * Then it will look for DHCPRELEASE requests (?) and anything else
2612 * that we might find useful later.
2613 *
2614 * @param pIf The interface that's sending the frame.
2615 * @param pIpHdr Pointer to the IPv4 header in the frame.
2616 * @param cbPacket The size of the packet, or more correctly the
2617 * size of the frame without the ethernet header.
2618 * @param fGso Set if this is a GSO frame, clear if regular.
2619 */
2620static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2621{
2622 /*
2623 * Check the header size first to prevent access invalid data.
2624 */
2625 if (cbPacket < RTNETIPV4_MIN_LEN)
2626 return;
2627 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2628 if ( cbHdr < RTNETIPV4_MIN_LEN
2629 || cbPacket < cbHdr)
2630 return;
2631
2632 /*
2633 * If the source address is good (not broadcast or my network) and
2634 * not already in the address cache of the sender, add it. Validate
2635 * the IP header before adding it.
2636 */
2637 bool fValidatedIpHdr = false;
2638 RTNETADDRU Addr;
2639 Addr.IPv4 = pIpHdr->ip_src;
2640 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2641 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2642 {
2643 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2644 {
2645 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2646 return;
2647 }
2648
2649 intnetR0IfAddrCacheAddIt(pIf, kIntNetAddrType_IPv4, &Addr, "if/ipv4");
2650 fValidatedIpHdr = true;
2651 }
2652
2653#ifdef INTNET_WITH_DHCP_SNOOPING
2654 /*
2655 * Check for potential DHCP packets.
2656 */
2657 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2658 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2659 && !fGso) /* GSO is not applicable to DHCP traffic. */
2660 {
2661 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2662 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2663 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2664 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2665 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2666 {
2667 if ( fValidatedIpHdr
2668 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2669 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2670 else
2671 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2672 }
2673 }
2674#endif /* INTNET_WITH_DHCP_SNOOPING */
2675}
2676
2677
2678/**
2679 * Snoop up source addresses from an ARP request or reply.
2680 *
2681 * @param pIf The interface that's sending the frame.
2682 * @param pHdr The ARP header.
2683 * @param cbPacket The size of the packet (might be larger than the ARP
2684 * request 'cause of min ethernet frame size).
2685 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2686 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2687 */
2688static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2689{
2690 /*
2691 * Ignore packets which doesn't interest us or we perceive as malformed.
2692 */
2693 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2694 return;
2695 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2696 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2697 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2698 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2699 return;
2700 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2701 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2702 && ar_oper != RTNET_ARPOP_REPLY))
2703 {
2704 Log6(("ar_oper=%#x\n", ar_oper));
2705 return;
2706 }
2707
2708 /*
2709 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2710 * which can be removed or added to the address cache of the sender.
2711 */
2712 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2713
2714 if ( ar_oper == RTNET_ARPOP_REPLY
2715 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2716 && ( pArpIPv4->ar_tha.au16[0]
2717 || pArpIPv4->ar_tha.au16[1]
2718 || pArpIPv4->ar_tha.au16[2])
2719 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2720 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2721 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2722
2723 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2724 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2725 {
2726 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv4, (PCRTNETADDRU)&pArpIPv4->ar_spa, "if/arp");
2727 }
2728}
2729
2730
2731
2732/**
2733 * Checks packets send by a normal interface for new network
2734 * layer addresses.
2735 *
2736 * @param pIf The interface that's sending the frame.
2737 * @param pbFrame The frame.
2738 * @param cbFrame The size of the frame.
2739 * @param fGso Set if this is a GSO frame, clear if regular.
2740 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2741 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2742 */
2743static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2744{
2745 /*
2746 * Fish out the ethertype and look for stuff we can handle.
2747 */
2748 if (cbFrame <= sizeof(RTNETETHERHDR))
2749 return;
2750 cbFrame -= sizeof(RTNETETHERHDR);
2751
2752 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2753 switch (EtherType)
2754 {
2755 case RTNET_ETHERTYPE_IPV4:
2756 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2757 break;
2758
2759 case RTNET_ETHERTYPE_IPV6:
2760 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2761 break;
2762
2763#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2764 case RTNET_ETHERTYPE_IPX_1:
2765 case RTNET_ETHERTYPE_IPX_2:
2766 case RTNET_ETHERTYPE_IPX_3:
2767 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2768 break;
2769#endif
2770 case RTNET_ETHERTYPE_ARP:
2771 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2772 break;
2773 }
2774}
2775
2776
2777/**
2778 * Writes a frame packet to the ring buffer.
2779 *
2780 * @returns VBox status code.
2781 * @param pBuf The buffer.
2782 * @param pRingBuf The ring buffer to read from.
2783 * @param pSG The gather list.
2784 * @param pNewDstMac Set the destination MAC address to the address if specified.
2785 */
2786static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2787{
2788 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2789 void *pvDst = NULL; /* ditto */
2790 int rc;
2791 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2792 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2793 else
2794 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2795 if (RT_SUCCESS(rc))
2796 {
2797 IntNetSgRead(pSG, pvDst);
2798 if (pNewDstMac)
2799 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2800
2801 IntNetRingCommitFrame(pRingBuf, pHdr);
2802 return VINF_SUCCESS;
2803 }
2804 return rc;
2805}
2806
2807
2808/**
2809 * Sends a frame to a specific interface.
2810 *
2811 * @param pIf The interface.
2812 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2813 * @param pSG The gather buffer which data is being sent to the interface.
2814 * @param pNewDstMac Set the destination MAC address to the address if specified.
2815 */
2816static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2817{
2818 /*
2819 * Grab the receive/producer lock and copy over the frame.
2820 */
2821 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2822 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2823 RTSpinlockRelease(pIf->hRecvInSpinlock);
2824 if (RT_SUCCESS(rc))
2825 {
2826 pIf->cYields = 0;
2827 RTSemEventSignal(pIf->hRecvEvent);
2828 return;
2829 }
2830
2831 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2832
2833 /*
2834 * Scheduling hack, for unicore machines primarily.
2835 */
2836 if ( pIf->fActive
2837 && pIf->cYields < 4 /* just twice */
2838 && pIfSender /* but not if it's from the trunk */
2839 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2840 )
2841 {
2842 unsigned cYields = 2;
2843 while (--cYields > 0)
2844 {
2845 RTSemEventSignal(pIf->hRecvEvent);
2846 RTThreadYield();
2847
2848 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2849 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2850 RTSpinlockRelease(pIf->hRecvInSpinlock);
2851 if (RT_SUCCESS(rc))
2852 {
2853 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2854 RTSemEventSignal(pIf->hRecvEvent);
2855 return;
2856 }
2857 pIf->cYields++;
2858 }
2859 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2860 }
2861
2862 /* ok, the frame is lost. */
2863 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2864 RTSemEventSignal(pIf->hRecvEvent);
2865}
2866
2867
2868/**
2869 * Fallback path that does the GSO segmenting before passing the frame on to the
2870 * trunk interface.
2871 *
2872 * The caller holds the trunk lock.
2873 *
2874 * @param pThis The trunk.
2875 * @param pIfSender The IF sending the frame.
2876 * @param pSG Pointer to the gather list.
2877 * @param fDst The destination flags.
2878 */
2879static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2880{
2881 /*
2882 * Since we're only using this for GSO frame coming from the internal
2883 * network interfaces and never the trunk, we can assume there is only
2884 * one segment. This simplifies the code quite a bit.
2885 */
2886 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2887 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2888
2889 union
2890 {
2891 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2892 INTNETSG SG;
2893 } u;
2894
2895 /** @todo We have to adjust MSS so it does not exceed the value configured for
2896 * the host's interface.
2897 */
2898
2899 /*
2900 * Carve out the frame segments with the header and frame in different
2901 * scatter / gather segments.
2902 */
2903 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2904 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2905 {
2906 uint32_t cbSegPayload, cbSegHdrs;
2907 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2908 pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
2909
2910 IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
2911 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2912 u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
2913 u.SG.aSegs[0].cb = cbSegHdrs;
2914 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2915 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2916 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2917
2918 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2919 if (RT_FAILURE(rc))
2920 return rc;
2921 }
2922 return VINF_SUCCESS;
2923}
2924
2925
2926/**
2927 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2928 *
2929 * @returns true if it can, false if it cannot.
2930 * @param pThis The trunk.
2931 * @param pSG The scatter / gather buffer.
2932 * @param fDst The destination mask.
2933 */
2934DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2935{
2936 uint8_t u8Type = pSG->GsoCtx.u8Type;
2937 AssertReturn(u8Type < 32, false); /* paranoia */
2938 uint32_t fMask = RT_BIT_32(u8Type);
2939
2940 if (fDst == INTNETTRUNKDIR_HOST)
2941 return !!(pThis->fHostGsoCapabilites & fMask);
2942 if (fDst == INTNETTRUNKDIR_WIRE)
2943 return !!(pThis->fWireGsoCapabilites & fMask);
2944 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2945 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2946}
2947
2948
2949/**
2950 * Calculates the checksum of a full ipv6 frame.
2951 *
2952 * @returns 16-bit hecksum value.
2953 * @param pIpHdr The IPv6 header (network endian (big)).
2954 * @param bProtocol The protocol number. This can be the same as the
2955 * ip6_nxt field, but doesn't need to be.
2956 * @param cbPkt The packet size (host endian of course). This can
2957 * be the same as the ip6_plen field, but as with @a
2958 * bProtocol it won't be when extension headers are
2959 * present. For UDP this will be uh_ulen converted to
2960 * host endian.
2961 */
2962static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
2963{
2964 uint16_t const *data;
2965 int len = RT_BE2H_U16(pIpHdr->ip6_plen);
2966 uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
2967
2968 /* add the payload */
2969 data = (uint16_t *) (pIpHdr + 1);
2970 while(len > 1)
2971 {
2972 sum += *(data);
2973 data++;
2974 len -= 2;
2975 }
2976
2977 if(len > 0)
2978 sum += *((uint8_t *) data);
2979
2980 while(sum >> 16)
2981 sum = (sum & 0xffff) + (sum >> 16);
2982
2983 return (uint16_t) ~sum;
2984}
2985
2986
2987/**
2988 * Rewrite VM MAC address with shared host MAC address inside IPv6
2989 * Neighbor Discovery datagrams.
2990 */
2991static void intnetR0TrunkSharedMacEditIPv6FromIntNet(PINTNETTRUNKIF pThis, PINTNETIF pIfSender,
2992 PRTNETETHERHDR pEthHdr, uint32_t cb)
2993{
2994 if (RT_UNLIKELY(cb < sizeof(*pEthHdr)))
2995 return;
2996
2997 /* have IPv6 header */
2998 PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
2999 cb -= sizeof(*pEthHdr);
3000 if (RT_UNLIKELY(cb < sizeof(*pIPv6)))
3001 return;
3002
3003 if ( pIPv6->ip6_nxt != RTNETIPV6_PROT_ICMPV6
3004 || pIPv6->ip6_hlim != 0xff)
3005 return;
3006
3007 PRTNETICMPV6HDR pICMPv6 = (PRTNETICMPV6HDR)(pIPv6 + 1);
3008 cb -= sizeof(*pIPv6);
3009 if (RT_UNLIKELY(cb < sizeof(*pICMPv6)))
3010 return;
3011
3012 uint32_t hdrlen = 0;
3013 uint8_t llaopt = RTNETIPV6_ICMP_ND_SLLA_OPT;
3014
3015 uint8_t type = pICMPv6->icmp6_type;
3016 switch (type)
3017 {
3018 case RTNETIPV6_ICMP_TYPE_RS:
3019 hdrlen = 8;
3020 break;
3021
3022 case RTNETIPV6_ICMP_TYPE_RA:
3023 hdrlen = 16;
3024 break;
3025
3026 case RTNETIPV6_ICMP_TYPE_NS:
3027 hdrlen = 24;
3028 break;
3029
3030 case RTNETIPV6_ICMP_TYPE_NA:
3031 hdrlen = 24;
3032 llaopt = RTNETIPV6_ICMP_ND_TLLA_OPT;
3033 break;
3034
3035 default:
3036 return;
3037 }
3038
3039 AssertReturnVoid(hdrlen > 0);
3040 if (RT_UNLIKELY(cb < hdrlen))
3041 return;
3042
3043 if (RT_UNLIKELY(pICMPv6->icmp6_code != 0))
3044 return;
3045
3046 PRTNETNDP_LLA_OPT pLLAOpt = NULL;
3047 char *pOpt = (char *)pICMPv6 + hdrlen;
3048 cb -= hdrlen;
3049
3050 while (cb >= 8)
3051 {
3052 uint8_t opt = ((uint8_t *)pOpt)[0];
3053 uint32_t optlen = (uint32_t)((uint8_t *)pOpt)[1] * 8;
3054 if (RT_UNLIKELY(cb < optlen))
3055 return;
3056
3057 if (opt == llaopt)
3058 {
3059 if (RT_UNLIKELY(optlen != 8))
3060 return;
3061 pLLAOpt = (PRTNETNDP_LLA_OPT)pOpt;
3062 break;
3063 }
3064
3065 pOpt += optlen;
3066 cb -= optlen;
3067 }
3068
3069 if (pLLAOpt == NULL)
3070 return;
3071
3072 if (memcmp(&pLLAOpt->lla, &pIfSender->MacAddr, sizeof(RTMAC)) != 0)
3073 return;
3074
3075 /* overwrite VM's MAC with host's MAC */
3076 pLLAOpt->lla = pThis->MacAddr;
3077
3078 /* recompute the checksum */
3079 pICMPv6->icmp6_cksum = 0;
3080 pICMPv6->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
3081}
3082
3083
3084/**
3085 * Sends a frame down the trunk.
3086 *
3087 * @param pThis The trunk.
3088 * @param pNetwork The network the frame is being sent to.
3089 * @param pIfSender The IF sending the frame. Used for MAC address
3090 * checks in shared MAC mode.
3091 * @param fDst The destination flags.
3092 * @param pSG Pointer to the gather list.
3093 */
3094static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
3095 uint32_t fDst, PINTNETSG pSG)
3096{
3097 /*
3098 * Quick sanity check.
3099 */
3100 AssertPtr(pThis);
3101 AssertPtr(pNetwork);
3102 AssertPtr(pIfSender);
3103 AssertPtr(pSG);
3104 Assert(fDst);
3105 AssertReturnVoid(pThis->pIfPort);
3106
3107 /*
3108 * Edit the frame if we're sharing the MAC address with the host on the wire.
3109 *
3110 * If the frame is headed for both the host and the wire, we'll have to send
3111 * it to the host before making any modifications, and force the OS specific
3112 * backend to copy it. We do this by marking it as TEMP (which is always the
3113 * case right now).
3114 */
3115 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3116 && (fDst & INTNETTRUNKDIR_WIRE))
3117 {
3118 /*
3119 * Dispatch it to the host before making changes.
3120 */
3121 if (fDst & INTNETTRUNKDIR_HOST)
3122 {
3123 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
3124 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
3125 fDst &= ~INTNETTRUNKDIR_HOST;
3126 }
3127
3128 /*
3129 * Edit the source address so that it it's the same as the host.
3130 */
3131 /* ASSUME frame from IntNetR0IfSend! */
3132 AssertReturnVoid(pSG->cSegsUsed == 1);
3133 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
3134 AssertReturnVoid(pIfSender);
3135 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
3136
3137 pEthHdr->SrcMac = pThis->MacAddr;
3138
3139 /*
3140 * Deal with tags from the snooping phase.
3141 */
3142 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3143 {
3144 /*
3145 * APR IPv4: replace hardware (MAC) addresses because these end up
3146 * in ARP caches. So, if we don't the other machines will
3147 * send the packets to the MAC address of the guest
3148 * instead of the one of the host, which won't work on
3149 * wireless of course...
3150 */
3151 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
3152 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
3153 {
3154 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
3155 pArp->ar_sha = pThis->MacAddr;
3156 }
3157 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
3158 {
3159 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
3160 pArp->ar_tha = pThis->MacAddr;
3161 }
3162 }
3163 else if (pEthHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6))
3164 {
3165 intnetR0TrunkSharedMacEditIPv6FromIntNet(pThis, pIfSender, pEthHdr, pSG->cbTotal);
3166 }
3167 }
3168
3169 /*
3170 * Send the frame, handling the GSO fallback.
3171 *
3172 * Note! The trunk implementation will re-check that the trunk is active
3173 * before sending, so we don't have to duplicate that effort here.
3174 */
3175 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
3176 int rc;
3177 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
3178 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
3179 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
3180 else
3181 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
3182 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
3183
3184 /** @todo failure statistics? */
3185 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
3186}
3187
3188
3189/**
3190 * Detect broadcasts packaged as unicast and convert them back to broadcast.
3191 *
3192 * WiFi routers try to use ethernet unicast instead of broadcast or
3193 * multicast when possible. Look inside the packet and fix up
3194 * ethernet destination to be proper broadcast or multicast if
3195 * necessary.
3196 *
3197 * @returns true broadcast (pEthHdr & pSG are modified), false if not.
3198 * @param pNetwork The network the frame is being sent to.
3199 * @param pSG Pointer to the gather list for the frame. The
3200 * ethernet destination address is modified when
3201 * returning true.
3202 * @param pEthHdr Pointer to the ethernet header. The ethernet
3203 * destination address is modified when returning true.
3204 */
3205static bool intnetR0NetworkSharedMacDetectAndFixBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3206{
3207 NOREF(pNetwork);
3208
3209 switch (pEthHdr->EtherType)
3210 {
3211 case RT_H2N_U16_C(RTNET_ETHERTYPE_ARP):
3212 {
3213 uint16_t ar_oper;
3214 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETARPHDR, ar_oper),
3215 sizeof(ar_oper), &ar_oper))
3216 return false;
3217
3218 if (ar_oper == RT_H2N_U16_C(RTNET_ARPOP_REQUEST))
3219 {
3220 /* change to broadcast */
3221 pEthHdr->DstMac.au16[0] = 0xffff;
3222 pEthHdr->DstMac.au16[1] = 0xffff;
3223 pEthHdr->DstMac.au16[2] = 0xffff;
3224 }
3225 else
3226 return false;
3227 break;
3228 }
3229
3230 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
3231 {
3232 RTNETADDRIPV4 ip_dst;
3233 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst),
3234 sizeof(ip_dst), &ip_dst))
3235 return false;
3236
3237 if (ip_dst.u == 0xffffffff) /* 255.255.255.255? */
3238 {
3239 /* change to broadcast */
3240 pEthHdr->DstMac.au16[0] = 0xffff;
3241 pEthHdr->DstMac.au16[1] = 0xffff;
3242 pEthHdr->DstMac.au16[2] = 0xffff;
3243 }
3244 else if ((ip_dst.au8[0] & 0xf0) == 0xe0) /* IPv4 multicast? */
3245 {
3246 /* change to 01:00:5e:xx:xx:xx multicast ... */
3247 pEthHdr->DstMac.au8[0] = 0x01;
3248 pEthHdr->DstMac.au8[1] = 0x00;
3249 pEthHdr->DstMac.au8[2] = 0x5e;
3250 /* ... with lower 23 bits from the multicast IP address */
3251 pEthHdr->DstMac.au8[3] = ip_dst.au8[1] & 0x7f;
3252 pEthHdr->DstMac.au8[4] = ip_dst.au8[2];
3253 pEthHdr->DstMac.au8[5] = ip_dst.au8[3];
3254 }
3255 else
3256 return false;
3257 break;
3258 }
3259
3260 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
3261 {
3262 RTNETADDRIPV6 ip6_dst;
3263 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst),
3264 sizeof(ip6_dst), &ip6_dst))
3265 return false;
3266
3267 if (ip6_dst.au8[0] == 0xff) /* IPv6 multicast? */
3268 {
3269 pEthHdr->DstMac.au16[0] = 0x3333;
3270 pEthHdr->DstMac.au16[1] = ip6_dst.au16[6];
3271 pEthHdr->DstMac.au16[2] = ip6_dst.au16[7];
3272 }
3273 else
3274 return false;
3275 break;
3276 }
3277
3278 default:
3279 return false;
3280 }
3281
3282
3283 /*
3284 * Update ethernet destination in the segment.
3285 */
3286 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(pEthHdr->DstMac), &pEthHdr->DstMac);
3287
3288 return true;
3289}
3290
3291
3292/**
3293 * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
3294 *
3295 * @param pNetwork The network the frame is being sent to.
3296 * @param pSG Pointer to the gather list for the frame.
3297 * @param pEthHdr Pointer to the ethernet header.
3298 */
3299static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3300{
3301 NOREF(pEthHdr);
3302
3303 /*
3304 * Check the minimum size and get a linear copy of the thing to work on,
3305 * using the temporary buffer if necessary.
3306 */
3307 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3308 sizeof(RTNETNDP)))
3309 return;
3310 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3311 if ( pSG->cSegsUsed != 1
3312 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3313 sizeof(RTNETNDP))
3314 {
3315 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
3316 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
3317 + sizeof(RTNETNDP), pNetwork->pbTmp))
3318 return;
3319 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3320 pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
3321 }
3322
3323 PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
3324
3325 /*
3326 * a multicast NS with :: as source address means a DAD packet.
3327 * if it comes from the wire and we have the DAD'd address in our cache,
3328 * flush the entry as the address is being acquired by someone else on
3329 * the network.
3330 */
3331 if ( pIPv6->ip6_hlim == 0xff
3332 && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
3333 && pNd->Hdr.icmp6_type == RTNETIPV6_ICMP_TYPE_NS
3334 && pNd->Hdr.icmp6_code == 0
3335 && pIPv6->ip6_src.QWords.qw0 == 0
3336 && pIPv6->ip6_src.QWords.qw1 == 0)
3337 {
3338
3339 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
3340 kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
3341 }
3342}
3343/**
3344 * Edits an ARP packet arriving from the wire via the trunk connection.
3345 *
3346 * @param pNetwork The network the frame is being sent to.
3347 * @param pSG Pointer to the gather list for the frame.
3348 * The flags and data content may be updated.
3349 * @param pEthHdr Pointer to the ethernet header. This may also be
3350 * updated if it's a unicast...
3351 */
3352static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3353{
3354 /*
3355 * Check the minimum size and get a linear copy of the thing to work on,
3356 * using the temporary buffer if necessary.
3357 */
3358 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
3359 return;
3360 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3361 if ( pSG->cSegsUsed != 1
3362 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
3363 {
3364 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
3365 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
3366 return;
3367 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3368 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
3369 }
3370
3371 /*
3372 * Ignore packets which doesn't interest us or we perceive as malformed.
3373 */
3374 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
3375 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
3376 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
3377 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
3378 return;
3379 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
3380 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
3381 && ar_oper != RTNET_ARPOP_REPLY))
3382 {
3383 Log6(("ar_oper=%#x\n", ar_oper));
3384 return;
3385 }
3386
3387 /* Tag it as ARP IPv4. */
3388 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
3389
3390 /*
3391 * The thing we're interested in here is a reply to a query made by a guest
3392 * since we modified the MAC in the initial request the guest made.
3393 */
3394 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3395 RTMAC MacAddrTrunk;
3396 if (pNetwork->MacTab.pTrunk)
3397 MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
3398 else
3399 memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
3400 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3401 if ( ar_oper == RTNET_ARPOP_REPLY
3402 && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
3403 {
3404 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
3405 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
3406 if (pIf)
3407 {
3408 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
3409 pArpIPv4->ar_tha = pIf->MacAddr;
3410 if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
3411 {
3412 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
3413 pEthHdr->DstMac = pIf->MacAddr;
3414 if ((void *)pEthHdr != pSG->aSegs[0].pv)
3415 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
3416 }
3417 intnetR0BusyDecIf(pIf);
3418
3419 /* Write back the packet if we've been making changes to a buffered copy. */
3420 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
3421 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
3422 }
3423 }
3424}
3425
3426
3427/**
3428 * Detects and edits an DHCP packet arriving from the internal net.
3429 *
3430 * @param pNetwork The network the frame is being sent to.
3431 * @param pSG Pointer to the gather list for the frame.
3432 * The flags and data content may be updated.
3433 * @param pEthHdr Pointer to the ethernet header. This may also be
3434 * updated if it's a unicast...
3435 */
3436static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3437{
3438 NOREF(pEthHdr);
3439
3440 /*
3441 * Check the minimum size and get a linear copy of the thing to work on,
3442 * using the temporary buffer if necessary.
3443 */
3444 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
3445 return;
3446 /*
3447 * Get a pointer to a linear copy of the full packet, using the
3448 * temporary buffer if necessary.
3449 */
3450 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
3451 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
3452 if (pSG->cSegsUsed > 1)
3453 {
3454 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
3455 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
3456 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
3457 return;
3458 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3459 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
3460 }
3461
3462 /*
3463 * Validate the IP header and find the UDP packet.
3464 */
3465 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
3466 {
3467 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
3468 return;
3469 }
3470 size_t cbIpHdr = pIpHdr->ip_hl * 4;
3471 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
3472 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
3473 return;
3474
3475 size_t cbUdpPkt = cbPacket - cbIpHdr;
3476 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
3477 /* We are only interested in DHCP packets coming from client to server. */
3478 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
3479 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
3480 return;
3481
3482 /*
3483 * Check if the DHCP message is valid and get the type.
3484 */
3485 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
3486 {
3487 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
3488 return;
3489 }
3490 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
3491 uint8_t bMsgType;
3492 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
3493 {
3494 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
3495 return;
3496 }
3497
3498 switch (bMsgType)
3499 {
3500 case RTNET_DHCP_MT_DISCOVER:
3501 case RTNET_DHCP_MT_REQUEST:
3502 /*
3503 * Must set the broadcast flag or we won't catch the respons.
3504 */
3505 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
3506 {
3507 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
3508 bMsgType, pDhcp->bp_flags));
3509
3510 /* Patch flags */
3511 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3512 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
3513
3514 /* Patch UDP checksum */
3515 if (pUdpHdr->uh_sum != 0)
3516 {
3517 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3518 while (uChecksum >> 16)
3519 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3520 uChecksum = ~uChecksum;
3521 intnetR0SgWritePart(pSG,
3522 (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR),
3523 sizeof(pUdpHdr->uh_sum),
3524 &uChecksum);
3525 }
3526 }
3527
3528#ifdef RT_OS_DARWIN
3529 /*
3530 * Work around little endian checksum issue in mac os x 10.7.0 GM.
3531 */
3532 if ( pIpHdr->ip_tos
3533 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
3534 {
3535 /* Patch it. */
3536 uint8_t uTos = pIpHdr->ip_tos;
3537 uint8_t uZero = 0;
3538 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
3539
3540 /* Patch the IP header checksum. */
3541 uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
3542 while (uChecksum >> 16)
3543 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3544 uChecksum = ~uChecksum;
3545
3546 Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
3547 uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
3548 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_sum),
3549 sizeof(pIpHdr->ip_sum), &uChecksum);
3550 }
3551#endif
3552 break;
3553 }
3554}
3555
3556
3557/**
3558 * Checks if the callers context is okay for sending to the specified
3559 * destinations.
3560 *
3561 * @returns true if it's okay, false if it isn't.
3562 * @param pNetwork The network.
3563 * @param pIfSender The interface sending or NULL if it's the trunk.
3564 * @param pDstTab The destination table.
3565 */
3566DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
3567{
3568 NOREF(pNetwork);
3569
3570 /* Sending to the trunk is the problematic path. If the trunk is the
3571 sender we won't be sending to it, so no problem..
3572 Note! fTrunkDst may be set event if if the trunk is the sender. */
3573 if (!pIfSender)
3574 return true;
3575
3576 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
3577 if (!fTrunkDst)
3578 return true;
3579
3580 /* ASSUMES: that the trunk won't change its report while we're checking. */
3581 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3582 if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
3583 return true;
3584
3585 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3586 non-preemptive systems as well.) */
3587 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3588 return true;
3589 return false;
3590}
3591
3592
3593/**
3594 * Checks if the callers context is okay for doing a broadcast given the
3595 * specified source.
3596 *
3597 * @returns true if it's okay, false if it isn't.
3598 * @param pNetwork The network.
3599 * @param fSrc The source of the packet. (0 (intnet),
3600 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3601 */
3602DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
3603{
3604 /* Sending to the trunk is the problematic path. If the trunk is the
3605 sender we won't be sending to it, so no problem. */
3606 if (fSrc)
3607 return true;
3608
3609 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3610 non-preemptive systems as well.) */
3611 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3612 return true;
3613
3614 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
3615 freed while we're touching it. */
3616 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3617 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
3618
3619 bool fRc = !pTrunk
3620 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
3621 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
3622 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
3623
3624 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3625
3626 return fRc;
3627}
3628
3629
3630/**
3631 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
3632 * address on the wire.
3633 *
3634 * The caller must hold at least one interface on the network busy to prevent it
3635 * from destructing beath us.
3636 *
3637 * @param pNetwork The network the frame is being sent to.
3638 * @param fSrc The source of the packet. (0 (intnet),
3639 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3640 * @param pIfSender The sender interface, NULL if trunk. Used to
3641 * prevent sending an echo to the sender.
3642 * @param pSG Pointer to the gather list.
3643 * @param pEthHdr Pointer to the ethernet header.
3644 * @param pDstTab The destination output table.
3645 */
3646static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
3647 uint32_t fSrc, PINTNETIF pIfSender,
3648 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
3649 PINTNETDSTTAB pDstTab)
3650{
3651 /*
3652 * Before doing any work here, we need to figure out if we can handle it
3653 * in the current context. The restrictions are solely on the trunk.
3654 *
3655 * Note! Since at least one interface is busy, there won't be any changes
3656 * to the parameters here (unless the trunk changes its capability
3657 * report, which it shouldn't).
3658 */
3659 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
3660 return INTNETSWDECISION_BAD_CONTEXT;
3661
3662 /*
3663 * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
3664 * If we see an advertisement for an IP in our cache, we can safely remove
3665 * it as the IP has probably moved.
3666 */
3667 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3668 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
3669 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3670 intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
3671
3672
3673 /*
3674 * Check for ARP packets from the wire since we'll have to make
3675 * modification to them if we're sharing the MAC address with the host.
3676 */
3677 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3678 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
3679 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3680 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
3681
3682 /*
3683 * Check for DHCP packets from the internal net since we'll have to set
3684 * broadcast flag in DHCP requests if we're sharing the MAC address with
3685 * the host. GSO is not applicable to DHCP traffic.
3686 */
3687 if ( !fSrc
3688 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
3689 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3690 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3691
3692 /*
3693 * Snoop address info from packet originating from the trunk connection.
3694 */
3695 if (fSrc)
3696 {
3697#ifdef INTNET_WITH_DHCP_SNOOPING
3698 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3699 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3700 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3701 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3702 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3703 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3704#else
3705 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3706 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3707#endif
3708 }
3709
3710 /*
3711 * Create the broadcast destination table.
3712 */
3713 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3714}
3715
3716
3717/**
3718 * Check context, snoop and switch a unicast frame using the network layer
3719 * address of the link layer one (when sharing MAC address on the wire).
3720 *
3721 * This function is only used for frames coming from the wire (trunk).
3722 *
3723 * @returns true if it's addressed to someone on the network, otherwise false.
3724 * @param pNetwork The network the frame is being sent to.
3725 * @param pSG Pointer to the gather list.
3726 * @param pEthHdr Pointer to the ethernet header.
3727 * @param pDstTab The destination output table.
3728 */
3729static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3730 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3731{
3732 /*
3733 * Extract the network address from the packet.
3734 */
3735 RTNETADDRU Addr;
3736 INTNETADDRTYPE enmAddrType;
3737 uint8_t cbAddr;
3738 switch (RT_BE2H_U16(pEthHdr->EtherType))
3739 {
3740 case RTNET_ETHERTYPE_IPV4:
3741 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3742 {
3743 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3744 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3745 }
3746 enmAddrType = kIntNetAddrType_IPv4;
3747 cbAddr = sizeof(Addr.IPv4);
3748 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3749 break;
3750
3751 case RTNET_ETHERTYPE_IPV6:
3752 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3753 {
3754 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3755 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3756 }
3757 enmAddrType = kIntNetAddrType_IPv6;
3758 cbAddr = sizeof(Addr.IPv6);
3759 break;
3760#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3761 case RTNET_ETHERTYPE_IPX_1:
3762 case RTNET_ETHERTYPE_IPX_2:
3763 case RTNET_ETHERTYPE_IPX_3:
3764 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3765 {
3766 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3767 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3768 }
3769 enmAddrType = kIntNetAddrType_IPX;
3770 cbAddr = sizeof(Addr.IPX);
3771 break;
3772#endif
3773
3774 /*
3775 * Treat ARP as broadcast (it shouldn't end up here normally,
3776 * so it goes last in the switch).
3777 */
3778 case RTNET_ETHERTYPE_ARP:
3779 Log6(("intnetshareduni: ARP\n"));
3780 /** @todo revisit this broadcasting of unicast ARP frames! */
3781 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3782
3783 /*
3784 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3785 */
3786 default:
3787 {
3788 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3789 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3790 }
3791 }
3792
3793 /*
3794 * Do level-3 switching.
3795 */
3796 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3797 enmAddrType, &Addr, cbAddr,
3798 INTNETTRUNKDIR_WIRE, pDstTab);
3799
3800#ifdef INTNET_WITH_DHCP_SNOOPING
3801 /*
3802 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3803 */
3804 if ( enmAddrType == kIntNetAddrType_IPv4
3805 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3806 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3807 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3808#endif /* INTNET_WITH_DHCP_SNOOPING */
3809
3810 return enmSwDecision;
3811}
3812
3813
3814/**
3815 * Release all the interfaces in the destination table when we realize that
3816 * we're in a context where we cannot get the job done.
3817 *
3818 * @param pNetwork The network.
3819 * @param pDstTab The destination table.
3820 */
3821static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3822{
3823 /* The trunk interface. */
3824 if (pDstTab->fTrunkDst)
3825 {
3826 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3827 if (pTrunk)
3828 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3829 pDstTab->pTrunk = NULL;
3830 pDstTab->fTrunkDst = 0;
3831 }
3832
3833 /* Regular interfaces. */
3834 uint32_t iIf = pDstTab->cIfs;
3835 while (iIf-- > 0)
3836 {
3837 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3838 intnetR0BusyDecIf(pIf);
3839 pDstTab->aIfs[iIf].pIf = NULL;
3840 }
3841 pDstTab->cIfs = 0;
3842}
3843
3844
3845/**
3846 * Deliver the frame to the interfaces specified in the destination table.
3847 *
3848 * @param pNetwork The network.
3849 * @param pDstTab The destination table.
3850 * @param pSG The frame to send.
3851 * @param pIfSender The sender interface. NULL if it originated via
3852 * the trunk.
3853 */
3854static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3855{
3856 /*
3857 * Do the interfaces first before sending it to the wire and risk having to
3858 * modify it.
3859 */
3860 uint32_t iIf = pDstTab->cIfs;
3861 while (iIf-- > 0)
3862 {
3863 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3864 intnetR0IfSend(pIf, pIfSender, pSG,
3865 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3866 intnetR0BusyDecIf(pIf);
3867 pDstTab->aIfs[iIf].pIf = NULL;
3868 }
3869 pDstTab->cIfs = 0;
3870
3871 /*
3872 * Send to the trunk.
3873 *
3874 * Note! The switching functions will include the trunk even when the frame
3875 * source is the trunk. This is because we need it to figure out
3876 * whether the other half of the trunk should see the frame or not
3877 * and let the caller know.
3878 *
3879 * So, we'll ignore trunk sends here if the frame origin is
3880 * INTNETTRUNKSWPORT::pfnRecv.
3881 */
3882 if (pDstTab->fTrunkDst)
3883 {
3884 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3885 if (pTrunk)
3886 {
3887 if (pIfSender)
3888 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3889 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3890 }
3891 pDstTab->pTrunk = NULL;
3892 pDstTab->fTrunkDst = 0;
3893 }
3894}
3895
3896
3897/**
3898 * Sends a frame.
3899 *
3900 * This function will distribute the frame to the interfaces it is addressed to.
3901 * It will also update the MAC address of the sender.
3902 *
3903 * The caller must own the network mutex.
3904 *
3905 * @returns The switching decision.
3906 * @param pNetwork The network the frame is being sent to.
3907 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3908 * @param fSrc The source flags. This 0 if it's not from the trunk.
3909 * @param pSG Pointer to the gather list.
3910 * @param pDstTab The destination table to use.
3911 */
3912static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3913 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3914{
3915 /*
3916 * Assert reality.
3917 */
3918 AssertPtr(pNetwork);
3919 AssertPtrNull(pIfSender);
3920 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3921 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3922 AssertPtr(pSG);
3923 Assert(pSG->cSegsUsed >= 1);
3924 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3925 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3926 return INTNETSWDECISION_INVALID;
3927
3928 /*
3929 * Get the ethernet header (might theoretically involve multiple segments).
3930 */
3931 RTNETETHERHDR EthHdr;
3932 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3933 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3934 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3935 return INTNETSWDECISION_INVALID;
3936 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3937 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3938 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3939 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3940 || EthHdr.DstMac.au8[0] == 0xff
3941 || EthHdr.SrcMac.au8[0] == 0xff)
3942 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3943 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3944
3945 /*
3946 * Learn the MAC address of the sender. No re-learning as the interface
3947 * user will normally tell us the right MAC address.
3948 *
3949 * Note! We don't notify the trunk about these mainly because of the
3950 * problematic contexts we might be called in.
3951 */
3952 if (RT_UNLIKELY( pIfSender
3953 && !pIfSender->fMacSet
3954 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
3955 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
3956 ))
3957 {
3958 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
3959 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3960
3961 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
3962 if (pIfEntry)
3963 pIfEntry->MacAddr = EthHdr.SrcMac;
3964 pIfSender->MacAddr = EthHdr.SrcMac;
3965
3966 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3967 }
3968
3969 /*
3970 * Deal with MAC address sharing as that may required editing of the
3971 * packets before we dispatch them anywhere.
3972 */
3973 INTNETSWDECISION enmSwDecision;
3974 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3975 {
3976 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3977 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3978 else if (fSrc & INTNETTRUNKDIR_WIRE)
3979 {
3980 if (intnetR0NetworkSharedMacDetectAndFixBroadcast(pNetwork, pSG, &EthHdr))
3981 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3982 else
3983 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
3984 }
3985 else
3986 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3987 }
3988 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3989 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3990 else
3991 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3992
3993 /*
3994 * Deliver to the destinations if we can.
3995 */
3996 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
3997 {
3998 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
3999 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
4000 else
4001 {
4002 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
4003 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
4004 }
4005 }
4006
4007 return enmSwDecision;
4008}
4009
4010
4011/**
4012 * Sends one or more frames.
4013 *
4014 * The function will first the frame which is passed as the optional arguments
4015 * pvFrame and cbFrame. These are optional since it also possible to chain
4016 * together one or more frames in the send buffer which the function will
4017 * process after considering it's arguments.
4018 *
4019 * The caller is responsible for making sure that there are no concurrent calls
4020 * to this method (with the same handle).
4021 *
4022 * @returns VBox status code.
4023 * @param hIf The interface handle.
4024 * @param pSession The caller's session.
4025 */
4026INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4027{
4028 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
4029
4030 /*
4031 * Validate input and translate the handle.
4032 */
4033 PINTNET pIntNet = g_pIntNet;
4034 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4035 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4036
4037 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4038 if (!pIf)
4039 return VERR_INVALID_HANDLE;
4040 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
4041
4042 /*
4043 * Make sure we've got a network.
4044 */
4045 int rc = VINF_SUCCESS;
4046 intnetR0BusyIncIf(pIf);
4047 PINTNETNETWORK pNetwork = pIf->pNetwork;
4048 if (RT_LIKELY(pNetwork))
4049 {
4050 /*
4051 * Grab the destination table.
4052 */
4053 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
4054 if (RT_LIKELY(pDstTab))
4055 {
4056 /*
4057 * Process the send buffer.
4058 */
4059 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
4060 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
4061 * with buffer sharing for some OS or service. Darwin copies everything so
4062 * I won't bother allocating and managing SGs right now. Sorry. */
4063 PINTNETHDR pHdr;
4064 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
4065 {
4066 uint8_t const u8Type = pHdr->u8Type;
4067 if (u8Type == INTNETHDR_TYPE_FRAME)
4068 {
4069 /* Send regular frame. */
4070 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
4071 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
4072 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4073 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
4074 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4075 }
4076 else if (u8Type == INTNETHDR_TYPE_GSO)
4077 {
4078 /* Send GSO frame if sane. */
4079 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
4080 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
4081 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
4082 {
4083 void *pvCurFrame = pGso + 1;
4084 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
4085 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4086 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
4087 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4088 }
4089 else
4090 {
4091 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4092 enmSwDecision = INTNETSWDECISION_DROP;
4093 }
4094 }
4095 /* Unless it's a padding frame, we're getting babble from the producer. */
4096 else
4097 {
4098 if (u8Type != INTNETHDR_TYPE_PADDING)
4099 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4100 enmSwDecision = INTNETSWDECISION_DROP;
4101 }
4102 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
4103 {
4104 rc = VERR_TRY_AGAIN;
4105 break;
4106 }
4107
4108 /* Skip to the next frame. */
4109 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
4110 }
4111
4112 /*
4113 * Put back the destination table.
4114 */
4115 Assert(!pIf->pDstTab);
4116 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
4117 }
4118 else
4119 rc = VERR_INTERNAL_ERROR_4;
4120 }
4121 else
4122 rc = VERR_INTERNAL_ERROR_3;
4123
4124 /*
4125 * Release the interface.
4126 */
4127 intnetR0BusyDecIf(pIf);
4128 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
4129 intnetR0IfRelease(pIf, pSession);
4130 return rc;
4131}
4132
4133
4134/**
4135 * VMMR0 request wrapper for IntNetR0IfSend.
4136 *
4137 * @returns see IntNetR0IfSend.
4138 * @param pSession The caller's session.
4139 * @param pReq The request packet.
4140 */
4141INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
4142{
4143 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4144 return VERR_INVALID_PARAMETER;
4145 return IntNetR0IfSend(pReq->hIf, pSession);
4146}
4147
4148
4149/**
4150 * Maps the default buffer into ring 3.
4151 *
4152 * @returns VBox status code.
4153 * @param hIf The interface handle.
4154 * @param pSession The caller's session.
4155 * @param ppRing3Buf Where to store the address of the ring-3 mapping
4156 * (optional).
4157 * @param ppRing0Buf Where to store the address of the ring-0 mapping
4158 * (optional).
4159 */
4160INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
4161 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
4162{
4163 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
4164
4165 /*
4166 * Validate input.
4167 */
4168 PINTNET pIntNet = g_pIntNet;
4169 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4170 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4171
4172 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
4173 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
4174 if (ppRing3Buf)
4175 *ppRing3Buf = 0;
4176 if (ppRing0Buf)
4177 *ppRing0Buf = 0;
4178
4179 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4180 if (!pIf)
4181 return VERR_INVALID_HANDLE;
4182
4183 /*
4184 * ASSUMES that only the process that created an interface can use it.
4185 * ASSUMES that we created the ring-3 mapping when selecting or
4186 * allocating the buffer.
4187 */
4188 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4189 if (RT_SUCCESS(rc))
4190 {
4191 if (ppRing3Buf)
4192 *ppRing3Buf = pIf->pIntBufR3;
4193 if (ppRing0Buf)
4194 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
4195
4196 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4197 }
4198
4199 intnetR0IfRelease(pIf, pSession);
4200 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
4201 rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
4202 return rc;
4203}
4204
4205
4206/**
4207 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
4208 *
4209 * @returns see IntNetR0IfGetRing3Buffer.
4210 * @param pSession The caller's session.
4211 * @param pReq The request packet.
4212 */
4213INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
4214{
4215 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4216 return VERR_INVALID_PARAMETER;
4217 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
4218}
4219
4220
4221#if 0
4222/**
4223 * Gets the physical addresses of the default interface buffer.
4224 *
4225 * @returns VBox status code.
4226 * @param hIF The interface handle.
4227 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
4228 * @param cPages
4229 */
4230INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
4231{
4232 /*
4233 * Validate input.
4234 */
4235 PINTNET pIntNet = g_pIntNet;
4236 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4237 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4238
4239 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
4240 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
4241 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4242 if (!pIf)
4243 return VERR_INVALID_HANDLE;
4244
4245 /*
4246 * Grab the lock and get the data.
4247 * ASSUMES that the handle isn't closed while we're here.
4248 */
4249 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
4250 if (RT_SUCCESS(rc))
4251 {
4252 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
4253 * is no need for any extra bookkeeping here.. */
4254
4255 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
4256 }
4257 intnetR0IfRelease(pIf, pSession);
4258 return VERR_NOT_IMPLEMENTED;
4259}
4260#endif
4261
4262
4263/**
4264 * Sets the promiscuous mode property of an interface.
4265 *
4266 * @returns VBox status code.
4267 * @param hIf The interface handle.
4268 * @param pSession The caller's session.
4269 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
4270 */
4271INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
4272{
4273 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
4274
4275 /*
4276 * Validate & translate input.
4277 */
4278 PINTNET pIntNet = g_pIntNet;
4279 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4280 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4281
4282 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4283 if (!pIf)
4284 {
4285 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
4286 return VERR_INVALID_HANDLE;
4287 }
4288
4289 /*
4290 * Get the network, take the address spinlock, and make the change.
4291 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4292 */
4293 int rc = VINF_SUCCESS;
4294 intnetR0BusyIncIf(pIf);
4295 PINTNETNETWORK pNetwork = pIf->pNetwork;
4296 if (pNetwork)
4297 {
4298 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4299
4300 if (pIf->fPromiscuousReal != fPromiscuous)
4301 {
4302 const bool fPromiscuousEff = fPromiscuous
4303 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
4304 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
4305 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
4306 hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
4307
4308 pIf->fPromiscuousReal = fPromiscuous;
4309
4310 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4311 if (RT_LIKELY(pEntry))
4312 {
4313 if (pEntry->fPromiscuousEff)
4314 {
4315 pNetwork->MacTab.cPromiscuousEntries--;
4316 if (!pEntry->fPromiscuousSeeTrunk)
4317 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4318 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4319 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4320 }
4321
4322 pEntry->fPromiscuousEff = fPromiscuousEff;
4323 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
4324 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
4325
4326 if (pEntry->fPromiscuousEff)
4327 {
4328 pNetwork->MacTab.cPromiscuousEntries++;
4329 if (!pEntry->fPromiscuousSeeTrunk)
4330 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
4331 }
4332 Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
4333 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
4334 }
4335 }
4336
4337 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4338 }
4339 else
4340 rc = VERR_WRONG_ORDER;
4341
4342 intnetR0BusyDecIf(pIf);
4343 intnetR0IfRelease(pIf, pSession);
4344 return rc;
4345}
4346
4347
4348/**
4349 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
4350 *
4351 * @returns see IntNetR0IfSetPromiscuousMode.
4352 * @param pSession The caller's session.
4353 * @param pReq The request packet.
4354 */
4355INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
4356{
4357 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4358 return VERR_INVALID_PARAMETER;
4359 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
4360}
4361
4362
4363/**
4364 * Sets the MAC address of an interface.
4365 *
4366 * @returns VBox status code.
4367 * @param hIf The interface handle.
4368 * @param pSession The caller's session.
4369 * @param pMAC The new MAC address.
4370 */
4371INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
4372{
4373 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
4374
4375 /*
4376 * Validate & translate input.
4377 */
4378 PINTNET pIntNet = g_pIntNet;
4379 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4380 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4381
4382 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
4383 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4384 if (!pIf)
4385 {
4386 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
4387 return VERR_INVALID_HANDLE;
4388 }
4389
4390 /*
4391 * Get the network, take the address spinlock, and make the change.
4392 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4393 */
4394 int rc = VINF_SUCCESS;
4395 intnetR0BusyIncIf(pIf);
4396 PINTNETNETWORK pNetwork = pIf->pNetwork;
4397 if (pNetwork)
4398 {
4399 PINTNETTRUNKIF pTrunk = NULL;
4400
4401 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4402
4403 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
4404 {
4405 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
4406 hIf, &pIf->MacAddr, pMac));
4407
4408 /* Update the two copies. */
4409 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4410 if (RT_LIKELY(pEntry))
4411 pEntry->MacAddr = *pMac;
4412 pIf->MacAddr = *pMac;
4413 pIf->fMacSet = true;
4414
4415 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
4416 pTrunk = pNetwork->MacTab.pTrunk;
4417 if (pTrunk)
4418 intnetR0BusyIncTrunk(pTrunk);
4419 }
4420
4421 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4422
4423 if (pTrunk)
4424 {
4425 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
4426 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
4427 if (pIfPort)
4428 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
4429 intnetR0BusyDecTrunk(pTrunk);
4430 }
4431 }
4432 else
4433 rc = VERR_WRONG_ORDER;
4434
4435 intnetR0BusyDecIf(pIf);
4436 intnetR0IfRelease(pIf, pSession);
4437 return rc;
4438}
4439
4440
4441/**
4442 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
4443 *
4444 * @returns see IntNetR0IfSetMacAddress.
4445 * @param pSession The caller's session.
4446 * @param pReq The request packet.
4447 */
4448INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
4449{
4450 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4451 return VERR_INVALID_PARAMETER;
4452 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
4453}
4454
4455
4456/**
4457 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
4458 *
4459 * This function will update the active interface count on the network and
4460 * activate or deactivate the trunk connection if necessary.
4461 *
4462 * The call must own the giant lock (we cannot take it here).
4463 *
4464 * @returns VBox status code.
4465 * @param pNetwork The network.
4466 * @param fIf The interface.
4467 * @param fActive What to do.
4468 */
4469static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
4470{
4471 /* quick sanity check */
4472 AssertPtr(pNetwork);
4473 AssertPtr(pIf);
4474
4475 /*
4476 * The address spinlock of the network protects the variables, while the
4477 * big lock protects the calling of pfnSetState. Grab both lock at once
4478 * to save us the extra hassle.
4479 */
4480 PINTNETTRUNKIF pTrunk = NULL;
4481 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4482
4483 /*
4484 * Do the update.
4485 */
4486 if (pIf->fActive != fActive)
4487 {
4488 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4489 if (RT_LIKELY(pEntry))
4490 {
4491 pEntry->fActive = fActive;
4492 pIf->fActive = fActive;
4493
4494 if (fActive)
4495 {
4496 pNetwork->cActiveIFs++;
4497 if (pNetwork->cActiveIFs == 1)
4498 {
4499 pTrunk = pNetwork->MacTab.pTrunk;
4500 if (pTrunk)
4501 {
4502 pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
4503 pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
4504 }
4505 }
4506 }
4507 else
4508 {
4509 pNetwork->cActiveIFs--;
4510 if (pNetwork->cActiveIFs == 0)
4511 {
4512 pTrunk = pNetwork->MacTab.pTrunk;
4513 pNetwork->MacTab.fHostActive = false;
4514 pNetwork->MacTab.fWireActive = false;
4515 }
4516 }
4517 }
4518 }
4519
4520 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4521
4522 /*
4523 * Tell the trunk if necessary.
4524 * The wait for !busy is for the Solaris streams trunk driver (mostly).
4525 */
4526 if (pTrunk && pTrunk->pIfPort)
4527 {
4528 if (!fActive)
4529 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
4530
4531 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
4532 }
4533
4534 return VINF_SUCCESS;
4535}
4536
4537
4538/**
4539 * Sets the active property of an interface.
4540 *
4541 * @returns VBox status code.
4542 * @param hIf The interface handle.
4543 * @param pSession The caller's session.
4544 * @param fActive The new state.
4545 */
4546INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
4547{
4548 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
4549
4550 /*
4551 * Validate & translate input.
4552 */
4553 PINTNET pIntNet = g_pIntNet;
4554 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4555 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4556
4557 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4558 if (!pIf)
4559 {
4560 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
4561 return VERR_INVALID_HANDLE;
4562 }
4563
4564 /*
4565 * Hand it to the network since it might involve the trunk and things are
4566 * tricky there wrt to locking order.
4567 *
4568 * 1. We take the giant lock here. This makes sure nobody is re-enabling
4569 * the network while we're pausing it and vice versa. This also enables
4570 * us to wait for the network to become idle before telling the trunk.
4571 * (Important on Solaris.)
4572 *
4573 * 2. For paranoid reasons, we grab a busy reference to the calling
4574 * interface. This is totally unnecessary but should hurt (when done
4575 * after grabbing the giant lock).
4576 */
4577 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4578 if (RT_SUCCESS(rc))
4579 {
4580 intnetR0BusyIncIf(pIf);
4581
4582 PINTNETNETWORK pNetwork = pIf->pNetwork;
4583 if (pNetwork)
4584 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
4585 else
4586 rc = VERR_WRONG_ORDER;
4587
4588 intnetR0BusyDecIf(pIf);
4589 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4590 }
4591
4592 intnetR0IfRelease(pIf, pSession);
4593 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
4594 return rc;
4595}
4596
4597
4598/**
4599 * VMMR0 request wrapper for IntNetR0IfSetActive.
4600 *
4601 * @returns see IntNetR0IfSetActive.
4602 * @param pIntNet The internal networking instance.
4603 * @param pSession The caller's session.
4604 * @param pReq The request packet.
4605 */
4606INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
4607{
4608 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4609 return VERR_INVALID_PARAMETER;
4610 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
4611}
4612
4613
4614/**
4615 * Wait for the interface to get signaled.
4616 * The interface will be signaled when is put into the receive buffer.
4617 *
4618 * @returns VBox status code.
4619 * @param hIf The interface handle.
4620 * @param pSession The caller's session.
4621 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
4622 * used if indefinite wait is desired.
4623 */
4624INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
4625{
4626 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
4627
4628 /*
4629 * Get and validate essential handles.
4630 */
4631 PINTNET pIntNet = g_pIntNet;
4632 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4633 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4634
4635 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4636 if (!pIf)
4637 {
4638 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
4639 return VERR_INVALID_HANDLE;
4640 }
4641
4642 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4643 const bool fNoMoreWaits = ASMAtomicUoReadBool(&pIf->fNoMoreWaits);
4644 RTNATIVETHREAD hDtorThrd;
4645 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4646 if (hDtorThrd != NIL_RTNATIVETHREAD)
4647 {
4648 /* See IntNetR0IfAbortWait for an explanation of hDestructorThread. */
4649 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
4650 return VERR_SEM_DESTROYED;
4651 }
4652
4653 /* Check whether further waits have been barred by IntNetR0IfAbortWait. */
4654 int rc;
4655 if ( !fNoMoreWaits
4656 && hRecvEvent != NIL_RTSEMEVENT)
4657 {
4658 /*
4659 * It is tempting to check if there is data to be read here,
4660 * but the problem with such an approach is that it will cause
4661 * one unnecessary supervisor->user->supervisor trip. There is
4662 * already a slight risk for such, so no need to increase it.
4663 */
4664
4665 /*
4666 * Increment the number of waiters before starting the wait.
4667 * Upon wakeup we must assert reality, checking that we're not
4668 * already destroyed or in the process of being destroyed. This
4669 * code must be aligned with the waiting code in intnetR0IfDestruct.
4670 */
4671 ASMAtomicIncU32(&pIf->cSleepers);
4672 rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
4673 if (pIf->hRecvEvent == hRecvEvent)
4674 {
4675 ASMAtomicDecU32(&pIf->cSleepers);
4676 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4677 if (hDtorThrd == NIL_RTNATIVETHREAD)
4678 {
4679 if (intnetR0IfRelease(pIf, pSession))
4680 rc = VERR_SEM_DESTROYED;
4681 }
4682 else
4683 rc = VERR_SEM_DESTROYED;
4684 }
4685 else
4686 rc = VERR_SEM_DESTROYED;
4687 }
4688 else
4689 {
4690 rc = VERR_SEM_DESTROYED;
4691 intnetR0IfRelease(pIf, pSession);
4692 }
4693
4694 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
4695 return rc;
4696}
4697
4698
4699/**
4700 * VMMR0 request wrapper for IntNetR0IfWait.
4701 *
4702 * @returns see IntNetR0IfWait.
4703 * @param pSession The caller's session.
4704 * @param pReq The request packet.
4705 */
4706INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
4707{
4708 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4709 return VERR_INVALID_PARAMETER;
4710 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
4711}
4712
4713
4714/**
4715 * Wake up any threads waiting on the interface.
4716 *
4717 * @returns VBox status code.
4718 * @param hIf The interface handle.
4719 * @param pSession The caller's session.
4720 * @param fNoMoreWaits When set, no more waits are permitted.
4721 */
4722INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
4723{
4724 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
4725
4726 /*
4727 * Get and validate essential handles.
4728 */
4729 PINTNET pIntNet = g_pIntNet;
4730 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4731 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4732
4733 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4734 if (!pIf)
4735 {
4736 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4737 return VERR_INVALID_HANDLE;
4738 }
4739
4740 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4741 RTNATIVETHREAD hDtorThrd;
4742 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4743 if (hDtorThrd != NIL_RTNATIVETHREAD)
4744 {
4745 /* This can only happen if we for some reason race SUPDRVSESSION cleanup,
4746 i.e. the object count is set to zero without yet having removed it from
4747 the object table, so we got a spurious "reference". We must drop that
4748 reference and let the destructor get on with its work. (Not entirely sure
4749 if this is practically possible on any of the platforms, i.e. whether it's
4750 we can actually close a SUPDrv handle/descriptor with active threads still
4751 in NtDeviceIoControlFile/ioctl, but better safe than sorry.) */
4752 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4753 return VERR_SEM_DESTROYED;
4754 }
4755
4756 /* a bit of paranoia */
4757 int rc = VINF_SUCCESS;
4758 if (hRecvEvent != NIL_RTSEMEVENT)
4759 {
4760 /*
4761 * Set fNoMoreWaits if requested to do so and then wake up all the sleeping
4762 * threads (usually just one). We leave the semaphore in the signalled
4763 * state so the next caller will return immediately.
4764 */
4765 if (fNoMoreWaits)
4766 ASMAtomicWriteBool(&pIf->fNoMoreWaits, true);
4767
4768 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4769 while (cSleepers-- > 0)
4770 {
4771 int rc2 = RTSemEventSignal(pIf->hRecvEvent);
4772 AssertRC(rc2);
4773 }
4774 }
4775 else
4776 rc = VERR_SEM_DESTROYED;
4777
4778 intnetR0IfRelease(pIf, pSession);
4779
4780 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4781 return VINF_SUCCESS;
4782}
4783
4784
4785/**
4786 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4787 *
4788 * @returns see IntNetR0IfWait.
4789 * @param pSession The caller's session.
4790 * @param pReq The request packet.
4791 */
4792INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4793{
4794 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4795 return VERR_INVALID_PARAMETER;
4796 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4797}
4798
4799
4800/**
4801 * Close an interface.
4802 *
4803 * @returns VBox status code.
4804 * @param pIntNet The instance handle.
4805 * @param hIf The interface handle.
4806 * @param pSession The caller's session.
4807 */
4808INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4809{
4810 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4811
4812 /*
4813 * Validate and free the handle.
4814 */
4815 PINTNET pIntNet = g_pIntNet;
4816 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4817 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4818
4819 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4820 if (!pIf)
4821 return VERR_INVALID_HANDLE;
4822
4823 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4824 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4825
4826 /*
4827 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4828 * and give them a moment to get out and release the interface.
4829 */
4830 uint32_t i = pIf->cSleepers;
4831 while (i-- > 0)
4832 {
4833 RTSemEventSignal(pIf->hRecvEvent);
4834 RTThreadYield();
4835 }
4836 RTSemEventSignal(pIf->hRecvEvent);
4837
4838 /*
4839 * Release the references to the interface object (handle + free lookup).
4840 */
4841 void *pvObj = pIf->pvObj;
4842 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4843
4844 int rc = SUPR0ObjRelease(pvObj, pSession);
4845 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4846 return rc;
4847}
4848
4849
4850/**
4851 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4852 *
4853 * @returns see IntNetR0IfClose.
4854 * @param pSession The caller's session.
4855 * @param pReq The request packet.
4856 */
4857INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4858{
4859 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4860 return VERR_INVALID_PARAMETER;
4861 return IntNetR0IfClose(pReq->hIf, pSession);
4862}
4863
4864
4865/**
4866 * Interface destructor callback.
4867 * This is called for reference counted objectes when the count reaches 0.
4868 *
4869 * @param pvObj The object pointer.
4870 * @param pvUser1 Pointer to the interface.
4871 * @param pvUser2 Pointer to the INTNET instance data.
4872 */
4873static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4874{
4875 PINTNETIF pIf = (PINTNETIF)pvUser1;
4876 PINTNET pIntNet = (PINTNET)pvUser2;
4877 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4878
4879 /*
4880 * For paranoid reasons we must now mark the interface as destroyed.
4881 * This is so that any waiting threads can take evasive action (kind
4882 * of theoretical case), and we can reject everyone else referencing
4883 * the object via the handle table before we get around to removing it.
4884 */
4885 ASMAtomicWriteHandle(&pIf->hDestructorThread, RTThreadNativeSelf());
4886
4887 /*
4888 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4889 * adding or removing interfaces while we're in here.
4890 */
4891 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4892
4893 /*
4894 * Delete the interface handle so the object no longer can be used.
4895 * (Can happen if the client didn't close its session.)
4896 */
4897 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4898 if (hIf != INTNET_HANDLE_INVALID)
4899 {
4900 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4901 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4902 }
4903
4904 /*
4905 * If we've got a network deactivate and detach ourselves from it. Because
4906 * of cleanup order we might have been orphaned by the network destructor.
4907 */
4908 PINTNETNETWORK pNetwork = pIf->pNetwork;
4909 if (pNetwork)
4910 {
4911 /* set inactive. */
4912 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4913
4914 /* remove ourselves from the switch table. */
4915 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4916
4917 uint32_t iIf = pNetwork->MacTab.cEntries;
4918 while (iIf-- > 0)
4919 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4920 {
4921 if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
4922 {
4923 pNetwork->MacTab.cPromiscuousEntries--;
4924 if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
4925 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4926 }
4927 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4928 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4929
4930 if (iIf + 1 < pNetwork->MacTab.cEntries)
4931 memmove(&pNetwork->MacTab.paEntries[iIf],
4932 &pNetwork->MacTab.paEntries[iIf + 1],
4933 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4934 pNetwork->MacTab.cEntries--;
4935 break;
4936 }
4937
4938 /* recalc the min flags. */
4939 if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
4940 {
4941 uint32_t fMinFlags = 0;
4942 iIf = pNetwork->MacTab.cEntries;
4943 while (iIf-- > 0)
4944 {
4945 PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
4946 if ( pIf2 /* paranoia */
4947 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
4948 fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
4949 }
4950 pNetwork->fMinFlags = fMinFlags;
4951 }
4952
4953 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4954
4955 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4956
4957 /* Notify the trunk about the interface being destroyed. */
4958 if (pTrunk && pTrunk->pIfPort)
4959 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
4960
4961 /* Wait for the interface to quiesce while we still can. */
4962 intnetR0BusyWait(pNetwork, &pIf->cBusy);
4963
4964 /* Release our reference to the network. */
4965 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4966 pIf->pNetwork = NULL;
4967 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4968
4969 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
4970 }
4971
4972 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4973
4974 /*
4975 * Wakeup anyone waiting on this interface. (Kind of unlikely, but perhaps
4976 * not quite impossible.)
4977 *
4978 * We *must* make sure they have woken up properly and realized
4979 * that the interface is no longer valid.
4980 */
4981 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
4982 {
4983 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4984 unsigned cMaxWait = 0x1000;
4985 while (pIf->cSleepers && cMaxWait-- > 0)
4986 {
4987 RTSemEventSignal(hRecvEvent);
4988 RTThreadYield();
4989 }
4990 if (pIf->cSleepers)
4991 {
4992 RTThreadSleep(1);
4993
4994 cMaxWait = pIf->cSleepers;
4995 while (pIf->cSleepers && cMaxWait-- > 0)
4996 {
4997 RTSemEventSignal(hRecvEvent);
4998 RTThreadSleep(10);
4999 }
5000 }
5001
5002 RTSemEventDestroy(hRecvEvent);
5003 pIf->hRecvEvent = NIL_RTSEMEVENT;
5004 }
5005
5006 /*
5007 * Unmap user buffer.
5008 */
5009 if (pIf->pIntBuf != pIf->pIntBufDefault)
5010 {
5011 /** @todo user buffer */
5012 }
5013
5014 /*
5015 * Unmap and Free the default buffer.
5016 */
5017 if (pIf->pIntBufDefault)
5018 {
5019 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5020 pIf->pIntBufDefault = NULL;
5021 pIf->pIntBufDefaultR3 = 0;
5022 pIf->pIntBuf = NULL;
5023 pIf->pIntBufR3 = 0;
5024 }
5025
5026 /*
5027 * Free remaining resources
5028 */
5029 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5030 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5031
5032 RTMemFree(pIf->pDstTab);
5033 pIf->pDstTab = NULL;
5034
5035 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5036 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5037
5038 pIf->pvObj = NULL;
5039 RTMemFree(pIf);
5040}
5041
5042
5043/**
5044 * Creates a new network interface.
5045 *
5046 * The call must have opened the network for the new interface and is
5047 * responsible for closing it on failure. On success it must leave the network
5048 * opened so the interface destructor can close it.
5049 *
5050 * @returns VBox status code.
5051 * @param pNetwork The network, referenced. The reference is consumed on
5052 * success.
5053 * @param pSession The session handle.
5054 * @param cbSend The size of the send buffer.
5055 * @param cbRecv The size of the receive buffer.
5056 * @param fFlags The open network flags.
5057 * @param phIf Where to store the interface handle.
5058 */
5059static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession,
5060 unsigned cbSend, unsigned cbRecv, uint32_t fFlags,
5061 PINTNETIFHANDLE phIf)
5062{
5063 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
5064 pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
5065
5066 /*
5067 * Assert input.
5068 */
5069 AssertPtr(pNetwork);
5070 AssertPtr(phIf);
5071
5072 /*
5073 * Adjust the flags with defaults for the interface policies.
5074 * Note: Main restricts promiscuous mode per interface.
5075 */
5076 uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
5077 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
5078 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
5079 if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
5080 fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
5081
5082 /*
5083 * Make sure that all destination tables as well as the have space of
5084 */
5085 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
5086 if (RT_FAILURE(rc))
5087 return rc;
5088
5089 /*
5090 * Allocate the interface and initialize it.
5091 */
5092 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
5093 if (!pIf)
5094 return VERR_NO_MEMORY;
5095
5096 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
5097 //pIf->fMacSet = false;
5098 //pIf->fPromiscuousReal = false;
5099 //pIf->fActive = false;
5100 //pIf->fNoMoreWaits = false;
5101 pIf->fOpenFlags = fFlags;
5102 //pIf->cYields = 0;
5103 //pIf->pIntBuf = 0;
5104 //pIf->pIntBufR3 = NIL_RTR3PTR;
5105 //pIf->pIntBufDefault = 0;
5106 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
5107 pIf->hRecvEvent = NIL_RTSEMEVENT;
5108 //pIf->cSleepers = 0;
5109 pIf->hIf = INTNET_HANDLE_INVALID;
5110 pIf->hDestructorThread = NIL_RTNATIVETHREAD;
5111 pIf->pNetwork = pNetwork;
5112 pIf->pSession = pSession;
5113 //pIf->pvObj = NULL;
5114 //pIf->aAddrCache = {0};
5115 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5116 pIf->cBusy = 0;
5117 //pIf->pDstTab = NULL;
5118 //pIf->pvIfData = NULL;
5119
5120 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
5121 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
5122 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
5123 if (RT_SUCCESS(rc))
5124 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
5125 if (RT_SUCCESS(rc))
5126 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
5127 if (RT_SUCCESS(rc))
5128 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
5129 if (RT_SUCCESS(rc))
5130 {
5131 /*
5132 * Create the default buffer.
5133 */
5134 /** @todo adjust with minimums and apply defaults here. */
5135 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5136 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5137 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
5138 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
5139 if (RT_SUCCESS(rc))
5140 {
5141 ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
5142
5143 pIf->pIntBuf = pIf->pIntBufDefault;
5144 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
5145 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
5146
5147 /*
5148 * Register the interface with the session and create a handle for it.
5149 */
5150 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
5151 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
5152 if (pIf->pvObj)
5153 {
5154 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
5155 if (RT_SUCCESS(rc))
5156 {
5157 /*
5158 * Finally add the interface to the network, consuming the
5159 * network reference of the caller.
5160 */
5161 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5162
5163 uint32_t iIf = pNetwork->MacTab.cEntries;
5164 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
5165
5166 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
5167 pNetwork->MacTab.paEntries[iIf].fActive = false;
5168 pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
5169 pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
5170 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
5171
5172 pNetwork->MacTab.cEntries = iIf + 1;
5173 pIf->pNetwork = pNetwork;
5174
5175 /*
5176 * Grab a busy reference (paranoia) to the trunk before releasing
5177 * the spinlock and then notify it about the new interface.
5178 */
5179 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5180 if (pTrunk)
5181 intnetR0BusyIncTrunk(pTrunk);
5182
5183 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5184
5185 if (pTrunk)
5186 {
5187 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
5188 if (pTrunk->pIfPort)
5189 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
5190 intnetR0BusyDecTrunk(pTrunk);
5191 }
5192 if (RT_SUCCESS(rc))
5193 {
5194 /*
5195 * We're good!
5196 */
5197 *phIf = pIf->hIf;
5198 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
5199 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
5200 return VINF_SUCCESS;
5201 }
5202 }
5203
5204 SUPR0ObjAddRef(pNetwork->pvObj, pSession);
5205 SUPR0ObjRelease(pIf->pvObj, pSession);
5206 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5207 return rc;
5208 }
5209
5210 /* clean up */
5211 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5212 pIf->pIntBufDefault = NULL;
5213 pIf->pIntBuf = NULL;
5214 }
5215 }
5216
5217 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5218 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5219 RTSemEventDestroy(pIf->hRecvEvent);
5220 pIf->hRecvEvent = NIL_RTSEMEVENT;
5221 RTMemFree(pIf->pDstTab);
5222 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5223 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5224 RTMemFree(pIf);
5225 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5226 return rc;
5227}
5228
5229
5230/** @copydoc INTNETTRUNKSWPORT::pfnSetSGPhys */
5231static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
5232{
5233 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5234 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
5235 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
5236}
5237
5238
5239/** @copydoc INTNETTRUNKSWPORT::pfnReportMacAddress */
5240static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
5241{
5242 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5243
5244 /*
5245 * Get the network instance and grab the address spinlock before making
5246 * any changes.
5247 */
5248 intnetR0BusyIncTrunk(pThis);
5249 PINTNETNETWORK pNetwork = pThis->pNetwork;
5250 if (pNetwork)
5251 {
5252 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5253
5254 pNetwork->MacTab.HostMac = *pMacAddr;
5255 pThis->MacAddr = *pMacAddr;
5256
5257 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5258 }
5259 else
5260 pThis->MacAddr = *pMacAddr;
5261 intnetR0BusyDecTrunk(pThis);
5262}
5263
5264
5265/** @copydoc INTNETTRUNKSWPORT::pfnReportPromiscuousMode */
5266static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
5267{
5268 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5269
5270 /*
5271 * Get the network instance and grab the address spinlock before making
5272 * any changes.
5273 */
5274 intnetR0BusyIncTrunk(pThis);
5275 PINTNETNETWORK pNetwork = pThis->pNetwork;
5276 if (pNetwork)
5277 {
5278 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5279
5280 pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
5281 || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
5282 pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
5283 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5284
5285 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5286 }
5287 intnetR0BusyDecTrunk(pThis);
5288}
5289
5290
5291/** @copydoc INTNETTRUNKSWPORT::pfnReportGsoCapabilities */
5292static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
5293 uint32_t fGsoCapabilities, uint32_t fDst)
5294{
5295 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5296
5297 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
5298 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
5299 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
5300 Assert(fDst);
5301
5302 if (fDst & INTNETTRUNKDIR_HOST)
5303 pThis->fHostGsoCapabilites = fGsoCapabilities;
5304
5305 if (fDst & INTNETTRUNKDIR_WIRE)
5306 pThis->fWireGsoCapabilites = fGsoCapabilities;
5307}
5308
5309
5310/** @copydoc INTNETTRUNKSWPORT::pfnReportNoPreemptDsts */
5311static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
5312{
5313 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5314 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
5315
5316 pThis->fNoPreemptDsts = fNoPreemptDsts;
5317}
5318
5319
5320/** @copydoc INTNETTRUNKSWPORT::pfnDisconnect */
5321static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
5322 PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
5323{
5324 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5325
5326 /*
5327 * The caller has marked the trunk instance busy on his side before making
5328 * the call (see method docs) to let us safely grab the network and internal
5329 * network instance pointers without racing the network destruction code
5330 * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
5331 * the interface to stop being busy before setting pNetwork to NULL and
5332 * freeing up the resources).
5333 */
5334 PINTNETNETWORK pNetwork = pThis->pNetwork;
5335 if (pNetwork)
5336 {
5337 PINTNET pIntNet = pNetwork->pIntNet;
5338 Assert(pNetwork->pIntNet);
5339
5340 /*
5341 * We must decrease the callers busy count here to prevent deadlocking
5342 * when requesting the big mutex ownership. This will of course
5343 * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
5344 * (the other deadlock party), so we have to revalidate the network
5345 * pointer after taking ownership of the big mutex.
5346 */
5347 pfnReleaseBusy(pIfPort);
5348
5349 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5350
5351 if (intnetR0NetworkIsValid(pIntNet, pNetwork))
5352 {
5353 Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
5354 Assert(pThis->pIfPort == pIfPort); /* Ditto */
5355
5356 /*
5357 * Disconnect the trunk and destroy it, similar to what is done int
5358 * intnetR0NetworkDestruct.
5359 */
5360 pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5361
5362 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5363 pNetwork->MacTab.pTrunk = NULL;
5364 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5365
5366 intnetR0TrunkIfDestroy(pThis, pNetwork);
5367 }
5368
5369 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5370 }
5371 /*
5372 * We must always release the busy reference.
5373 */
5374 else
5375 pfnReleaseBusy(pIfPort);
5376}
5377
5378
5379/** @copydoc INTNETTRUNKSWPORT::pfnPreRecv */
5380static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
5381 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
5382{
5383 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5384
5385 /* assert some sanity */
5386 AssertPtr(pvSrc);
5387 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
5388 Assert(fSrc);
5389
5390 /*
5391 * Mark the trunk as busy, make sure we've got a network and that there are
5392 * some active interfaces around.
5393 */
5394 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
5395 intnetR0BusyIncTrunk(pThis);
5396 PINTNETNETWORK pNetwork = pThis->pNetwork;
5397 if (RT_LIKELY( pNetwork
5398 && pNetwork->cActiveIFs > 0 ))
5399 {
5400 /*
5401 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
5402 */
5403 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
5404 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
5405 enmSwDecision = INTNETSWDECISION_BROADCAST;
5406 else if ( fSrc == INTNETTRUNKDIR_WIRE
5407 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5408 enmSwDecision = INTNETSWDECISION_BROADCAST;
5409 else
5410 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
5411 fSrc,
5412 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
5413 &pEthHdr->DstMac);
5414 }
5415
5416 intnetR0BusyDecTrunk(pThis);
5417 return enmSwDecision;
5418}
5419
5420
5421/** @copydoc INTNETTRUNKSWPORT::pfnRecv */
5422static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
5423{
5424 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5425
5426 /* assert some sanity */
5427 AssertPtr(pSG);
5428 Assert(fSrc);
5429 NOREF(pvIf); /* later */
5430
5431 /*
5432 * Mark the trunk as busy, make sure we've got a network and that there are
5433 * some active interfaces around.
5434 */
5435 bool fRc = false /* don't drop it */;
5436 intnetR0BusyIncTrunk(pThis);
5437 PINTNETNETWORK pNetwork = pThis->pNetwork;
5438 if (RT_LIKELY( pNetwork
5439 && pNetwork->cActiveIFs > 0 ))
5440 {
5441 /*
5442 * Grab or allocate a destination table.
5443 */
5444 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
5445 unsigned iDstTab = 0;
5446 PINTNETDSTTAB pDstTab = NULL;
5447 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5448 if (fIntCtx)
5449 {
5450 /* Interrupt or restricted context. */
5451 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
5452 iDstTab %= pThis->cIntDstTabs;
5453 pDstTab = pThis->apIntDstTabs[iDstTab];
5454 if (RT_LIKELY(pDstTab))
5455 pThis->apIntDstTabs[iDstTab] = NULL;
5456 else
5457 {
5458 iDstTab = pThis->cIntDstTabs;
5459 while (iDstTab-- > 0)
5460 {
5461 pDstTab = pThis->apIntDstTabs[iDstTab];
5462 if (pDstTab)
5463 {
5464 pThis->apIntDstTabs[iDstTab] = NULL;
5465 break;
5466 }
5467 }
5468 }
5469 RTSpinlockRelease(pThis->hDstTabSpinlock);
5470 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
5471 }
5472 else
5473 {
5474 /* Task context, fallback is to allocate a table. */
5475 AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
5476 pDstTab = pThis->apIntDstTabs[iDstTab = 0];
5477 if (!pDstTab)
5478 pDstTab = pThis->apIntDstTabs[iDstTab = 1];
5479 if (pDstTab)
5480 {
5481 pThis->apIntDstTabs[iDstTab] = NULL;
5482 RTSpinlockRelease(pThis->hDstTabSpinlock);
5483 Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
5484 }
5485 else
5486 {
5487 RTSpinlockRelease(pThis->hDstTabSpinlock);
5488 intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
5489 iDstTab = 65535;
5490 }
5491 }
5492 if (RT_LIKELY(pDstTab))
5493 {
5494 /*
5495 * Finally, get down to business of sending the frame.
5496 */
5497 INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
5498 AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
5499 if (enmSwDecision == INTNETSWDECISION_INTNET)
5500 fRc = true; /* drop it */
5501
5502 /*
5503 * Free the destination table.
5504 */
5505 if (iDstTab == 65535)
5506 RTMemFree(pDstTab);
5507 else
5508 {
5509 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5510 if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
5511 pThis->apIntDstTabs[iDstTab] = pDstTab;
5512 else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
5513 pThis->apTaskDstTabs[iDstTab] = pDstTab;
5514 else
5515 {
5516 /* this shouldn't happen! */
5517 PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
5518 iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
5519 while (iDstTab-- > 0)
5520 if (!papDstTabs[iDstTab])
5521 {
5522 papDstTabs[iDstTab] = pDstTab;
5523 break;
5524 }
5525 }
5526 RTSpinlockRelease(pThis->hDstTabSpinlock);
5527 Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
5528 }
5529 }
5530 }
5531
5532 intnetR0BusyDecTrunk(pThis);
5533 return fRc;
5534}
5535
5536
5537/** @copydoc INTNETTRUNKSWPORT::pfnSGRetain */
5538static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5539{
5540 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5541 PINTNETNETWORK pNetwork = pThis->pNetwork;
5542
5543 /* assert some sanity */
5544 AssertPtrReturnVoid(pNetwork);
5545 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5546 AssertPtr(pSG);
5547 Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
5548
5549 /* do it. */
5550 ++pSG->cUsers;
5551}
5552
5553
5554/** @copydoc INTNETTRUNKSWPORT::pfnSGRelease */
5555static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5556{
5557 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5558 PINTNETNETWORK pNetwork = pThis->pNetwork;
5559
5560 /* assert some sanity */
5561 AssertPtrReturnVoid(pNetwork);
5562 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5563 AssertPtr(pSG);
5564 Assert(pSG->cUsers > 0);
5565
5566 /*
5567 * Free it?
5568 */
5569 if (!--pSG->cUsers)
5570 {
5571 /** @todo later */
5572 }
5573}
5574
5575
5576/** @copydoc INTNETTRUNKSWPORT::pfnNotifyHostAddress */
5577static DECLCALLBACK(void) intnetR0NetworkNotifyHostAddress(PINTNETTRUNKSWPORT pSwitchPort,
5578 bool fAdded,
5579 INTNETADDRTYPE enmType, const void *pvAddr)
5580{
5581 PINTNETTRUNKIF pTrunkIf = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5582 PINTNETNETWORK pNetwork = pTrunkIf->pNetwork;
5583 PCRTNETADDRU pAddr = (PCRTNETADDRU)pvAddr;
5584 uint8_t cbAddr;
5585
5586 if (enmType == kIntNetAddrType_IPv4)
5587 {
5588 Log(("%s: %s %RTnaipv4\n",
5589 __FUNCTION__, (fAdded ? "add" : "del"),
5590 pAddr->IPv4));
5591 cbAddr = 4;
5592 }
5593 else if (enmType == kIntNetAddrType_IPv6)
5594 {
5595 Log(("%s: %s %RTnaipv6\n",
5596 __FUNCTION__, (fAdded ? "add" : "del"),
5597 pAddr));
5598 cbAddr = 16;
5599 }
5600 else
5601 {
5602 Log(("%s: unexpected address type %d\n", __FUNCTION__, enmType));
5603 return;
5604 }
5605
5606 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5607 if (fAdded) /* one of host interfaces got a new address */
5608 {
5609 /* blacklist it to prevent spoofing by guests */
5610 intnetR0NetworkBlacklistAdd(pNetwork, pAddr, enmType);
5611
5612 /* kick out any guest that uses it */
5613 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, "tif/host");
5614 }
5615 else /* address deleted from one of host interfaces */
5616 {
5617 /* stop blacklisting it, guests may use it now */
5618 intnetR0NetworkBlacklistDelete(pNetwork, pAddr, enmType);
5619 }
5620 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5621}
5622
5623
5624/**
5625 * Shutdown the trunk interface.
5626 *
5627 * @param pThis The trunk.
5628 * @param pNetworks The network.
5629 *
5630 * @remarks The caller must hold the global lock.
5631 */
5632static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
5633{
5634 /* assert sanity */
5635 if (!pThis)
5636 return;
5637 AssertPtr(pThis);
5638 Assert(pThis->pNetwork == pNetwork);
5639 AssertPtrNull(pThis->pIfPort);
5640
5641 /*
5642 * The interface has already been deactivated, we just to wait for
5643 * it to become idle before we can disconnect and release it.
5644 */
5645 PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
5646 if (pIfPort)
5647 {
5648 /* unset it */
5649 pThis->pIfPort = NULL;
5650
5651 /* wait in portions so we can complain every now an then. */
5652 uint64_t StartTS = RTTimeSystemNanoTS();
5653 int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5654 if (RT_FAILURE(rc))
5655 {
5656 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5657 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5658 Assert(rc == VERR_TIMEOUT);
5659 while ( RT_FAILURE(rc)
5660 && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
5661 rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5662 if (rc == VERR_TIMEOUT)
5663 {
5664 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5665 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5666 while ( rc == VERR_TIMEOUT
5667 && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
5668 rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
5669 if (RT_FAILURE(rc))
5670 {
5671 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc), giving up.\n",
5672 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5673 AssertRC(rc);
5674 }
5675 }
5676 }
5677
5678 /* disconnect & release it. */
5679 pIfPort->pfnDisconnectAndRelease(pIfPort);
5680 }
5681
5682 /*
5683 * Free up the resources.
5684 */
5685 pThis->pNetwork = NULL; /* Must not be cleared while busy, see intnetR0TrunkIfPortDisconnect. */
5686 RTSpinlockDestroy(pThis->hDstTabSpinlock);
5687 for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
5688 {
5689 Assert(pThis->apTaskDstTabs[i]);
5690 RTMemFree(pThis->apTaskDstTabs[i]);
5691 pThis->apTaskDstTabs[i] = NULL;
5692 }
5693 for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
5694 {
5695 Assert(pThis->apIntDstTabs[i]);
5696 RTMemFree(pThis->apIntDstTabs[i]);
5697 pThis->apIntDstTabs[i] = NULL;
5698 }
5699 RTMemFree(pThis);
5700}
5701
5702
5703/**
5704 * Creates the trunk connection (if any).
5705 *
5706 * @returns VBox status code.
5707 *
5708 * @param pNetwork The newly created network.
5709 * @param pSession The session handle.
5710 */
5711static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
5712{
5713 const char *pszName;
5714 switch (pNetwork->enmTrunkType)
5715 {
5716 /*
5717 * The 'None' case, simple.
5718 */
5719 case kIntNetTrunkType_None:
5720 case kIntNetTrunkType_WhateverNone:
5721#ifdef VBOX_WITH_NAT_SERVICE
5722 /*
5723 * Well, here we don't want load anything special,
5724 * just communicate between processes via internal network.
5725 */
5726 case kIntNetTrunkType_SrvNat:
5727#endif
5728 return VINF_SUCCESS;
5729
5730 /* Can't happen, but makes GCC happy. */
5731 default:
5732 return VERR_NOT_IMPLEMENTED;
5733
5734 /*
5735 * Translate enum to component factory name.
5736 */
5737 case kIntNetTrunkType_NetFlt:
5738 pszName = "VBoxNetFlt";
5739 break;
5740 case kIntNetTrunkType_NetAdp:
5741#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
5742 pszName = "VBoxNetFlt";
5743#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
5744 pszName = "VBoxNetAdp";
5745#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
5746 break;
5747#ifndef VBOX_WITH_NAT_SERVICE
5748 case kIntNetTrunkType_SrvNat:
5749 pszName = "VBoxSrvNat";
5750 break;
5751#endif
5752 }
5753
5754 /*
5755 * Allocate the trunk interface and associated destination tables.
5756 *
5757 * We take a very optimistic view on the parallelism of the host
5758 * network stack and NIC driver. So, we allocate one table for each
5759 * possible CPU to deal with interrupt time requests and one for task
5760 * time calls.
5761 */
5762 RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
5763 PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_OFFSETOF(INTNETTRUNKIF, apIntDstTabs[cCpus]));
5764 if (!pTrunk)
5765 return VERR_NO_MEMORY;
5766
5767 Assert(pNetwork->MacTab.cEntriesAllocated > 0);
5768 int rc = VINF_SUCCESS;
5769 pTrunk->cIntDstTabs = cCpus;
5770 for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
5771 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
5772 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
5773 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
5774
5775 if (RT_SUCCESS(rc))
5776 {
5777 pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
5778 pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
5779 pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
5780 pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
5781 pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
5782 pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
5783 pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
5784 pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
5785 pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
5786 pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
5787 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5788 pTrunk->SwitchPort.pfnNotifyHostAddress = intnetR0NetworkNotifyHostAddress;
5789 pTrunk->SwitchPort.pfnDisconnect = intnetR0TrunkIfPortDisconnect;
5790 pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
5791 //pTrunk->pIfPort = NULL;
5792 pTrunk->pNetwork = pNetwork;
5793 pTrunk->MacAddr.au8[0] = 0xff;
5794 pTrunk->MacAddr.au8[1] = 0xff;
5795 pTrunk->MacAddr.au8[2] = 0xff;
5796 pTrunk->MacAddr.au8[3] = 0xff;
5797 pTrunk->MacAddr.au8[4] = 0xff;
5798 pTrunk->MacAddr.au8[5] = 0xff;
5799 //pTrunk->fPhysSG = false;
5800 //pTrunk->fUnused = false;
5801 //pTrunk->cBusy = 0;
5802 //pTrunk->fNoPreemptDsts = 0;
5803 //pTrunk->fWireGsoCapabilites = 0;
5804 //pTrunk->fHostGsoCapabilites = 0;
5805 //pTrunk->abGsoHdrs = {0};
5806 pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
5807 //pTrunk->apTaskDstTabs = above;
5808 //pTrunk->cIntDstTabs = above;
5809 //pTrunk->apIntDstTabs = above;
5810
5811 /*
5812 * Create the lock (we've NIL'ed the members above to simplify cleanup).
5813 */
5814 rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hDstTabSpinlock");
5815 if (RT_SUCCESS(rc))
5816 {
5817 /*
5818 * There are a couple of bits in MacTab as well pertaining to the
5819 * trunk. We have to set this before it's reported.
5820 *
5821 * Note! We don't need to lock the MacTab here - creation time.
5822 */
5823 pNetwork->MacTab.pTrunk = pTrunk;
5824 pNetwork->MacTab.HostMac = pTrunk->MacAddr;
5825 pNetwork->MacTab.fHostPromiscuousReal = false;
5826 pNetwork->MacTab.fHostPromiscuousEff = (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE)
5827 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5828 pNetwork->MacTab.fHostActive = false;
5829 pNetwork->MacTab.fWirePromiscuousReal = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
5830 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
5831 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
5832 pNetwork->MacTab.fWireActive = false;
5833
5834#ifdef IN_RING0 /* (testcase is ring-3) */
5835 /*
5836 * Query the factory we want, then use it create and connect the trunk.
5837 */
5838 PINTNETTRUNKFACTORY pTrunkFactory = NULL;
5839 rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
5840 if (RT_SUCCESS(rc))
5841 {
5842 rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
5843 pNetwork->szTrunk,
5844 &pTrunk->SwitchPort,
5845 pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
5846 ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
5847 : 0,
5848 &pTrunk->pIfPort);
5849 pTrunkFactory->pfnRelease(pTrunkFactory);
5850 if (RT_SUCCESS(rc))
5851 {
5852 Assert(pTrunk->pIfPort);
5853
5854 Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
5855 pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
5856 return VINF_SUCCESS;
5857 }
5858 }
5859#else /* IN_RING3 */
5860 NOREF(pSession);
5861 rc = VERR_NOT_SUPPORTED;
5862#endif /* IN_RING3 */
5863
5864 pNetwork->MacTab.pTrunk = NULL;
5865 }
5866
5867 /* bail out and clean up. */
5868 RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
5869 }
5870
5871 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
5872 RTMemFree(pTrunk->apTaskDstTabs[i]);
5873 for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
5874 RTMemFree(pTrunk->apIntDstTabs[i]);
5875 RTMemFree(pTrunk);
5876
5877 LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
5878 rc, pszName, pNetwork->szTrunk, pNetwork->szName));
5879 return rc;
5880}
5881
5882
5883
5884/**
5885 * Object destructor callback.
5886 * This is called for reference counted objectes when the count reaches 0.
5887 *
5888 * @param pvObj The object pointer.
5889 * @param pvUser1 Pointer to the network.
5890 * @param pvUser2 Pointer to the INTNET instance data.
5891 */
5892static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
5893{
5894 PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
5895 PINTNET pIntNet = (PINTNET)pvUser2;
5896 Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
5897 Assert(pNetwork->pIntNet == pIntNet);
5898
5899 /* Take the big create/open/destroy sem. */
5900 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5901
5902 /*
5903 * Tell the trunk, if present, that we're about to disconnect it and wish
5904 * no further calls from it.
5905 */
5906 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5907 if (pTrunk)
5908 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5909
5910 /*
5911 * Deactivate and orphan any remaining interfaces and wait for them to idle.
5912 *
5913 * Note! Normally there are no more interfaces at this point, however, when
5914 * supdrvCloseSession / supdrvCleanupSession release the objects the
5915 * order is undefined. So, it's quite possible that the network will
5916 * be dereference and destroyed before the interfaces.
5917 */
5918 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5919
5920 uint32_t iIf = pNetwork->MacTab.cEntries;
5921 while (iIf-- > 0)
5922 {
5923 pNetwork->MacTab.paEntries[iIf].fActive = false;
5924 pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
5925 }
5926
5927 pNetwork->MacTab.fHostActive = false;
5928 pNetwork->MacTab.fWireActive = false;
5929
5930 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5931
5932 /* Wait for all the interfaces to quiesce. (Interfaces cannot be
5933 removed / added since we're holding the big lock.) */
5934 if (pTrunk)
5935 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
5936
5937 iIf = pNetwork->MacTab.cEntries;
5938 while (iIf-- > 0)
5939 intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
5940
5941 /* Orphan the interfaces (not trunk). Don't bother with calling
5942 pfnDisconnectInterface here since the networking is going away. */
5943 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5944 while ((iIf = pNetwork->MacTab.cEntries) > 0)
5945 {
5946 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
5947 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5948
5949 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5950
5951 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5952 if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
5953 && pIf->cBusy)
5954 {
5955 pIf->pNetwork = NULL;
5956 pNetwork->MacTab.cEntries--;
5957 }
5958 }
5959
5960 /*
5961 * Zap the trunk pointer while we still own the spinlock, destroy the
5962 * trunk after we've left it. Note that this might take a while...
5963 */
5964 pNetwork->MacTab.pTrunk = NULL;
5965
5966 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5967
5968 if (pTrunk)
5969 intnetR0TrunkIfDestroy(pTrunk, pNetwork);
5970
5971 /*
5972 * Unlink the network.
5973 * Note that it needn't be in the list if we failed during creation.
5974 */
5975 PINTNETNETWORK pPrev = pIntNet->pNetworks;
5976 if (pPrev == pNetwork)
5977 pIntNet->pNetworks = pNetwork->pNext;
5978 else
5979 {
5980 for (; pPrev; pPrev = pPrev->pNext)
5981 if (pPrev->pNext == pNetwork)
5982 {
5983 pPrev->pNext = pNetwork->pNext;
5984 break;
5985 }
5986 }
5987 pNetwork->pNext = NULL;
5988 pNetwork->pvObj = NULL;
5989
5990 /*
5991 * Free resources.
5992 */
5993 RTSemEventDestroy(pNetwork->hEvtBusyIf);
5994 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5995 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
5996 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5997 RTMemFree(pNetwork->MacTab.paEntries);
5998 pNetwork->MacTab.paEntries = NULL;
5999 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
6000 intnetR0IfAddrCacheDestroy(&pNetwork->aAddrBlacklist[i]);
6001 RTMemFree(pNetwork);
6002
6003 /* Release the create/destroy sem. */
6004 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6005}
6006
6007
6008/**
6009 * Checks if the open network flags are compatible.
6010 *
6011 * @returns VBox status code.
6012 * @param pNetwork The network.
6013 * @param fFlags The open network flags.
6014 */
6015static int intnetR0CheckOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
6016{
6017 uint32_t const fNetFlags = pNetwork->fFlags;
6018
6019 if ( (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6020 ^ (fNetFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
6021 return VERR_INTNET_INCOMPATIBLE_FLAGS;
6022
6023 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_EXACT)
6024 {
6025 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6026 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
6027 && (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
6028 != (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) )
6029 return VERR_INTNET_INCOMPATIBLE_FLAGS;
6030 }
6031
6032 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
6033 {
6034 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6035 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6036 && !(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6037 && (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed) )
6038 return VERR_INTNET_INCOMPATIBLE_FLAGS;
6039 }
6040
6041 return VINF_SUCCESS;
6042}
6043
6044
6045/**
6046 * Adapts flag changes on network opening.
6047 *
6048 * @returns VBox status code.
6049 * @param pNetwork The network.
6050 * @param fFlags The open network flags.
6051 */
6052static int intnetR0AdaptOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
6053{
6054 /*
6055 * Upgrade the minimum policy flags.
6056 */
6057 uint32_t fNetMinFlags = pNetwork->fMinFlags;
6058 Assert(!(fNetMinFlags & INTNET_OPEN_FLAGS_RELAXED_MASK));
6059 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
6060 {
6061 fNetMinFlags |= fFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
6062 if (fNetMinFlags != pNetwork->fMinFlags)
6063 {
6064 LogRel(("INTNET: %s - min flags changed %#x -> %#x\n", pNetwork->szName, pNetwork->fMinFlags, fNetMinFlags));
6065 pNetwork->fMinFlags = fNetMinFlags;
6066 }
6067 }
6068
6069 /*
6070 * Calculate the new network flags.
6071 * (Depends on fNetMinFlags being recalculated first.)
6072 */
6073 uint32_t fNetFlags = pNetwork->fFlags;
6074
6075 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6076 {
6077 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6078 Assert(!(fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRelaxed));
6079
6080 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6081 continue;
6082 if (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed)
6083 continue;
6084
6085 if ( (fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6086 || (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive) )
6087 {
6088 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6089 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRestrictive;
6090 }
6091 else if (!(fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
6092 {
6093 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6094 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRelaxed;
6095 }
6096 }
6097
6098 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6099 {
6100 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6101 fNetFlags |= fFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed;
6102 }
6103
6104 /*
6105 * Apply the flags if they changed.
6106 */
6107 uint32_t const fOldNetFlags = pNetwork->fFlags;
6108 if (fOldNetFlags != fNetFlags)
6109 {
6110 LogRel(("INTNET: %s - flags changed %#x -> %#x\n", pNetwork->szName, fOldNetFlags, fNetFlags));
6111
6112 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
6113
6114 pNetwork->fFlags = fNetFlags;
6115
6116 /* Recalculate some derived switcher variables. */
6117 bool fActiveTrunk = pNetwork->MacTab.pTrunk
6118 && pNetwork->cActiveIFs > 0;
6119 pNetwork->MacTab.fHostActive = fActiveTrunk
6120 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6121 pNetwork->MacTab.fHostPromiscuousEff = ( pNetwork->MacTab.fHostPromiscuousReal
6122 || (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE))
6123 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
6124
6125 pNetwork->MacTab.fWireActive = fActiveTrunk
6126 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6127 pNetwork->MacTab.fWirePromiscuousReal= RT_BOOL(fNetFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
6128 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
6129 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
6130
6131 if ((fOldNetFlags ^ fNetFlags) & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6132 {
6133 pNetwork->MacTab.cPromiscuousEntries = 0;
6134 pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6135
6136 uint32_t iIf = pNetwork->MacTab.cEntries;
6137 while (iIf-- > 0)
6138 {
6139 PINTNETMACTABENTRY pEntry = &pNetwork->MacTab.paEntries[iIf];
6140 PINTNETIF pIf2 = pEntry->pIf;
6141 if ( pIf2 /* paranoia */
6142 && pIf2->fPromiscuousReal)
6143 {
6144 bool fPromiscuousEff = (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6145 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW);
6146 pEntry->fPromiscuousEff = fPromiscuousEff;
6147 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
6148 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
6149
6150 if (pEntry->fPromiscuousEff)
6151 {
6152 pNetwork->MacTab.cPromiscuousEntries++;
6153 if (!pEntry->fPromiscuousSeeTrunk)
6154 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
6155 }
6156 }
6157 }
6158 }
6159
6160 RTSpinlockRelease(pNetwork->hAddrSpinlock);
6161 }
6162
6163 return VINF_SUCCESS;
6164}
6165
6166
6167/**
6168 * Opens an existing network.
6169 *
6170 * The call must own the INTNET::hMtxCreateOpenDestroy.
6171 *
6172 * @returns VBox status code.
6173 * @param pIntNet The instance data.
6174 * @param pSession The current session.
6175 * @param pszNetwork The network name. This has a valid length.
6176 * @param enmTrunkType The trunk type.
6177 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6178 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6179 * @param ppNetwork Where to store the pointer to the network on success.
6180 */
6181static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6182 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6183{
6184 LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6185 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6186
6187 /* just pro forma validation, the caller is internal. */
6188 AssertPtr(pIntNet);
6189 AssertPtr(pSession);
6190 AssertPtr(pszNetwork);
6191 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6192 AssertPtr(pszTrunk);
6193 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6194 AssertPtr(ppNetwork);
6195 *ppNetwork = NULL;
6196
6197 /*
6198 * Search networks by name.
6199 */
6200 PINTNETNETWORK pCur;
6201 uint8_t cchName = (uint8_t)strlen(pszNetwork);
6202 Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
6203
6204 pCur = pIntNet->pNetworks;
6205 while (pCur)
6206 {
6207 if ( pCur->cchName == cchName
6208 && !memcmp(pCur->szName, pszNetwork, cchName))
6209 {
6210 /*
6211 * Found the network, now check that we have the same ideas
6212 * about the trunk setup and security.
6213 */
6214 int rc;
6215 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6216#ifdef VBOX_WITH_NAT_SERVICE
6217 || enmTrunkType == kIntNetTrunkType_SrvNat /* @todo: what does it mean */
6218#endif
6219 || ( pCur->enmTrunkType == enmTrunkType
6220 && !strcmp(pCur->szTrunk, pszTrunk)))
6221 {
6222 rc = intnetR0CheckOpenNetworkFlags(pCur, fFlags);
6223 if (RT_SUCCESS(rc))
6224 {
6225 /*
6226 * Increment the reference and check that the session
6227 * can access this network.
6228 */
6229 rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
6230 if (RT_SUCCESS(rc))
6231 {
6232 if (pCur->fFlags & INTNET_OPEN_FLAGS_ACCESS_RESTRICTED)
6233 rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
6234 if (RT_SUCCESS(rc))
6235 *ppNetwork = pCur;
6236 else
6237 SUPR0ObjRelease(pCur->pvObj, pSession);
6238 }
6239 else if (rc == VERR_WRONG_ORDER)
6240 rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
6241 }
6242 }
6243 else
6244 {
6245 rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
6246 LogRel(("intnetR0OpenNetwork failed. rc=%Rrc pCur->szTrunk=%s pszTrunk=%s pCur->enmTrunkType=%d enmTrunkType=%d\n",
6247 rc, pCur->szTrunk, pszTrunk, pCur->enmTrunkType, enmTrunkType));
6248 }
6249
6250 LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
6251 return rc;
6252 }
6253
6254 pCur = pCur->pNext;
6255 }
6256
6257 LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
6258 return VERR_NOT_FOUND;
6259}
6260
6261
6262/**
6263 * Creates a new network.
6264 *
6265 * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
6266 * opening the network and found it to be non-existing.
6267 *
6268 * @returns VBox status code.
6269 * @param pIntNet The instance data.
6270 * @param pSession The session handle.
6271 * @param pszNetwork The name of the network. This must be at least one character long and no longer
6272 * than the INTNETNETWORK::szName.
6273 * @param enmTrunkType The trunk type.
6274 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6275 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6276 * @param ppNetwork Where to store the network. In the case of failure
6277 * whatever is returned here should be dereferenced
6278 * outside the INTNET::hMtxCreateOpenDestroy.
6279 */
6280static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6281 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6282{
6283 LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6284 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6285
6286 /* just pro forma validation, the caller is internal. */
6287 AssertPtr(pIntNet);
6288 AssertPtr(pSession);
6289 AssertPtr(pszNetwork);
6290 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6291 AssertPtr(pszTrunk);
6292 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6293 AssertPtr(ppNetwork);
6294
6295 *ppNetwork = NULL;
6296
6297 /*
6298 * Adjust the flags with defaults for the network policies.
6299 * Note: Main restricts promiscuous mode on the per interface level.
6300 */
6301 fFlags &= ~( INTNET_OPEN_FLAGS_IF_FIXED
6302 | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
6303 | INTNET_OPEN_FLAGS_IF_PROMISC_DENY
6304 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK
6305 | INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK
6306 | INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES
6307 | INTNET_OPEN_FLAGS_REQUIRE_EXACT);
6308 uint32_t fDefFlags = INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS
6309 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST
6310 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE
6311 | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED
6312 | INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE
6313 | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED
6314 | INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE;
6315 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6316#ifdef VBOX_WITH_NAT_SERVICE
6317 || enmTrunkType == kIntNetTrunkType_SrvNat /* simialar security */
6318#endif
6319 || enmTrunkType == kIntNetTrunkType_None)
6320 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_RESTRICTED;
6321 else
6322 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_PUBLIC;
6323 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6324 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6325 fFlags |= g_afIntNetOpenNetworkNetFlags[i].fPair & fDefFlags;
6326
6327 /*
6328 * Allocate and initialize.
6329 */
6330 size_t cb = sizeof(INTNETNETWORK);
6331 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6332 cb += INTNETNETWORK_TMP_SIZE + 64;
6333 PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
6334 if (!pNetwork)
6335 return VERR_NO_MEMORY;
6336 //pNetwork->pNext = NULL;
6337 //pNetwork->pIfs = NULL;
6338 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6339 pNetwork->MacTab.cEntries = 0;
6340 pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
6341 //pNetwork->MacTab.cPromiscuousEntries = 0;
6342 //pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6343 pNetwork->MacTab.paEntries = NULL;
6344 pNetwork->MacTab.fHostPromiscuousReal = false;
6345 pNetwork->MacTab.fHostPromiscuousEff = false;
6346 pNetwork->MacTab.fHostActive = false;
6347 pNetwork->MacTab.fWirePromiscuousReal = false;
6348 pNetwork->MacTab.fWirePromiscuousEff = false;
6349 pNetwork->MacTab.fWireActive = false;
6350 pNetwork->MacTab.pTrunk = NULL;
6351 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6352 pNetwork->pIntNet = pIntNet;
6353 //pNetwork->pvObj = NULL;
6354 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6355 pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
6356 //else
6357 // pNetwork->pbTmp = NULL;
6358 pNetwork->fFlags = fFlags;
6359 //pNetwork->fMinFlags = 0;
6360 //pNetwork->cActiveIFs = 0;
6361 size_t cchName = strlen(pszNetwork);
6362 pNetwork->cchName = (uint8_t)cchName;
6363 Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
6364 memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
6365 pNetwork->enmTrunkType = enmTrunkType;
6366 Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
6367 strcpy(pNetwork->szTrunk, pszTrunk);
6368
6369 /*
6370 * Create the semaphore, spinlock and allocate the interface table.
6371 */
6372 int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
6373 if (RT_SUCCESS(rc))
6374 rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hAddrSpinlock");
6375 if (RT_SUCCESS(rc))
6376 {
6377 pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
6378 if (!pNetwork->MacTab.paEntries)
6379 rc = VERR_NO_MEMORY;
6380 }
6381 if (RT_SUCCESS(rc))
6382 {
6383 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
6384 rc = intnetR0IfAddrCacheInit(&pNetwork->aAddrBlacklist[i], (INTNETADDRTYPE)i,
6385 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
6386 }
6387 if (RT_SUCCESS(rc))
6388 {
6389 /*
6390 * Register the object in the current session and link it into the network list.
6391 */
6392 pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
6393 if (pNetwork->pvObj)
6394 {
6395 pNetwork->pNext = pIntNet->pNetworks;
6396 pIntNet->pNetworks = pNetwork;
6397
6398 /*
6399 * Check if the current session is actually allowed to create and
6400 * open the network. It is possible to implement network name
6401 * based policies and these must be checked now. SUPR0ObjRegister
6402 * does no such checks.
6403 */
6404 rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
6405 if (RT_SUCCESS(rc))
6406 {
6407 /*
6408 * Connect the trunk.
6409 */
6410 rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
6411 if (RT_SUCCESS(rc))
6412 {
6413 *ppNetwork = pNetwork;
6414 LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
6415 return VINF_SUCCESS;
6416 }
6417 }
6418
6419 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6420 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6421 return rc;
6422 }
6423
6424 /* cleanup */
6425 rc = VERR_NO_MEMORY;
6426 }
6427
6428 RTSemEventDestroy(pNetwork->hEvtBusyIf);
6429 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6430 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
6431 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6432 RTMemFree(pNetwork->MacTab.paEntries);
6433 pNetwork->MacTab.paEntries = NULL;
6434 RTMemFree(pNetwork);
6435
6436 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6437 return rc;
6438}
6439
6440
6441/**
6442 * Opens a network interface and connects it to the specified network.
6443 *
6444 * @returns VBox status code.
6445 * @param pSession The session handle.
6446 * @param pszNetwork The network name.
6447 * @param enmTrunkType The trunk type.
6448 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6449 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6450 * @param fRestrictAccess Whether new participants should be subjected to access check or not.
6451 * @param cbSend The send buffer size.
6452 * @param cbRecv The receive buffer size.
6453 * @param phIf Where to store the handle to the network interface.
6454 */
6455INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
6456 INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
6457 uint32_t cbSend, uint32_t cbRecv, PINTNETIFHANDLE phIf)
6458{
6459 LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
6460 pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
6461
6462 /*
6463 * Validate input.
6464 */
6465 PINTNET pIntNet = g_pIntNet;
6466 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
6467 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
6468
6469 AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
6470 const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
6471 AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
6472 size_t cchNetwork = pszNetworkEnd - pszNetwork;
6473 AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
6474
6475 if (pszTrunk)
6476 {
6477 AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
6478 const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
6479 AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
6480 }
6481 else
6482 pszTrunk = "";
6483
6484 AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
6485 ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
6486 switch (enmTrunkType)
6487 {
6488 case kIntNetTrunkType_None:
6489 case kIntNetTrunkType_WhateverNone:
6490#ifdef VBOX_WITH_NAT_SERVICE
6491 case kIntNetTrunkType_SrvNat:
6492#endif
6493 if (*pszTrunk)
6494 return VERR_INVALID_PARAMETER;
6495 break;
6496
6497 case kIntNetTrunkType_NetFlt:
6498 case kIntNetTrunkType_NetAdp:
6499 if (!*pszTrunk)
6500 return VERR_INVALID_PARAMETER;
6501 break;
6502
6503 default:
6504 return VERR_NOT_IMPLEMENTED;
6505 }
6506
6507 AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
6508 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6509 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) != g_afIntNetOpenNetworkNetFlags[i].fPair,
6510 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkNetFlags[i].fPair), VERR_INVALID_PARAMETER);
6511 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
6512 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair) != g_afIntNetOpenNetworkIfFlags[i].fPair,
6513 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkIfFlags[i].fPair), VERR_INVALID_PARAMETER);
6514 AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
6515
6516 /*
6517 * Acquire the mutex to serialize open/create/close.
6518 */
6519 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6520 if (RT_FAILURE(rc))
6521 return rc;
6522
6523 /*
6524 * Try open / create the network and create an interface on it for the
6525 * caller to use.
6526 */
6527 PINTNETNETWORK pNetwork = NULL;
6528 rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6529 if (RT_SUCCESS(rc))
6530 {
6531 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6532 if (RT_SUCCESS(rc))
6533 {
6534 intnetR0AdaptOpenNetworkFlags(pNetwork, fFlags);
6535 rc = VINF_ALREADY_INITIALIZED;
6536 }
6537 else
6538 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6539 }
6540 else if (rc == VERR_NOT_FOUND)
6541 {
6542 rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6543 if (RT_SUCCESS(rc))
6544 {
6545 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6546 if (RT_FAILURE(rc))
6547 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6548 }
6549 }
6550
6551 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6552 LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
6553 return rc;
6554}
6555
6556
6557/**
6558 * VMMR0 request wrapper for IntNetR0Open.
6559 *
6560 * @returns see GMMR0MapUnmapChunk.
6561 * @param pSession The caller's session.
6562 * @param pReq The request packet.
6563 */
6564INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
6565{
6566 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
6567 return VERR_INVALID_PARAMETER;
6568 return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
6569 pReq->fFlags, pReq->cbSend, pReq->cbRecv, &pReq->hIf);
6570}
6571
6572
6573/**
6574 * Count the internal networks.
6575 *
6576 * This is mainly for providing the testcase with some introspection to validate
6577 * behavior when closing interfaces.
6578 *
6579 * @returns The number of networks.
6580 */
6581INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
6582{
6583 /*
6584 * Grab the instance.
6585 */
6586 PINTNET pIntNet = g_pIntNet;
6587 if (!pIntNet)
6588 return 0;
6589 AssertPtrReturn(pIntNet, 0);
6590 AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
6591
6592 /*
6593 * Grab the mutex and count the networks.
6594 */
6595 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6596 if (RT_FAILURE(rc))
6597 return 0;
6598
6599 uint32_t cNetworks = 0;
6600 for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
6601 cNetworks++;
6602
6603 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6604
6605 return cNetworks;
6606}
6607
6608
6609
6610/**
6611 * Destroys an instance of the Ring-0 internal networking service.
6612 */
6613INTNETR0DECL(void) IntNetR0Term(void)
6614{
6615 LogFlow(("IntNetR0Term:\n"));
6616
6617 /*
6618 * Zap the global pointer and validate it.
6619 */
6620 PINTNET pIntNet = g_pIntNet;
6621 g_pIntNet = NULL;
6622 if (!pIntNet)
6623 return;
6624 AssertPtrReturnVoid(pIntNet);
6625 AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
6626
6627 /*
6628 * There is not supposed to be any networks hanging around at this time.
6629 */
6630 AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
6631 Assert(pIntNet->pNetworks == NULL);
6632 if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
6633 {
6634 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6635 pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
6636 }
6637 if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
6638 {
6639 /** @todo does it make sense to have a deleter here? */
6640 RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
6641 pIntNet->hHtIfs = NIL_RTHANDLETABLE;
6642 }
6643
6644 RTMemFree(pIntNet);
6645}
6646
6647
6648/**
6649 * Initializes the internal network ring-0 service.
6650 *
6651 * @returns VBox status code.
6652 */
6653INTNETR0DECL(int) IntNetR0Init(void)
6654{
6655 LogFlow(("IntNetR0Init:\n"));
6656 int rc = VERR_NO_MEMORY;
6657 PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
6658 if (pIntNet)
6659 {
6660 //pIntNet->pNetworks = NULL;
6661
6662 rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
6663 if (RT_SUCCESS(rc))
6664 {
6665 rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
6666 UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
6667 if (RT_SUCCESS(rc))
6668 {
6669 pIntNet->u32Magic = INTNET_MAGIC;
6670 g_pIntNet = pIntNet;
6671 LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
6672 return VINF_SUCCESS;
6673 }
6674
6675 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6676 }
6677 RTMemFree(pIntNet);
6678 }
6679 LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
6680 return rc;
6681}
6682
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette