VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 69303

Last change on this file since 69303 was 65698, checked in by vboxsync, 8 years ago

NetAdp/win (bugref:8488) standalone implementation, does not require NetLwf to be installed.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 242.0 KB
Line 
1/* $Id: SrvIntNetR0.cpp 65698 2017-02-09 12:28:01Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 *
5 * @remarks No lazy code changes. If you don't understand exactly what you're
6 * doing, get an understanding or forget it.
7 * All changes shall be reviewed by bird before commit. If not around,
8 * email and let Frank and/or Klaus OK the changes before committing.
9 */
10
11/*
12 * Copyright (C) 2006-2016 Oracle Corporation
13 *
14 * This file is part of VirtualBox Open Source Edition (OSE), as
15 * available from http://www.virtualbox.org. This file is free software;
16 * you can redistribute it and/or modify it under the terms of the GNU
17 * General Public License (GPL) as published by the Free Software
18 * Foundation, in version 2 as it comes in the "COPYING" file of the
19 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
20 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
21 */
22
23
24/*********************************************************************************************************************************
25* Header Files *
26*********************************************************************************************************************************/
27#define LOG_GROUP LOG_GROUP_SRV_INTNET
28#include <VBox/intnet.h>
29#include <VBox/intnetinline.h>
30#include <VBox/vmm/pdmnetinline.h>
31#include <VBox/sup.h>
32#include <VBox/vmm/pdm.h>
33#include <VBox/log.h>
34
35#include <iprt/asm.h>
36#include <iprt/assert.h>
37#include <iprt/handletable.h>
38#include <iprt/mp.h>
39#include <iprt/mem.h>
40#include <iprt/net.h>
41#include <iprt/semaphore.h>
42#include <iprt/spinlock.h>
43#include <iprt/string.h>
44#include <iprt/thread.h>
45#include <iprt/time.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** @def INTNET_WITH_DHCP_SNOOPING
52 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
53#define INTNET_WITH_DHCP_SNOOPING
54
55/** The maximum number of interface in a network. */
56#define INTNET_MAX_IFS (1023 + 1 + 16)
57
58/** The number of entries to grow the destination tables with. */
59#if 0
60# define INTNET_GROW_DSTTAB_SIZE 16
61#else
62# define INTNET_GROW_DSTTAB_SIZE 1
63#endif
64
65/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
66#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
67
68
69/*********************************************************************************************************************************
70* Structures and Typedefs *
71*********************************************************************************************************************************/
72/**
73 * MAC address lookup table entry.
74 */
75typedef struct INTNETMACTABENTRY
76{
77 /** The MAC address of this entry. */
78 RTMAC MacAddr;
79 /** Is it is effectively promiscuous mode. */
80 bool fPromiscuousEff;
81 /** Is it promiscuous and should it see unrelated trunk traffic. */
82 bool fPromiscuousSeeTrunk;
83 /** Is it active.
84 * We ignore the entry if this is clear and may end up sending packets addressed
85 * to this interface onto the trunk. The reasoning for this is that this could
86 * be the interface of a VM that just has been teleported to a different host. */
87 bool fActive;
88 /** Pointer to the network interface. */
89 struct INTNETIF *pIf;
90} INTNETMACTABENTRY;
91/** Pointer to a MAC address lookup table entry. */
92typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
93
94/**
95 * MAC address lookup table.
96 *
97 * @todo Having this in a separate structure didn't work out as well as it
98 * should. Consider merging it into INTNETNETWORK.
99 */
100typedef struct INTNETMACTAB
101{
102 /** The current number of entries. */
103 uint32_t cEntries;
104 /** The number of entries we've allocated space for. */
105 uint32_t cEntriesAllocated;
106 /** Table entries. */
107 PINTNETMACTABENTRY paEntries;
108
109 /** The number of interface entries currently in promicuous mode. */
110 uint32_t cPromiscuousEntries;
111 /** The number of interface entries currently in promicuous mode that
112 * shall not see unrelated trunk traffic. */
113 uint32_t cPromiscuousNoTrunkEntries;
114
115 /** The host MAC address (reported). */
116 RTMAC HostMac;
117 /** The effective host promiscuous setting (reported). */
118 bool fHostPromiscuousEff;
119 /** The real host promiscuous setting (reported). */
120 bool fHostPromiscuousReal;
121 /** Whether the host is active. */
122 bool fHostActive;
123
124 /** Whether the wire is promiscuous (config). */
125 bool fWirePromiscuousEff;
126 /** Whether the wire is promiscuous (config).
127 * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
128 * INTNETNETWORK::fFlags.) */
129 bool fWirePromiscuousReal;
130 /** Whether the wire is active. */
131 bool fWireActive;
132
133 /** Pointer to the trunk interface. */
134 struct INTNETTRUNKIF *pTrunk;
135} INTNETMACTAB;
136/** Pointer to a MAC address . */
137typedef INTNETMACTAB *PINTNETMACTAB;
138
139/**
140 * Destination table.
141 */
142typedef struct INTNETDSTTAB
143{
144 /** The trunk destinations. */
145 uint32_t fTrunkDst;
146 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
147 struct INTNETTRUNKIF *pTrunk;
148 /** The number of destination interfaces. */
149 uint32_t cIfs;
150 /** The interfaces (referenced). Variable sized array. */
151 struct
152 {
153 /** The destination interface. */
154 struct INTNETIF *pIf;
155 /** Whether to replace the destination MAC address.
156 * This is used when sharing MAC address with the host on the wire(less). */
157 bool fReplaceDstMac;
158 } aIfs[1];
159} INTNETDSTTAB;
160/** Pointer to a destination table. */
161typedef INTNETDSTTAB *PINTNETDSTTAB;
162/** Pointer to a const destination table. */
163typedef INTNETDSTTAB const *PCINTNETDSTTAB;
164
165/**
166 * Address and type.
167 */
168typedef struct INTNETADDR
169{
170 /** The address type. */
171 INTNETADDRTYPE enmType;
172 /** The address. */
173 RTNETADDRU Addr;
174} INTNETADDR;
175/** Pointer to an address. */
176typedef INTNETADDR *PINTNETADDR;
177/** Pointer to a const address. */
178typedef INTNETADDR const *PCINTNETADDR;
179
180
181/**
182 * Address cache for a specific network layer.
183 */
184typedef struct INTNETADDRCACHE
185{
186 /** Pointer to the table of addresses. */
187 uint8_t *pbEntries;
188 /** The number of valid address entries. */
189 uint8_t cEntries;
190 /** The number of allocated address entries. */
191 uint8_t cEntriesAlloc;
192 /** The address size. */
193 uint8_t cbAddress;
194 /** The size of an entry. */
195 uint8_t cbEntry;
196} INTNETADDRCACHE;
197/** Pointer to an address cache. */
198typedef INTNETADDRCACHE *PINTNETADDRCACHE;
199/** Pointer to a const address cache. */
200typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
201
202
203/**
204 * A network interface.
205 *
206 * Unless explicitly stated, all members are protect by the network semaphore.
207 */
208typedef struct INTNETIF
209{
210 /** The MAC address.
211 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
212 RTMAC MacAddr;
213 /** Set if the INTNET::MacAddr member has been explicitly set. */
214 bool fMacSet;
215 /** Tracks the desired promiscuous setting of the interface. */
216 bool fPromiscuousReal;
217 /** Whether the interface is active or not.
218 * This is shadowed by INTNETMACTABENTRY::fActive. */
219 bool fActive;
220 /** Whether someone has indicated that the end is nigh by means of IntNetR0IfAbortWait. */
221 bool volatile fNoMoreWaits;
222 /** The flags specified when opening this interface. */
223 uint32_t fOpenFlags;
224 /** Number of yields done to try make the interface read pending data.
225 * We will stop yielding when this reaches a threshold assuming that the VM is
226 * paused or that it simply isn't worth all the delay. It is cleared when a
227 * successful send has been done. */
228 uint32_t cYields;
229 /** Pointer to the current exchange buffer (ring-0). */
230 PINTNETBUF pIntBuf;
231 /** Pointer to ring-3 mapping of the current exchange buffer. */
232 R3PTRTYPE(PINTNETBUF) pIntBufR3;
233 /** Pointer to the default exchange buffer for the interface. */
234 PINTNETBUF pIntBufDefault;
235 /** Pointer to ring-3 mapping of the default exchange buffer. */
236 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
237 /** Event semaphore which a receiver/consumer thread will sleep on while
238 * waiting for data to arrive. */
239 RTSEMEVENT volatile hRecvEvent;
240 /** Number of threads sleeping on the event semaphore. */
241 uint32_t volatile cSleepers;
242 /** The interface handle.
243 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
244 * should return with the appropriate error condition. */
245 INTNETIFHANDLE volatile hIf;
246 /** The native handle of the destructor thread. This is NIL_RTNATIVETHREAD when
247 * the object is valid and set when intnetR0IfDestruct is in progress. This is
248 * used to cover an unlikely (impossible?) race between SUPDRVSESSION cleanup
249 * and lingering threads waiting for recv or similar. */
250 RTNATIVETHREAD volatile hDestructorThread;
251 /** Pointer to the network this interface is connected to.
252 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
253 struct INTNETNETWORK *pNetwork;
254 /** The session this interface is associated with. */
255 PSUPDRVSESSION pSession;
256 /** The SUPR0 object id. */
257 void *pvObj;
258 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
259 * This is protected by the address spinlock of the network. */
260 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
261 /** Spinlock protecting the input (producer) side of the receive ring. */
262 RTSPINLOCK hRecvInSpinlock;
263 /** Busy count for tracking destination table references and active sends.
264 * Usually incremented while owning the switch table spinlock. The 30th bit
265 * is used to indicate wakeup. */
266 uint32_t volatile cBusy;
267 /** The preallocated destination table.
268 * This is NULL when it's in use as a precaution against unserialized
269 * transmitting. This is grown when new interfaces are added to the network. */
270 PINTNETDSTTAB volatile pDstTab;
271 /** Pointer to the trunk's per interface data. Can be NULL. */
272 void *pvIfData;
273 /** Header buffer for when we're carving GSO frames. */
274 uint8_t abGsoHdrs[256];
275} INTNETIF;
276/** Pointer to an internal network interface. */
277typedef INTNETIF *PINTNETIF;
278
279
280/**
281 * A trunk interface.
282 */
283typedef struct INTNETTRUNKIF
284{
285 /** The port interface we present to the component. */
286 INTNETTRUNKSWPORT SwitchPort;
287 /** The port interface we get from the component. */
288 PINTNETTRUNKIFPORT pIfPort;
289 /** Pointer to the network we're connect to.
290 * This may be NULL if we're orphaned? */
291 struct INTNETNETWORK *pNetwork;
292 /** The current MAC address for the interface. (reported)
293 * Updated while owning the switch table spinlock. */
294 RTMAC MacAddr;
295 /** Whether to supply physical addresses with the outbound SGs. (reported) */
296 bool fPhysSG;
297 /** Explicit alignment. */
298 bool fUnused;
299 /** Busy count for tracking destination table references and active sends.
300 * Usually incremented while owning the switch table spinlock. The 30th bit
301 * is used to indicate wakeup. */
302 uint32_t volatile cBusy;
303 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
304 uint32_t fNoPreemptDsts;
305 /** The GSO capabilities of the wire destination. (reported) */
306 uint32_t fWireGsoCapabilites;
307 /** The GSO capabilities of the host destination. (reported)
308 * This is as bit map where each bit represents the GSO type with the same
309 * number. */
310 uint32_t fHostGsoCapabilites;
311 /** The destination table spinlock, interrupt safe.
312 * Protects apTaskDstTabs and apIntDstTabs. */
313 RTSPINLOCK hDstTabSpinlock;
314 /** The number of entries in apIntDstTabs. */
315 uint32_t cIntDstTabs;
316 /** The task time destination tables.
317 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
318 * precedes apIntDstTabs so that these two tables can be used as one
319 * contiguous one. */
320 PINTNETDSTTAB apTaskDstTabs[2];
321 /** The interrupt / disabled-preemption time destination tables.
322 * This is a variable sized array. */
323 PINTNETDSTTAB apIntDstTabs[1];
324} INTNETTRUNKIF;
325/** Pointer to a trunk interface. */
326typedef INTNETTRUNKIF *PINTNETTRUNKIF;
327
328/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
329#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
330
331
332/**
333 * Internal representation of a network.
334 */
335typedef struct INTNETNETWORK
336{
337 /** The Next network in the chain.
338 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
339 struct INTNETNETWORK *pNext;
340
341 /** The spinlock protecting MacTab, aAddrBlacklist and INTNETIF::aAddrCache.
342 * Interrupt safe. */
343 RTSPINLOCK hAddrSpinlock;
344 /** MAC address table.
345 * This doubles as interface collection. */
346 INTNETMACTAB MacTab;
347
348 /** The network layer address cache. (Indexed by type, 0 entry isn't used.
349 * Contains host addresses. We don't let guests spoof them. */
350 INTNETADDRCACHE aAddrBlacklist[kIntNetAddrType_End];
351
352 /** Wait for an interface to stop being busy so it can be removed or have its
353 * destination table replaced. We have to wait upon this while owning the
354 * network mutex. Will only ever have one waiter because of the big mutex. */
355 RTSEMEVENT hEvtBusyIf;
356 /** Pointer to the instance data. */
357 struct INTNET *pIntNet;
358 /** The SUPR0 object id. */
359 void *pvObj;
360 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
361 * This is allocated after this structure if we're sharing the MAC address with
362 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
363 uint8_t *pbTmp;
364 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
365 uint32_t fFlags;
366 /** Any restrictive policies required as a minimum by some interface.
367 * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
368 uint32_t fMinFlags;
369 /** The number of active interfaces (excluding the trunk). */
370 uint32_t cActiveIFs;
371 /** The length of the network name. */
372 uint8_t cchName;
373 /** The network name. */
374 char szName[INTNET_MAX_NETWORK_NAME];
375 /** The trunk type. */
376 INTNETTRUNKTYPE enmTrunkType;
377 /** The trunk name. */
378 char szTrunk[INTNET_MAX_TRUNK_NAME];
379} INTNETNETWORK;
380/** Pointer to an internal network. */
381typedef INTNETNETWORK *PINTNETNETWORK;
382/** Pointer to a const internal network. */
383typedef const INTNETNETWORK *PCINTNETNETWORK;
384
385/** The size of the buffer INTNETNETWORK::pbTmp points at. */
386#define INTNETNETWORK_TMP_SIZE 2048
387
388
389/**
390 * Internal networking instance.
391 */
392typedef struct INTNET
393{
394 /** Magic number (INTNET_MAGIC). */
395 uint32_t volatile u32Magic;
396 /** Mutex protecting the creation, opening and destruction of both networks and
397 * interfaces. (This means all operations affecting the pNetworks list.) */
398 RTSEMMUTEX hMtxCreateOpenDestroy;
399 /** List of networks. Protected by INTNET::Spinlock. */
400 PINTNETNETWORK volatile pNetworks;
401 /** Handle table for the interfaces. */
402 RTHANDLETABLE hHtIfs;
403} INTNET;
404/** Pointer to an internal network ring-0 instance. */
405typedef struct INTNET *PINTNET;
406
407/** Magic number for the internal network instance data (Hayao Miyazaki). */
408#define INTNET_MAGIC UINT32_C(0x19410105)
409
410
411/*********************************************************************************************************************************
412* Global Variables *
413*********************************************************************************************************************************/
414/** Pointer to the internal network instance data. */
415static PINTNET volatile g_pIntNet = NULL;
416
417static const struct INTNETOPENNETWORKFLAGS
418{
419 uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
420 uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
421 uint32_t fFixed; /**< The config-fixed flag. */
422 uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
423}
424/** Open network policy flags relating to the network. */
425g_afIntNetOpenNetworkNetFlags[] =
426{
427 { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
428 { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
429 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
430 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
431 { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
432 { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
433 { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
434 { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
435},
436/** Open network policy flags relating to the new interface. */
437g_afIntNetOpenNetworkIfFlags[] =
438{
439 { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
440 { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
441};
442
443
444/*********************************************************************************************************************************
445* Forward Declarations *
446*********************************************************************************************************************************/
447static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
448
449
450/**
451 * Checks if a pointer belongs to the list of known networks without
452 * accessing memory it points to.
453 *
454 * @returns true, if such network is in the list.
455 * @param pIntNet The pointer to the internal network instance (global).
456 * @param pNetwork The pointer that must be validated.
457 */
458DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
459{
460 for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
461 if (pCurr == pNetwork)
462 return true;
463 return false;
464}
465
466
467/**
468 * Worker for intnetR0SgWritePart that deals with the case where the
469 * request doesn't fit into the first segment.
470 *
471 * @returns true, unless the request or SG invalid.
472 * @param pSG The SG list to write to.
473 * @param off Where to start writing (offset into the SG).
474 * @param cb How much to write.
475 * @param pvBuf The buffer to containing the bits to write.
476 */
477static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
478{
479 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
480 return false;
481
482 /*
483 * Skip ahead to the segment where off starts.
484 */
485 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
486 unsigned iSeg = 0;
487 while (off > pSG->aSegs[iSeg].cb)
488 {
489 off -= pSG->aSegs[iSeg++].cb;
490 AssertReturn(iSeg < cSegs, false);
491 }
492
493 /*
494 * Copy the data, hoping that it's all from one segment...
495 */
496 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
497 if (cbCanCopy >= cb)
498 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
499 else
500 {
501 /* copy the portion in the current segment. */
502 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
503 cb -= cbCanCopy;
504
505 /* copy the portions in the other segments. */
506 do
507 {
508 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
509 iSeg++;
510 AssertReturn(iSeg < cSegs, false);
511
512 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
513 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
514
515 cb -= cbCanCopy;
516 } while (cb > 0);
517 }
518
519 return true;
520}
521
522
523/**
524 * Writes to a part of an SG.
525 *
526 * @returns true on success, false on failure (out of bounds).
527 * @param pSG The SG list to write to.
528 * @param off Where to start writing (offset into the SG).
529 * @param cb How much to write.
530 * @param pvBuf The buffer to containing the bits to write.
531 */
532DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
533{
534 Assert(off + cb > off);
535
536 /* The optimized case. */
537 if (RT_LIKELY( pSG->cSegsUsed == 1
538 || pSG->aSegs[0].cb >= off + cb))
539 {
540 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
541 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
542 return true;
543 }
544 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
545}
546
547
548/**
549 * Reads a byte from a SG list.
550 *
551 * @returns The byte on success. 0xff on failure.
552 * @param pSG The SG list to read.
553 * @param off The offset (into the SG) off the byte.
554 */
555DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
556{
557 if (RT_LIKELY(pSG->aSegs[0].cb > off))
558 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
559
560 off -= pSG->aSegs[0].cb;
561 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
562 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
563 {
564 if (pSG->aSegs[iSeg].cb > off)
565 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
566 off -= pSG->aSegs[iSeg].cb;
567 }
568 return false;
569}
570
571
572/**
573 * Worker for intnetR0SgReadPart that deals with the case where the
574 * requested data isn't in the first segment.
575 *
576 * @returns true, unless the SG is invalid.
577 * @param pSG The SG list to read.
578 * @param off Where to start reading (offset into the SG).
579 * @param cb How much to read.
580 * @param pvBuf The buffer to read into.
581 */
582static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
583{
584 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
585 return false;
586
587 /*
588 * Skip ahead to the segment where off starts.
589 */
590 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
591 unsigned iSeg = 0;
592 while (off > pSG->aSegs[iSeg].cb)
593 {
594 off -= pSG->aSegs[iSeg++].cb;
595 AssertReturn(iSeg < cSegs, false);
596 }
597
598 /*
599 * Copy the data, hoping that it's all from one segment...
600 */
601 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
602 if (cbCanCopy >= cb)
603 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
604 else
605 {
606 /* copy the portion in the current segment. */
607 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
608 cb -= cbCanCopy;
609
610 /* copy the portions in the other segments. */
611 do
612 {
613 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
614 iSeg++;
615 AssertReturn(iSeg < cSegs, false);
616
617 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
618 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
619
620 cb -= cbCanCopy;
621 } while (cb > 0);
622 }
623
624 return true;
625}
626
627
628/**
629 * Reads a part of an SG into a buffer.
630 *
631 * @returns true on success, false on failure (out of bounds).
632 * @param pSG The SG list to read.
633 * @param off Where to start reading (offset into the SG).
634 * @param cb How much to read.
635 * @param pvBuf The buffer to read into.
636 */
637DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
638{
639 Assert(off + cb > off);
640
641 /* The optimized case. */
642 if (RT_LIKELY( pSG->cSegsUsed == 1
643 || pSG->aSegs[0].cb >= off + cb))
644 {
645 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
646 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
647 return true;
648 }
649 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
650}
651
652
653/**
654 * Wait for a busy counter to reach zero.
655 *
656 * @param pNetwork The network.
657 * @param pcBusy The busy counter.
658 */
659static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
660{
661 if (ASMAtomicReadU32(pcBusy) == 0)
662 return;
663
664 /*
665 * We have to be a bit cautious here so we don't destroy the network or the
666 * semaphore before intnetR0BusyDec has signalled us.
667 */
668
669 /* Reset the semaphore and flip the wakeup bit. */
670 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
671 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
672 do
673 {
674 if (cCurBusy == 0)
675 return;
676 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
677 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
678 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
679
680 /* Wait for the count to reach zero. */
681 do
682 {
683 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
684 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
685 cCurBusy = ASMAtomicReadU32(pcBusy);
686 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
687 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
688 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
689 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
690}
691
692
693/**
694 * Decrements the busy counter and maybe wakes up any threads waiting for it to
695 * reach zero.
696 *
697 * @param pNetwork The network.
698 * @param pcBusy The busy counter.
699 */
700DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
701{
702 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
703 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
704 && pNetwork))
705 RTSemEventSignal(pNetwork->hEvtBusyIf);
706 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
707}
708
709
710/**
711 * Increments the busy count of the specified interface.
712 *
713 * The caller must own the MAC address table spinlock.
714 *
715 * @param pIf The interface.
716 */
717DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
718{
719 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
720}
721
722
723/**
724 * Increments the busy count of the specified interface.
725 *
726 * The caller must own the MAC address table spinlock or an explicity reference.
727 *
728 * @param pTrunk The trunk.
729 */
730DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
731{
732 if (pTrunk)
733 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
734}
735
736
737/**
738 * Increments the busy count of the specified interface.
739 *
740 * The caller must own the MAC address table spinlock or an explicity reference.
741 *
742 * @param pIf The interface.
743 */
744DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
745{
746 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
747 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
748 NOREF(cNewBusy);
749}
750
751
752/**
753 * Increments the busy count of the specified interface.
754 *
755 * The caller must own the MAC address table spinlock or an explicity reference.
756 *
757 * @param pTrunk The trunk.
758 */
759DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
760{
761 if (!pTrunk) return;
762 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
763 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
764 NOREF(cNewBusy);
765}
766
767
768/**
769 * Retain an interface.
770 *
771 * @returns VBox status code, can assume success in most situations.
772 * @param pIf The interface instance.
773 * @param pSession The current session.
774 */
775DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
776{
777 Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
778
779 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
780 AssertRCReturn(rc, rc);
781
782 return VINF_SUCCESS;
783}
784
785
786/**
787 * Release an interface previously retained by intnetR0IfRetain or
788 * by handle lookup/freeing.
789 *
790 * @returns true if destroyed, false if not.
791 * @param pIf The interface instance.
792 * @param pSession The current session.
793 */
794DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
795{
796 Assert(pIf->hDestructorThread == NIL_RTNATIVETHREAD);
797
798 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
799 AssertRC(rc);
800
801 return rc == VINF_OBJECT_DESTROYED;
802}
803
804
805/**
806 * RTHandleCreateEx callback that retains an object in the
807 * handle table before returning it.
808 *
809 * (Avoids racing the freeing of the handle.)
810 *
811 * @returns VBox status code.
812 * @param hHandleTable The handle table (ignored).
813 * @param pvObj The object (INTNETIF).
814 * @param pvCtx The context (SUPDRVSESSION).
815 * @param pvUser The user context (ignored).
816 */
817static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
818{
819 NOREF(pvUser);
820 NOREF(hHandleTable);
821
822 PINTNETIF pIf = (PINTNETIF)pvObj;
823 RTNATIVETHREAD hDtorThrd;
824 ASMAtomicUoReadHandle(&pIf->hDestructorThread, &hDtorThrd);
825 if (hDtorThrd == NIL_RTNATIVETHREAD)
826 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
827
828 /* Allow intnetR0IfDestruct to call RTHandleTableFreeWithCtx to free
829 the handle, but not even think about retaining a referenceas we don't
830 want to confuse SUPDrv and risk having the destructor called twice. */
831 if (hDtorThrd == RTThreadNativeSelf())
832 return VINF_SUCCESS;
833
834 return VERR_SEM_DESTROYED;
835}
836
837
838
839/**
840 * Checks if the interface has a usable MAC address or not.
841 *
842 * @returns true if MacAddr is usable, false if not.
843 * @param pIf The interface.
844 */
845DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
846{
847 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
848}
849
850
851/**
852 * Locates the MAC address table entry for the given interface.
853 *
854 * The caller holds the MAC address table spinlock, obviously.
855 *
856 * @returns Pointer to the entry on if found, NULL if not.
857 * @param pNetwork The network.
858 * @param pIf The interface.
859 */
860DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
861{
862 uint32_t iIf = pNetwork->MacTab.cEntries;
863 while (iIf-- > 0)
864 {
865 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
866 return &pNetwork->MacTab.paEntries[iIf];
867 }
868 return NULL;
869}
870
871
872/**
873 * Checks if the IPv6 address is a good interface address.
874 * @returns true/false.
875 * @param addr The address, network endian.
876 */
877DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
878{
879 return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
880 || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
881 || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
882 && addr.Words.w2 == 0 && addr.Words.w3 == 0
883 && addr.Words.w4 == 0 && addr.Words.w5 == 0
884 && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
885}
886
887
888#if 0 /* unused */
889/**
890 * Checks if the IPv4 address is a broadcast address.
891 * @returns true/false.
892 * @param Addr The address, network endian.
893 */
894DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
895{
896 /* Just check for 255.255.255.255 atm. */
897 return Addr.u == UINT32_MAX;
898}
899#endif /* unused */
900
901
902/**
903 * Checks if the IPv4 address is a good interface address.
904 * @returns true/false.
905 * @param Addr The address, network endian.
906 */
907DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
908{
909 /* Usual suspects. */
910 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
911 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
912 return false;
913
914 /* Unusual suspects. */
915 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
916 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
917 ))
918 return false;
919 return true;
920}
921
922
923/**
924 * Gets the address size of a network layer type.
925 *
926 * @returns size in bytes.
927 * @param enmType The type.
928 */
929DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
930{
931 switch (enmType)
932 {
933 case kIntNetAddrType_IPv4: return 4;
934 case kIntNetAddrType_IPv6: return 16;
935 case kIntNetAddrType_IPX: return 4 + 6;
936 default: AssertFailedReturn(0);
937 }
938}
939
940
941/**
942 * Compares two address to see if they are equal, assuming naturally align structures.
943 *
944 * @returns true if equal, false if not.
945 * @param pAddr1 The first address.
946 * @param pAddr2 The second address.
947 * @param cbAddr The address size.
948 */
949DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
950{
951 switch (cbAddr)
952 {
953 case 4: /* IPv4 */
954 return pAddr1->au32[0] == pAddr2->au32[0];
955 case 16: /* IPv6 */
956 return pAddr1->au64[0] == pAddr2->au64[0]
957 && pAddr1->au64[1] == pAddr2->au64[1];
958 case 10: /* IPX */
959 return pAddr1->au64[0] == pAddr2->au64[0]
960 && pAddr1->au16[4] == pAddr2->au16[4];
961 default:
962 AssertFailedReturn(false);
963 }
964}
965
966
967/**
968 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
969 * in the remaining cache entries after the caller has check the
970 * most likely ones.
971 *
972 * @returns -1 if not found, the index of the cache entry if found.
973 * @param pCache The cache.
974 * @param pAddr The address.
975 * @param cbAddr The address size (optimization).
976 */
977static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
978{
979 unsigned i = pCache->cEntries - 2;
980 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
981 while (i >= 1)
982 {
983 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
984 return i;
985 pbEntry -= pCache->cbEntry;
986 i--;
987 }
988
989 return -1;
990}
991
992/**
993 * Lookup an address in a cache without any expectations.
994 *
995 * @returns -1 if not found, the index of the cache entry if found.
996 * @param pCache The cache.
997 * @param pAddr The address.
998 * @param cbAddr The address size (optimization).
999 */
1000DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1001{
1002 Assert(pCache->cbAddress == cbAddr);
1003
1004 /*
1005 * The optimized case is when there is one cache entry and
1006 * it doesn't match.
1007 */
1008 unsigned i = pCache->cEntries;
1009 if ( i > 0
1010 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
1011 return 0;
1012 if (i <= 1)
1013 return -1;
1014
1015 /*
1016 * Check the last entry.
1017 */
1018 i--;
1019 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
1020 return i;
1021 if (i <= 1)
1022 return -1;
1023
1024 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
1025}
1026
1027
1028/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
1029DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1030{
1031 /** @todo implement this. */
1032 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1033}
1034
1035#if 0 /* unused */
1036
1037/**
1038 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
1039 * the lookup in the remaining cache entries after the caller
1040 * has check the most likely ones.
1041 *
1042 * The routine is expecting not to find the address.
1043 *
1044 * @returns -1 if not found, the index of the cache entry if found.
1045 * @param pCache The cache.
1046 * @param pAddr The address.
1047 * @param cbAddr The address size (optimization).
1048 */
1049static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1050{
1051 /*
1052 * Perform a full table lookup.
1053 */
1054 unsigned i = pCache->cEntries - 2;
1055 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1056 while (i >= 1)
1057 {
1058 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1059 return i;
1060 pbEntry -= pCache->cbEntry;
1061 i--;
1062 }
1063
1064 return -1;
1065}
1066
1067
1068/**
1069 * Lookup an address in a cache expecting not to find it.
1070 *
1071 * @returns -1 if not found, the index of the cache entry if found.
1072 * @param pCache The cache.
1073 * @param pAddr The address.
1074 * @param cbAddr The address size (optimization).
1075 */
1076DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1077{
1078 Assert(pCache->cbAddress == cbAddr);
1079
1080 /*
1081 * The optimized case is when there is one cache entry and
1082 * it doesn't match.
1083 */
1084 unsigned i = pCache->cEntries;
1085 if (RT_UNLIKELY( i > 0
1086 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
1087 return 0;
1088 if (RT_LIKELY(i <= 1))
1089 return -1;
1090
1091 /*
1092 * Then check the last entry and return if there are just two cache entries.
1093 */
1094 i--;
1095 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1096 return i;
1097 if (i <= 1)
1098 return -1;
1099
1100 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1101}
1102
1103#endif /* unused */
1104
1105
1106/**
1107 * Deletes a specific cache entry.
1108 *
1109 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1110 *
1111 * @param pIf The interface (for logging).
1112 * @param pCache The cache.
1113 * @param iEntry The entry to delete.
1114 * @param pszMsg Log message.
1115 */
1116static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1117{
1118 AssertReturnVoid(iEntry < pCache->cEntries);
1119 AssertReturnVoid(iEntry >= 0);
1120#ifdef LOG_ENABLED
1121 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1122 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1123 switch (enmAddrType)
1124 {
1125 case kIntNetAddrType_IPv4:
1126 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
1127 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
1128 break;
1129 case kIntNetAddrType_IPv6:
1130 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
1131 pIf->hIf, &pIf->MacAddr, iEntry, &pAddr->IPv6, pszMsg));
1132 break;
1133 default:
1134 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1135 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1136 break;
1137 }
1138#else
1139 RT_NOREF2(pIf, pszMsg);
1140#endif
1141
1142 pCache->cEntries--;
1143 if (iEntry < pCache->cEntries)
1144 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1145 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1146 (pCache->cEntries - iEntry) * pCache->cbEntry);
1147}
1148
1149
1150/**
1151 * Deletes an address from the cache, assuming it isn't actually in the cache.
1152 *
1153 * May or may not own the spinlock when calling this.
1154 *
1155 * @param pIf The interface (for logging).
1156 * @param pCache The cache.
1157 * @param pAddr The address.
1158 * @param cbAddr The address size (optimization).
1159 */
1160DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1161{
1162 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1163 if (RT_UNLIKELY(i >= 0))
1164 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1165}
1166
1167
1168/**
1169 * Deletes the address from all the interface caches.
1170 *
1171 * This is used to remove stale entries that has been reassigned to
1172 * other machines on the network.
1173 *
1174 * @param pNetwork The network.
1175 * @param pAddr The address.
1176 * @param enmType The address type.
1177 * @param cbAddr The address size (optimization).
1178 * @param pszMsg Log message.
1179 */
1180DECLINLINE(void) intnetR0NetworkAddrCacheDeleteLocked(PINTNETNETWORK pNetwork,
1181 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType,
1182 uint8_t const cbAddr,
1183 const char *pszMsg)
1184{
1185 uint32_t iIf = pNetwork->MacTab.cEntries;
1186 while (iIf--)
1187 {
1188 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1189
1190 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1191 if (RT_UNLIKELY(i >= 0))
1192 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1193 }
1194}
1195
1196
1197/**
1198 * Deletes the address from all the interface caches.
1199 *
1200 * This is used to remove stale entries that has been reassigned to
1201 * other machines on the network.
1202 *
1203 * @param pNetwork The network.
1204 * @param pAddr The address.
1205 * @param enmType The address type.
1206 * @param cbAddr The address size (optimization).
1207 * @param pszMsg Log message.
1208 */
1209DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1210 uint8_t const cbAddr, const char *pszMsg)
1211{
1212 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1213
1214 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, pszMsg);
1215
1216 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1217}
1218
1219
1220#if 0 /* unused */
1221/**
1222 * Deletes the address from all the interface caches except the specified one.
1223 *
1224 * This is used to remove stale entries that has been reassigned to
1225 * other machines on the network.
1226 *
1227 * @param pNetwork The network.
1228 * @param pAddr The address.
1229 * @param enmType The address type.
1230 * @param cbAddr The address size (optimization).
1231 * @param pszMsg Log message.
1232 */
1233DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1234 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1235{
1236 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1237
1238 uint32_t iIf = pNetwork->MacTab.cEntries;
1239 while (iIf--)
1240 {
1241 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1242 if (pIf != pIfSender)
1243 {
1244 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1245 if (RT_UNLIKELY(i >= 0))
1246 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1247 }
1248 }
1249
1250 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1251}
1252#endif /* unused */
1253
1254
1255/**
1256 * Lookup an address on the network, returning the (first) interface having it
1257 * in its address cache.
1258 *
1259 * @returns Pointer to the interface on success, NULL if not found. The caller
1260 * must release the interface by calling intnetR0BusyDecIf.
1261 * @param pNetwork The network.
1262 * @param pAddr The address to lookup.
1263 * @param enmType The address type.
1264 * @param cbAddr The size of the address.
1265 */
1266DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1267{
1268 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1269
1270 uint32_t iIf = pNetwork->MacTab.cEntries;
1271 while (iIf--)
1272 {
1273 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1274 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1275 if (i >= 0)
1276 {
1277 intnetR0BusyIncIf(pIf);
1278 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1279 return pIf;
1280 }
1281 }
1282
1283 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1284 return NULL;
1285}
1286
1287
1288/**
1289 * Look up specified address in the network's blacklist.
1290 *
1291 * @param pNetwork The network.
1292 * @param enmType The address type.
1293 * @param pAddr The address.
1294 */
1295static bool intnetR0NetworkBlacklistLookup(PINTNETNETWORK pNetwork,
1296 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1297{
1298 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1299
1300 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1301 return false;
1302
1303 const uint8_t cbAddr = pCache->cbAddress;
1304 Assert(cbAddr == intnetR0AddrSize(enmType));
1305
1306 for (unsigned i = 0; i < pCache->cEntries; ++i)
1307 {
1308 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1309 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1310 return true;
1311 }
1312
1313 return false;
1314}
1315
1316
1317/**
1318 * Deletes specified address from network's blacklist.
1319 *
1320 * @param pNetwork The network.
1321 * @param enmType The address type.
1322 * @param pAddr The address.
1323 */
1324static void intnetR0NetworkBlacklistDelete(PINTNETNETWORK pNetwork,
1325 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1326{
1327 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1328
1329 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1330 return;
1331
1332 const uint8_t cbAddr = pCache->cbAddress;
1333 Assert(cbAddr == intnetR0AddrSize(enmType));
1334
1335 for (unsigned i = 0; i < pCache->cEntries; ++i)
1336 {
1337 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1338 if (!intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
1339 continue;
1340
1341 --pCache->cEntries;
1342 memmove(pCache->pbEntries + i * pCache->cbEntry,
1343 pCache->pbEntries + (i + 1) * pCache->cbEntry,
1344 (pCache->cEntries - i) * pCache->cbEntry);
1345 return;
1346 }
1347}
1348
1349
1350/**
1351 * Adds specified address from network's blacklist.
1352 *
1353 * @param pNetwork The network.
1354 * @param enmType The address type.
1355 * @param pAddr The address.
1356 */
1357static void intnetR0NetworkBlacklistAdd(PINTNETNETWORK pNetwork,
1358 PCRTNETADDRU pAddr, INTNETADDRTYPE enmType)
1359{
1360 PINTNETADDRCACHE pCache = &pNetwork->aAddrBlacklist[enmType];
1361
1362 if (RT_UNLIKELY(pCache->cEntriesAlloc == 0))
1363 return;
1364
1365 const uint8_t cbAddr = pCache->cbAddress;
1366 Assert(cbAddr == intnetR0AddrSize(enmType));
1367
1368 /* lookup */
1369 for (unsigned i = 0; i < pCache->cEntries; ++i)
1370 {
1371 uint8_t *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1372 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1373 return; /* already exists */
1374 }
1375
1376 if (pCache->cEntries >= pCache->cEntriesAlloc)
1377 {
1378 /* shift */
1379 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry,
1380 pCache->cbEntry * (pCache->cEntries - 1));
1381 --pCache->cEntries;
1382 }
1383
1384 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1385
1386 /* push */
1387 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1388 memcpy(pbEntry, pAddr, cbAddr);
1389 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - cbAddr);
1390 ++pCache->cEntries;
1391
1392 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1393}
1394
1395
1396/**
1397 * Adds an address to the cache, the caller is responsible for making sure it's
1398 * not already in the cache.
1399 *
1400 * The caller must not
1401 *
1402 * @param pIf The interface (for logging).
1403 * @param pCache The address cache.
1404 * @param pAddr The address.
1405 * @param pszMsg log message.
1406 */
1407static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1408 const char *pszMsg)
1409{
1410 PINTNETNETWORK pNetwork = pIf->pNetwork;
1411 AssertReturnVoid(pNetwork);
1412
1413 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1414
1415#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
1416 const uint8_t cbAddr = pCache->cbAddress;
1417 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1418#endif
1419
1420 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1421
1422 bool fBlacklisted = intnetR0NetworkBlacklistLookup(pNetwork, pAddr, enmAddrType);
1423 if (fBlacklisted)
1424 {
1425 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1426
1427#ifdef LOG_ENABLED
1428 switch (enmAddrType)
1429 {
1430 case kIntNetAddrType_IPv4:
1431 Log(("%s: spoofing attempt for %RTnaipv4\n",
1432 __FUNCTION__, pAddr->IPv4));
1433 break;
1434 case kIntNetAddrType_IPv6:
1435 Log(("%s: spoofing attempt for %RTnaipv6\n",
1436 __FUNCTION__, &pAddr->IPv6));
1437 break;
1438 default:
1439 Log(("%s: spoofing attempt for %.*Rhxs (type %d)\n",
1440 __FUNCTION__, cbAddr, pAddr, enmAddrType));
1441 break;
1442 }
1443#endif
1444 return;
1445 }
1446
1447 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1448 {
1449 /* This shouldn't happen*/
1450 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1451 return;
1452 }
1453
1454 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1455 if (pCache->cEntries >= pCache->cEntriesAlloc)
1456 {
1457 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1458 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1459 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1460 pCache->cEntries--;
1461 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1462 }
1463
1464 /*
1465 * Add the new entry to the end of the array.
1466 */
1467 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1468 memcpy(pbEntry, pAddr, pCache->cbAddress);
1469 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1470
1471#ifdef LOG_ENABLED
1472 switch (enmAddrType)
1473 {
1474 case kIntNetAddrType_IPv4:
1475 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
1476 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
1477 break;
1478 case kIntNetAddrType_IPv6:
1479 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
1480 pIf->hIf, &pIf->MacAddr, pCache->cEntries, &pAddr->IPv6, pszMsg));
1481 break;
1482 default:
1483 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1484 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1485 break;
1486 }
1487#else
1488 RT_NOREF1(pszMsg);
1489#endif
1490 pCache->cEntries++;
1491 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1492
1493 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1494}
1495
1496
1497/**
1498 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1499 *
1500 * @param pIf The interface (for logging).
1501 * @param pCache The address cache.
1502 * @param pAddr The address.
1503 * @param cbAddr The size of the address (optimization).
1504 * @param pszMsg Log message.
1505 */
1506static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1507 const char *pszMsg)
1508{
1509 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1510
1511 const uint8_t cbAddr = pCache->cbAddress;
1512 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1513
1514 /*
1515 * Check all but the first and last entries, the caller
1516 * has already checked those.
1517 */
1518 int i = pCache->cEntries - 2;
1519 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1520 while (i >= 1)
1521 {
1522 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1523 return;
1524 pbEntry += pCache->cbEntry;
1525 i--;
1526 }
1527
1528 /*
1529 * Not found, add it.
1530 */
1531 intnetR0IfAddrCacheAddIt(pIf, enmAddrType, pAddr, pszMsg);
1532}
1533
1534
1535/**
1536 * Adds an address to the cache if it's not already there.
1537 *
1538 * Must not own any spinlocks when calling this function.
1539 *
1540 * @param pIf The interface (for logging).
1541 * @param pCache The address cache.
1542 * @param pAddr The address.
1543 * @param cbAddr The size of the address (optimization).
1544 * @param pszMsg Log message.
1545 */
1546DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, INTNETADDRTYPE enmAddrType, PCRTNETADDRU pAddr,
1547 const char *pszMsg)
1548{
1549 PINTNETADDRCACHE pCache = &pIf->aAddrCache[enmAddrType];
1550
1551 const uint8_t cbAddr = pCache->cbAddress;
1552 Assert(cbAddr == intnetR0AddrSize(enmAddrType));
1553
1554 /*
1555 * The optimized case is when the address the first or last cache entry.
1556 */
1557 unsigned i = pCache->cEntries;
1558 if (RT_LIKELY( i > 0
1559 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1560 || (i > 1
1561 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * (i-1)), pAddr, cbAddr))) ))
1562 return;
1563
1564 intnetR0IfAddrCacheAddSlow(pIf, enmAddrType, pAddr, pszMsg);
1565}
1566
1567
1568/**
1569 * Destroys the specified address cache.
1570 * @param pCache The address cache.
1571 */
1572static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1573{
1574 void *pvFree = pCache->pbEntries;
1575 pCache->pbEntries = NULL;
1576 pCache->cEntries = 0;
1577 pCache->cEntriesAlloc = 0;
1578 RTMemFree(pvFree);
1579}
1580
1581
1582/**
1583 * Initialize the address cache for the specified address type.
1584 *
1585 * The cache storage is preallocated and fixed size so that we can handle
1586 * inserts from problematic contexts.
1587 *
1588 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1589 * @param pCache The cache to initialize.
1590 * @param enmAddrType The address type.
1591 * @param fEnabled Whether the address cache is enabled or not.
1592 */
1593static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1594{
1595 pCache->cEntries = 0;
1596 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1597 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1598 if (fEnabled)
1599 {
1600 pCache->cEntriesAlloc = 32;
1601 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1602 if (!pCache->pbEntries)
1603 return VERR_NO_MEMORY;
1604 }
1605 else
1606 {
1607 pCache->cEntriesAlloc = 0;
1608 pCache->pbEntries = NULL;
1609 }
1610 return VINF_SUCCESS;
1611}
1612
1613
1614/**
1615 * Is it a multicast or broadcast MAC address?
1616 *
1617 * @returns true if multicast, false if not.
1618 * @param pMacAddr The address to inspect.
1619 */
1620DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1621{
1622 return !!(pMacAddr->au8[0] & 0x01);
1623}
1624
1625
1626/**
1627 * Is it a dummy MAC address?
1628 *
1629 * We use dummy MAC addresses for interfaces which we don't know the MAC
1630 * address of because they haven't sent anything (learning) or explicitly set
1631 * it.
1632 *
1633 * @returns true if dummy, false if not.
1634 * @param pMacAddr The address to inspect.
1635 */
1636DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1637{
1638 /* The dummy address are broadcast addresses, don't bother check it all. */
1639 return pMacAddr->au16[0] == 0xffff;
1640}
1641
1642
1643/**
1644 * Compares two MAC addresses.
1645 *
1646 * @returns true if equal, false if not.
1647 * @param pDstAddr1 Address 1.
1648 * @param pDstAddr2 Address 2.
1649 */
1650DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1651{
1652 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1653 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1654 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1655}
1656
1657
1658/**
1659 * Switch a unicast frame based on the network layer address (OSI level 3) and
1660 * return a destination table.
1661 *
1662 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1663 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1664 * @param pNetwork The network to switch on.
1665 * @param pDstMacAddr The destination MAC address.
1666 * @param enmL3AddrType The level-3 destination address type.
1667 * @param pL3Addr The level-3 destination address.
1668 * @param cbL3Addr The size of the level-3 destination address.
1669 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1670 * @param pDstTab The destination output table.
1671 */
1672static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1673 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1674 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1675{
1676 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1677
1678 /*
1679 * Grab the spinlock first and do the switching.
1680 */
1681 PINTNETMACTAB pTab = &pNetwork->MacTab;
1682 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1683
1684 pDstTab->fTrunkDst = 0;
1685 pDstTab->pTrunk = 0;
1686 pDstTab->cIfs = 0;
1687
1688 /* Find exactly matching or promiscuous interfaces. */
1689 uint32_t cExactHits = 0;
1690 uint32_t iIfMac = pTab->cEntries;
1691 while (iIfMac-- > 0)
1692 {
1693 if (pTab->paEntries[iIfMac].fActive)
1694 {
1695 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1696 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1697 if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1698 {
1699 cExactHits += fExact;
1700
1701 uint32_t iIfDst = pDstTab->cIfs++;
1702 pDstTab->aIfs[iIfDst].pIf = pIf;
1703 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1704 intnetR0BusyIncIf(pIf);
1705
1706 if (fExact)
1707 pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
1708 }
1709 }
1710 }
1711
1712 /* Network only promicuous mode ifs should see related trunk traffic. */
1713 if ( cExactHits
1714 && fSrc
1715 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1716 {
1717 iIfMac = pTab->cEntries;
1718 while (iIfMac-- > 0)
1719 {
1720 if ( pTab->paEntries[iIfMac].fActive
1721 && pTab->paEntries[iIfMac].fPromiscuousEff
1722 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1723 {
1724 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1725 if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
1726 {
1727 uint32_t iIfDst = pDstTab->cIfs++;
1728 pDstTab->aIfs[iIfDst].pIf = pIf;
1729 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1730 intnetR0BusyIncIf(pIf);
1731 }
1732 }
1733 }
1734 }
1735
1736 /* Does it match the host, or is the host promiscuous? */
1737 if (pTab->fHostActive)
1738 {
1739 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1740 if ( fExact
1741 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1742 || pTab->fHostPromiscuousEff)
1743 {
1744 cExactHits += fExact;
1745 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1746 }
1747 }
1748
1749 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1750 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
1751 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1752 pDstTab->fTrunkDst &= ~fSrc;
1753 if (pDstTab->fTrunkDst)
1754 {
1755 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1756 pDstTab->pTrunk = pTrunk;
1757 intnetR0BusyIncTrunk(pTrunk);
1758 }
1759
1760 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1761 return pDstTab->cIfs
1762 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1763 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1764}
1765
1766
1767/**
1768 * Pre-switch a unicast MAC address.
1769 *
1770 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1771 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1772 * @param pNetwork The network to switch on.
1773 * @param fSrc The frame source.
1774 * @param pSrcAddr The source address of the frame.
1775 * @param pDstAddr The destination address of the frame.
1776 */
1777static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1778 PCRTMAC pDstAddr)
1779{
1780 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1781 Assert(fSrc);
1782
1783 /*
1784 * Grab the spinlock first and do the switching.
1785 */
1786 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1787 PINTNETMACTAB pTab = &pNetwork->MacTab;
1788 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1789
1790 /* Iterate the internal network interfaces and look for matching source and
1791 destination addresses. */
1792 uint32_t iIfMac = pTab->cEntries;
1793 while (iIfMac-- > 0)
1794 {
1795 if (pTab->paEntries[iIfMac].fActive)
1796 {
1797 /* Unknown interface address? */
1798 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1799 break;
1800
1801 /* Paranoia - this shouldn't happen, right? */
1802 if ( pSrcAddr
1803 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1804 break;
1805
1806 /* Exact match? */
1807 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1808 {
1809 enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
1810 ? INTNETSWDECISION_BROADCAST
1811 : INTNETSWDECISION_INTNET;
1812 break;
1813 }
1814 }
1815 }
1816
1817 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1818 return enmSwDecision;
1819}
1820
1821
1822/**
1823 * Switch a unicast MAC address and return a destination table.
1824 *
1825 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1826 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1827 * @param pNetwork The network to switch on.
1828 * @param fSrc The frame source.
1829 * @param pIfSender The sender interface, NULL if trunk. Used to
1830 * prevent sending an echo to the sender.
1831 * @param pDstAddr The destination address of the frame.
1832 * @param pDstTab The destination output table.
1833 */
1834static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1835 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1836{
1837 AssertPtr(pDstTab);
1838 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1839
1840 /*
1841 * Grab the spinlock first and do the switching.
1842 */
1843 PINTNETMACTAB pTab = &pNetwork->MacTab;
1844 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1845
1846 pDstTab->fTrunkDst = 0;
1847 pDstTab->pTrunk = 0;
1848 pDstTab->cIfs = 0;
1849
1850 /* Find exactly matching or promiscuous interfaces. */
1851 uint32_t cExactHits = 0;
1852 uint32_t iIfMac = pTab->cEntries;
1853 while (iIfMac-- > 0)
1854 {
1855 if (pTab->paEntries[iIfMac].fActive)
1856 {
1857 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1858 if ( fExact
1859 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1860 || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1861 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1862 )
1863 {
1864 cExactHits += fExact;
1865
1866 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1867 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1868 {
1869 uint32_t iIfDst = pDstTab->cIfs++;
1870 pDstTab->aIfs[iIfDst].pIf = pIf;
1871 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1872 intnetR0BusyIncIf(pIf);
1873 }
1874 }
1875 }
1876 }
1877
1878 /* Network only promicuous mode ifs should see related trunk traffic. */
1879 if ( cExactHits
1880 && fSrc
1881 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1882 {
1883 iIfMac = pTab->cEntries;
1884 while (iIfMac-- > 0)
1885 {
1886 if ( pTab->paEntries[iIfMac].fPromiscuousEff
1887 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1888 && pTab->paEntries[iIfMac].fActive
1889 && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
1890 && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
1891 {
1892 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1893 uint32_t iIfDst = pDstTab->cIfs++;
1894 pDstTab->aIfs[iIfDst].pIf = pIf;
1895 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1896 intnetR0BusyIncIf(pIf);
1897 }
1898 }
1899 }
1900
1901 /* Does it match the host, or is the host promiscuous? */
1902 if ( fSrc != INTNETTRUNKDIR_HOST
1903 && pTab->fHostActive)
1904 {
1905 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1906 if ( fExact
1907 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1908 || pTab->fHostPromiscuousEff)
1909 {
1910 cExactHits += fExact;
1911 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1912 }
1913 }
1914
1915 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1916 if ( fSrc != INTNETTRUNKDIR_WIRE
1917 && pTab->fWireActive
1918 && (!cExactHits || pTab->fWirePromiscuousEff)
1919 )
1920 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1921
1922 /* Grab the trunk if we're sending to it. */
1923 if (pDstTab->fTrunkDst)
1924 {
1925 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1926 pDstTab->pTrunk = pTrunk;
1927 intnetR0BusyIncTrunk(pTrunk);
1928 }
1929
1930 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1931 return pDstTab->cIfs
1932 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1933 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1934}
1935
1936
1937/**
1938 * Create a destination table for a broadcast frame.
1939 *
1940 * @returns INTNETSWDECISION_BROADCAST.
1941 * @param pNetwork The network to switch on.
1942 * @param fSrc The frame source.
1943 * @param pIfSender The sender interface, NULL if trunk. Used to
1944 * prevent sending an echo to the sender.
1945 * @param pDstTab The destination output table.
1946 */
1947static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1948 PINTNETDSTTAB pDstTab)
1949{
1950 AssertPtr(pDstTab);
1951
1952 /*
1953 * Grab the spinlock first and record all active interfaces.
1954 */
1955 PINTNETMACTAB pTab = &pNetwork->MacTab;
1956 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1957
1958 pDstTab->fTrunkDst = 0;
1959 pDstTab->pTrunk = 0;
1960 pDstTab->cIfs = 0;
1961
1962 /* Regular interfaces. */
1963 uint32_t iIfMac = pTab->cEntries;
1964 while (iIfMac-- > 0)
1965 {
1966 if (pTab->paEntries[iIfMac].fActive)
1967 {
1968 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1969 if (pIf != pIfSender)
1970 {
1971 uint32_t iIfDst = pDstTab->cIfs++;
1972 pDstTab->aIfs[iIfDst].pIf = pIf;
1973 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1974 intnetR0BusyIncIf(pIf);
1975 }
1976 }
1977 }
1978
1979 /* The trunk interface. */
1980 if (pTab->fHostActive)
1981 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1982 if (pTab->fWireActive)
1983 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1984 pDstTab->fTrunkDst &= ~fSrc;
1985 if (pDstTab->fTrunkDst)
1986 {
1987 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1988 pDstTab->pTrunk = pTrunk;
1989 intnetR0BusyIncTrunk(pTrunk);
1990 }
1991
1992 RTSpinlockRelease(pNetwork->hAddrSpinlock);
1993 return INTNETSWDECISION_BROADCAST;
1994}
1995
1996
1997/**
1998 * Create a destination table with the trunk and any promiscuous interfaces.
1999 *
2000 * This is only used in a fallback case of the level-3 switching, so we can
2001 * assume the wire as source and skip the sender interface filtering.
2002 *
2003 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
2004 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
2005 * @param pNetwork The network to switch on.
2006 * @param fSrc The frame source.
2007 * @param pDstTab The destination output table.
2008 */
2009static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2010{
2011 Assert(fSrc == INTNETTRUNKDIR_WIRE);
2012
2013 /*
2014 * Grab the spinlock first and do the switching.
2015 */
2016 PINTNETMACTAB pTab = &pNetwork->MacTab;
2017 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2018
2019 pDstTab->fTrunkDst = 0;
2020 pDstTab->pTrunk = 0;
2021 pDstTab->cIfs = 0;
2022
2023 /* Find promiscuous interfaces. */
2024 uint32_t iIfMac = pTab->cEntries;
2025 while (iIfMac-- > 0)
2026 {
2027 if ( pTab->paEntries[iIfMac].fActive
2028 && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
2029 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
2030 )
2031 {
2032 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
2033 uint32_t iIfDst = pDstTab->cIfs++;
2034 pDstTab->aIfs[iIfDst].pIf = pIf;
2035 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
2036 intnetR0BusyIncIf(pIf);
2037 }
2038 }
2039
2040 /* The trunk interface. */
2041 if (pTab->fHostActive)
2042 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2043 if (pTab->fWireActive)
2044 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2045 pDstTab->fTrunkDst &= ~fSrc;
2046 if (pDstTab->fTrunkDst)
2047 {
2048 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2049 pDstTab->pTrunk = pTrunk;
2050 intnetR0BusyIncTrunk(pTrunk);
2051 }
2052
2053 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2054 return !pDstTab->cIfs
2055 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
2056 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
2057}
2058
2059
2060/**
2061 * Create a destination table for a trunk frame.
2062 *
2063 * @returns INTNETSWDECISION_BROADCAST.
2064 * @param pNetwork The network to switch on.
2065 * @param fSrc The frame source.
2066 * @param pDstTab The destination output table.
2067 */
2068static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
2069{
2070 AssertPtr(pDstTab);
2071
2072 /*
2073 * Grab the spinlock first and record all active interfaces.
2074 */
2075 PINTNETMACTAB pTab= &pNetwork->MacTab;
2076 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2077
2078 pDstTab->fTrunkDst = 0;
2079 pDstTab->pTrunk = 0;
2080 pDstTab->cIfs = 0;
2081
2082 /* The trunk interface. */
2083 if (pTab->fHostActive)
2084 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
2085 if (pTab->fWireActive)
2086 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
2087 pDstTab->fTrunkDst &= ~fSrc;
2088 if (pDstTab->fTrunkDst)
2089 {
2090 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
2091 pDstTab->pTrunk = pTrunk;
2092 intnetR0BusyIncTrunk(pTrunk);
2093 }
2094
2095 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2096 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
2097}
2098
2099
2100/**
2101 * Wrapper around RTMemAlloc for allocating a destination table.
2102 *
2103 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
2104 * @param cEntries The size given as an entry count.
2105 * @param ppDstTab Where to store the pointer (always).
2106 */
2107DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
2108{
2109 PINTNETDSTTAB pDstTab;
2110 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_OFFSETOF(INTNETDSTTAB, aIfs[cEntries]));
2111 if (RT_UNLIKELY(!pDstTab))
2112 return VERR_NO_MEMORY;
2113 return VINF_SUCCESS;
2114}
2115
2116
2117/**
2118 * Ensures that there is space for another interface in the MAC address lookup
2119 * table as well as all the destination tables.
2120 *
2121 * The caller must own the create/open/destroy mutex.
2122 *
2123 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
2124 * @param pNetwork The network to operate on.
2125 */
2126static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
2127{
2128 /*
2129 * The cEntries and cEntriesAllocated members are only updated while
2130 * owning the big mutex, so we only need the spinlock when doing the
2131 * actual table replacing.
2132 */
2133 PINTNETMACTAB pTab = &pNetwork->MacTab;
2134 int rc = VINF_SUCCESS;
2135 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
2136 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
2137 {
2138 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
2139 if (cAllocated <= INTNET_MAX_IFS)
2140 {
2141 /*
2142 * Resize the destination tables first, this can be kind of tedious.
2143 */
2144 for (uint32_t i = 0; i < pTab->cEntries; i++)
2145 {
2146 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
2147 PINTNETDSTTAB pNew;
2148 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2149 if (RT_FAILURE(rc))
2150 break;
2151
2152 for (;;)
2153 {
2154 PINTNETDSTTAB pOld = pIf->pDstTab;
2155 if ( pOld
2156 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
2157 {
2158 RTMemFree(pOld);
2159 break;
2160 }
2161 intnetR0BusyWait(pNetwork, &pIf->cBusy);
2162 }
2163 }
2164
2165 /*
2166 * The trunk.
2167 */
2168 if ( RT_SUCCESS(rc)
2169 && pNetwork->MacTab.pTrunk)
2170 {
2171 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
2172 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
2173 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
2174 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
2175 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
2176 ppDstTab++)
2177 {
2178 PINTNETDSTTAB pNew;
2179 rc = intnetR0AllocDstTab(cAllocated, &pNew);
2180 if (RT_FAILURE(rc))
2181 break;
2182
2183 for (;;)
2184 {
2185 RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
2186 void *pvOld = *ppDstTab;
2187 if (pvOld)
2188 *ppDstTab = pNew;
2189 RTSpinlockRelease(pTrunk->hDstTabSpinlock);
2190 if (pvOld)
2191 {
2192 RTMemFree(pvOld);
2193 break;
2194 }
2195 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
2196 }
2197 }
2198 }
2199
2200 /*
2201 * The MAC Address table itself.
2202 */
2203 if (RT_SUCCESS(rc))
2204 {
2205 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
2206 if (paNew)
2207 {
2208 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2209
2210 PINTNETMACTABENTRY paOld = pTab->paEntries;
2211 uint32_t i = pTab->cEntries;
2212 while (i-- > 0)
2213 {
2214 paNew[i] = paOld[i];
2215
2216 paOld[i].fActive = false;
2217 paOld[i].pIf = NULL;
2218 }
2219
2220 pTab->paEntries = paNew;
2221 pTab->cEntriesAllocated = cAllocated;
2222
2223 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2224
2225 RTMemFree(paOld);
2226 }
2227 else
2228 rc = VERR_NO_MEMORY;
2229 }
2230 }
2231 else
2232 rc = VERR_OUT_OF_RANGE;
2233 }
2234 return rc;
2235}
2236
2237
2238
2239
2240#ifdef INTNET_WITH_DHCP_SNOOPING
2241
2242/**
2243 * Snoops IP assignments and releases from the DHCPv4 traffic.
2244 *
2245 * The caller is responsible for making sure this traffic between the
2246 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
2247 * need not be validated beyond the ports.
2248 *
2249 * @param pNetwork The network this frame was seen on.
2250 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
2251 * header validation, so only the minimum header size
2252 * needs to be available and valid here.
2253 * @param pUdpHdr Pointer to the UDP header in the frame.
2254 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
2255 * @param fGso Set if this is a GSO frame, clear if regular.
2256 */
2257static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
2258{
2259 /*
2260 * Check if the DHCP message is valid and get the type.
2261 */
2262 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2263 {
2264 Log6(("Bad UDP packet\n"));
2265 return;
2266 }
2267 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2268 uint8_t MsgType;
2269 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2270 {
2271 Log6(("Bad DHCP packet\n"));
2272 return;
2273 }
2274
2275#ifdef LOG_ENABLED
2276 /*
2277 * Log it.
2278 */
2279 const char *pszType = "unknown";
2280 switch (MsgType)
2281 {
2282 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
2283 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
2284 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
2285 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
2286 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
2287 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
2288 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
2289 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
2290 }
2291 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
2292 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
2293 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
2294#endif /* LOG_EANBLED */
2295
2296 /*
2297 * Act upon the message.
2298 */
2299 switch (MsgType)
2300 {
2301#if 0
2302 case RTNET_DHCP_MT_REQUEST:
2303 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
2304 * know, and add the IP to the cache. */
2305 break;
2306#endif
2307
2308
2309 /*
2310 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
2311 * Delete the old client address first, just in case it changed in a renewal.
2312 */
2313 case RTNET_DHCP_MT_ACK:
2314 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
2315 {
2316 PINTNETIF pMatchingIf = NULL;
2317 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2318
2319 uint32_t iIf = pNetwork->MacTab.cEntries;
2320 while (iIf-- > 0)
2321 {
2322 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2323 if ( intnetR0IfHasMacAddr(pCur)
2324 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2325 {
2326 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2327 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2328 if (!pMatchingIf)
2329 {
2330 pMatchingIf = pCur;
2331 intnetR0BusyIncIf(pMatchingIf);
2332 }
2333 }
2334 }
2335
2336 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2337
2338 if (pMatchingIf)
2339 {
2340 intnetR0IfAddrCacheAdd(pMatchingIf, kIntNetAddrType_IPv4,
2341 (PCRTNETADDRU)&pDhcp->bp_yiaddr, "DHCP_MT_ACK");
2342 intnetR0BusyDecIf(pMatchingIf);
2343 }
2344 }
2345 return;
2346
2347
2348 /*
2349 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2350 */
2351 case RTNET_DHCP_MT_RELEASE:
2352 {
2353 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2354
2355 uint32_t iIf = pNetwork->MacTab.cEntries;
2356 while (iIf-- > 0)
2357 {
2358 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2359 if ( intnetR0IfHasMacAddr(pCur)
2360 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2361 {
2362 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2363 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2364 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2365 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2366 }
2367 }
2368
2369 RTSpinlockRelease(pNetwork->hAddrSpinlock);
2370 break;
2371 }
2372 }
2373
2374}
2375
2376
2377/**
2378 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2379 * is likely to be a DHCP message.
2380 *
2381 * The caller has already check that the UDP source and destination ports
2382 * are BOOTPS or BOOTPC.
2383 *
2384 * @param pNetwork The network this frame was seen on.
2385 * @param pSG The gather list for the frame.
2386 */
2387static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2388{
2389 /*
2390 * Get a pointer to a linear copy of the full packet, using the
2391 * temporary buffer if necessary.
2392 */
2393 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2394 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2395 if (pSG->cSegsUsed > 1)
2396 {
2397 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2398 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2399 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2400 return;
2401 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2402 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2403 }
2404
2405 /*
2406 * Validate the IP header and find the UDP packet.
2407 */
2408 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2409 {
2410 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2411 return;
2412 }
2413 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2414
2415 /*
2416 * Hand it over to the common DHCP snooper.
2417 */
2418 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2419}
2420
2421#endif /* INTNET_WITH_DHCP_SNOOPING */
2422
2423
2424/**
2425 * Snoops up source addresses from ARP requests and purge these from the address
2426 * caches.
2427 *
2428 * The purpose of this purging is to get rid of stale addresses.
2429 *
2430 * @param pNetwork The network this frame was seen on.
2431 * @param pSG The gather list for the frame.
2432 */
2433static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2434{
2435 /*
2436 * Check the minimum size first.
2437 */
2438 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2439 return;
2440
2441 /*
2442 * Copy to temporary buffer if necessary.
2443 */
2444 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2445 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2446 if ( pSG->cSegsUsed != 1
2447 && pSG->aSegs[0].cb < cbPacket)
2448 {
2449 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2450 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2451 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2452 return;
2453 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2454 }
2455
2456 /*
2457 * Ignore packets which doesn't interest us or we perceive as malformed.
2458 */
2459 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2460 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2461 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2462 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2463 return;
2464 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2465 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2466 && ar_oper != RTNET_ARPOP_REPLY))
2467 {
2468 Log6(("ts-ar: op=%#x\n", ar_oper));
2469 return;
2470 }
2471
2472 /*
2473 * Delete the source address if it's OK.
2474 */
2475 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2476 && ( pArpIPv4->ar_sha.au16[0]
2477 || pArpIPv4->ar_sha.au16[1]
2478 || pArpIPv4->ar_sha.au16[2])
2479 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2480 {
2481 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2482 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2483 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2484 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2485 }
2486}
2487
2488
2489#ifdef INTNET_WITH_DHCP_SNOOPING
2490/**
2491 * Snoop up addresses from ARP and DHCP traffic from frames coming
2492 * over the trunk connection.
2493 *
2494 * The caller is responsible for do some basic filtering before calling
2495 * this function.
2496 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2497 *
2498 * @param pNetwork The network.
2499 * @param pSG The SG list for the frame.
2500 * @param EtherType The Ethertype of the frame.
2501 */
2502static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2503{
2504 switch (EtherType)
2505 {
2506 case RTNET_ETHERTYPE_IPV4:
2507 {
2508 uint32_t cbIpHdr;
2509 uint8_t b;
2510
2511 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2512 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2513 {
2514 /* check if the protocol is UDP */
2515 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2516 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2517 return;
2518
2519 /* get the TCP header length */
2520 cbIpHdr = pIpHdr->ip_hl * 4;
2521 }
2522 else
2523 {
2524 /* check if the protocol is UDP */
2525 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_p))
2526 != RTNETIPV4_PROT_UDP)
2527 return;
2528
2529 /* get the TCP header length */
2530 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2531 cbIpHdr = (b & 0x0f) * 4;
2532 }
2533 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2534 return;
2535
2536 /* compare the ports. */
2537 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2538 {
2539 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2540 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2541 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2542 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2543 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2544 return;
2545 }
2546 else
2547 {
2548 /* get the lower byte of the UDP source port number. */
2549 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport) + 1);
2550 if ( b != RTNETIPV4_PORT_BOOTPS
2551 && b != RTNETIPV4_PORT_BOOTPC)
2552 return;
2553 uint8_t SrcPort = b;
2554 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport));
2555 if (b)
2556 return;
2557
2558 /* get the lower byte of the UDP destination port number. */
2559 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport) + 1);
2560 if ( b != RTNETIPV4_PORT_BOOTPS
2561 && b != RTNETIPV4_PORT_BOOTPC)
2562 return;
2563 if (b == SrcPort)
2564 return;
2565 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport));
2566 if (b)
2567 return;
2568 }
2569 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2570 break;
2571 }
2572
2573 case RTNET_ETHERTYPE_ARP:
2574 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2575 break;
2576 }
2577}
2578#endif /* INTNET_WITH_DHCP_SNOOPING */
2579
2580/**
2581 * Deals with an IPv6 packet.
2582 *
2583 * This will fish out the source IP address and add it to the cache.
2584 * Then it will look for DHCPRELEASE requests (?) and anything else
2585 * that we might find useful later.
2586 *
2587 * @param pIf The interface that's sending the frame.
2588 * @param pIpHdr Pointer to the IPv4 header in the frame.
2589 * @param cbPacket The size of the packet, or more correctly the
2590 * size of the frame without the ethernet header.
2591 * @param fGso Set if this is a GSO frame, clear if regular.
2592 */
2593static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
2594{
2595 NOREF(fGso);
2596
2597 /*
2598 * Check the header size first to prevent access invalid data.
2599 */
2600 if (cbPacket < RTNETIPV6_MIN_LEN)
2601 return;
2602
2603 /*
2604 * If the source address is good (not multicast) and
2605 * not already in the address cache of the sender, add it.
2606 */
2607 RTNETADDRU Addr;
2608 Addr.IPv6 = pIpHdr->ip6_src;
2609
2610 if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
2611 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
2612 {
2613 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv6, &Addr, "if/ipv6");
2614 }
2615}
2616
2617
2618/**
2619 * Deals with an IPv4 packet.
2620 *
2621 * This will fish out the source IP address and add it to the cache.
2622 * Then it will look for DHCPRELEASE requests (?) and anything else
2623 * that we might find useful later.
2624 *
2625 * @param pIf The interface that's sending the frame.
2626 * @param pIpHdr Pointer to the IPv4 header in the frame.
2627 * @param cbPacket The size of the packet, or more correctly the
2628 * size of the frame without the ethernet header.
2629 * @param fGso Set if this is a GSO frame, clear if regular.
2630 */
2631static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2632{
2633 /*
2634 * Check the header size first to prevent access invalid data.
2635 */
2636 if (cbPacket < RTNETIPV4_MIN_LEN)
2637 return;
2638 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2639 if ( cbHdr < RTNETIPV4_MIN_LEN
2640 || cbPacket < cbHdr)
2641 return;
2642
2643 /*
2644 * If the source address is good (not broadcast or my network) and
2645 * not already in the address cache of the sender, add it. Validate
2646 * the IP header before adding it.
2647 */
2648 bool fValidatedIpHdr = false;
2649 RTNETADDRU Addr;
2650 Addr.IPv4 = pIpHdr->ip_src;
2651 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2652 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2653 {
2654 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2655 {
2656 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2657 return;
2658 }
2659
2660 intnetR0IfAddrCacheAddIt(pIf, kIntNetAddrType_IPv4, &Addr, "if/ipv4");
2661 fValidatedIpHdr = true;
2662 }
2663
2664#ifdef INTNET_WITH_DHCP_SNOOPING
2665 /*
2666 * Check for potential DHCP packets.
2667 */
2668 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2669 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2670 && !fGso) /* GSO is not applicable to DHCP traffic. */
2671 {
2672 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2673 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2674 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2675 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2676 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2677 {
2678 if ( fValidatedIpHdr
2679 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2680 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2681 else
2682 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2683 }
2684 }
2685#endif /* INTNET_WITH_DHCP_SNOOPING */
2686}
2687
2688
2689/**
2690 * Snoop up source addresses from an ARP request or reply.
2691 *
2692 * @param pIf The interface that's sending the frame.
2693 * @param pHdr The ARP header.
2694 * @param cbPacket The size of the packet (might be larger than the ARP
2695 * request 'cause of min ethernet frame size).
2696 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2697 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2698 */
2699static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2700{
2701 /*
2702 * Ignore packets which doesn't interest us or we perceive as malformed.
2703 */
2704 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2705 return;
2706 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2707 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2708 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2709 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2710 return;
2711 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2712 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2713 && ar_oper != RTNET_ARPOP_REPLY))
2714 {
2715 Log6(("ar_oper=%#x\n", ar_oper));
2716 return;
2717 }
2718
2719 /*
2720 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2721 * which can be removed or added to the address cache of the sender.
2722 */
2723 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2724
2725 if ( ar_oper == RTNET_ARPOP_REPLY
2726 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2727 && ( pArpIPv4->ar_tha.au16[0]
2728 || pArpIPv4->ar_tha.au16[1]
2729 || pArpIPv4->ar_tha.au16[2])
2730 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2731 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2732 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2733
2734 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2735 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2736 {
2737 intnetR0IfAddrCacheAdd(pIf, kIntNetAddrType_IPv4, (PCRTNETADDRU)&pArpIPv4->ar_spa, "if/arp");
2738 }
2739}
2740
2741
2742
2743/**
2744 * Checks packets send by a normal interface for new network
2745 * layer addresses.
2746 *
2747 * @param pIf The interface that's sending the frame.
2748 * @param pbFrame The frame.
2749 * @param cbFrame The size of the frame.
2750 * @param fGso Set if this is a GSO frame, clear if regular.
2751 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2752 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2753 */
2754static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2755{
2756 /*
2757 * Fish out the ethertype and look for stuff we can handle.
2758 */
2759 if (cbFrame <= sizeof(RTNETETHERHDR))
2760 return;
2761 cbFrame -= sizeof(RTNETETHERHDR);
2762
2763 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2764 switch (EtherType)
2765 {
2766 case RTNET_ETHERTYPE_IPV4:
2767 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2768 break;
2769
2770 case RTNET_ETHERTYPE_IPV6:
2771 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2772 break;
2773
2774#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2775 case RTNET_ETHERTYPE_IPX_1:
2776 case RTNET_ETHERTYPE_IPX_2:
2777 case RTNET_ETHERTYPE_IPX_3:
2778 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2779 break;
2780#endif
2781 case RTNET_ETHERTYPE_ARP:
2782 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2783 break;
2784 }
2785}
2786
2787
2788/**
2789 * Writes a frame packet to the ring buffer.
2790 *
2791 * @returns VBox status code.
2792 * @param pBuf The buffer.
2793 * @param pRingBuf The ring buffer to read from.
2794 * @param pSG The gather list.
2795 * @param pNewDstMac Set the destination MAC address to the address if specified.
2796 */
2797static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2798{
2799 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2800 void *pvDst = NULL; /* ditto */
2801 int rc;
2802 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2803 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2804 else
2805 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2806 if (RT_SUCCESS(rc))
2807 {
2808 IntNetSgRead(pSG, pvDst);
2809 if (pNewDstMac)
2810 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2811
2812 IntNetRingCommitFrame(pRingBuf, pHdr);
2813 return VINF_SUCCESS;
2814 }
2815 return rc;
2816}
2817
2818
2819/**
2820 * Sends a frame to a specific interface.
2821 *
2822 * @param pIf The interface.
2823 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2824 * @param pSG The gather buffer which data is being sent to the interface.
2825 * @param pNewDstMac Set the destination MAC address to the address if specified.
2826 */
2827static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2828{
2829 /*
2830 * Grab the receive/producer lock and copy over the frame.
2831 */
2832 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2833 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2834 RTSpinlockRelease(pIf->hRecvInSpinlock);
2835 if (RT_SUCCESS(rc))
2836 {
2837 pIf->cYields = 0;
2838 RTSemEventSignal(pIf->hRecvEvent);
2839 return;
2840 }
2841
2842 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2843
2844 /*
2845 * Scheduling hack, for unicore machines primarily.
2846 */
2847 if ( pIf->fActive
2848 && pIf->cYields < 4 /* just twice */
2849 && pIfSender /* but not if it's from the trunk */
2850 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2851 )
2852 {
2853 unsigned cYields = 2;
2854 while (--cYields > 0)
2855 {
2856 RTSemEventSignal(pIf->hRecvEvent);
2857 RTThreadYield();
2858
2859 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2860 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2861 RTSpinlockRelease(pIf->hRecvInSpinlock);
2862 if (RT_SUCCESS(rc))
2863 {
2864 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2865 RTSemEventSignal(pIf->hRecvEvent);
2866 return;
2867 }
2868 pIf->cYields++;
2869 }
2870 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2871 }
2872
2873 /* ok, the frame is lost. */
2874 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2875 RTSemEventSignal(pIf->hRecvEvent);
2876}
2877
2878
2879/**
2880 * Fallback path that does the GSO segmenting before passing the frame on to the
2881 * trunk interface.
2882 *
2883 * The caller holds the trunk lock.
2884 *
2885 * @param pThis The trunk.
2886 * @param pIfSender The IF sending the frame.
2887 * @param pSG Pointer to the gather list.
2888 * @param fDst The destination flags.
2889 */
2890static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2891{
2892 /*
2893 * Since we're only using this for GSO frame coming from the internal
2894 * network interfaces and never the trunk, we can assume there is only
2895 * one segment. This simplifies the code quite a bit.
2896 */
2897 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2898 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2899
2900 union
2901 {
2902 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2903 INTNETSG SG;
2904 } u;
2905
2906 /** @todo We have to adjust MSS so it does not exceed the value configured for
2907 * the host's interface.
2908 */
2909
2910 /*
2911 * Carve out the frame segments with the header and frame in different
2912 * scatter / gather segments.
2913 */
2914 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2915 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2916 {
2917 uint32_t cbSegPayload, cbSegHdrs;
2918 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2919 pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
2920
2921 IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
2922 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2923 u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
2924 u.SG.aSegs[0].cb = cbSegHdrs;
2925 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2926 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2927 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2928
2929 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2930 if (RT_FAILURE(rc))
2931 return rc;
2932 }
2933 return VINF_SUCCESS;
2934}
2935
2936
2937/**
2938 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2939 *
2940 * @returns true if it can, false if it cannot.
2941 * @param pThis The trunk.
2942 * @param pSG The scatter / gather buffer.
2943 * @param fDst The destination mask.
2944 */
2945DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2946{
2947 uint8_t u8Type = pSG->GsoCtx.u8Type;
2948 AssertReturn(u8Type < 32, false); /* paranoia */
2949 uint32_t fMask = RT_BIT_32(u8Type);
2950
2951 if (fDst == INTNETTRUNKDIR_HOST)
2952 return !!(pThis->fHostGsoCapabilites & fMask);
2953 if (fDst == INTNETTRUNKDIR_WIRE)
2954 return !!(pThis->fWireGsoCapabilites & fMask);
2955 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2956 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2957}
2958
2959
2960/**
2961 * Calculates the checksum of a full ipv6 frame.
2962 *
2963 * @returns 16-bit hecksum value.
2964 * @param pIpHdr The IPv6 header (network endian (big)).
2965 * @param bProtocol The protocol number. This can be the same as the
2966 * ip6_nxt field, but doesn't need to be.
2967 * @param cbPkt The packet size (host endian of course). This can
2968 * be the same as the ip6_plen field, but as with @a
2969 * bProtocol it won't be when extension headers are
2970 * present. For UDP this will be uh_ulen converted to
2971 * host endian.
2972 */
2973static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
2974{
2975 uint16_t const *data;
2976 int len = RT_BE2H_U16(pIpHdr->ip6_plen);
2977 uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
2978
2979 /* add the payload */
2980 data = (uint16_t *) (pIpHdr + 1);
2981 while(len > 1)
2982 {
2983 sum += *(data);
2984 data++;
2985 len -= 2;
2986 }
2987
2988 if(len > 0)
2989 sum += *((uint8_t *) data);
2990
2991 while(sum >> 16)
2992 sum = (sum & 0xffff) + (sum >> 16);
2993
2994 return (uint16_t) ~sum;
2995}
2996
2997
2998/**
2999 * Rewrite VM MAC address with shared host MAC address inside IPv6
3000 * Neighbor Discovery datagrams.
3001 */
3002static void intnetR0TrunkSharedMacEditIPv6FromIntNet(PINTNETTRUNKIF pThis, PINTNETIF pIfSender,
3003 PRTNETETHERHDR pEthHdr, uint32_t cb)
3004{
3005 if (RT_UNLIKELY(cb < sizeof(*pEthHdr)))
3006 return;
3007
3008 /* have IPv6 header */
3009 PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
3010 cb -= sizeof(*pEthHdr);
3011 if (RT_UNLIKELY(cb < sizeof(*pIPv6)))
3012 return;
3013
3014 if ( pIPv6->ip6_nxt != RTNETIPV6_PROT_ICMPV6
3015 || pIPv6->ip6_hlim != 0xff)
3016 return;
3017
3018 PRTNETICMPV6HDR pICMPv6 = (PRTNETICMPV6HDR)(pIPv6 + 1);
3019 cb -= sizeof(*pIPv6);
3020 if (RT_UNLIKELY(cb < sizeof(*pICMPv6)))
3021 return;
3022
3023 uint32_t hdrlen = 0;
3024 uint8_t llaopt = RTNETIPV6_ICMP_ND_SLLA_OPT;
3025
3026 uint8_t type = pICMPv6->icmp6_type;
3027 switch (type)
3028 {
3029 case RTNETIPV6_ICMP_TYPE_RS:
3030 hdrlen = 8;
3031 break;
3032
3033 case RTNETIPV6_ICMP_TYPE_RA:
3034 hdrlen = 16;
3035 break;
3036
3037 case RTNETIPV6_ICMP_TYPE_NS:
3038 hdrlen = 24;
3039 break;
3040
3041 case RTNETIPV6_ICMP_TYPE_NA:
3042 hdrlen = 24;
3043 llaopt = RTNETIPV6_ICMP_ND_TLLA_OPT;
3044 break;
3045
3046 default:
3047 return;
3048 }
3049
3050 AssertReturnVoid(hdrlen > 0);
3051 if (RT_UNLIKELY(cb < hdrlen))
3052 return;
3053
3054 if (RT_UNLIKELY(pICMPv6->icmp6_code != 0))
3055 return;
3056
3057 PRTNETNDP_LLA_OPT pLLAOpt = NULL;
3058 char *pOpt = (char *)pICMPv6 + hdrlen;
3059 cb -= hdrlen;
3060
3061 while (cb >= 8)
3062 {
3063 uint8_t opt = ((uint8_t *)pOpt)[0];
3064 uint32_t optlen = (uint32_t)((uint8_t *)pOpt)[1] * 8;
3065 if (RT_UNLIKELY(cb < optlen))
3066 return;
3067
3068 if (opt == llaopt)
3069 {
3070 if (RT_UNLIKELY(optlen != 8))
3071 return;
3072 pLLAOpt = (PRTNETNDP_LLA_OPT)pOpt;
3073 break;
3074 }
3075
3076 pOpt += optlen;
3077 cb -= optlen;
3078 }
3079
3080 if (pLLAOpt == NULL)
3081 return;
3082
3083 if (memcmp(&pLLAOpt->lla, &pIfSender->MacAddr, sizeof(RTMAC)) != 0)
3084 return;
3085
3086 /* overwrite VM's MAC with host's MAC */
3087 pLLAOpt->lla = pThis->MacAddr;
3088
3089 /* recompute the checksum */
3090 pICMPv6->icmp6_cksum = 0;
3091 pICMPv6->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
3092}
3093
3094
3095/**
3096 * Sends a frame down the trunk.
3097 *
3098 * @param pThis The trunk.
3099 * @param pNetwork The network the frame is being sent to.
3100 * @param pIfSender The IF sending the frame. Used for MAC address
3101 * checks in shared MAC mode.
3102 * @param fDst The destination flags.
3103 * @param pSG Pointer to the gather list.
3104 */
3105static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
3106 uint32_t fDst, PINTNETSG pSG)
3107{
3108 /*
3109 * Quick sanity check.
3110 */
3111 AssertPtr(pThis);
3112 AssertPtr(pNetwork);
3113 AssertPtr(pIfSender);
3114 AssertPtr(pSG);
3115 Assert(fDst);
3116 AssertReturnVoid(pThis->pIfPort);
3117
3118 /*
3119 * Edit the frame if we're sharing the MAC address with the host on the wire.
3120 *
3121 * If the frame is headed for both the host and the wire, we'll have to send
3122 * it to the host before making any modifications, and force the OS specific
3123 * backend to copy it. We do this by marking it as TEMP (which is always the
3124 * case right now).
3125 */
3126 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3127 && (fDst & INTNETTRUNKDIR_WIRE))
3128 {
3129 /*
3130 * Dispatch it to the host before making changes.
3131 */
3132 if (fDst & INTNETTRUNKDIR_HOST)
3133 {
3134 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
3135 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
3136 fDst &= ~INTNETTRUNKDIR_HOST;
3137 }
3138
3139 /*
3140 * Edit the source address so that it it's the same as the host.
3141 */
3142 /* ASSUME frame from IntNetR0IfSend! */
3143 AssertReturnVoid(pSG->cSegsUsed == 1);
3144 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
3145 AssertReturnVoid(pIfSender);
3146 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
3147
3148 pEthHdr->SrcMac = pThis->MacAddr;
3149
3150 /*
3151 * Deal with tags from the snooping phase.
3152 */
3153 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3154 {
3155 /*
3156 * APR IPv4: replace hardware (MAC) addresses because these end up
3157 * in ARP caches. So, if we don't the other machines will
3158 * send the packets to the MAC address of the guest
3159 * instead of the one of the host, which won't work on
3160 * wireless of course...
3161 */
3162 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
3163 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
3164 {
3165 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
3166 pArp->ar_sha = pThis->MacAddr;
3167 }
3168 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
3169 {
3170 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
3171 pArp->ar_tha = pThis->MacAddr;
3172 }
3173 }
3174 else if (pEthHdr->EtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6))
3175 {
3176 intnetR0TrunkSharedMacEditIPv6FromIntNet(pThis, pIfSender, pEthHdr, pSG->cbTotal);
3177 }
3178 }
3179
3180 /*
3181 * Send the frame, handling the GSO fallback.
3182 *
3183 * Note! The trunk implementation will re-check that the trunk is active
3184 * before sending, so we don't have to duplicate that effort here.
3185 */
3186 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
3187 int rc;
3188 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
3189 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
3190 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
3191 else
3192 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
3193 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
3194
3195 /** @todo failure statistics? */
3196 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
3197}
3198
3199
3200/**
3201 * Detect broadcasts packaged as unicast and convert them back to broadcast.
3202 *
3203 * WiFi routers try to use ethernet unicast instead of broadcast or
3204 * multicast when possible. Look inside the packet and fix up
3205 * ethernet destination to be proper broadcast or multicast if
3206 * necessary.
3207 *
3208 * @returns true broadcast (pEthHdr & pSG are modified), false if not.
3209 * @param pNetwork The network the frame is being sent to.
3210 * @param pSG Pointer to the gather list for the frame. The
3211 * ethernet destination address is modified when
3212 * returning true.
3213 * @param pEthHdr Pointer to the ethernet header. The ethernet
3214 * destination address is modified when returning true.
3215 */
3216static bool intnetR0NetworkSharedMacDetectAndFixBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3217{
3218 NOREF(pNetwork);
3219
3220 switch (pEthHdr->EtherType)
3221 {
3222 case RT_H2N_U16_C(RTNET_ETHERTYPE_ARP):
3223 {
3224 uint16_t ar_oper;
3225 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETARPHDR, ar_oper),
3226 sizeof(ar_oper), &ar_oper))
3227 return false;
3228
3229 if (ar_oper == RT_H2N_U16_C(RTNET_ARPOP_REQUEST))
3230 {
3231 /* change to broadcast */
3232 pEthHdr->DstMac.au16[0] = 0xffff;
3233 pEthHdr->DstMac.au16[1] = 0xffff;
3234 pEthHdr->DstMac.au16[2] = 0xffff;
3235 }
3236 else
3237 return false;
3238 break;
3239 }
3240
3241 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
3242 {
3243 RTNETADDRIPV4 ip_dst;
3244 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst),
3245 sizeof(ip_dst), &ip_dst))
3246 return false;
3247
3248 if (ip_dst.u == 0xffffffff) /* 255.255.255.255? */
3249 {
3250 /* change to broadcast */
3251 pEthHdr->DstMac.au16[0] = 0xffff;
3252 pEthHdr->DstMac.au16[1] = 0xffff;
3253 pEthHdr->DstMac.au16[2] = 0xffff;
3254 }
3255 else if ((ip_dst.au8[0] & 0xf0) == 0xe0) /* IPv4 multicast? */
3256 {
3257 /* change to 01:00:5e:xx:xx:xx multicast ... */
3258 pEthHdr->DstMac.au8[0] = 0x01;
3259 pEthHdr->DstMac.au8[1] = 0x00;
3260 pEthHdr->DstMac.au8[2] = 0x5e;
3261 /* ... with lower 23 bits from the multicast IP address */
3262 pEthHdr->DstMac.au8[3] = ip_dst.au8[1] & 0x7f;
3263 pEthHdr->DstMac.au8[4] = ip_dst.au8[2];
3264 pEthHdr->DstMac.au8[5] = ip_dst.au8[3];
3265 }
3266 else
3267 return false;
3268 break;
3269 }
3270
3271 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
3272 {
3273 RTNETADDRIPV6 ip6_dst;
3274 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst),
3275 sizeof(ip6_dst), &ip6_dst))
3276 return false;
3277
3278 if (ip6_dst.au8[0] == 0xff) /* IPv6 multicast? */
3279 {
3280 pEthHdr->DstMac.au16[0] = 0x3333;
3281 pEthHdr->DstMac.au16[1] = ip6_dst.au16[6];
3282 pEthHdr->DstMac.au16[2] = ip6_dst.au16[7];
3283 }
3284 else
3285 return false;
3286 break;
3287 }
3288
3289 default:
3290 return false;
3291 }
3292
3293
3294 /*
3295 * Update ethernet destination in the segment.
3296 */
3297 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(pEthHdr->DstMac), &pEthHdr->DstMac);
3298
3299 return true;
3300}
3301
3302
3303/**
3304 * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
3305 *
3306 * @param pNetwork The network the frame is being sent to.
3307 * @param pSG Pointer to the gather list for the frame.
3308 * @param pEthHdr Pointer to the ethernet header.
3309 */
3310static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3311{
3312 NOREF(pEthHdr);
3313
3314 /*
3315 * Check the minimum size and get a linear copy of the thing to work on,
3316 * using the temporary buffer if necessary.
3317 */
3318 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3319 sizeof(RTNETNDP)))
3320 return;
3321 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3322 if ( pSG->cSegsUsed != 1
3323 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3324 sizeof(RTNETNDP))
3325 {
3326 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
3327 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
3328 + sizeof(RTNETNDP), pNetwork->pbTmp))
3329 return;
3330 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3331 pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
3332 }
3333
3334 PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
3335
3336 /*
3337 * a multicast NS with :: as source address means a DAD packet.
3338 * if it comes from the wire and we have the DAD'd address in our cache,
3339 * flush the entry as the address is being acquired by someone else on
3340 * the network.
3341 */
3342 if ( pIPv6->ip6_hlim == 0xff
3343 && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
3344 && pNd->Hdr.icmp6_type == RTNETIPV6_ICMP_TYPE_NS
3345 && pNd->Hdr.icmp6_code == 0
3346 && pIPv6->ip6_src.QWords.qw0 == 0
3347 && pIPv6->ip6_src.QWords.qw1 == 0)
3348 {
3349
3350 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
3351 kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
3352 }
3353}
3354/**
3355 * Edits an ARP packet arriving from the wire via the trunk connection.
3356 *
3357 * @param pNetwork The network the frame is being sent to.
3358 * @param pSG Pointer to the gather list for the frame.
3359 * The flags and data content may be updated.
3360 * @param pEthHdr Pointer to the ethernet header. This may also be
3361 * updated if it's a unicast...
3362 */
3363static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3364{
3365 /*
3366 * Check the minimum size and get a linear copy of the thing to work on,
3367 * using the temporary buffer if necessary.
3368 */
3369 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
3370 return;
3371 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3372 if ( pSG->cSegsUsed != 1
3373 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
3374 {
3375 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
3376 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
3377 return;
3378 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3379 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
3380 }
3381
3382 /*
3383 * Ignore packets which doesn't interest us or we perceive as malformed.
3384 */
3385 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
3386 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
3387 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
3388 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
3389 return;
3390 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
3391 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
3392 && ar_oper != RTNET_ARPOP_REPLY))
3393 {
3394 Log6(("ar_oper=%#x\n", ar_oper));
3395 return;
3396 }
3397
3398 /* Tag it as ARP IPv4. */
3399 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
3400
3401 /*
3402 * The thing we're interested in here is a reply to a query made by a guest
3403 * since we modified the MAC in the initial request the guest made.
3404 */
3405 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3406 RTMAC MacAddrTrunk;
3407 if (pNetwork->MacTab.pTrunk)
3408 MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
3409 else
3410 memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
3411 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3412 if ( ar_oper == RTNET_ARPOP_REPLY
3413 && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
3414 {
3415 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
3416 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
3417 if (pIf)
3418 {
3419 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
3420 pArpIPv4->ar_tha = pIf->MacAddr;
3421 if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
3422 {
3423 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
3424 pEthHdr->DstMac = pIf->MacAddr;
3425 if ((void *)pEthHdr != pSG->aSegs[0].pv)
3426 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
3427 }
3428 intnetR0BusyDecIf(pIf);
3429
3430 /* Write back the packet if we've been making changes to a buffered copy. */
3431 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
3432 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
3433 }
3434 }
3435}
3436
3437
3438/**
3439 * Detects and edits an DHCP packet arriving from the internal net.
3440 *
3441 * @param pNetwork The network the frame is being sent to.
3442 * @param pSG Pointer to the gather list for the frame.
3443 * The flags and data content may be updated.
3444 * @param pEthHdr Pointer to the ethernet header. This may also be
3445 * updated if it's a unicast...
3446 */
3447static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3448{
3449 NOREF(pEthHdr);
3450
3451 /*
3452 * Check the minimum size and get a linear copy of the thing to work on,
3453 * using the temporary buffer if necessary.
3454 */
3455 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
3456 return;
3457 /*
3458 * Get a pointer to a linear copy of the full packet, using the
3459 * temporary buffer if necessary.
3460 */
3461 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
3462 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
3463 if (pSG->cSegsUsed > 1)
3464 {
3465 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
3466 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
3467 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
3468 return;
3469 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3470 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
3471 }
3472
3473 /*
3474 * Validate the IP header and find the UDP packet.
3475 */
3476 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
3477 {
3478 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
3479 return;
3480 }
3481 size_t cbIpHdr = pIpHdr->ip_hl * 4;
3482 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
3483 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
3484 return;
3485
3486 size_t cbUdpPkt = cbPacket - cbIpHdr;
3487 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
3488 /* We are only interested in DHCP packets coming from client to server. */
3489 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
3490 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
3491 return;
3492
3493 /*
3494 * Check if the DHCP message is valid and get the type.
3495 */
3496 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
3497 {
3498 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
3499 return;
3500 }
3501 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
3502 uint8_t bMsgType;
3503 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
3504 {
3505 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
3506 return;
3507 }
3508
3509 switch (bMsgType)
3510 {
3511 case RTNET_DHCP_MT_DISCOVER:
3512 case RTNET_DHCP_MT_REQUEST:
3513 /*
3514 * Must set the broadcast flag or we won't catch the respons.
3515 */
3516 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
3517 {
3518 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
3519 bMsgType, pDhcp->bp_flags));
3520
3521 /* Patch flags */
3522 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3523 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
3524
3525 /* Patch UDP checksum */
3526 if (pUdpHdr->uh_sum != 0)
3527 {
3528 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3529 while (uChecksum >> 16)
3530 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3531 uChecksum = ~uChecksum;
3532 intnetR0SgWritePart(pSG,
3533 (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR),
3534 sizeof(pUdpHdr->uh_sum),
3535 &uChecksum);
3536 }
3537 }
3538
3539#ifdef RT_OS_DARWIN
3540 /*
3541 * Work around little endian checksum issue in mac os x 10.7.0 GM.
3542 */
3543 if ( pIpHdr->ip_tos
3544 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
3545 {
3546 /* Patch it. */
3547 uint8_t uTos = pIpHdr->ip_tos;
3548 uint8_t uZero = 0;
3549 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
3550
3551 /* Patch the IP header checksum. */
3552 uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
3553 while (uChecksum >> 16)
3554 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3555 uChecksum = ~uChecksum;
3556
3557 Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
3558 uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
3559 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_sum),
3560 sizeof(pIpHdr->ip_sum), &uChecksum);
3561 }
3562#endif
3563 break;
3564 }
3565}
3566
3567
3568/**
3569 * Checks if the callers context is okay for sending to the specified
3570 * destinations.
3571 *
3572 * @returns true if it's okay, false if it isn't.
3573 * @param pNetwork The network.
3574 * @param pIfSender The interface sending or NULL if it's the trunk.
3575 * @param pDstTab The destination table.
3576 */
3577DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
3578{
3579 NOREF(pNetwork);
3580
3581 /* Sending to the trunk is the problematic path. If the trunk is the
3582 sender we won't be sending to it, so no problem..
3583 Note! fTrunkDst may be set event if if the trunk is the sender. */
3584 if (!pIfSender)
3585 return true;
3586
3587 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
3588 if (!fTrunkDst)
3589 return true;
3590
3591 /* ASSUMES: that the trunk won't change its report while we're checking. */
3592 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3593 if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
3594 return true;
3595
3596 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3597 non-preemptive systems as well.) */
3598 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3599 return true;
3600 return false;
3601}
3602
3603
3604/**
3605 * Checks if the callers context is okay for doing a broadcast given the
3606 * specified source.
3607 *
3608 * @returns true if it's okay, false if it isn't.
3609 * @param pNetwork The network.
3610 * @param fSrc The source of the packet. (0 (intnet),
3611 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3612 */
3613DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
3614{
3615 /* Sending to the trunk is the problematic path. If the trunk is the
3616 sender we won't be sending to it, so no problem. */
3617 if (fSrc)
3618 return true;
3619
3620 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3621 non-preemptive systems as well.) */
3622 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3623 return true;
3624
3625 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
3626 freed while we're touching it. */
3627 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3628 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
3629
3630 bool fRc = !pTrunk
3631 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
3632 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
3633 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
3634
3635 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3636
3637 return fRc;
3638}
3639
3640
3641/**
3642 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
3643 * address on the wire.
3644 *
3645 * The caller must hold at least one interface on the network busy to prevent it
3646 * from destructing beath us.
3647 *
3648 * @param pNetwork The network the frame is being sent to.
3649 * @param fSrc The source of the packet. (0 (intnet),
3650 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3651 * @param pIfSender The sender interface, NULL if trunk. Used to
3652 * prevent sending an echo to the sender.
3653 * @param pSG Pointer to the gather list.
3654 * @param pEthHdr Pointer to the ethernet header.
3655 * @param pDstTab The destination output table.
3656 */
3657static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
3658 uint32_t fSrc, PINTNETIF pIfSender,
3659 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
3660 PINTNETDSTTAB pDstTab)
3661{
3662 /*
3663 * Before doing any work here, we need to figure out if we can handle it
3664 * in the current context. The restrictions are solely on the trunk.
3665 *
3666 * Note! Since at least one interface is busy, there won't be any changes
3667 * to the parameters here (unless the trunk changes its capability
3668 * report, which it shouldn't).
3669 */
3670 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
3671 return INTNETSWDECISION_BAD_CONTEXT;
3672
3673 /*
3674 * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
3675 * If we see an advertisement for an IP in our cache, we can safely remove
3676 * it as the IP has probably moved.
3677 */
3678 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3679 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
3680 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3681 intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
3682
3683
3684 /*
3685 * Check for ARP packets from the wire since we'll have to make
3686 * modification to them if we're sharing the MAC address with the host.
3687 */
3688 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3689 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
3690 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3691 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
3692
3693 /*
3694 * Check for DHCP packets from the internal net since we'll have to set
3695 * broadcast flag in DHCP requests if we're sharing the MAC address with
3696 * the host. GSO is not applicable to DHCP traffic.
3697 */
3698 if ( !fSrc
3699 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
3700 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3701 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3702
3703 /*
3704 * Snoop address info from packet originating from the trunk connection.
3705 */
3706 if (fSrc)
3707 {
3708#ifdef INTNET_WITH_DHCP_SNOOPING
3709 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3710 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3711 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3712 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3713 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3714 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3715#else
3716 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3717 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3718#endif
3719 }
3720
3721 /*
3722 * Create the broadcast destination table.
3723 */
3724 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3725}
3726
3727
3728/**
3729 * Check context, snoop and switch a unicast frame using the network layer
3730 * address of the link layer one (when sharing MAC address on the wire).
3731 *
3732 * This function is only used for frames coming from the wire (trunk).
3733 *
3734 * @returns true if it's addressed to someone on the network, otherwise false.
3735 * @param pNetwork The network the frame is being sent to.
3736 * @param pSG Pointer to the gather list.
3737 * @param pEthHdr Pointer to the ethernet header.
3738 * @param pDstTab The destination output table.
3739 */
3740static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3741 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3742{
3743 /*
3744 * Extract the network address from the packet.
3745 */
3746 RTNETADDRU Addr;
3747 INTNETADDRTYPE enmAddrType;
3748 uint8_t cbAddr;
3749 switch (RT_BE2H_U16(pEthHdr->EtherType))
3750 {
3751 case RTNET_ETHERTYPE_IPV4:
3752 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3753 {
3754 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3755 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3756 }
3757 enmAddrType = kIntNetAddrType_IPv4;
3758 cbAddr = sizeof(Addr.IPv4);
3759 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3760 break;
3761
3762 case RTNET_ETHERTYPE_IPV6:
3763 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3764 {
3765 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3766 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3767 }
3768 enmAddrType = kIntNetAddrType_IPv6;
3769 cbAddr = sizeof(Addr.IPv6);
3770 break;
3771#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3772 case RTNET_ETHERTYPE_IPX_1:
3773 case RTNET_ETHERTYPE_IPX_2:
3774 case RTNET_ETHERTYPE_IPX_3:
3775 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3776 {
3777 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3778 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3779 }
3780 enmAddrType = kIntNetAddrType_IPX;
3781 cbAddr = sizeof(Addr.IPX);
3782 break;
3783#endif
3784
3785 /*
3786 * Treat ARP as broadcast (it shouldn't end up here normally,
3787 * so it goes last in the switch).
3788 */
3789 case RTNET_ETHERTYPE_ARP:
3790 Log6(("intnetshareduni: ARP\n"));
3791 /** @todo revisit this broadcasting of unicast ARP frames! */
3792 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3793
3794 /*
3795 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3796 */
3797 default:
3798 {
3799 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3800 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3801 }
3802 }
3803
3804 /*
3805 * Do level-3 switching.
3806 */
3807 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3808 enmAddrType, &Addr, cbAddr,
3809 INTNETTRUNKDIR_WIRE, pDstTab);
3810
3811#ifdef INTNET_WITH_DHCP_SNOOPING
3812 /*
3813 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3814 */
3815 if ( enmAddrType == kIntNetAddrType_IPv4
3816 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3817 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3818 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3819#endif /* INTNET_WITH_DHCP_SNOOPING */
3820
3821 return enmSwDecision;
3822}
3823
3824
3825/**
3826 * Release all the interfaces in the destination table when we realize that
3827 * we're in a context where we cannot get the job done.
3828 *
3829 * @param pNetwork The network.
3830 * @param pDstTab The destination table.
3831 */
3832static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3833{
3834 /* The trunk interface. */
3835 if (pDstTab->fTrunkDst)
3836 {
3837 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3838 if (pTrunk)
3839 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3840 pDstTab->pTrunk = NULL;
3841 pDstTab->fTrunkDst = 0;
3842 }
3843
3844 /* Regular interfaces. */
3845 uint32_t iIf = pDstTab->cIfs;
3846 while (iIf-- > 0)
3847 {
3848 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3849 intnetR0BusyDecIf(pIf);
3850 pDstTab->aIfs[iIf].pIf = NULL;
3851 }
3852 pDstTab->cIfs = 0;
3853}
3854
3855
3856/**
3857 * Deliver the frame to the interfaces specified in the destination table.
3858 *
3859 * @param pNetwork The network.
3860 * @param pDstTab The destination table.
3861 * @param pSG The frame to send.
3862 * @param pIfSender The sender interface. NULL if it originated via
3863 * the trunk.
3864 */
3865static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3866{
3867 /*
3868 * Do the interfaces first before sending it to the wire and risk having to
3869 * modify it.
3870 */
3871 uint32_t iIf = pDstTab->cIfs;
3872 while (iIf-- > 0)
3873 {
3874 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3875 intnetR0IfSend(pIf, pIfSender, pSG,
3876 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3877 intnetR0BusyDecIf(pIf);
3878 pDstTab->aIfs[iIf].pIf = NULL;
3879 }
3880 pDstTab->cIfs = 0;
3881
3882 /*
3883 * Send to the trunk.
3884 *
3885 * Note! The switching functions will include the trunk even when the frame
3886 * source is the trunk. This is because we need it to figure out
3887 * whether the other half of the trunk should see the frame or not
3888 * and let the caller know.
3889 *
3890 * So, we'll ignore trunk sends here if the frame origin is
3891 * INTNETTRUNKSWPORT::pfnRecv.
3892 */
3893 if (pDstTab->fTrunkDst)
3894 {
3895 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3896 if (pTrunk)
3897 {
3898 if (pIfSender)
3899 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3900 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3901 }
3902 pDstTab->pTrunk = NULL;
3903 pDstTab->fTrunkDst = 0;
3904 }
3905}
3906
3907
3908/**
3909 * Sends a frame.
3910 *
3911 * This function will distribute the frame to the interfaces it is addressed to.
3912 * It will also update the MAC address of the sender.
3913 *
3914 * The caller must own the network mutex.
3915 *
3916 * @returns The switching decision.
3917 * @param pNetwork The network the frame is being sent to.
3918 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3919 * @param fSrc The source flags. This 0 if it's not from the trunk.
3920 * @param pSG Pointer to the gather list.
3921 * @param pDstTab The destination table to use.
3922 */
3923static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3924 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3925{
3926 /*
3927 * Assert reality.
3928 */
3929 AssertPtr(pNetwork);
3930 AssertPtrNull(pIfSender);
3931 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3932 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3933 AssertPtr(pSG);
3934 Assert(pSG->cSegsUsed >= 1);
3935 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3936 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3937 return INTNETSWDECISION_INVALID;
3938
3939 /*
3940 * Get the ethernet header (might theoretically involve multiple segments).
3941 */
3942 RTNETETHERHDR EthHdr;
3943 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3944 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3945 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3946 return INTNETSWDECISION_INVALID;
3947 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3948 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3949 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3950 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3951 || EthHdr.DstMac.au8[0] == 0xff
3952 || EthHdr.SrcMac.au8[0] == 0xff)
3953 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3954 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3955
3956 /*
3957 * Learn the MAC address of the sender. No re-learning as the interface
3958 * user will normally tell us the right MAC address.
3959 *
3960 * Note! We don't notify the trunk about these mainly because of the
3961 * problematic contexts we might be called in.
3962 */
3963 if (RT_UNLIKELY( pIfSender
3964 && !pIfSender->fMacSet
3965 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
3966 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
3967 ))
3968 {
3969 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
3970 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3971
3972 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
3973 if (pIfEntry)
3974 pIfEntry->MacAddr = EthHdr.SrcMac;
3975 pIfSender->MacAddr = EthHdr.SrcMac;
3976
3977 RTSpinlockRelease(pNetwork->hAddrSpinlock);
3978 }
3979
3980 /*
3981 * Deal with MAC address sharing as that may required editing of the
3982 * packets before we dispatch them anywhere.
3983 */
3984 INTNETSWDECISION enmSwDecision;
3985 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3986 {
3987 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3988 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3989 else if (fSrc & INTNETTRUNKDIR_WIRE)
3990 {
3991 if (intnetR0NetworkSharedMacDetectAndFixBroadcast(pNetwork, pSG, &EthHdr))
3992 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3993 else
3994 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
3995 }
3996 else
3997 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3998 }
3999 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
4000 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
4001 else
4002 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
4003
4004 /*
4005 * Deliver to the destinations if we can.
4006 */
4007 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
4008 {
4009 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
4010 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
4011 else
4012 {
4013 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
4014 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
4015 }
4016 }
4017
4018 return enmSwDecision;
4019}
4020
4021
4022/**
4023 * Sends one or more frames.
4024 *
4025 * The function will first the frame which is passed as the optional arguments
4026 * pvFrame and cbFrame. These are optional since it also possible to chain
4027 * together one or more frames in the send buffer which the function will
4028 * process after considering it's arguments.
4029 *
4030 * The caller is responsible for making sure that there are no concurrent calls
4031 * to this method (with the same handle).
4032 *
4033 * @returns VBox status code.
4034 * @param hIf The interface handle.
4035 * @param pSession The caller's session.
4036 */
4037INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4038{
4039 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
4040
4041 /*
4042 * Validate input and translate the handle.
4043 */
4044 PINTNET pIntNet = g_pIntNet;
4045 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4046 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4047
4048 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4049 if (!pIf)
4050 return VERR_INVALID_HANDLE;
4051 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
4052
4053 /*
4054 * Make sure we've got a network.
4055 */
4056 int rc = VINF_SUCCESS;
4057 intnetR0BusyIncIf(pIf);
4058 PINTNETNETWORK pNetwork = pIf->pNetwork;
4059 if (RT_LIKELY(pNetwork))
4060 {
4061 /*
4062 * Grab the destination table.
4063 */
4064 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
4065 if (RT_LIKELY(pDstTab))
4066 {
4067 /*
4068 * Process the send buffer.
4069 */
4070 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
4071 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
4072 * with buffer sharing for some OS or service. Darwin copies everything so
4073 * I won't bother allocating and managing SGs right now. Sorry. */
4074 PINTNETHDR pHdr;
4075 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
4076 {
4077 uint8_t const u8Type = pHdr->u8Type;
4078 if (u8Type == INTNETHDR_TYPE_FRAME)
4079 {
4080 /* Send regular frame. */
4081 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
4082 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
4083 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4084 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
4085 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4086 }
4087 else if (u8Type == INTNETHDR_TYPE_GSO)
4088 {
4089 /* Send GSO frame if sane. */
4090 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
4091 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
4092 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
4093 {
4094 void *pvCurFrame = pGso + 1;
4095 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
4096 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
4097 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
4098 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
4099 }
4100 else
4101 {
4102 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4103 enmSwDecision = INTNETSWDECISION_DROP;
4104 }
4105 }
4106 /* Unless it's a padding frame, we're getting babble from the producer. */
4107 else
4108 {
4109 if (u8Type != INTNETHDR_TYPE_PADDING)
4110 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
4111 enmSwDecision = INTNETSWDECISION_DROP;
4112 }
4113 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
4114 {
4115 rc = VERR_TRY_AGAIN;
4116 break;
4117 }
4118
4119 /* Skip to the next frame. */
4120 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
4121 }
4122
4123 /*
4124 * Put back the destination table.
4125 */
4126 Assert(!pIf->pDstTab);
4127 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
4128 }
4129 else
4130 rc = VERR_INTERNAL_ERROR_4;
4131 }
4132 else
4133 rc = VERR_INTERNAL_ERROR_3;
4134
4135 /*
4136 * Release the interface.
4137 */
4138 intnetR0BusyDecIf(pIf);
4139 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
4140 intnetR0IfRelease(pIf, pSession);
4141 return rc;
4142}
4143
4144
4145/**
4146 * VMMR0 request wrapper for IntNetR0IfSend.
4147 *
4148 * @returns see IntNetR0IfSend.
4149 * @param pSession The caller's session.
4150 * @param pReq The request packet.
4151 */
4152INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
4153{
4154 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4155 return VERR_INVALID_PARAMETER;
4156 return IntNetR0IfSend(pReq->hIf, pSession);
4157}
4158
4159
4160/**
4161 * Maps the default buffer into ring 3.
4162 *
4163 * @returns VBox status code.
4164 * @param hIf The interface handle.
4165 * @param pSession The caller's session.
4166 * @param ppRing3Buf Where to store the address of the ring-3 mapping
4167 * (optional).
4168 * @param ppRing0Buf Where to store the address of the ring-0 mapping
4169 * (optional).
4170 */
4171INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
4172 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
4173{
4174 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
4175
4176 /*
4177 * Validate input.
4178 */
4179 PINTNET pIntNet = g_pIntNet;
4180 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4181 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4182
4183 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
4184 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
4185 if (ppRing3Buf)
4186 *ppRing3Buf = 0;
4187 if (ppRing0Buf)
4188 *ppRing0Buf = 0;
4189
4190 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4191 if (!pIf)
4192 return VERR_INVALID_HANDLE;
4193
4194 /*
4195 * ASSUMES that only the process that created an interface can use it.
4196 * ASSUMES that we created the ring-3 mapping when selecting or
4197 * allocating the buffer.
4198 */
4199 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4200 if (RT_SUCCESS(rc))
4201 {
4202 if (ppRing3Buf)
4203 *ppRing3Buf = pIf->pIntBufR3;
4204 if (ppRing0Buf)
4205 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
4206
4207 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4208 }
4209
4210 intnetR0IfRelease(pIf, pSession);
4211 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
4212 rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
4213 return rc;
4214}
4215
4216
4217/**
4218 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
4219 *
4220 * @returns see IntNetR0IfGetRing3Buffer.
4221 * @param pSession The caller's session.
4222 * @param pReq The request packet.
4223 */
4224INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
4225{
4226 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4227 return VERR_INVALID_PARAMETER;
4228 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
4229}
4230
4231
4232#if 0
4233/**
4234 * Gets the physical addresses of the default interface buffer.
4235 *
4236 * @returns VBox status code.
4237 * @param hIF The interface handle.
4238 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
4239 * @param cPages
4240 */
4241INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
4242{
4243 /*
4244 * Validate input.
4245 */
4246 PINTNET pIntNet = g_pIntNet;
4247 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4248 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4249
4250 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
4251 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
4252 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4253 if (!pIf)
4254 return VERR_INVALID_HANDLE;
4255
4256 /*
4257 * Grab the lock and get the data.
4258 * ASSUMES that the handle isn't closed while we're here.
4259 */
4260 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
4261 if (RT_SUCCESS(rc))
4262 {
4263 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
4264 * is no need for any extra bookkeeping here.. */
4265
4266 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
4267 }
4268 intnetR0IfRelease(pIf, pSession);
4269 return VERR_NOT_IMPLEMENTED;
4270}
4271#endif
4272
4273
4274/**
4275 * Sets the promiscuous mode property of an interface.
4276 *
4277 * @returns VBox status code.
4278 * @param hIf The interface handle.
4279 * @param pSession The caller's session.
4280 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
4281 */
4282INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
4283{
4284 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
4285
4286 /*
4287 * Validate & translate input.
4288 */
4289 PINTNET pIntNet = g_pIntNet;
4290 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4291 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4292
4293 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4294 if (!pIf)
4295 {
4296 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
4297 return VERR_INVALID_HANDLE;
4298 }
4299
4300 /*
4301 * Get the network, take the address spinlock, and make the change.
4302 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4303 */
4304 int rc = VINF_SUCCESS;
4305 intnetR0BusyIncIf(pIf);
4306 PINTNETNETWORK pNetwork = pIf->pNetwork;
4307 if (pNetwork)
4308 {
4309 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4310
4311 if (pIf->fPromiscuousReal != fPromiscuous)
4312 {
4313 const bool fPromiscuousEff = fPromiscuous
4314 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
4315 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
4316 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
4317 hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
4318
4319 pIf->fPromiscuousReal = fPromiscuous;
4320
4321 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4322 if (RT_LIKELY(pEntry))
4323 {
4324 if (pEntry->fPromiscuousEff)
4325 {
4326 pNetwork->MacTab.cPromiscuousEntries--;
4327 if (!pEntry->fPromiscuousSeeTrunk)
4328 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4329 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4330 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4331 }
4332
4333 pEntry->fPromiscuousEff = fPromiscuousEff;
4334 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
4335 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
4336
4337 if (pEntry->fPromiscuousEff)
4338 {
4339 pNetwork->MacTab.cPromiscuousEntries++;
4340 if (!pEntry->fPromiscuousSeeTrunk)
4341 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
4342 }
4343 Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
4344 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
4345 }
4346 }
4347
4348 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4349 }
4350 else
4351 rc = VERR_WRONG_ORDER;
4352
4353 intnetR0BusyDecIf(pIf);
4354 intnetR0IfRelease(pIf, pSession);
4355 return rc;
4356}
4357
4358
4359/**
4360 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
4361 *
4362 * @returns see IntNetR0IfSetPromiscuousMode.
4363 * @param pSession The caller's session.
4364 * @param pReq The request packet.
4365 */
4366INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
4367{
4368 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4369 return VERR_INVALID_PARAMETER;
4370 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
4371}
4372
4373
4374/**
4375 * Sets the MAC address of an interface.
4376 *
4377 * @returns VBox status code.
4378 * @param hIf The interface handle.
4379 * @param pSession The caller's session.
4380 * @param pMAC The new MAC address.
4381 */
4382INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
4383{
4384 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
4385
4386 /*
4387 * Validate & translate input.
4388 */
4389 PINTNET pIntNet = g_pIntNet;
4390 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4391 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4392
4393 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
4394 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4395 if (!pIf)
4396 {
4397 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
4398 return VERR_INVALID_HANDLE;
4399 }
4400
4401 /*
4402 * Get the network, take the address spinlock, and make the change.
4403 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4404 */
4405 int rc = VINF_SUCCESS;
4406 intnetR0BusyIncIf(pIf);
4407 PINTNETNETWORK pNetwork = pIf->pNetwork;
4408 if (pNetwork)
4409 {
4410 PINTNETTRUNKIF pTrunk = NULL;
4411
4412 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4413
4414 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
4415 {
4416 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
4417 hIf, &pIf->MacAddr, pMac));
4418
4419 /* Update the two copies. */
4420 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4421 if (RT_LIKELY(pEntry))
4422 pEntry->MacAddr = *pMac;
4423 pIf->MacAddr = *pMac;
4424 pIf->fMacSet = true;
4425
4426 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
4427 pTrunk = pNetwork->MacTab.pTrunk;
4428 if (pTrunk)
4429 intnetR0BusyIncTrunk(pTrunk);
4430 }
4431
4432 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4433
4434 if (pTrunk)
4435 {
4436 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
4437 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
4438 if (pIfPort)
4439 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
4440 intnetR0BusyDecTrunk(pTrunk);
4441 }
4442 }
4443 else
4444 rc = VERR_WRONG_ORDER;
4445
4446 intnetR0BusyDecIf(pIf);
4447 intnetR0IfRelease(pIf, pSession);
4448 return rc;
4449}
4450
4451
4452/**
4453 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
4454 *
4455 * @returns see IntNetR0IfSetMacAddress.
4456 * @param pSession The caller's session.
4457 * @param pReq The request packet.
4458 */
4459INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
4460{
4461 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4462 return VERR_INVALID_PARAMETER;
4463 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
4464}
4465
4466
4467/**
4468 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
4469 *
4470 * This function will update the active interface count on the network and
4471 * activate or deactivate the trunk connection if necessary.
4472 *
4473 * The call must own the giant lock (we cannot take it here).
4474 *
4475 * @returns VBox status code.
4476 * @param pNetwork The network.
4477 * @param fIf The interface.
4478 * @param fActive What to do.
4479 */
4480static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
4481{
4482 /* quick sanity check */
4483 AssertPtr(pNetwork);
4484 AssertPtr(pIf);
4485
4486 /*
4487 * The address spinlock of the network protects the variables, while the
4488 * big lock protects the calling of pfnSetState. Grab both lock at once
4489 * to save us the extra hassle.
4490 */
4491 PINTNETTRUNKIF pTrunk = NULL;
4492 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4493
4494 /*
4495 * Do the update.
4496 */
4497 if (pIf->fActive != fActive)
4498 {
4499 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4500 if (RT_LIKELY(pEntry))
4501 {
4502 pEntry->fActive = fActive;
4503 pIf->fActive = fActive;
4504
4505 if (fActive)
4506 {
4507 pNetwork->cActiveIFs++;
4508 if (pNetwork->cActiveIFs == 1)
4509 {
4510 pTrunk = pNetwork->MacTab.pTrunk;
4511 if (pTrunk)
4512 {
4513 pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
4514 pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
4515 }
4516 }
4517 }
4518 else
4519 {
4520 pNetwork->cActiveIFs--;
4521 if (pNetwork->cActiveIFs == 0)
4522 {
4523 pTrunk = pNetwork->MacTab.pTrunk;
4524 pNetwork->MacTab.fHostActive = false;
4525 pNetwork->MacTab.fWireActive = false;
4526 }
4527 }
4528 }
4529 }
4530
4531 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4532
4533 /*
4534 * Tell the trunk if necessary.
4535 * The wait for !busy is for the Solaris streams trunk driver (mostly).
4536 */
4537 if (pTrunk && pTrunk->pIfPort)
4538 {
4539 if (!fActive)
4540 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
4541
4542 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
4543 }
4544
4545 return VINF_SUCCESS;
4546}
4547
4548
4549/**
4550 * Sets the active property of an interface.
4551 *
4552 * @returns VBox status code.
4553 * @param hIf The interface handle.
4554 * @param pSession The caller's session.
4555 * @param fActive The new state.
4556 */
4557INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
4558{
4559 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
4560
4561 /*
4562 * Validate & translate input.
4563 */
4564 PINTNET pIntNet = g_pIntNet;
4565 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4566 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4567
4568 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4569 if (!pIf)
4570 {
4571 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
4572 return VERR_INVALID_HANDLE;
4573 }
4574
4575 /*
4576 * Hand it to the network since it might involve the trunk and things are
4577 * tricky there wrt to locking order.
4578 *
4579 * 1. We take the giant lock here. This makes sure nobody is re-enabling
4580 * the network while we're pausing it and vice versa. This also enables
4581 * us to wait for the network to become idle before telling the trunk.
4582 * (Important on Solaris.)
4583 *
4584 * 2. For paranoid reasons, we grab a busy reference to the calling
4585 * interface. This is totally unnecessary but should hurt (when done
4586 * after grabbing the giant lock).
4587 */
4588 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4589 if (RT_SUCCESS(rc))
4590 {
4591 intnetR0BusyIncIf(pIf);
4592
4593 PINTNETNETWORK pNetwork = pIf->pNetwork;
4594 if (pNetwork)
4595 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
4596 else
4597 rc = VERR_WRONG_ORDER;
4598
4599 intnetR0BusyDecIf(pIf);
4600 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4601 }
4602
4603 intnetR0IfRelease(pIf, pSession);
4604 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
4605 return rc;
4606}
4607
4608
4609/**
4610 * VMMR0 request wrapper for IntNetR0IfSetActive.
4611 *
4612 * @returns see IntNetR0IfSetActive.
4613 * @param pIntNet The internal networking instance.
4614 * @param pSession The caller's session.
4615 * @param pReq The request packet.
4616 */
4617INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
4618{
4619 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4620 return VERR_INVALID_PARAMETER;
4621 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
4622}
4623
4624
4625/**
4626 * Wait for the interface to get signaled.
4627 * The interface will be signaled when is put into the receive buffer.
4628 *
4629 * @returns VBox status code.
4630 * @param hIf The interface handle.
4631 * @param pSession The caller's session.
4632 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
4633 * used if indefinite wait is desired.
4634 */
4635INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
4636{
4637 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
4638
4639 /*
4640 * Get and validate essential handles.
4641 */
4642 PINTNET pIntNet = g_pIntNet;
4643 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4644 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4645
4646 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4647 if (!pIf)
4648 {
4649 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
4650 return VERR_INVALID_HANDLE;
4651 }
4652
4653 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4654 const bool fNoMoreWaits = ASMAtomicUoReadBool(&pIf->fNoMoreWaits);
4655 RTNATIVETHREAD hDtorThrd;
4656 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4657 if (hDtorThrd != NIL_RTNATIVETHREAD)
4658 {
4659 /* See IntNetR0IfAbortWait for an explanation of hDestructorThread. */
4660 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
4661 return VERR_SEM_DESTROYED;
4662 }
4663
4664 /* Check whether further waits have been barred by IntNetR0IfAbortWait. */
4665 int rc;
4666 if ( !fNoMoreWaits
4667 && hRecvEvent != NIL_RTSEMEVENT)
4668 {
4669 /*
4670 * It is tempting to check if there is data to be read here,
4671 * but the problem with such an approach is that it will cause
4672 * one unnecessary supervisor->user->supervisor trip. There is
4673 * already a slight risk for such, so no need to increase it.
4674 */
4675
4676 /*
4677 * Increment the number of waiters before starting the wait.
4678 * Upon wakeup we must assert reality, checking that we're not
4679 * already destroyed or in the process of being destroyed. This
4680 * code must be aligned with the waiting code in intnetR0IfDestruct.
4681 */
4682 ASMAtomicIncU32(&pIf->cSleepers);
4683 rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
4684 if (pIf->hRecvEvent == hRecvEvent)
4685 {
4686 ASMAtomicDecU32(&pIf->cSleepers);
4687 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4688 if (hDtorThrd == NIL_RTNATIVETHREAD)
4689 {
4690 if (intnetR0IfRelease(pIf, pSession))
4691 rc = VERR_SEM_DESTROYED;
4692 }
4693 else
4694 rc = VERR_SEM_DESTROYED;
4695 }
4696 else
4697 rc = VERR_SEM_DESTROYED;
4698 }
4699 else
4700 {
4701 rc = VERR_SEM_DESTROYED;
4702 intnetR0IfRelease(pIf, pSession);
4703 }
4704
4705 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
4706 return rc;
4707}
4708
4709
4710/**
4711 * VMMR0 request wrapper for IntNetR0IfWait.
4712 *
4713 * @returns see IntNetR0IfWait.
4714 * @param pSession The caller's session.
4715 * @param pReq The request packet.
4716 */
4717INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
4718{
4719 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4720 return VERR_INVALID_PARAMETER;
4721 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
4722}
4723
4724
4725/**
4726 * Wake up any threads waiting on the interface.
4727 *
4728 * @returns VBox status code.
4729 * @param hIf The interface handle.
4730 * @param pSession The caller's session.
4731 * @param fNoMoreWaits When set, no more waits are permitted.
4732 */
4733INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
4734{
4735 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
4736
4737 /*
4738 * Get and validate essential handles.
4739 */
4740 PINTNET pIntNet = g_pIntNet;
4741 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4742 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4743
4744 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4745 if (!pIf)
4746 {
4747 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4748 return VERR_INVALID_HANDLE;
4749 }
4750
4751 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4752 RTNATIVETHREAD hDtorThrd;
4753 ASMAtomicReadHandle(&pIf->hDestructorThread, &hDtorThrd);
4754 if (hDtorThrd != NIL_RTNATIVETHREAD)
4755 {
4756 /* This can only happen if we for some reason race SUPDRVSESSION cleanup,
4757 i.e. the object count is set to zero without yet having removed it from
4758 the object table, so we got a spurious "reference". We must drop that
4759 reference and let the destructor get on with its work. (Not entirely sure
4760 if this is practically possible on any of the platforms, i.e. whether it's
4761 we can actually close a SUPDrv handle/descriptor with active threads still
4762 in NtDeviceIoControlFile/ioctl, but better safe than sorry.) */
4763 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4764 return VERR_SEM_DESTROYED;
4765 }
4766
4767 /* a bit of paranoia */
4768 int rc = VINF_SUCCESS;
4769 if (hRecvEvent != NIL_RTSEMEVENT)
4770 {
4771 /*
4772 * Set fNoMoreWaits if requested to do so and then wake up all the sleeping
4773 * threads (usually just one). We leave the semaphore in the signalled
4774 * state so the next caller will return immediately.
4775 */
4776 if (fNoMoreWaits)
4777 ASMAtomicWriteBool(&pIf->fNoMoreWaits, true);
4778
4779 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4780 while (cSleepers-- > 0)
4781 {
4782 int rc2 = RTSemEventSignal(pIf->hRecvEvent);
4783 AssertRC(rc2);
4784 }
4785 }
4786 else
4787 rc = VERR_SEM_DESTROYED;
4788
4789 intnetR0IfRelease(pIf, pSession);
4790
4791 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4792 return VINF_SUCCESS;
4793}
4794
4795
4796/**
4797 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4798 *
4799 * @returns see IntNetR0IfWait.
4800 * @param pSession The caller's session.
4801 * @param pReq The request packet.
4802 */
4803INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4804{
4805 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4806 return VERR_INVALID_PARAMETER;
4807 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4808}
4809
4810
4811/**
4812 * Close an interface.
4813 *
4814 * @returns VBox status code.
4815 * @param pIntNet The instance handle.
4816 * @param hIf The interface handle.
4817 * @param pSession The caller's session.
4818 */
4819INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4820{
4821 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4822
4823 /*
4824 * Validate and free the handle.
4825 */
4826 PINTNET pIntNet = g_pIntNet;
4827 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4828 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4829
4830 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4831 if (!pIf)
4832 return VERR_INVALID_HANDLE;
4833
4834 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4835 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4836
4837 /*
4838 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4839 * and give them a moment to get out and release the interface.
4840 */
4841 uint32_t i = pIf->cSleepers;
4842 while (i-- > 0)
4843 {
4844 RTSemEventSignal(pIf->hRecvEvent);
4845 RTThreadYield();
4846 }
4847 RTSemEventSignal(pIf->hRecvEvent);
4848
4849 /*
4850 * Release the references to the interface object (handle + free lookup).
4851 */
4852 void *pvObj = pIf->pvObj;
4853 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4854
4855 int rc = SUPR0ObjRelease(pvObj, pSession);
4856 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4857 return rc;
4858}
4859
4860
4861/**
4862 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4863 *
4864 * @returns see IntNetR0IfClose.
4865 * @param pSession The caller's session.
4866 * @param pReq The request packet.
4867 */
4868INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4869{
4870 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4871 return VERR_INVALID_PARAMETER;
4872 return IntNetR0IfClose(pReq->hIf, pSession);
4873}
4874
4875
4876/**
4877 * Interface destructor callback.
4878 * This is called for reference counted objectes when the count reaches 0.
4879 *
4880 * @param pvObj The object pointer.
4881 * @param pvUser1 Pointer to the interface.
4882 * @param pvUser2 Pointer to the INTNET instance data.
4883 */
4884static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4885{
4886 PINTNETIF pIf = (PINTNETIF)pvUser1;
4887 PINTNET pIntNet = (PINTNET)pvUser2;
4888 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4889 RT_NOREF1(pvObj);
4890
4891 /*
4892 * For paranoid reasons we must now mark the interface as destroyed.
4893 * This is so that any waiting threads can take evasive action (kind
4894 * of theoretical case), and we can reject everyone else referencing
4895 * the object via the handle table before we get around to removing it.
4896 */
4897 ASMAtomicWriteHandle(&pIf->hDestructorThread, RTThreadNativeSelf());
4898
4899 /*
4900 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4901 * adding or removing interfaces while we're in here.
4902 */
4903 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4904
4905 /*
4906 * Delete the interface handle so the object no longer can be used.
4907 * (Can happen if the client didn't close its session.)
4908 */
4909 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4910 if (hIf != INTNET_HANDLE_INVALID)
4911 {
4912 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4913 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4914 }
4915
4916 /*
4917 * If we've got a network deactivate and detach ourselves from it. Because
4918 * of cleanup order we might have been orphaned by the network destructor.
4919 */
4920 PINTNETNETWORK pNetwork = pIf->pNetwork;
4921 if (pNetwork)
4922 {
4923 /* set inactive. */
4924 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4925
4926 /* remove ourselves from the switch table. */
4927 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4928
4929 uint32_t iIf = pNetwork->MacTab.cEntries;
4930 while (iIf-- > 0)
4931 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4932 {
4933 if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
4934 {
4935 pNetwork->MacTab.cPromiscuousEntries--;
4936 if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
4937 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4938 }
4939 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4940 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4941
4942 if (iIf + 1 < pNetwork->MacTab.cEntries)
4943 memmove(&pNetwork->MacTab.paEntries[iIf],
4944 &pNetwork->MacTab.paEntries[iIf + 1],
4945 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4946 pNetwork->MacTab.cEntries--;
4947 break;
4948 }
4949
4950 /* recalc the min flags. */
4951 if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
4952 {
4953 uint32_t fMinFlags = 0;
4954 iIf = pNetwork->MacTab.cEntries;
4955 while (iIf-- > 0)
4956 {
4957 PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
4958 if ( pIf2 /* paranoia */
4959 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
4960 fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
4961 }
4962 pNetwork->fMinFlags = fMinFlags;
4963 }
4964
4965 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4966
4967 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4968
4969 /* Notify the trunk about the interface being destroyed. */
4970 if (pTrunk && pTrunk->pIfPort)
4971 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
4972
4973 /* Wait for the interface to quiesce while we still can. */
4974 intnetR0BusyWait(pNetwork, &pIf->cBusy);
4975
4976 /* Release our reference to the network. */
4977 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4978 pIf->pNetwork = NULL;
4979 RTSpinlockRelease(pNetwork->hAddrSpinlock);
4980
4981 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
4982 }
4983
4984 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4985
4986 /*
4987 * Wakeup anyone waiting on this interface. (Kind of unlikely, but perhaps
4988 * not quite impossible.)
4989 *
4990 * We *must* make sure they have woken up properly and realized
4991 * that the interface is no longer valid.
4992 */
4993 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
4994 {
4995 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4996 unsigned cMaxWait = 0x1000;
4997 while (pIf->cSleepers && cMaxWait-- > 0)
4998 {
4999 RTSemEventSignal(hRecvEvent);
5000 RTThreadYield();
5001 }
5002 if (pIf->cSleepers)
5003 {
5004 RTThreadSleep(1);
5005
5006 cMaxWait = pIf->cSleepers;
5007 while (pIf->cSleepers && cMaxWait-- > 0)
5008 {
5009 RTSemEventSignal(hRecvEvent);
5010 RTThreadSleep(10);
5011 }
5012 }
5013
5014 RTSemEventDestroy(hRecvEvent);
5015 pIf->hRecvEvent = NIL_RTSEMEVENT;
5016 }
5017
5018 /*
5019 * Unmap user buffer.
5020 */
5021 if (pIf->pIntBuf != pIf->pIntBufDefault)
5022 {
5023 /** @todo user buffer */
5024 }
5025
5026 /*
5027 * Unmap and Free the default buffer.
5028 */
5029 if (pIf->pIntBufDefault)
5030 {
5031 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5032 pIf->pIntBufDefault = NULL;
5033 pIf->pIntBufDefaultR3 = 0;
5034 pIf->pIntBuf = NULL;
5035 pIf->pIntBufR3 = 0;
5036 }
5037
5038 /*
5039 * Free remaining resources
5040 */
5041 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5042 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5043
5044 RTMemFree(pIf->pDstTab);
5045 pIf->pDstTab = NULL;
5046
5047 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5048 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5049
5050 pIf->pvObj = NULL;
5051 RTMemFree(pIf);
5052}
5053
5054
5055/**
5056 * Creates a new network interface.
5057 *
5058 * The call must have opened the network for the new interface and is
5059 * responsible for closing it on failure. On success it must leave the network
5060 * opened so the interface destructor can close it.
5061 *
5062 * @returns VBox status code.
5063 * @param pNetwork The network, referenced. The reference is consumed on
5064 * success.
5065 * @param pSession The session handle.
5066 * @param cbSend The size of the send buffer.
5067 * @param cbRecv The size of the receive buffer.
5068 * @param fFlags The open network flags.
5069 * @param phIf Where to store the interface handle.
5070 */
5071static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession,
5072 unsigned cbSend, unsigned cbRecv, uint32_t fFlags,
5073 PINTNETIFHANDLE phIf)
5074{
5075 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
5076 pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
5077
5078 /*
5079 * Assert input.
5080 */
5081 AssertPtr(pNetwork);
5082 AssertPtr(phIf);
5083
5084 /*
5085 * Adjust the flags with defaults for the interface policies.
5086 * Note: Main restricts promiscuous mode per interface.
5087 */
5088 uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
5089 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
5090 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
5091 if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
5092 fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
5093
5094 /*
5095 * Make sure that all destination tables as well as the have space of
5096 */
5097 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
5098 if (RT_FAILURE(rc))
5099 return rc;
5100
5101 /*
5102 * Allocate the interface and initialize it.
5103 */
5104 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
5105 if (!pIf)
5106 return VERR_NO_MEMORY;
5107
5108 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
5109 //pIf->fMacSet = false;
5110 //pIf->fPromiscuousReal = false;
5111 //pIf->fActive = false;
5112 //pIf->fNoMoreWaits = false;
5113 pIf->fOpenFlags = fFlags;
5114 //pIf->cYields = 0;
5115 //pIf->pIntBuf = 0;
5116 //pIf->pIntBufR3 = NIL_RTR3PTR;
5117 //pIf->pIntBufDefault = 0;
5118 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
5119 pIf->hRecvEvent = NIL_RTSEMEVENT;
5120 //pIf->cSleepers = 0;
5121 pIf->hIf = INTNET_HANDLE_INVALID;
5122 pIf->hDestructorThread = NIL_RTNATIVETHREAD;
5123 pIf->pNetwork = pNetwork;
5124 pIf->pSession = pSession;
5125 //pIf->pvObj = NULL;
5126 //pIf->aAddrCache = {0};
5127 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5128 pIf->cBusy = 0;
5129 //pIf->pDstTab = NULL;
5130 //pIf->pvIfData = NULL;
5131
5132 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
5133 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
5134 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
5135 if (RT_SUCCESS(rc))
5136 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
5137 if (RT_SUCCESS(rc))
5138 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
5139 if (RT_SUCCESS(rc))
5140 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
5141 if (RT_SUCCESS(rc))
5142 {
5143 /*
5144 * Create the default buffer.
5145 */
5146 /** @todo adjust with minimums and apply defaults here. */
5147 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5148 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
5149 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
5150 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
5151 if (RT_SUCCESS(rc))
5152 {
5153 ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
5154
5155 pIf->pIntBuf = pIf->pIntBufDefault;
5156 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
5157 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
5158
5159 /*
5160 * Register the interface with the session and create a handle for it.
5161 */
5162 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
5163 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
5164 if (pIf->pvObj)
5165 {
5166 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
5167 if (RT_SUCCESS(rc))
5168 {
5169 /*
5170 * Finally add the interface to the network, consuming the
5171 * network reference of the caller.
5172 */
5173 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5174
5175 uint32_t iIf = pNetwork->MacTab.cEntries;
5176 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
5177
5178 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
5179 pNetwork->MacTab.paEntries[iIf].fActive = false;
5180 pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
5181 pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
5182 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
5183
5184 pNetwork->MacTab.cEntries = iIf + 1;
5185 pIf->pNetwork = pNetwork;
5186
5187 /*
5188 * Grab a busy reference (paranoia) to the trunk before releasing
5189 * the spinlock and then notify it about the new interface.
5190 */
5191 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5192 if (pTrunk)
5193 intnetR0BusyIncTrunk(pTrunk);
5194
5195 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5196
5197 if (pTrunk)
5198 {
5199 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
5200 if (pTrunk->pIfPort)
5201 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
5202 intnetR0BusyDecTrunk(pTrunk);
5203 }
5204 if (RT_SUCCESS(rc))
5205 {
5206 /*
5207 * We're good!
5208 */
5209 *phIf = pIf->hIf;
5210 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
5211 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
5212 return VINF_SUCCESS;
5213 }
5214 }
5215
5216 SUPR0ObjAddRef(pNetwork->pvObj, pSession);
5217 SUPR0ObjRelease(pIf->pvObj, pSession);
5218 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5219 return rc;
5220 }
5221
5222 /* clean up */
5223 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
5224 pIf->pIntBufDefault = NULL;
5225 pIf->pIntBuf = NULL;
5226 }
5227 }
5228
5229 RTSpinlockDestroy(pIf->hRecvInSpinlock);
5230 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
5231 RTSemEventDestroy(pIf->hRecvEvent);
5232 pIf->hRecvEvent = NIL_RTSEMEVENT;
5233 RTMemFree(pIf->pDstTab);
5234 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
5235 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
5236 RTMemFree(pIf);
5237 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
5238 return rc;
5239}
5240
5241
5242/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSetSGPhys} */
5243static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
5244{
5245 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5246 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
5247 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
5248}
5249
5250
5251/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportMacAddress} */
5252static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
5253{
5254 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5255
5256 /*
5257 * Get the network instance and grab the address spinlock before making
5258 * any changes.
5259 */
5260 intnetR0BusyIncTrunk(pThis);
5261 PINTNETNETWORK pNetwork = pThis->pNetwork;
5262 if (pNetwork)
5263 {
5264 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5265
5266 pNetwork->MacTab.HostMac = *pMacAddr;
5267 pThis->MacAddr = *pMacAddr;
5268
5269 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5270 }
5271 else
5272 pThis->MacAddr = *pMacAddr;
5273 intnetR0BusyDecTrunk(pThis);
5274}
5275
5276
5277/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportPromiscuousMode} */
5278static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
5279{
5280 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5281
5282 /*
5283 * Get the network instance and grab the address spinlock before making
5284 * any changes.
5285 */
5286 intnetR0BusyIncTrunk(pThis);
5287 PINTNETNETWORK pNetwork = pThis->pNetwork;
5288 if (pNetwork)
5289 {
5290 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5291
5292 pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
5293 || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
5294 pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
5295 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5296
5297 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5298 }
5299 intnetR0BusyDecTrunk(pThis);
5300}
5301
5302
5303/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportGsoCapabilities} */
5304static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
5305 uint32_t fGsoCapabilities, uint32_t fDst)
5306{
5307 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5308
5309 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
5310 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
5311 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
5312 Assert(fDst);
5313
5314 if (fDst & INTNETTRUNKDIR_HOST)
5315 pThis->fHostGsoCapabilites = fGsoCapabilities;
5316
5317 if (fDst & INTNETTRUNKDIR_WIRE)
5318 pThis->fWireGsoCapabilites = fGsoCapabilities;
5319}
5320
5321
5322/** @interface_method_impl{INTNETTRUNKSWPORT,pfnReportNoPreemptDsts} */
5323static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
5324{
5325 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5326 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
5327
5328 pThis->fNoPreemptDsts = fNoPreemptDsts;
5329}
5330
5331
5332/** @interface_method_impl{INTNETTRUNKSWPORT,pfnDisconnect} */
5333static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
5334 PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
5335{
5336 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5337
5338 /*
5339 * The caller has marked the trunk instance busy on his side before making
5340 * the call (see method docs) to let us safely grab the network and internal
5341 * network instance pointers without racing the network destruction code
5342 * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
5343 * the interface to stop being busy before setting pNetwork to NULL and
5344 * freeing up the resources).
5345 */
5346 PINTNETNETWORK pNetwork = pThis->pNetwork;
5347 if (pNetwork)
5348 {
5349 PINTNET pIntNet = pNetwork->pIntNet;
5350 Assert(pNetwork->pIntNet);
5351
5352 /*
5353 * We must decrease the callers busy count here to prevent deadlocking
5354 * when requesting the big mutex ownership. This will of course
5355 * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
5356 * (the other deadlock party), so we have to revalidate the network
5357 * pointer after taking ownership of the big mutex.
5358 */
5359 if (pfnReleaseBusy)
5360 pfnReleaseBusy(pIfPort);
5361
5362 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5363
5364 if (intnetR0NetworkIsValid(pIntNet, pNetwork))
5365 {
5366 Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
5367 Assert(pThis->pIfPort == pIfPort); /* Ditto */
5368
5369 /*
5370 * Disconnect the trunk and destroy it, similar to what is done int
5371 * intnetR0NetworkDestruct.
5372 */
5373 pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5374
5375 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5376 pNetwork->MacTab.pTrunk = NULL;
5377 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5378
5379 intnetR0TrunkIfDestroy(pThis, pNetwork);
5380 }
5381
5382 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5383 }
5384 /*
5385 * We must always release the busy reference.
5386 */
5387 else if (pfnReleaseBusy)
5388 pfnReleaseBusy(pIfPort);
5389}
5390
5391
5392/** @interface_method_impl{INTNETTRUNKSWPORT,pfnPreRecv} */
5393static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
5394 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
5395{
5396 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5397
5398 /* assert some sanity */
5399 AssertPtr(pvSrc);
5400 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
5401 Assert(fSrc);
5402
5403 /*
5404 * Mark the trunk as busy, make sure we've got a network and that there are
5405 * some active interfaces around.
5406 */
5407 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
5408 intnetR0BusyIncTrunk(pThis);
5409 PINTNETNETWORK pNetwork = pThis->pNetwork;
5410 if (RT_LIKELY( pNetwork
5411 && pNetwork->cActiveIFs > 0 ))
5412 {
5413 /*
5414 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
5415 */
5416 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
5417 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
5418 enmSwDecision = INTNETSWDECISION_BROADCAST;
5419 else if ( fSrc == INTNETTRUNKDIR_WIRE
5420 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5421 enmSwDecision = INTNETSWDECISION_BROADCAST;
5422 else
5423 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
5424 fSrc,
5425 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
5426 &pEthHdr->DstMac);
5427 }
5428
5429 intnetR0BusyDecTrunk(pThis);
5430 return enmSwDecision;
5431}
5432
5433
5434/** @interface_method_impl{INTNETTRUNKSWPORT,pfnRecv} */
5435static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
5436{
5437 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5438
5439 /* assert some sanity */
5440 AssertPtr(pSG);
5441 Assert(fSrc);
5442 NOREF(pvIf); /* later */
5443
5444 /*
5445 * Mark the trunk as busy, make sure we've got a network and that there are
5446 * some active interfaces around.
5447 */
5448 bool fRc = false /* don't drop it */;
5449 intnetR0BusyIncTrunk(pThis);
5450 PINTNETNETWORK pNetwork = pThis->pNetwork;
5451 if (RT_LIKELY( pNetwork
5452 && pNetwork->cActiveIFs > 0 ))
5453 {
5454 /*
5455 * Grab or allocate a destination table.
5456 */
5457 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
5458 unsigned iDstTab = 0;
5459 PINTNETDSTTAB pDstTab = NULL;
5460 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5461 if (fIntCtx)
5462 {
5463 /* Interrupt or restricted context. */
5464 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
5465 iDstTab %= pThis->cIntDstTabs;
5466 pDstTab = pThis->apIntDstTabs[iDstTab];
5467 if (RT_LIKELY(pDstTab))
5468 pThis->apIntDstTabs[iDstTab] = NULL;
5469 else
5470 {
5471 iDstTab = pThis->cIntDstTabs;
5472 while (iDstTab-- > 0)
5473 {
5474 pDstTab = pThis->apIntDstTabs[iDstTab];
5475 if (pDstTab)
5476 {
5477 pThis->apIntDstTabs[iDstTab] = NULL;
5478 break;
5479 }
5480 }
5481 }
5482 RTSpinlockRelease(pThis->hDstTabSpinlock);
5483 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
5484 }
5485 else
5486 {
5487 /* Task context, fallback is to allocate a table. */
5488 AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
5489 pDstTab = pThis->apIntDstTabs[iDstTab = 0];
5490 if (!pDstTab)
5491 pDstTab = pThis->apIntDstTabs[iDstTab = 1];
5492 if (pDstTab)
5493 {
5494 pThis->apIntDstTabs[iDstTab] = NULL;
5495 RTSpinlockRelease(pThis->hDstTabSpinlock);
5496 Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
5497 }
5498 else
5499 {
5500 RTSpinlockRelease(pThis->hDstTabSpinlock);
5501 intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
5502 iDstTab = 65535;
5503 }
5504 }
5505 if (RT_LIKELY(pDstTab))
5506 {
5507 /*
5508 * Finally, get down to business of sending the frame.
5509 */
5510 INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
5511 AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
5512 if (enmSwDecision == INTNETSWDECISION_INTNET)
5513 fRc = true; /* drop it */
5514
5515 /*
5516 * Free the destination table.
5517 */
5518 if (iDstTab == 65535)
5519 RTMemFree(pDstTab);
5520 else
5521 {
5522 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5523 if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
5524 pThis->apIntDstTabs[iDstTab] = pDstTab;
5525 else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
5526 pThis->apTaskDstTabs[iDstTab] = pDstTab;
5527 else
5528 {
5529 /* this shouldn't happen! */
5530 PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
5531 iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
5532 while (iDstTab-- > 0)
5533 if (!papDstTabs[iDstTab])
5534 {
5535 papDstTabs[iDstTab] = pDstTab;
5536 break;
5537 }
5538 }
5539 RTSpinlockRelease(pThis->hDstTabSpinlock);
5540 Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
5541 }
5542 }
5543 }
5544
5545 intnetR0BusyDecTrunk(pThis);
5546 return fRc;
5547}
5548
5549
5550/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSGRetain} */
5551static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5552{
5553 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5554 PINTNETNETWORK pNetwork = pThis->pNetwork;
5555
5556 /* assert some sanity */
5557 AssertPtrReturnVoid(pNetwork);
5558 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5559 AssertPtr(pSG);
5560 Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
5561
5562 /* do it. */
5563 ++pSG->cUsers;
5564}
5565
5566
5567/** @interface_method_impl{INTNETTRUNKSWPORT,pfnSGRelease} */
5568static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5569{
5570 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5571 PINTNETNETWORK pNetwork = pThis->pNetwork;
5572
5573 /* assert some sanity */
5574 AssertPtrReturnVoid(pNetwork);
5575 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5576 AssertPtr(pSG);
5577 Assert(pSG->cUsers > 0);
5578
5579 /*
5580 * Free it?
5581 */
5582 if (!--pSG->cUsers)
5583 {
5584 /** @todo later */
5585 }
5586}
5587
5588
5589/** @interface_method_impl{INTNETTRUNKSWPORT,pfnNotifyHostAddress} */
5590static DECLCALLBACK(void) intnetR0NetworkNotifyHostAddress(PINTNETTRUNKSWPORT pSwitchPort,
5591 bool fAdded,
5592 INTNETADDRTYPE enmType, const void *pvAddr)
5593{
5594 PINTNETTRUNKIF pTrunkIf = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5595 PINTNETNETWORK pNetwork = pTrunkIf->pNetwork;
5596 PCRTNETADDRU pAddr = (PCRTNETADDRU)pvAddr;
5597 uint8_t cbAddr;
5598
5599 if (enmType == kIntNetAddrType_IPv4)
5600 {
5601 Log(("%s: %s %RTnaipv4\n",
5602 __FUNCTION__, (fAdded ? "add" : "del"),
5603 pAddr->IPv4));
5604 cbAddr = 4;
5605 }
5606 else if (enmType == kIntNetAddrType_IPv6)
5607 {
5608 Log(("%s: %s %RTnaipv6\n",
5609 __FUNCTION__, (fAdded ? "add" : "del"),
5610 pAddr));
5611 cbAddr = 16;
5612 }
5613 else
5614 {
5615 Log(("%s: unexpected address type %d\n", __FUNCTION__, enmType));
5616 return;
5617 }
5618
5619 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5620 if (fAdded) /* one of host interfaces got a new address */
5621 {
5622 /* blacklist it to prevent spoofing by guests */
5623 intnetR0NetworkBlacklistAdd(pNetwork, pAddr, enmType);
5624
5625 /* kick out any guest that uses it */
5626 intnetR0NetworkAddrCacheDeleteLocked(pNetwork, pAddr, enmType, cbAddr, "tif/host");
5627 }
5628 else /* address deleted from one of host interfaces */
5629 {
5630 /* stop blacklisting it, guests may use it now */
5631 intnetR0NetworkBlacklistDelete(pNetwork, pAddr, enmType);
5632 }
5633 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5634}
5635
5636
5637/**
5638 * Shutdown the trunk interface.
5639 *
5640 * @param pThis The trunk.
5641 * @param pNetworks The network.
5642 *
5643 * @remarks The caller must hold the global lock.
5644 */
5645static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
5646{
5647 /* assert sanity */
5648 if (!pThis)
5649 return;
5650 AssertPtr(pThis);
5651 Assert(pThis->pNetwork == pNetwork);
5652 AssertPtrNull(pThis->pIfPort);
5653
5654 /*
5655 * The interface has already been deactivated, we just to wait for
5656 * it to become idle before we can disconnect and release it.
5657 */
5658 PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
5659 if (pIfPort)
5660 {
5661 /* unset it */
5662 pThis->pIfPort = NULL;
5663
5664 /* wait in portions so we can complain every now an then. */
5665 uint64_t StartTS = RTTimeSystemNanoTS();
5666 int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5667 if (RT_FAILURE(rc))
5668 {
5669 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5670 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5671 Assert(rc == VERR_TIMEOUT);
5672 while ( RT_FAILURE(rc)
5673 && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
5674 rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5675 if (rc == VERR_TIMEOUT)
5676 {
5677 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5678 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5679 while ( rc == VERR_TIMEOUT
5680 && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
5681 rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
5682 if (RT_FAILURE(rc))
5683 {
5684 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc), giving up.\n",
5685 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5686 AssertRC(rc);
5687 }
5688 }
5689 }
5690
5691 /* disconnect & release it. */
5692 pIfPort->pfnDisconnectAndRelease(pIfPort);
5693 }
5694
5695 /*
5696 * Free up the resources.
5697 */
5698 pThis->pNetwork = NULL; /* Must not be cleared while busy, see intnetR0TrunkIfPortDisconnect. */
5699 RTSpinlockDestroy(pThis->hDstTabSpinlock);
5700 for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
5701 {
5702 Assert(pThis->apTaskDstTabs[i]);
5703 RTMemFree(pThis->apTaskDstTabs[i]);
5704 pThis->apTaskDstTabs[i] = NULL;
5705 }
5706 for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
5707 {
5708 Assert(pThis->apIntDstTabs[i]);
5709 RTMemFree(pThis->apIntDstTabs[i]);
5710 pThis->apIntDstTabs[i] = NULL;
5711 }
5712 RTMemFree(pThis);
5713}
5714
5715
5716/**
5717 * Creates the trunk connection (if any).
5718 *
5719 * @returns VBox status code.
5720 *
5721 * @param pNetwork The newly created network.
5722 * @param pSession The session handle.
5723 */
5724static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
5725{
5726 const char *pszName;
5727 switch (pNetwork->enmTrunkType)
5728 {
5729 /*
5730 * The 'None' case, simple.
5731 */
5732 case kIntNetTrunkType_None:
5733 case kIntNetTrunkType_WhateverNone:
5734#ifdef VBOX_WITH_NAT_SERVICE
5735 /*
5736 * Well, here we don't want load anything special,
5737 * just communicate between processes via internal network.
5738 */
5739 case kIntNetTrunkType_SrvNat:
5740#endif
5741 return VINF_SUCCESS;
5742
5743 /* Can't happen, but makes GCC happy. */
5744 default:
5745 return VERR_NOT_IMPLEMENTED;
5746
5747 /*
5748 * Translate enum to component factory name.
5749 */
5750 case kIntNetTrunkType_NetFlt:
5751 pszName = "VBoxNetFlt";
5752 break;
5753 case kIntNetTrunkType_NetAdp:
5754#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
5755 pszName = "VBoxNetFlt";
5756#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
5757 pszName = "VBoxNetAdp";
5758#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
5759 break;
5760#ifndef VBOX_WITH_NAT_SERVICE
5761 case kIntNetTrunkType_SrvNat:
5762 pszName = "VBoxSrvNat";
5763 break;
5764#endif
5765 }
5766
5767 /*
5768 * Allocate the trunk interface and associated destination tables.
5769 *
5770 * We take a very optimistic view on the parallelism of the host
5771 * network stack and NIC driver. So, we allocate one table for each
5772 * possible CPU to deal with interrupt time requests and one for task
5773 * time calls.
5774 */
5775 RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
5776 PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_OFFSETOF(INTNETTRUNKIF, apIntDstTabs[cCpus]));
5777 if (!pTrunk)
5778 return VERR_NO_MEMORY;
5779
5780 Assert(pNetwork->MacTab.cEntriesAllocated > 0);
5781 int rc = VINF_SUCCESS;
5782 pTrunk->cIntDstTabs = cCpus;
5783 for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
5784 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
5785 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
5786 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
5787
5788 if (RT_SUCCESS(rc))
5789 {
5790 pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
5791 pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
5792 pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
5793 pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
5794 pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
5795 pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
5796 pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
5797 pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
5798 pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
5799 pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
5800 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5801 pTrunk->SwitchPort.pfnNotifyHostAddress = intnetR0NetworkNotifyHostAddress;
5802 pTrunk->SwitchPort.pfnDisconnect = intnetR0TrunkIfPortDisconnect;
5803 pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
5804 //pTrunk->pIfPort = NULL;
5805 pTrunk->pNetwork = pNetwork;
5806 pTrunk->MacAddr.au8[0] = 0xff;
5807 pTrunk->MacAddr.au8[1] = 0xff;
5808 pTrunk->MacAddr.au8[2] = 0xff;
5809 pTrunk->MacAddr.au8[3] = 0xff;
5810 pTrunk->MacAddr.au8[4] = 0xff;
5811 pTrunk->MacAddr.au8[5] = 0xff;
5812 //pTrunk->fPhysSG = false;
5813 //pTrunk->fUnused = false;
5814 //pTrunk->cBusy = 0;
5815 //pTrunk->fNoPreemptDsts = 0;
5816 //pTrunk->fWireGsoCapabilites = 0;
5817 //pTrunk->fHostGsoCapabilites = 0;
5818 //pTrunk->abGsoHdrs = {0};
5819 pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
5820 //pTrunk->apTaskDstTabs = above;
5821 //pTrunk->cIntDstTabs = above;
5822 //pTrunk->apIntDstTabs = above;
5823
5824 /*
5825 * Create the lock (we've NIL'ed the members above to simplify cleanup).
5826 */
5827 rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hDstTabSpinlock");
5828 if (RT_SUCCESS(rc))
5829 {
5830 /*
5831 * There are a couple of bits in MacTab as well pertaining to the
5832 * trunk. We have to set this before it's reported.
5833 *
5834 * Note! We don't need to lock the MacTab here - creation time.
5835 */
5836 pNetwork->MacTab.pTrunk = pTrunk;
5837 pNetwork->MacTab.HostMac = pTrunk->MacAddr;
5838 pNetwork->MacTab.fHostPromiscuousReal = false;
5839 pNetwork->MacTab.fHostPromiscuousEff = (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE)
5840 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5841 pNetwork->MacTab.fHostActive = false;
5842 pNetwork->MacTab.fWirePromiscuousReal = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
5843 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
5844 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
5845 pNetwork->MacTab.fWireActive = false;
5846
5847#ifdef IN_RING0 /* (testcase is ring-3) */
5848 /*
5849 * Query the factory we want, then use it create and connect the trunk.
5850 */
5851 PINTNETTRUNKFACTORY pTrunkFactory = NULL;
5852 rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
5853 if (RT_SUCCESS(rc))
5854 {
5855 rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
5856 pNetwork->szTrunk,
5857 &pTrunk->SwitchPort,
5858 pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
5859 ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
5860 : 0,
5861 &pTrunk->pIfPort);
5862 pTrunkFactory->pfnRelease(pTrunkFactory);
5863 if (RT_SUCCESS(rc))
5864 {
5865 Assert(pTrunk->pIfPort);
5866
5867 Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
5868 pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
5869 return VINF_SUCCESS;
5870 }
5871 }
5872#else /* IN_RING3 */
5873 NOREF(pSession);
5874 rc = VERR_NOT_SUPPORTED;
5875#endif /* IN_RING3 */
5876
5877 pNetwork->MacTab.pTrunk = NULL;
5878 }
5879
5880 /* bail out and clean up. */
5881 RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
5882 }
5883
5884 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
5885 RTMemFree(pTrunk->apTaskDstTabs[i]);
5886 for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
5887 RTMemFree(pTrunk->apIntDstTabs[i]);
5888 RTMemFree(pTrunk);
5889
5890 LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
5891 rc, pszName, pNetwork->szTrunk, pNetwork->szName));
5892 return rc;
5893}
5894
5895
5896
5897/**
5898 * Object destructor callback.
5899 * This is called for reference counted objectes when the count reaches 0.
5900 *
5901 * @param pvObj The object pointer.
5902 * @param pvUser1 Pointer to the network.
5903 * @param pvUser2 Pointer to the INTNET instance data.
5904 */
5905static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
5906{
5907 PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
5908 PINTNET pIntNet = (PINTNET)pvUser2;
5909 Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
5910 Assert(pNetwork->pIntNet == pIntNet);
5911 RT_NOREF1(pvObj);
5912
5913 /* Take the big create/open/destroy sem. */
5914 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5915
5916 /*
5917 * Tell the trunk, if present, that we're about to disconnect it and wish
5918 * no further calls from it.
5919 */
5920 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5921 if (pTrunk)
5922 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5923
5924 /*
5925 * Deactivate and orphan any remaining interfaces and wait for them to idle.
5926 *
5927 * Note! Normally there are no more interfaces at this point, however, when
5928 * supdrvCloseSession / supdrvCleanupSession release the objects the
5929 * order is undefined. So, it's quite possible that the network will
5930 * be dereference and destroyed before the interfaces.
5931 */
5932 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5933
5934 uint32_t iIf = pNetwork->MacTab.cEntries;
5935 while (iIf-- > 0)
5936 {
5937 pNetwork->MacTab.paEntries[iIf].fActive = false;
5938 pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
5939 }
5940
5941 pNetwork->MacTab.fHostActive = false;
5942 pNetwork->MacTab.fWireActive = false;
5943
5944 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5945
5946 /* Wait for all the interfaces to quiesce. (Interfaces cannot be
5947 removed / added since we're holding the big lock.) */
5948 if (pTrunk)
5949 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
5950
5951 iIf = pNetwork->MacTab.cEntries;
5952 while (iIf-- > 0)
5953 intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
5954
5955 /* Orphan the interfaces (not trunk). Don't bother with calling
5956 pfnDisconnectInterface here since the networking is going away. */
5957 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5958 while ((iIf = pNetwork->MacTab.cEntries) > 0)
5959 {
5960 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
5961 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5962
5963 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5964
5965 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5966 if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
5967 && pIf->cBusy)
5968 {
5969 pIf->pNetwork = NULL;
5970 pNetwork->MacTab.cEntries--;
5971 }
5972 }
5973
5974 /*
5975 * Zap the trunk pointer while we still own the spinlock, destroy the
5976 * trunk after we've left it. Note that this might take a while...
5977 */
5978 pNetwork->MacTab.pTrunk = NULL;
5979
5980 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5981
5982 if (pTrunk)
5983 intnetR0TrunkIfDestroy(pTrunk, pNetwork);
5984
5985 /*
5986 * Unlink the network.
5987 * Note that it needn't be in the list if we failed during creation.
5988 */
5989 PINTNETNETWORK pPrev = pIntNet->pNetworks;
5990 if (pPrev == pNetwork)
5991 pIntNet->pNetworks = pNetwork->pNext;
5992 else
5993 {
5994 for (; pPrev; pPrev = pPrev->pNext)
5995 if (pPrev->pNext == pNetwork)
5996 {
5997 pPrev->pNext = pNetwork->pNext;
5998 break;
5999 }
6000 }
6001 pNetwork->pNext = NULL;
6002 pNetwork->pvObj = NULL;
6003
6004 /*
6005 * Free resources.
6006 */
6007 RTSemEventDestroy(pNetwork->hEvtBusyIf);
6008 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6009 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
6010 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6011 RTMemFree(pNetwork->MacTab.paEntries);
6012 pNetwork->MacTab.paEntries = NULL;
6013 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
6014 intnetR0IfAddrCacheDestroy(&pNetwork->aAddrBlacklist[i]);
6015 RTMemFree(pNetwork);
6016
6017 /* Release the create/destroy sem. */
6018 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6019}
6020
6021
6022/**
6023 * Checks if the open network flags are compatible.
6024 *
6025 * @returns VBox status code.
6026 * @param pNetwork The network.
6027 * @param fFlags The open network flags.
6028 */
6029static int intnetR0CheckOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
6030{
6031 uint32_t const fNetFlags = pNetwork->fFlags;
6032
6033 if ( (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6034 ^ (fNetFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
6035 return VERR_INTNET_INCOMPATIBLE_FLAGS;
6036
6037 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_EXACT)
6038 {
6039 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6040 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
6041 && (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
6042 != (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) )
6043 return VERR_INTNET_INCOMPATIBLE_FLAGS;
6044 }
6045
6046 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
6047 {
6048 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6049 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6050 && !(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6051 && (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed) )
6052 return VERR_INTNET_INCOMPATIBLE_FLAGS;
6053 }
6054
6055 return VINF_SUCCESS;
6056}
6057
6058
6059/**
6060 * Adapts flag changes on network opening.
6061 *
6062 * @returns VBox status code.
6063 * @param pNetwork The network.
6064 * @param fFlags The open network flags.
6065 */
6066static int intnetR0AdaptOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
6067{
6068 /*
6069 * Upgrade the minimum policy flags.
6070 */
6071 uint32_t fNetMinFlags = pNetwork->fMinFlags;
6072 Assert(!(fNetMinFlags & INTNET_OPEN_FLAGS_RELAXED_MASK));
6073 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
6074 {
6075 fNetMinFlags |= fFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
6076 if (fNetMinFlags != pNetwork->fMinFlags)
6077 {
6078 LogRel(("INTNET: %s - min flags changed %#x -> %#x\n", pNetwork->szName, pNetwork->fMinFlags, fNetMinFlags));
6079 pNetwork->fMinFlags = fNetMinFlags;
6080 }
6081 }
6082
6083 /*
6084 * Calculate the new network flags.
6085 * (Depends on fNetMinFlags being recalculated first.)
6086 */
6087 uint32_t fNetFlags = pNetwork->fFlags;
6088
6089 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6090 {
6091 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6092 Assert(!(fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRelaxed));
6093
6094 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6095 continue;
6096 if (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed)
6097 continue;
6098
6099 if ( (fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
6100 || (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive) )
6101 {
6102 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6103 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRestrictive;
6104 }
6105 else if (!(fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
6106 {
6107 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
6108 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRelaxed;
6109 }
6110 }
6111
6112 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6113 {
6114 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
6115 fNetFlags |= fFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed;
6116 }
6117
6118 /*
6119 * Apply the flags if they changed.
6120 */
6121 uint32_t const fOldNetFlags = pNetwork->fFlags;
6122 if (fOldNetFlags != fNetFlags)
6123 {
6124 LogRel(("INTNET: %s - flags changed %#x -> %#x\n", pNetwork->szName, fOldNetFlags, fNetFlags));
6125
6126 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
6127
6128 pNetwork->fFlags = fNetFlags;
6129
6130 /* Recalculate some derived switcher variables. */
6131 bool fActiveTrunk = pNetwork->MacTab.pTrunk
6132 && pNetwork->cActiveIFs > 0;
6133 pNetwork->MacTab.fHostActive = fActiveTrunk
6134 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6135 pNetwork->MacTab.fHostPromiscuousEff = ( pNetwork->MacTab.fHostPromiscuousReal
6136 || (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE))
6137 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
6138
6139 pNetwork->MacTab.fWireActive = fActiveTrunk
6140 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
6141 pNetwork->MacTab.fWirePromiscuousReal= RT_BOOL(fNetFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
6142 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
6143 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
6144
6145 if ((fOldNetFlags ^ fNetFlags) & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6146 {
6147 pNetwork->MacTab.cPromiscuousEntries = 0;
6148 pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6149
6150 uint32_t iIf = pNetwork->MacTab.cEntries;
6151 while (iIf-- > 0)
6152 {
6153 PINTNETMACTABENTRY pEntry = &pNetwork->MacTab.paEntries[iIf];
6154 PINTNETIF pIf2 = pEntry->pIf;
6155 if ( pIf2 /* paranoia */
6156 && pIf2->fPromiscuousReal)
6157 {
6158 bool fPromiscuousEff = (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
6159 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW);
6160 pEntry->fPromiscuousEff = fPromiscuousEff;
6161 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
6162 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
6163
6164 if (pEntry->fPromiscuousEff)
6165 {
6166 pNetwork->MacTab.cPromiscuousEntries++;
6167 if (!pEntry->fPromiscuousSeeTrunk)
6168 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
6169 }
6170 }
6171 }
6172 }
6173
6174 RTSpinlockRelease(pNetwork->hAddrSpinlock);
6175 }
6176
6177 return VINF_SUCCESS;
6178}
6179
6180
6181/**
6182 * Opens an existing network.
6183 *
6184 * The call must own the INTNET::hMtxCreateOpenDestroy.
6185 *
6186 * @returns VBox status code.
6187 * @param pIntNet The instance data.
6188 * @param pSession The current session.
6189 * @param pszNetwork The network name. This has a valid length.
6190 * @param enmTrunkType The trunk type.
6191 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6192 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6193 * @param ppNetwork Where to store the pointer to the network on success.
6194 */
6195static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6196 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6197{
6198 LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6199 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6200
6201 /* just pro forma validation, the caller is internal. */
6202 AssertPtr(pIntNet);
6203 AssertPtr(pSession);
6204 AssertPtr(pszNetwork);
6205 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6206 AssertPtr(pszTrunk);
6207 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6208 AssertPtr(ppNetwork);
6209 *ppNetwork = NULL;
6210
6211 /*
6212 * Search networks by name.
6213 */
6214 PINTNETNETWORK pCur;
6215 uint8_t cchName = (uint8_t)strlen(pszNetwork);
6216 Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
6217
6218 pCur = pIntNet->pNetworks;
6219 while (pCur)
6220 {
6221 if ( pCur->cchName == cchName
6222 && !memcmp(pCur->szName, pszNetwork, cchName))
6223 {
6224 /*
6225 * Found the network, now check that we have the same ideas
6226 * about the trunk setup and security.
6227 */
6228 int rc;
6229 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6230#ifdef VBOX_WITH_NAT_SERVICE
6231 || enmTrunkType == kIntNetTrunkType_SrvNat /** @todo what does it mean */
6232#endif
6233 || ( pCur->enmTrunkType == enmTrunkType
6234 && !strcmp(pCur->szTrunk, pszTrunk)))
6235 {
6236 rc = intnetR0CheckOpenNetworkFlags(pCur, fFlags);
6237 if (RT_SUCCESS(rc))
6238 {
6239 /*
6240 * Increment the reference and check that the session
6241 * can access this network.
6242 */
6243 rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
6244 if (RT_SUCCESS(rc))
6245 {
6246 if (pCur->fFlags & INTNET_OPEN_FLAGS_ACCESS_RESTRICTED)
6247 rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
6248 if (RT_SUCCESS(rc))
6249 *ppNetwork = pCur;
6250 else
6251 SUPR0ObjRelease(pCur->pvObj, pSession);
6252 }
6253 else if (rc == VERR_WRONG_ORDER)
6254 rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
6255 }
6256 }
6257 else
6258 {
6259 rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
6260 LogRel(("intnetR0OpenNetwork failed. rc=%Rrc pCur->szTrunk=%s pszTrunk=%s pCur->enmTrunkType=%d enmTrunkType=%d\n",
6261 rc, pCur->szTrunk, pszTrunk, pCur->enmTrunkType, enmTrunkType));
6262 }
6263
6264 LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
6265 return rc;
6266 }
6267
6268 pCur = pCur->pNext;
6269 }
6270
6271 LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
6272 return VERR_NOT_FOUND;
6273}
6274
6275
6276/**
6277 * Creates a new network.
6278 *
6279 * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
6280 * opening the network and found it to be non-existing.
6281 *
6282 * @returns VBox status code.
6283 * @param pIntNet The instance data.
6284 * @param pSession The session handle.
6285 * @param pszNetwork The name of the network. This must be at least one character long and no longer
6286 * than the INTNETNETWORK::szName.
6287 * @param enmTrunkType The trunk type.
6288 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6289 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6290 * @param ppNetwork Where to store the network. In the case of failure
6291 * whatever is returned here should be dereferenced
6292 * outside the INTNET::hMtxCreateOpenDestroy.
6293 */
6294static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
6295 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
6296{
6297 LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
6298 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
6299
6300 /* just pro forma validation, the caller is internal. */
6301 AssertPtr(pIntNet);
6302 AssertPtr(pSession);
6303 AssertPtr(pszNetwork);
6304 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
6305 AssertPtr(pszTrunk);
6306 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
6307 AssertPtr(ppNetwork);
6308
6309 *ppNetwork = NULL;
6310
6311 /*
6312 * Adjust the flags with defaults for the network policies.
6313 * Note: Main restricts promiscuous mode on the per interface level.
6314 */
6315 fFlags &= ~( INTNET_OPEN_FLAGS_IF_FIXED
6316 | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
6317 | INTNET_OPEN_FLAGS_IF_PROMISC_DENY
6318 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK
6319 | INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK
6320 | INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES
6321 | INTNET_OPEN_FLAGS_REQUIRE_EXACT);
6322 uint32_t fDefFlags = INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS
6323 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST
6324 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE
6325 | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED
6326 | INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE
6327 | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED
6328 | INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE;
6329 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
6330#ifdef VBOX_WITH_NAT_SERVICE
6331 || enmTrunkType == kIntNetTrunkType_SrvNat /* simialar security */
6332#endif
6333 || enmTrunkType == kIntNetTrunkType_None)
6334 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_RESTRICTED;
6335 else
6336 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_PUBLIC;
6337 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6338 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
6339 fFlags |= g_afIntNetOpenNetworkNetFlags[i].fPair & fDefFlags;
6340
6341 /*
6342 * Allocate and initialize.
6343 */
6344 size_t cb = sizeof(INTNETNETWORK);
6345 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6346 cb += INTNETNETWORK_TMP_SIZE + 64;
6347 PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
6348 if (!pNetwork)
6349 return VERR_NO_MEMORY;
6350 //pNetwork->pNext = NULL;
6351 //pNetwork->pIfs = NULL;
6352 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6353 pNetwork->MacTab.cEntries = 0;
6354 pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
6355 //pNetwork->MacTab.cPromiscuousEntries = 0;
6356 //pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
6357 pNetwork->MacTab.paEntries = NULL;
6358 pNetwork->MacTab.fHostPromiscuousReal = false;
6359 pNetwork->MacTab.fHostPromiscuousEff = false;
6360 pNetwork->MacTab.fHostActive = false;
6361 pNetwork->MacTab.fWirePromiscuousReal = false;
6362 pNetwork->MacTab.fWirePromiscuousEff = false;
6363 pNetwork->MacTab.fWireActive = false;
6364 pNetwork->MacTab.pTrunk = NULL;
6365 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6366 pNetwork->pIntNet = pIntNet;
6367 //pNetwork->pvObj = NULL;
6368 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
6369 pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
6370 //else
6371 // pNetwork->pbTmp = NULL;
6372 pNetwork->fFlags = fFlags;
6373 //pNetwork->fMinFlags = 0;
6374 //pNetwork->cActiveIFs = 0;
6375 size_t cchName = strlen(pszNetwork);
6376 pNetwork->cchName = (uint8_t)cchName;
6377 Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
6378 memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
6379 pNetwork->enmTrunkType = enmTrunkType;
6380 Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
6381 strcpy(pNetwork->szTrunk, pszTrunk);
6382
6383 /*
6384 * Create the semaphore, spinlock and allocate the interface table.
6385 */
6386 int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
6387 if (RT_SUCCESS(rc))
6388 rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hAddrSpinlock");
6389 if (RT_SUCCESS(rc))
6390 {
6391 pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
6392 if (!pNetwork->MacTab.paEntries)
6393 rc = VERR_NO_MEMORY;
6394 }
6395 if (RT_SUCCESS(rc))
6396 {
6397 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
6398 rc = intnetR0IfAddrCacheInit(&pNetwork->aAddrBlacklist[i], (INTNETADDRTYPE)i,
6399 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
6400 }
6401 if (RT_SUCCESS(rc))
6402 {
6403 /*
6404 * Register the object in the current session and link it into the network list.
6405 */
6406 pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
6407 if (pNetwork->pvObj)
6408 {
6409 pNetwork->pNext = pIntNet->pNetworks;
6410 pIntNet->pNetworks = pNetwork;
6411
6412 /*
6413 * Check if the current session is actually allowed to create and
6414 * open the network. It is possible to implement network name
6415 * based policies and these must be checked now. SUPR0ObjRegister
6416 * does no such checks.
6417 */
6418 rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
6419 if (RT_SUCCESS(rc))
6420 {
6421 /*
6422 * Connect the trunk.
6423 */
6424 rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
6425 if (RT_SUCCESS(rc))
6426 {
6427 *ppNetwork = pNetwork;
6428 LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
6429 return VINF_SUCCESS;
6430 }
6431 }
6432
6433 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6434 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6435 return rc;
6436 }
6437
6438 /* cleanup */
6439 rc = VERR_NO_MEMORY;
6440 }
6441
6442 RTSemEventDestroy(pNetwork->hEvtBusyIf);
6443 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6444 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
6445 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6446 RTMemFree(pNetwork->MacTab.paEntries);
6447 pNetwork->MacTab.paEntries = NULL;
6448 RTMemFree(pNetwork);
6449
6450 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6451 return rc;
6452}
6453
6454
6455/**
6456 * Opens a network interface and connects it to the specified network.
6457 *
6458 * @returns VBox status code.
6459 * @param pSession The session handle.
6460 * @param pszNetwork The network name.
6461 * @param enmTrunkType The trunk type.
6462 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6463 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6464 * @param fRestrictAccess Whether new participants should be subjected to access check or not.
6465 * @param cbSend The send buffer size.
6466 * @param cbRecv The receive buffer size.
6467 * @param phIf Where to store the handle to the network interface.
6468 */
6469INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
6470 INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
6471 uint32_t cbSend, uint32_t cbRecv, PINTNETIFHANDLE phIf)
6472{
6473 LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
6474 pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
6475
6476 /*
6477 * Validate input.
6478 */
6479 PINTNET pIntNet = g_pIntNet;
6480 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
6481 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
6482
6483 AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
6484 const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
6485 AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
6486 size_t cchNetwork = pszNetworkEnd - pszNetwork;
6487 AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
6488
6489 if (pszTrunk)
6490 {
6491 AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
6492 const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
6493 AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
6494 }
6495 else
6496 pszTrunk = "";
6497
6498 AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
6499 ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
6500 switch (enmTrunkType)
6501 {
6502 case kIntNetTrunkType_None:
6503 case kIntNetTrunkType_WhateverNone:
6504#ifdef VBOX_WITH_NAT_SERVICE
6505 case kIntNetTrunkType_SrvNat:
6506#endif
6507 if (*pszTrunk)
6508 return VERR_INVALID_PARAMETER;
6509 break;
6510
6511 case kIntNetTrunkType_NetFlt:
6512 case kIntNetTrunkType_NetAdp:
6513 if (!*pszTrunk)
6514 return VERR_INVALID_PARAMETER;
6515 break;
6516
6517 default:
6518 return VERR_NOT_IMPLEMENTED;
6519 }
6520
6521 AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
6522 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6523 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) != g_afIntNetOpenNetworkNetFlags[i].fPair,
6524 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkNetFlags[i].fPair), VERR_INVALID_PARAMETER);
6525 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
6526 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair) != g_afIntNetOpenNetworkIfFlags[i].fPair,
6527 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkIfFlags[i].fPair), VERR_INVALID_PARAMETER);
6528 AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
6529
6530 /*
6531 * Acquire the mutex to serialize open/create/close.
6532 */
6533 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6534 if (RT_FAILURE(rc))
6535 return rc;
6536
6537 /*
6538 * Try open / create the network and create an interface on it for the
6539 * caller to use.
6540 */
6541 PINTNETNETWORK pNetwork = NULL;
6542 rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6543 if (RT_SUCCESS(rc))
6544 {
6545 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6546 if (RT_SUCCESS(rc))
6547 {
6548 intnetR0AdaptOpenNetworkFlags(pNetwork, fFlags);
6549 rc = VINF_ALREADY_INITIALIZED;
6550 }
6551 else
6552 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6553 }
6554 else if (rc == VERR_NOT_FOUND)
6555 {
6556 rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6557 if (RT_SUCCESS(rc))
6558 {
6559 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6560 if (RT_FAILURE(rc))
6561 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6562 }
6563 }
6564
6565 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6566 LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
6567 return rc;
6568}
6569
6570
6571/**
6572 * VMMR0 request wrapper for IntNetR0Open.
6573 *
6574 * @returns see GMMR0MapUnmapChunk.
6575 * @param pSession The caller's session.
6576 * @param pReq The request packet.
6577 */
6578INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
6579{
6580 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
6581 return VERR_INVALID_PARAMETER;
6582 return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
6583 pReq->fFlags, pReq->cbSend, pReq->cbRecv, &pReq->hIf);
6584}
6585
6586
6587/**
6588 * Count the internal networks.
6589 *
6590 * This is mainly for providing the testcase with some introspection to validate
6591 * behavior when closing interfaces.
6592 *
6593 * @returns The number of networks.
6594 */
6595INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
6596{
6597 /*
6598 * Grab the instance.
6599 */
6600 PINTNET pIntNet = g_pIntNet;
6601 if (!pIntNet)
6602 return 0;
6603 AssertPtrReturn(pIntNet, 0);
6604 AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
6605
6606 /*
6607 * Grab the mutex and count the networks.
6608 */
6609 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6610 if (RT_FAILURE(rc))
6611 return 0;
6612
6613 uint32_t cNetworks = 0;
6614 for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
6615 cNetworks++;
6616
6617 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6618
6619 return cNetworks;
6620}
6621
6622
6623
6624/**
6625 * Destroys an instance of the Ring-0 internal networking service.
6626 */
6627INTNETR0DECL(void) IntNetR0Term(void)
6628{
6629 LogFlow(("IntNetR0Term:\n"));
6630
6631 /*
6632 * Zap the global pointer and validate it.
6633 */
6634 PINTNET pIntNet = g_pIntNet;
6635 g_pIntNet = NULL;
6636 if (!pIntNet)
6637 return;
6638 AssertPtrReturnVoid(pIntNet);
6639 AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
6640
6641 /*
6642 * There is not supposed to be any networks hanging around at this time.
6643 */
6644 AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
6645 Assert(pIntNet->pNetworks == NULL);
6646 if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
6647 {
6648 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6649 pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
6650 }
6651 if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
6652 {
6653 /** @todo does it make sense to have a deleter here? */
6654 RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
6655 pIntNet->hHtIfs = NIL_RTHANDLETABLE;
6656 }
6657
6658 RTMemFree(pIntNet);
6659}
6660
6661
6662/**
6663 * Initializes the internal network ring-0 service.
6664 *
6665 * @returns VBox status code.
6666 */
6667INTNETR0DECL(int) IntNetR0Init(void)
6668{
6669 LogFlow(("IntNetR0Init:\n"));
6670 int rc = VERR_NO_MEMORY;
6671 PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
6672 if (pIntNet)
6673 {
6674 //pIntNet->pNetworks = NULL;
6675
6676 rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
6677 if (RT_SUCCESS(rc))
6678 {
6679 rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
6680 UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
6681 if (RT_SUCCESS(rc))
6682 {
6683 pIntNet->u32Magic = INTNET_MAGIC;
6684 g_pIntNet = pIntNet;
6685 LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
6686 return VINF_SUCCESS;
6687 }
6688
6689 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6690 }
6691 RTMemFree(pIntNet);
6692 }
6693 LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
6694 return rc;
6695}
6696
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette