VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/SrvIntNetR0.cpp@ 52394

Last change on this file since 52394 was 52394, checked in by vboxsync, 11 years ago

intnetR0TrunkIfPortDisconnect: Must always release the busy reference to the interface side (netflt) and must not trust pThis->pIfPort to be working as it's cleared before doing the busy waiting in intnetR0TrunkIfDestroy. So, pfnDisconnect has been extended with a pIfPort pointer as well as a function pointer to a busy-release function for pIfPort.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 230.8 KB
Line 
1/* $Id: SrvIntNetR0.cpp 52394 2014-08-15 22:25:31Z vboxsync $ */
2/** @file
3 * Internal networking - The ring 0 service.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_SRV_INTNET
23#include <VBox/intnet.h>
24#include <VBox/intnetinline.h>
25#include <VBox/vmm/pdmnetinline.h>
26#include <VBox/sup.h>
27#include <VBox/vmm/pdm.h>
28#include <VBox/log.h>
29
30#include <iprt/asm.h>
31#include <iprt/assert.h>
32#include <iprt/handletable.h>
33#include <iprt/mp.h>
34#include <iprt/mem.h>
35#include <iprt/net.h>
36#include <iprt/semaphore.h>
37#include <iprt/spinlock.h>
38#include <iprt/string.h>
39#include <iprt/thread.h>
40#include <iprt/time.h>
41
42
43/*******************************************************************************
44* Defined Constants And Macros *
45*******************************************************************************/
46/** @def INTNET_WITH_DHCP_SNOOPING
47 * Enabled DHCP snooping when in shared-mac-on-the-wire mode. */
48#define INTNET_WITH_DHCP_SNOOPING
49
50/** The maximum number of interface in a network. */
51#define INTNET_MAX_IFS (1023 + 1 + 16)
52
53/** The number of entries to grow the destination tables with. */
54#if 0
55# define INTNET_GROW_DSTTAB_SIZE 16
56#else
57# define INTNET_GROW_DSTTAB_SIZE 1
58#endif
59
60/** The wakeup bit in the INTNETIF::cBusy and INTNETRUNKIF::cBusy counters. */
61#define INTNET_BUSY_WAKEUP_MASK RT_BIT_32(30)
62
63
64/*******************************************************************************
65* Structures and Typedefs *
66*******************************************************************************/
67/**
68 * MAC address lookup table entry.
69 */
70typedef struct INTNETMACTABENTRY
71{
72 /** The MAC address of this entry. */
73 RTMAC MacAddr;
74 /** Is it is effectively promiscuous mode. */
75 bool fPromiscuousEff;
76 /** Is it promiscuous and should it see unrelated trunk traffic. */
77 bool fPromiscuousSeeTrunk;
78 /** Is it active.
79 * We ignore the entry if this is clear and may end up sending packets addressed
80 * to this interface onto the trunk. The reasoning for this is that this could
81 * be the interface of a VM that just has been teleported to a different host. */
82 bool fActive;
83 /** Pointer to the network interface. */
84 struct INTNETIF *pIf;
85} INTNETMACTABENTRY;
86/** Pointer to a MAC address lookup table entry. */
87typedef INTNETMACTABENTRY *PINTNETMACTABENTRY;
88
89/**
90 * MAC address lookup table.
91 *
92 * @todo Having this in a separate structure didn't work out as well as it
93 * should. Consider merging it into INTNETNETWORK.
94 */
95typedef struct INTNETMACTAB
96{
97 /** The current number of entries. */
98 uint32_t cEntries;
99 /** The number of entries we've allocated space for. */
100 uint32_t cEntriesAllocated;
101 /** Table entries. */
102 PINTNETMACTABENTRY paEntries;
103
104 /** The number of interface entries currently in promicuous mode. */
105 uint32_t cPromiscuousEntries;
106 /** The number of interface entries currently in promicuous mode that
107 * shall not see unrelated trunk traffic. */
108 uint32_t cPromiscuousNoTrunkEntries;
109
110 /** The host MAC address (reported). */
111 RTMAC HostMac;
112 /** The effective host promiscuous setting (reported). */
113 bool fHostPromiscuousEff;
114 /** The real host promiscuous setting (reported). */
115 bool fHostPromiscuousReal;
116 /** Whether the host is active. */
117 bool fHostActive;
118
119 /** Whether the wire is promiscuous (config). */
120 bool fWirePromiscuousEff;
121 /** Whether the wire is promiscuous (config).
122 * (Shadows INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE in
123 * INTNETNETWORK::fFlags.) */
124 bool fWirePromiscuousReal;
125 /** Whether the wire is active. */
126 bool fWireActive;
127
128 /** Pointer to the trunk interface. */
129 struct INTNETTRUNKIF *pTrunk;
130} INTNETMACTAB;
131/** Pointer to a MAC address . */
132typedef INTNETMACTAB *PINTNETMACTAB;
133
134/**
135 * Destination table.
136 */
137typedef struct INTNETDSTTAB
138{
139 /** The trunk destinations. */
140 uint32_t fTrunkDst;
141 /** Pointer to the trunk interface (referenced) if fTrunkDst is non-zero. */
142 struct INTNETTRUNKIF *pTrunk;
143 /** The number of destination interfaces. */
144 uint32_t cIfs;
145 /** The interfaces (referenced). Variable sized array. */
146 struct
147 {
148 /** The destination interface. */
149 struct INTNETIF *pIf;
150 /** Whether to replace the destination MAC address.
151 * This is used when sharing MAC address with the host on the wire(less). */
152 bool fReplaceDstMac;
153 } aIfs[1];
154} INTNETDSTTAB;
155/** Pointer to a destination table. */
156typedef INTNETDSTTAB *PINTNETDSTTAB;
157/** Pointer to a const destination table. */
158typedef INTNETDSTTAB const *PCINTNETDSTTAB;
159
160
161/** Network layer address type. */
162typedef enum INTNETADDRTYPE
163{
164 /** The invalid 0 entry. */
165 kIntNetAddrType_Invalid = 0,
166 /** IP version 4. */
167 kIntNetAddrType_IPv4,
168 /** IP version 6. */
169 kIntNetAddrType_IPv6,
170 /** IPX. */
171 kIntNetAddrType_IPX,
172 /** The end of the valid values. */
173 kIntNetAddrType_End,
174 /** The usual 32-bit hack. */
175 kIntNetAddrType_32BitHack = 0x7fffffff
176} INTNETADDRTYPE;
177/** Pointer to a network layer address type. */
178typedef INTNETADDRTYPE *PINTNETADDRTYPE;
179
180
181/**
182 * Address and type.
183 */
184typedef struct INTNETADDR
185{
186 /** The address type. */
187 INTNETADDRTYPE enmType;
188 /** The address. */
189 RTNETADDRU Addr;
190} INTNETADDR;
191/** Pointer to an address. */
192typedef INTNETADDR *PINTNETADDR;
193/** Pointer to a const address. */
194typedef INTNETADDR const *PCINTNETADDR;
195
196
197/**
198 * Address cache for a specific network layer.
199 */
200typedef struct INTNETADDRCACHE
201{
202 /** Pointer to the table of addresses. */
203 uint8_t *pbEntries;
204 /** The number of valid address entries. */
205 uint8_t cEntries;
206 /** The number of allocated address entries. */
207 uint8_t cEntriesAlloc;
208 /** The address size. */
209 uint8_t cbAddress;
210 /** The size of an entry. */
211 uint8_t cbEntry;
212} INTNETADDRCACHE;
213/** Pointer to an address cache. */
214typedef INTNETADDRCACHE *PINTNETADDRCACHE;
215/** Pointer to a const address cache. */
216typedef INTNETADDRCACHE const *PCINTNETADDRCACHE;
217
218
219/**
220 * A network interface.
221 *
222 * Unless explicitly stated, all members are protect by the network semaphore.
223 */
224typedef struct INTNETIF
225{
226 /** The MAC address.
227 * This is shadowed by INTNETMACTABENTRY::MacAddr. */
228 RTMAC MacAddr;
229 /** Set if the INTNET::MacAddr member has been explicitly set. */
230 bool fMacSet;
231 /** Tracks the desired promiscuous setting of the interface. */
232 bool fPromiscuousReal;
233 /** Whether the interface is active or not.
234 * This is shadowed by INTNETMACTABENTRY::fActive. */
235 bool fActive;
236 /** Whether someone is currently in the destructor or has indicated that
237 * the end is nigh by means of IntNetR0IfAbortWait. */
238 bool volatile fDestroying;
239 /** The flags specified when opening this interface. */
240 uint32_t fOpenFlags;
241 /** Number of yields done to try make the interface read pending data.
242 * We will stop yielding when this reaches a threshold assuming that the VM is
243 * paused or that it simply isn't worth all the delay. It is cleared when a
244 * successful send has been done. */
245 uint32_t cYields;
246 /** Pointer to the current exchange buffer (ring-0). */
247 PINTNETBUF pIntBuf;
248 /** Pointer to ring-3 mapping of the current exchange buffer. */
249 R3PTRTYPE(PINTNETBUF) pIntBufR3;
250 /** Pointer to the default exchange buffer for the interface. */
251 PINTNETBUF pIntBufDefault;
252 /** Pointer to ring-3 mapping of the default exchange buffer. */
253 R3PTRTYPE(PINTNETBUF) pIntBufDefaultR3;
254 /** Event semaphore which a receiver/consumer thread will sleep on while
255 * waiting for data to arrive. */
256 RTSEMEVENT volatile hRecvEvent;
257 /** Number of threads sleeping on the event semaphore. */
258 uint32_t cSleepers;
259 /** The interface handle.
260 * When this is INTNET_HANDLE_INVALID a sleeper which is waking up
261 * should return with the appropriate error condition. */
262 INTNETIFHANDLE volatile hIf;
263 /** Pointer to the network this interface is connected to.
264 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
265 struct INTNETNETWORK *pNetwork;
266 /** The session this interface is associated with. */
267 PSUPDRVSESSION pSession;
268 /** The SUPR0 object id. */
269 void *pvObj;
270 /** The network layer address cache. (Indexed by type, 0 entry isn't used.)
271 * This is protected by the address spinlock of the network. */
272 INTNETADDRCACHE aAddrCache[kIntNetAddrType_End];
273 /** Spinlock protecting the input (producer) side of the receive ring. */
274 RTSPINLOCK hRecvInSpinlock;
275 /** Busy count for tracking destination table references and active sends.
276 * Usually incremented while owning the switch table spinlock. The 30th bit
277 * is used to indicate wakeup. */
278 uint32_t volatile cBusy;
279 /** The preallocated destination table.
280 * This is NULL when it's in use as a precaution against unserialized
281 * transmitting. This is grown when new interfaces are added to the network. */
282 PINTNETDSTTAB volatile pDstTab;
283 /** Pointer to the trunk's per interface data. Can be NULL. */
284 void *pvIfData;
285 /** Header buffer for when we're carving GSO frames. */
286 uint8_t abGsoHdrs[256];
287} INTNETIF;
288/** Pointer to an internal network interface. */
289typedef INTNETIF *PINTNETIF;
290
291
292/**
293 * A trunk interface.
294 */
295typedef struct INTNETTRUNKIF
296{
297 /** The port interface we present to the component. */
298 INTNETTRUNKSWPORT SwitchPort;
299 /** The port interface we get from the component. */
300 PINTNETTRUNKIFPORT pIfPort;
301 /** Pointer to the network we're connect to.
302 * This may be NULL if we're orphaned? */
303 struct INTNETNETWORK *pNetwork;
304 /** The current MAC address for the interface. (reported)
305 * Updated while owning the switch table spinlock. */
306 RTMAC MacAddr;
307 /** Whether to supply physical addresses with the outbound SGs. (reported) */
308 bool fPhysSG;
309 /** Explicit alignment. */
310 bool fUnused;
311 /** Busy count for tracking destination table references and active sends.
312 * Usually incremented while owning the switch table spinlock. The 30th bit
313 * is used to indicate wakeup. */
314 uint32_t volatile cBusy;
315 /** Mask of destinations that pfnXmit cope with disabled preemption for. */
316 uint32_t fNoPreemptDsts;
317 /** The GSO capabilities of the wire destination. (reported) */
318 uint32_t fWireGsoCapabilites;
319 /** The GSO capabilities of the host destination. (reported)
320 * This is as bit map where each bit represents the GSO type with the same
321 * number. */
322 uint32_t fHostGsoCapabilites;
323 /** The destination table spinlock, interrupt safe.
324 * Protects apTaskDstTabs and apIntDstTabs. */
325 RTSPINLOCK hDstTabSpinlock;
326 /** The number of entries in apIntDstTabs. */
327 uint32_t cIntDstTabs;
328 /** The task time destination tables.
329 * @remarks intnetR0NetworkEnsureTabSpace and others ASSUMES this immediately
330 * precedes apIntDstTabs so that these two tables can be used as one
331 * contiguous one. */
332 PINTNETDSTTAB apTaskDstTabs[2];
333 /** The interrupt / disabled-preemption time destination tables.
334 * This is a variable sized array. */
335 PINTNETDSTTAB apIntDstTabs[1];
336} INTNETTRUNKIF;
337/** Pointer to a trunk interface. */
338typedef INTNETTRUNKIF *PINTNETTRUNKIF;
339
340/** Converts a pointer to INTNETTRUNKIF::SwitchPort to a PINTNETTRUNKIF. */
341#define INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort) ((PINTNETTRUNKIF)(pSwitchPort))
342
343
344/**
345 * Internal representation of a network.
346 */
347typedef struct INTNETNETWORK
348{
349 /** The Next network in the chain.
350 * This is protected by the INTNET::hMtxCreateOpenDestroy. */
351 struct INTNETNETWORK *pNext;
352
353 /** The spinlock protecting MacTab and INTNETTRUNKIF::aAddrCache.
354 * Interrupt safe. */
355 RTSPINLOCK hAddrSpinlock;
356 /** MAC address table.
357 * This doubles as interface collection. */
358 INTNETMACTAB MacTab;
359
360 /** Wait for an interface to stop being busy so it can be removed or have its
361 * destination table replaced. We have to wait upon this while owning the
362 * network mutex. Will only ever have one waiter because of the big mutex. */
363 RTSEMEVENT hEvtBusyIf;
364 /** Pointer to the instance data. */
365 struct INTNET *pIntNet;
366 /** The SUPR0 object id. */
367 void *pvObj;
368 /** Pointer to the temporary buffer that is used when snooping fragmented packets.
369 * This is allocated after this structure if we're sharing the MAC address with
370 * the host. The buffer is INTNETNETWORK_TMP_SIZE big and aligned on a 64-byte boundary. */
371 uint8_t *pbTmp;
372 /** Network creation flags (INTNET_OPEN_FLAGS_*). */
373 uint32_t fFlags;
374 /** Any restrictive policies required as a minimum by some interface.
375 * (INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES) */
376 uint32_t fMinFlags;
377 /** The number of active interfaces (excluding the trunk). */
378 uint32_t cActiveIFs;
379 /** The length of the network name. */
380 uint8_t cchName;
381 /** The network name. */
382 char szName[INTNET_MAX_NETWORK_NAME];
383 /** The trunk type. */
384 INTNETTRUNKTYPE enmTrunkType;
385 /** The trunk name. */
386 char szTrunk[INTNET_MAX_TRUNK_NAME];
387} INTNETNETWORK;
388/** Pointer to an internal network. */
389typedef INTNETNETWORK *PINTNETNETWORK;
390/** Pointer to a const internal network. */
391typedef const INTNETNETWORK *PCINTNETNETWORK;
392
393/** The size of the buffer INTNETNETWORK::pbTmp points at. */
394#define INTNETNETWORK_TMP_SIZE 2048
395
396
397/**
398 * Internal networking instance.
399 */
400typedef struct INTNET
401{
402 /** Magic number (INTNET_MAGIC). */
403 uint32_t volatile u32Magic;
404 /** Mutex protecting the creation, opening and destruction of both networks and
405 * interfaces. (This means all operations affecting the pNetworks list.) */
406 RTSEMMUTEX hMtxCreateOpenDestroy;
407 /** List of networks. Protected by INTNET::Spinlock. */
408 PINTNETNETWORK volatile pNetworks;
409 /** Handle table for the interfaces. */
410 RTHANDLETABLE hHtIfs;
411} INTNET;
412/** Pointer to an internal network ring-0 instance. */
413typedef struct INTNET *PINTNET;
414
415/** Magic number for the internal network instance data (Hayao Miyazaki). */
416#define INTNET_MAGIC UINT32_C(0x19410105)
417
418
419/*******************************************************************************
420* Global Variables *
421*******************************************************************************/
422/** Pointer to the internal network instance data. */
423static PINTNET volatile g_pIntNet = NULL;
424
425static const struct INTNETOPENNETWORKFLAGS
426{
427 uint32_t fRestrictive; /**< The restrictive flag (deny/disabled). */
428 uint32_t fRelaxed; /**< The relaxed flag (allow/enabled). */
429 uint32_t fFixed; /**< The config-fixed flag. */
430 uint32_t fPair; /**< The pair of restrictive and relaxed flags. */
431}
432/** Open network policy flags relating to the network. */
433g_afIntNetOpenNetworkNetFlags[] =
434{
435 { INTNET_OPEN_FLAGS_ACCESS_RESTRICTED, INTNET_OPEN_FLAGS_ACCESS_PUBLIC, INTNET_OPEN_FLAGS_ACCESS_FIXED, INTNET_OPEN_FLAGS_ACCESS_RESTRICTED | INTNET_OPEN_FLAGS_ACCESS_PUBLIC },
436 { INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_CLIENTS | INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS },
437 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_HOST | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST },
438 { INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE, INTNET_OPEN_FLAGS_PROMISC_FIXED, INTNET_OPEN_FLAGS_PROMISC_DENY_TRUNK_WIRE | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE },
439 { INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED, INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_DISABLED | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED },
440 { INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE },
441 { INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED, INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_DISABLED | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED },
442 { INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE, INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE, INTNET_OPEN_FLAGS_TRUNK_FIXED, INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE | INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE },
443},
444/** Open network policy flags relating to the new interface. */
445g_afIntNetOpenNetworkIfFlags[] =
446{
447 { INTNET_OPEN_FLAGS_IF_PROMISC_DENY, INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_DENY | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW },
448 { INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK, INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK, INTNET_OPEN_FLAGS_IF_FIXED, INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK },
449};
450
451
452/*******************************************************************************
453* Forward Declarations *
454*******************************************************************************/
455static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork);
456
457
458/**
459 * Checks if a pointer belongs to the list of known networks without
460 * accessing memory it points to.
461 *
462 * @returns true, if such network is in the list.
463 * @param pIntNet The pointer to the internal network instance (global).
464 * @param pNetwork The pointer that must be validated.
465 */
466DECLINLINE(bool) intnetR0NetworkIsValid(PINTNET pIntNet, PINTNETNETWORK pNetwork)
467{
468 for (PINTNETNETWORK pCurr = pIntNet->pNetworks; pCurr; pCurr = pCurr->pNext)
469 if (pCurr == pNetwork)
470 return true;
471 return false;
472}
473
474
475/**
476 * Worker for intnetR0SgWritePart that deals with the case where the
477 * request doesn't fit into the first segment.
478 *
479 * @returns true, unless the request or SG invalid.
480 * @param pSG The SG list to write to.
481 * @param off Where to start writing (offset into the SG).
482 * @param cb How much to write.
483 * @param pvBuf The buffer to containing the bits to write.
484 */
485static bool intnetR0SgWritePartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
486{
487 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
488 return false;
489
490 /*
491 * Skip ahead to the segment where off starts.
492 */
493 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
494 unsigned iSeg = 0;
495 while (off > pSG->aSegs[iSeg].cb)
496 {
497 off -= pSG->aSegs[iSeg++].cb;
498 AssertReturn(iSeg < cSegs, false);
499 }
500
501 /*
502 * Copy the data, hoping that it's all from one segment...
503 */
504 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
505 if (cbCanCopy >= cb)
506 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cb);
507 else
508 {
509 /* copy the portion in the current segment. */
510 memcpy((uint8_t *)pSG->aSegs[iSeg].pv + off, pvBuf, cbCanCopy);
511 cb -= cbCanCopy;
512
513 /* copy the portions in the other segments. */
514 do
515 {
516 pvBuf = (uint8_t const *)pvBuf + cbCanCopy;
517 iSeg++;
518 AssertReturn(iSeg < cSegs, false);
519
520 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
521 memcpy(pSG->aSegs[iSeg].pv, pvBuf, cbCanCopy);
522
523 cb -= cbCanCopy;
524 } while (cb > 0);
525 }
526
527 return true;
528}
529
530
531/**
532 * Writes to a part of an SG.
533 *
534 * @returns true on success, false on failure (out of bounds).
535 * @param pSG The SG list to write to.
536 * @param off Where to start writing (offset into the SG).
537 * @param cb How much to write.
538 * @param pvBuf The buffer to containing the bits to write.
539 */
540DECLINLINE(bool) intnetR0SgWritePart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void const *pvBuf)
541{
542 Assert(off + cb > off);
543
544 /* The optimized case. */
545 if (RT_LIKELY( pSG->cSegsUsed == 1
546 || pSG->aSegs[0].cb >= off + cb))
547 {
548 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
549 memcpy((uint8_t *)pSG->aSegs[0].pv + off, pvBuf, cb);
550 return true;
551 }
552 return intnetR0SgWritePartSlow(pSG, off, cb, pvBuf);
553}
554
555
556/**
557 * Reads a byte from a SG list.
558 *
559 * @returns The byte on success. 0xff on failure.
560 * @param pSG The SG list to read.
561 * @param off The offset (into the SG) off the byte.
562 */
563DECLINLINE(uint8_t) intnetR0SgReadByte(PCINTNETSG pSG, uint32_t off)
564{
565 if (RT_LIKELY(pSG->aSegs[0].cb > off))
566 return ((uint8_t const *)pSG->aSegs[0].pv)[off];
567
568 off -= pSG->aSegs[0].cb;
569 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
570 for (unsigned iSeg = 1; iSeg < cSegs; iSeg++)
571 {
572 if (pSG->aSegs[iSeg].cb > off)
573 return ((uint8_t const *)pSG->aSegs[iSeg].pv)[off];
574 off -= pSG->aSegs[iSeg].cb;
575 }
576 return false;
577}
578
579
580/**
581 * Worker for intnetR0SgReadPart that deals with the case where the
582 * requested data isn't in the first segment.
583 *
584 * @returns true, unless the SG is invalid.
585 * @param pSG The SG list to read.
586 * @param off Where to start reading (offset into the SG).
587 * @param cb How much to read.
588 * @param pvBuf The buffer to read into.
589 */
590static bool intnetR0SgReadPartSlow(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
591{
592 if (RT_UNLIKELY(off + cb > pSG->cbTotal))
593 return false;
594
595 /*
596 * Skip ahead to the segment where off starts.
597 */
598 unsigned const cSegs = pSG->cSegsUsed; Assert(cSegs == pSG->cSegsUsed);
599 unsigned iSeg = 0;
600 while (off > pSG->aSegs[iSeg].cb)
601 {
602 off -= pSG->aSegs[iSeg++].cb;
603 AssertReturn(iSeg < cSegs, false);
604 }
605
606 /*
607 * Copy the data, hoping that it's all from one segment...
608 */
609 uint32_t cbCanCopy = pSG->aSegs[iSeg].cb - off;
610 if (cbCanCopy >= cb)
611 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cb);
612 else
613 {
614 /* copy the portion in the current segment. */
615 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv + off, cbCanCopy);
616 cb -= cbCanCopy;
617
618 /* copy the portions in the other segments. */
619 do
620 {
621 pvBuf = (uint8_t *)pvBuf + cbCanCopy;
622 iSeg++;
623 AssertReturn(iSeg < cSegs, false);
624
625 cbCanCopy = RT_MIN(cb, pSG->aSegs[iSeg].cb);
626 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[iSeg].pv, cbCanCopy);
627
628 cb -= cbCanCopy;
629 } while (cb > 0);
630 }
631
632 return true;
633}
634
635
636/**
637 * Reads a part of an SG into a buffer.
638 *
639 * @returns true on success, false on failure (out of bounds).
640 * @param pSG The SG list to read.
641 * @param off Where to start reading (offset into the SG).
642 * @param cb How much to read.
643 * @param pvBuf The buffer to read into.
644 */
645DECLINLINE(bool) intnetR0SgReadPart(PCINTNETSG pSG, uint32_t off, uint32_t cb, void *pvBuf)
646{
647 Assert(off + cb > off);
648
649 /* The optimized case. */
650 if (RT_LIKELY( pSG->cSegsUsed == 1
651 || pSG->aSegs[0].cb >= off + cb))
652 {
653 Assert(pSG->cbTotal == pSG->aSegs[0].cb);
654 memcpy(pvBuf, (uint8_t const *)pSG->aSegs[0].pv + off, cb);
655 return true;
656 }
657 return intnetR0SgReadPartSlow(pSG, off, cb, pvBuf);
658}
659
660
661/**
662 * Wait for a busy counter to reach zero.
663 *
664 * @param pNetwork The network.
665 * @param pcBusy The busy counter.
666 */
667static void intnetR0BusyWait(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
668{
669 if (ASMAtomicReadU32(pcBusy) == 0)
670 return;
671
672 /*
673 * We have to be a bit cautious here so we don't destroy the network or the
674 * semaphore before intnetR0BusyDec has signalled us.
675 */
676
677 /* Reset the semaphore and flip the wakeup bit. */
678 RTSemEventWait(pNetwork->hEvtBusyIf, 0); /* clear it */
679 uint32_t cCurBusy = ASMAtomicReadU32(pcBusy);
680 do
681 {
682 if (cCurBusy == 0)
683 return;
684 AssertMsg(!(cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
685 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
686 } while (!ASMAtomicCmpXchgExU32(pcBusy, cCurBusy | INTNET_BUSY_WAKEUP_MASK, cCurBusy, &cCurBusy));
687
688 /* Wait for the count to reach zero. */
689 do
690 {
691 int rc2 = RTSemEventWait(pNetwork->hEvtBusyIf, 30000); NOREF(rc2);
692 //AssertMsg(RT_SUCCESS(rc2), ("rc=%Rrc *pcBusy=%#x (%#x)\n", rc2, ASMAtomicReadU32(pcBusy), cCurBusy ));
693 cCurBusy = ASMAtomicReadU32(pcBusy);
694 AssertMsg((cCurBusy & INTNET_BUSY_WAKEUP_MASK), ("%#x\n", cCurBusy));
695 AssertMsg((cCurBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cCurBusy));
696 } while ( cCurBusy != INTNET_BUSY_WAKEUP_MASK
697 || !ASMAtomicCmpXchgU32(pcBusy, 0, INTNET_BUSY_WAKEUP_MASK));
698}
699
700
701/**
702 * Decrements the busy counter and maybe wakes up any threads waiting for it to
703 * reach zero.
704 *
705 * @param pNetwork The network.
706 * @param pcBusy The busy counter.
707 */
708DECLINLINE(void) intnetR0BusyDec(PINTNETNETWORK pNetwork, uint32_t volatile *pcBusy)
709{
710 uint32_t cNewBusy = ASMAtomicDecU32(pcBusy);
711 if (RT_UNLIKELY( cNewBusy == INTNET_BUSY_WAKEUP_MASK
712 && pNetwork))
713 RTSemEventSignal(pNetwork->hEvtBusyIf);
714 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
715}
716
717
718/**
719 * Increments the busy count of the specified interface.
720 *
721 * The caller must own the MAC address table spinlock.
722 *
723 * @param pIf The interface.
724 */
725DECLINLINE(void) intnetR0BusyDecIf(PINTNETIF pIf)
726{
727 intnetR0BusyDec(pIf->pNetwork, &pIf->cBusy);
728}
729
730
731/**
732 * Increments the busy count of the specified interface.
733 *
734 * The caller must own the MAC address table spinlock or an explicity reference.
735 *
736 * @param pTrunk The trunk.
737 */
738DECLINLINE(void) intnetR0BusyDecTrunk(PINTNETTRUNKIF pTrunk)
739{
740 if (pTrunk)
741 intnetR0BusyDec(pTrunk->pNetwork, &pTrunk->cBusy);
742}
743
744
745/**
746 * Increments the busy count of the specified interface.
747 *
748 * The caller must own the MAC address table spinlock or an explicity reference.
749 *
750 * @param pIf The interface.
751 */
752DECLINLINE(void) intnetR0BusyIncIf(PINTNETIF pIf)
753{
754 uint32_t cNewBusy = ASMAtomicIncU32(&pIf->cBusy);
755 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
756 NOREF(cNewBusy);
757}
758
759
760/**
761 * Increments the busy count of the specified interface.
762 *
763 * The caller must own the MAC address table spinlock or an explicity reference.
764 *
765 * @param pTrunk The trunk.
766 */
767DECLINLINE(void) intnetR0BusyIncTrunk(PINTNETTRUNKIF pTrunk)
768{
769 if (!pTrunk) return;
770 uint32_t cNewBusy = ASMAtomicIncU32(&pTrunk->cBusy);
771 AssertMsg((cNewBusy & ~INTNET_BUSY_WAKEUP_MASK) < INTNET_MAX_IFS * 3, ("%#x\n", cNewBusy));
772 NOREF(cNewBusy);
773}
774
775
776/**
777 * Retain an interface.
778 *
779 * @returns VBox status code, can assume success in most situations.
780 * @param pIf The interface instance.
781 * @param pSession The current session.
782 */
783DECLINLINE(int) intnetR0IfRetain(PINTNETIF pIf, PSUPDRVSESSION pSession)
784{
785 int rc = SUPR0ObjAddRefEx(pIf->pvObj, pSession, true /* fNoBlocking */);
786 AssertRCReturn(rc, rc);
787 return VINF_SUCCESS;
788}
789
790
791/**
792 * Release an interface previously retained by intnetR0IfRetain or
793 * by handle lookup/freeing.
794 *
795 * @returns true if destroyed, false if not.
796 * @param pIf The interface instance.
797 * @param pSession The current session.
798 */
799DECLINLINE(bool) intnetR0IfRelease(PINTNETIF pIf, PSUPDRVSESSION pSession)
800{
801 int rc = SUPR0ObjRelease(pIf->pvObj, pSession);
802 AssertRC(rc);
803 return rc == VINF_OBJECT_DESTROYED;
804}
805
806
807/**
808 * RTHandleCreateEx callback that retains an object in the
809 * handle table before returning it.
810 *
811 * (Avoids racing the freeing of the handle.)
812 *
813 * @returns VBox status code.
814 * @param hHandleTable The handle table (ignored).
815 * @param pvObj The object (INTNETIF).
816 * @param pvCtx The context (SUPDRVSESSION).
817 * @param pvUser The user context (ignored).
818 */
819static DECLCALLBACK(int) intnetR0IfRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
820{
821 NOREF(pvUser);
822 NOREF(hHandleTable);
823 PINTNETIF pIf = (PINTNETIF)pvObj;
824 if (pIf->hIf != INTNET_HANDLE_INVALID) /* Don't try retain it if called from intnetR0IfDestruct. */
825 return intnetR0IfRetain(pIf, (PSUPDRVSESSION)pvCtx);
826 return VINF_SUCCESS;
827}
828
829
830
831/**
832 * Checks if the interface has a usable MAC address or not.
833 *
834 * @returns true if MacAddr is usable, false if not.
835 * @param pIf The interface.
836 */
837DECL_FORCE_INLINE(bool) intnetR0IfHasMacAddr(PINTNETIF pIf)
838{
839 return pIf->fMacSet || !(pIf->MacAddr.au8[0] & 1);
840}
841
842
843/**
844 * Locates the MAC address table entry for the given interface.
845 *
846 * The caller holds the MAC address table spinlock, obviously.
847 *
848 * @returns Pointer to the entry on if found, NULL if not.
849 * @param pNetwork The network.
850 * @param pIf The interface.
851 */
852DECLINLINE(PINTNETMACTABENTRY) intnetR0NetworkFindMacAddrEntry(PINTNETNETWORK pNetwork, PINTNETIF pIf)
853{
854 uint32_t iIf = pNetwork->MacTab.cEntries;
855 while (iIf-- > 0)
856 {
857 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
858 return &pNetwork->MacTab.paEntries[iIf];
859 }
860 return NULL;
861}
862
863
864/**
865 * Checks if the IPv6 address is a good interface address.
866 * @returns true/false.
867 * @param addr The address, network endian.
868 */
869DECLINLINE(bool) intnetR0IPv6AddrIsGood(RTNETADDRIPV6 addr)
870{
871 return !( ( addr.QWords.qw0 == 0 && addr.QWords.qw1 == 0) /* :: */
872 || ( (addr.Words.w0 & RT_H2BE_U16(0xff00)) == RT_H2BE_U16(0xff00)) /* multicast */
873 || ( addr.Words.w0 == 0 && addr.Words.w1 == 0
874 && addr.Words.w2 == 0 && addr.Words.w3 == 0
875 && addr.Words.w4 == 0 && addr.Words.w5 == 0
876 && addr.Words.w6 == 0 && addr.Words.w7 == RT_H2BE_U16(0x0001))); /* ::1 */
877}
878
879
880/**
881 * Checks if the IPv4 address is a broadcast address.
882 * @returns true/false.
883 * @param Addr The address, network endian.
884 */
885DECLINLINE(bool) intnetR0IPv4AddrIsBroadcast(RTNETADDRIPV4 Addr)
886{
887 /* Just check for 255.255.255.255 atm. */
888 return Addr.u == UINT32_MAX;
889}
890
891
892/**
893 * Checks if the IPv4 address is a good interface address.
894 * @returns true/false.
895 * @param Addr The address, network endian.
896 */
897DECLINLINE(bool) intnetR0IPv4AddrIsGood(RTNETADDRIPV4 Addr)
898{
899 /* Usual suspects. */
900 if ( Addr.u == UINT32_MAX /* 255.255.255.255 - broadcast. */
901 || Addr.au8[0] == 0) /* Current network, can be used as source address. */
902 return false;
903
904 /* Unusual suspects. */
905 if (RT_UNLIKELY( Addr.au8[0] == 127 /* Loopback */
906 || (Addr.au8[0] & 0xf0) == 224 /* Multicast */
907 ))
908 return false;
909 return true;
910}
911
912
913/**
914 * Gets the address size of a network layer type.
915 *
916 * @returns size in bytes.
917 * @param enmType The type.
918 */
919DECLINLINE(uint8_t) intnetR0AddrSize(INTNETADDRTYPE enmType)
920{
921 switch (enmType)
922 {
923 case kIntNetAddrType_IPv4: return 4;
924 case kIntNetAddrType_IPv6: return 16;
925 case kIntNetAddrType_IPX: return 4 + 6;
926 default: AssertFailedReturn(0);
927 }
928}
929
930
931/**
932 * Compares two address to see if they are equal, assuming naturally align structures.
933 *
934 * @returns true if equal, false if not.
935 * @param pAddr1 The first address.
936 * @param pAddr2 The second address.
937 * @param cbAddr The address size.
938 */
939DECLINLINE(bool) intnetR0AddrUIsEqualEx(PCRTNETADDRU pAddr1, PCRTNETADDRU pAddr2, uint8_t const cbAddr)
940{
941 switch (cbAddr)
942 {
943 case 4: /* IPv4 */
944 return pAddr1->au32[0] == pAddr2->au32[0];
945 case 16: /* IPv6 */
946 return pAddr1->au64[0] == pAddr2->au64[0]
947 && pAddr1->au64[1] == pAddr2->au64[1];
948 case 10: /* IPX */
949 return pAddr1->au64[0] == pAddr2->au64[0]
950 && pAddr1->au16[4] == pAddr2->au16[4];
951 default:
952 AssertFailedReturn(false);
953 }
954}
955
956
957/**
958 * Worker for intnetR0IfAddrCacheLookup that performs the lookup
959 * in the remaining cache entries after the caller has check the
960 * most likely ones.
961 *
962 * @returns -1 if not found, the index of the cache entry if found.
963 * @param pCache The cache.
964 * @param pAddr The address.
965 * @param cbAddr The address size (optimization).
966 */
967static int intnetR0IfAddrCacheLookupSlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
968{
969 unsigned i = pCache->cEntries - 2;
970 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
971 while (i >= 1)
972 {
973 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr))
974 return i;
975 pbEntry -= pCache->cbEntry;
976 i--;
977 }
978
979 return -1;
980}
981
982/**
983 * Lookup an address in a cache without any expectations.
984 *
985 * @returns -1 if not found, the index of the cache entry if found.
986 * @param pCache The cache.
987 * @param pAddr The address.
988 * @param cbAddr The address size (optimization).
989 */
990DECLINLINE(int) intnetR0IfAddrCacheLookup(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
991{
992 Assert(pCache->cbAddress == cbAddr);
993
994 /*
995 * The optimized case is when there is one cache entry and
996 * it doesn't match.
997 */
998 unsigned i = pCache->cEntries;
999 if ( i > 0
1000 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr))
1001 return 0;
1002 if (i <= 1)
1003 return -1;
1004
1005 /*
1006 * Check the last entry.
1007 */
1008 i--;
1009 if (intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))
1010 return i;
1011 if (i <= 1)
1012 return -1;
1013
1014 return intnetR0IfAddrCacheLookupSlow(pCache, pAddr, cbAddr);
1015}
1016
1017
1018/** Same as intnetR0IfAddrCacheLookup except we expect the address to be present already. */
1019DECLINLINE(int) intnetR0IfAddrCacheLookupLikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1020{
1021 /** @todo implement this. */
1022 return intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1023}
1024
1025
1026/**
1027 * Worker for intnetR0IfAddrCacheLookupUnlikely that performs
1028 * the lookup in the remaining cache entries after the caller
1029 * has check the most likely ones.
1030 *
1031 * The routine is expecting not to find the address.
1032 *
1033 * @returns -1 if not found, the index of the cache entry if found.
1034 * @param pCache The cache.
1035 * @param pAddr The address.
1036 * @param cbAddr The address size (optimization).
1037 */
1038static int intnetR0IfAddrCacheInCacheUnlikelySlow(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1039{
1040 /*
1041 * Perform a full table lookup.
1042 */
1043 unsigned i = pCache->cEntries - 2;
1044 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry * i;
1045 while (i >= 1)
1046 {
1047 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1048 return i;
1049 pbEntry -= pCache->cbEntry;
1050 i--;
1051 }
1052
1053 return -1;
1054}
1055
1056
1057/**
1058 * Lookup an address in a cache expecting not to find it.
1059 *
1060 * @returns -1 if not found, the index of the cache entry if found.
1061 * @param pCache The cache.
1062 * @param pAddr The address.
1063 * @param cbAddr The address size (optimization).
1064 */
1065DECLINLINE(int) intnetR0IfAddrCacheLookupUnlikely(PCINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr)
1066{
1067 Assert(pCache->cbAddress == cbAddr);
1068
1069 /*
1070 * The optimized case is when there is one cache entry and
1071 * it doesn't match.
1072 */
1073 unsigned i = pCache->cEntries;
1074 if (RT_UNLIKELY( i > 0
1075 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)))
1076 return 0;
1077 if (RT_LIKELY(i <= 1))
1078 return -1;
1079
1080 /*
1081 * Then check the last entry and return if there are just two cache entries.
1082 */
1083 i--;
1084 if (RT_UNLIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr)))
1085 return i;
1086 if (i <= 1)
1087 return -1;
1088
1089 return intnetR0IfAddrCacheInCacheUnlikelySlow(pCache, pAddr, cbAddr);
1090}
1091
1092
1093/**
1094 * Deletes a specific cache entry.
1095 *
1096 * Worker for intnetR0NetworkAddrCacheDelete and intnetR0NetworkAddrCacheDeleteMinusIf.
1097 *
1098 * @param pIf The interface (for logging).
1099 * @param pCache The cache.
1100 * @param iEntry The entry to delete.
1101 * @param pszMsg Log message.
1102 */
1103static void intnetR0IfAddrCacheDeleteIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, int iEntry, const char *pszMsg)
1104{
1105 AssertReturnVoid(iEntry < pCache->cEntries);
1106 AssertReturnVoid(iEntry >= 0);
1107#ifdef LOG_ENABLED
1108 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1109 PCRTNETADDRU pAddr = (PCRTNETADDRU)(pCache->pbEntries + iEntry * pCache->cbEntry);
1110 switch (enmAddrType)
1111 {
1112 case kIntNetAddrType_IPv4:
1113 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv4 deleted #%d %RTnaipv4 %s\n",
1114 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv4, pszMsg));
1115 break;
1116 case kIntNetAddrType_IPv6:
1117 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%#x MAC=%.6Rhxs IPv6 deleted #%d %RTnaipv6 %s\n",
1118 pIf->hIf, &pIf->MacAddr, iEntry, pAddr->IPv6, pszMsg));
1119 break;
1120 default:
1121 Log(("intnetR0IfAddrCacheDeleteIt: hIf=%RX32 MAC=%.6Rhxs type=%d #%d %.*Rhxs %s\n",
1122 pIf->hIf, &pIf->MacAddr, enmAddrType, iEntry, pCache->cbAddress, pAddr, pszMsg));
1123 break;
1124 }
1125#endif
1126
1127 pCache->cEntries--;
1128 if (iEntry < pCache->cEntries)
1129 memmove(pCache->pbEntries + iEntry * pCache->cbEntry,
1130 pCache->pbEntries + (iEntry + 1) * pCache->cbEntry,
1131 (pCache->cEntries - iEntry) * pCache->cbEntry);
1132}
1133
1134
1135/**
1136 * Deletes an address from the cache, assuming it isn't actually in the cache.
1137 *
1138 * May or may not own the spinlock when calling this.
1139 *
1140 * @param pIf The interface (for logging).
1141 * @param pCache The cache.
1142 * @param pAddr The address.
1143 * @param cbAddr The address size (optimization).
1144 */
1145DECLINLINE(void) intnetR0IfAddrCacheDelete(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1146{
1147 int i = intnetR0IfAddrCacheLookup(pCache, pAddr, cbAddr);
1148 if (RT_UNLIKELY(i >= 0))
1149 intnetR0IfAddrCacheDeleteIt(pIf, pCache, i, pszMsg);
1150}
1151
1152
1153/**
1154 * Deletes the address from all the interface caches.
1155 *
1156 * This is used to remove stale entries that has been reassigned to
1157 * other machines on the network.
1158 *
1159 * @param pNetwork The network.
1160 * @param pAddr The address.
1161 * @param enmType The address type.
1162 * @param cbAddr The address size (optimization).
1163 * @param pszMsg Log message.
1164 */
1165DECLINLINE(void) intnetR0NetworkAddrCacheDelete(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType,
1166 uint8_t const cbAddr, const char *pszMsg)
1167{
1168 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1169
1170 uint32_t iIf = pNetwork->MacTab.cEntries;
1171 while (iIf--)
1172 {
1173 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1174 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1175 if (RT_UNLIKELY(i >= 0))
1176 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1177 }
1178
1179 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1180}
1181
1182
1183/**
1184 * Deletes the address from all the interface caches except the specified one.
1185 *
1186 * This is used to remove stale entries that has been reassigned to
1187 * other machines on the network.
1188 *
1189 * @param pNetwork The network.
1190 * @param pAddr The address.
1191 * @param enmType The address type.
1192 * @param cbAddr The address size (optimization).
1193 * @param pszMsg Log message.
1194 */
1195DECLINLINE(void) intnetR0NetworkAddrCacheDeleteMinusIf(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCRTNETADDRU pAddr,
1196 INTNETADDRTYPE const enmType, uint8_t const cbAddr, const char *pszMsg)
1197{
1198 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1199
1200 uint32_t iIf = pNetwork->MacTab.cEntries;
1201 while (iIf--)
1202 {
1203 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1204 if (pIf != pIfSender)
1205 {
1206 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1207 if (RT_UNLIKELY(i >= 0))
1208 intnetR0IfAddrCacheDeleteIt(pIf, &pIf->aAddrCache[enmType], i, pszMsg);
1209 }
1210 }
1211
1212 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1213}
1214
1215
1216/**
1217 * Lookup an address on the network, returning the (first) interface having it
1218 * in its address cache.
1219 *
1220 * @returns Pointer to the interface on success, NULL if not found. The caller
1221 * must release the interface by calling intnetR0BusyDecIf.
1222 * @param pNetwork The network.
1223 * @param pAddr The address to lookup.
1224 * @param enmType The address type.
1225 * @param cbAddr The size of the address.
1226 */
1227DECLINLINE(PINTNETIF) intnetR0NetworkAddrCacheLookupIf(PINTNETNETWORK pNetwork, PCRTNETADDRU pAddr, INTNETADDRTYPE const enmType, uint8_t const cbAddr)
1228{
1229 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1230
1231 uint32_t iIf = pNetwork->MacTab.cEntries;
1232 while (iIf--)
1233 {
1234 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf].pIf;
1235 int i = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmType], pAddr, cbAddr);
1236 if (i >= 0)
1237 {
1238 intnetR0BusyIncIf(pIf);
1239 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1240 return pIf;
1241 }
1242 }
1243
1244 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1245 return NULL;
1246}
1247
1248
1249/**
1250 * Adds an address to the cache, the caller is responsible for making sure it's
1251 * not already in the cache.
1252 *
1253 * The caller must not
1254 *
1255 * @param pIf The interface (for logging).
1256 * @param pCache The address cache.
1257 * @param pAddr The address.
1258 * @param pszMsg log message.
1259 */
1260static void intnetR0IfAddrCacheAddIt(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, const char *pszMsg)
1261{
1262 PINTNETNETWORK pNetwork = pIf->pNetwork;
1263 AssertReturnVoid(pNetwork);
1264 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1265
1266 if (RT_UNLIKELY(!pCache->cEntriesAlloc))
1267 {
1268 /* This shouldn't happen*/
1269 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1270 return;
1271 }
1272
1273 /* When the table is full, drop the older entry (FIFO). Do proper ageing? */
1274 if (pCache->cEntries >= pCache->cEntriesAlloc)
1275 {
1276 Log(("intnetR0IfAddrCacheAddIt: type=%d replacing %.*Rhxs\n",
1277 (int)(uintptr_t)(pCache - &pIf->aAddrCache[0]), pCache->cbAddress, pCache->pbEntries));
1278 memmove(pCache->pbEntries, pCache->pbEntries + pCache->cbEntry, pCache->cbEntry * (pCache->cEntries - 1));
1279 pCache->cEntries--;
1280 Assert(pCache->cEntries < pCache->cEntriesAlloc);
1281 }
1282
1283 /*
1284 * Add the new entry to the end of the array.
1285 */
1286 uint8_t *pbEntry = pCache->pbEntries + pCache->cEntries * pCache->cbEntry;
1287 memcpy(pbEntry, pAddr, pCache->cbAddress);
1288 memset(pbEntry + pCache->cbAddress, '\0', pCache->cbEntry - pCache->cbAddress);
1289#ifdef LOG_ENABLED
1290 INTNETADDRTYPE enmAddrType = (INTNETADDRTYPE)(uintptr_t)(pCache - &pIf->aAddrCache[0]);
1291 switch (enmAddrType)
1292 {
1293 case kIntNetAddrType_IPv4:
1294 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv4 added #%d %RTnaipv4 %s\n",
1295 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv4, pszMsg));
1296 break;
1297 case kIntNetAddrType_IPv6:
1298 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs IPv6 added #%d %RTnaipv6 %s\n",
1299 pIf->hIf, &pIf->MacAddr, pCache->cEntries, pAddr->IPv6, pszMsg));
1300 break;
1301 default:
1302 Log(("intnetR0IfAddrCacheAddIt: hIf=%#x MAC=%.6Rhxs type=%d added #%d %.*Rhxs %s\n",
1303 pIf->hIf, &pIf->MacAddr, enmAddrType, pCache->cEntries, pCache->cbAddress, pAddr, pszMsg));
1304 break;
1305 }
1306#endif
1307 pCache->cEntries++;
1308 Assert(pCache->cEntries <= pCache->cEntriesAlloc);
1309
1310 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1311}
1312
1313
1314/**
1315 * A intnetR0IfAddrCacheAdd worker that performs the rest of the lookup.
1316 *
1317 * @param pIf The interface (for logging).
1318 * @param pCache The address cache.
1319 * @param pAddr The address.
1320 * @param cbAddr The size of the address (optimization).
1321 * @param pszMsg Log message.
1322 */
1323static void intnetR0IfAddrCacheAddSlow(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr, uint8_t const cbAddr, const char *pszMsg)
1324{
1325 /*
1326 * Check all but the first and last entries, the caller
1327 * has already checked those.
1328 */
1329 int i = pCache->cEntries - 2;
1330 uint8_t const *pbEntry = pCache->pbEntries + pCache->cbEntry;
1331 while (i >= 1)
1332 {
1333 if (RT_LIKELY(intnetR0AddrUIsEqualEx((PCRTNETADDRU)pbEntry, pAddr, cbAddr)))
1334 return;
1335 pbEntry += pCache->cbEntry;
1336 i--;
1337 }
1338
1339 /*
1340 * Not found, add it.
1341 */
1342 intnetR0IfAddrCacheAddIt(pIf, pCache, pAddr, pszMsg);
1343}
1344
1345
1346/**
1347 * Adds an address to the cache if it's not already there.
1348 *
1349 * Must not own any spinlocks when calling this function.
1350 *
1351 * @param pIf The interface (for logging).
1352 * @param pCache The address cache.
1353 * @param pAddr The address.
1354 * @param cbAddr The size of the address (optimization).
1355 * @param pszMsg Log message.
1356 */
1357DECLINLINE(void) intnetR0IfAddrCacheAdd(PINTNETIF pIf, PINTNETADDRCACHE pCache, PCRTNETADDRU pAddr,
1358 uint8_t const cbAddr, const char *pszMsg)
1359{
1360 Assert(pCache->cbAddress == cbAddr);
1361
1362 /*
1363 * The optimized case is when the address the first or last cache entry.
1364 */
1365 unsigned i = pCache->cEntries;
1366 if (RT_LIKELY( i > 0
1367 && ( intnetR0AddrUIsEqualEx((PCRTNETADDRU)pCache->pbEntries, pAddr, cbAddr)
1368 || (i > 1
1369 && intnetR0AddrUIsEqualEx((PCRTNETADDRU)(pCache->pbEntries + pCache->cbEntry * i), pAddr, cbAddr))) ))
1370 return;
1371 intnetR0IfAddrCacheAddSlow(pIf, pCache, pAddr, cbAddr, pszMsg);
1372}
1373
1374
1375/**
1376 * Destroys the specified address cache.
1377 * @param pCache The address cache.
1378 */
1379static void intnetR0IfAddrCacheDestroy(PINTNETADDRCACHE pCache)
1380{
1381 void *pvFree = pCache->pbEntries;
1382 pCache->pbEntries = NULL;
1383 pCache->cEntries = 0;
1384 pCache->cEntriesAlloc = 0;
1385 RTMemFree(pvFree);
1386}
1387
1388
1389/**
1390 * Initialize the address cache for the specified address type.
1391 *
1392 * The cache storage is preallocated and fixed size so that we can handle
1393 * inserts from problematic contexts.
1394 *
1395 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1396 * @param pCache The cache to initialize.
1397 * @param enmAddrType The address type.
1398 * @param fEnabled Whether the address cache is enabled or not.
1399 */
1400static int intnetR0IfAddrCacheInit(PINTNETADDRCACHE pCache, INTNETADDRTYPE enmAddrType, bool fEnabled)
1401{
1402 pCache->cEntries = 0;
1403 pCache->cbAddress = intnetR0AddrSize(enmAddrType);
1404 pCache->cbEntry = RT_ALIGN(pCache->cbAddress, 4);
1405 if (fEnabled)
1406 {
1407 pCache->cEntriesAlloc = 32;
1408 pCache->pbEntries = (uint8_t *)RTMemAllocZ(pCache->cEntriesAlloc * pCache->cbEntry);
1409 if (!pCache->pbEntries)
1410 return VERR_NO_MEMORY;
1411 }
1412 else
1413 {
1414 pCache->cEntriesAlloc = 0;
1415 pCache->pbEntries = NULL;
1416 }
1417 return VINF_SUCCESS;
1418}
1419
1420
1421/**
1422 * Is it a multicast or broadcast MAC address?
1423 *
1424 * @returns true if multicast, false if not.
1425 * @param pMacAddr The address to inspect.
1426 */
1427DECL_FORCE_INLINE(bool) intnetR0IsMacAddrMulticast(PCRTMAC pMacAddr)
1428{
1429 return !!(pMacAddr->au8[0] & 0x01);
1430}
1431
1432
1433/**
1434 * Is it a dummy MAC address?
1435 *
1436 * We use dummy MAC addresses for interfaces which we don't know the MAC
1437 * address of because they haven't sent anything (learning) or explicitly set
1438 * it.
1439 *
1440 * @returns true if dummy, false if not.
1441 * @param pMacAddr The address to inspect.
1442 */
1443DECL_FORCE_INLINE(bool) intnetR0IsMacAddrDummy(PCRTMAC pMacAddr)
1444{
1445 /* The dummy address are broadcast addresses, don't bother check it all. */
1446 return pMacAddr->au16[0] == 0xffff;
1447}
1448
1449
1450/**
1451 * Compares two MAC addresses.
1452 *
1453 * @returns true if equal, false if not.
1454 * @param pDstAddr1 Address 1.
1455 * @param pDstAddr2 Address 2.
1456 */
1457DECL_FORCE_INLINE(bool) intnetR0AreMacAddrsEqual(PCRTMAC pDstAddr1, PCRTMAC pDstAddr2)
1458{
1459 return pDstAddr1->au16[2] == pDstAddr2->au16[2]
1460 && pDstAddr1->au16[1] == pDstAddr2->au16[1]
1461 && pDstAddr1->au16[0] == pDstAddr2->au16[0];
1462}
1463
1464
1465/**
1466 * Switch a unicast frame based on the network layer address (OSI level 3) and
1467 * return a destination table.
1468 *
1469 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1470 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1471 * @param pNetwork The network to switch on.
1472 * @param pDstMacAddr The destination MAC address.
1473 * @param enmL3AddrType The level-3 destination address type.
1474 * @param pL3Addr The level-3 destination address.
1475 * @param cbL3Addr The size of the level-3 destination address.
1476 * @param fSrc The frame source (INTNETTRUNKDIR_WIRE).
1477 * @param pDstTab The destination output table.
1478 */
1479static INTNETSWDECISION intnetR0NetworkSwitchLevel3(PINTNETNETWORK pNetwork, PCRTMAC pDstMacAddr,
1480 INTNETADDRTYPE enmL3AddrType, PCRTNETADDRU pL3Addr, uint8_t cbL3Addr,
1481 uint32_t fSrc, PINTNETDSTTAB pDstTab)
1482{
1483 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1484
1485 /*
1486 * Grab the spinlock first and do the switching.
1487 */
1488 PINTNETMACTAB pTab = &pNetwork->MacTab;
1489 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1490
1491 pDstTab->fTrunkDst = 0;
1492 pDstTab->pTrunk = 0;
1493 pDstTab->cIfs = 0;
1494
1495 /* Find exactly matching or promiscuous interfaces. */
1496 uint32_t cExactHits = 0;
1497 uint32_t iIfMac = pTab->cEntries;
1498 while (iIfMac-- > 0)
1499 {
1500 if (pTab->paEntries[iIfMac].fActive)
1501 {
1502 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1503 bool fExact = intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) >= 0;
1504 if (fExact || pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1505 {
1506 cExactHits += fExact;
1507
1508 uint32_t iIfDst = pDstTab->cIfs++;
1509 pDstTab->aIfs[iIfDst].pIf = pIf;
1510 pDstTab->aIfs[iIfDst].fReplaceDstMac = fExact;
1511 intnetR0BusyIncIf(pIf);
1512
1513 if (fExact)
1514 pDstMacAddr = &pIf->MacAddr; /* Avoids duplicates being sent to the host. */
1515 }
1516 }
1517 }
1518
1519 /* Network only promicuous mode ifs should see related trunk traffic. */
1520 if ( cExactHits
1521 && fSrc
1522 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1523 {
1524 iIfMac = pTab->cEntries;
1525 while (iIfMac-- > 0)
1526 {
1527 if ( pTab->paEntries[iIfMac].fActive
1528 && pTab->paEntries[iIfMac].fPromiscuousEff
1529 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1530 {
1531 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1532 if (intnetR0IfAddrCacheLookup(&pIf->aAddrCache[enmL3AddrType], pL3Addr, cbL3Addr) < 0)
1533 {
1534 uint32_t iIfDst = pDstTab->cIfs++;
1535 pDstTab->aIfs[iIfDst].pIf = pIf;
1536 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1537 intnetR0BusyIncIf(pIf);
1538 }
1539 }
1540 }
1541 }
1542
1543 /* Does it match the host, or is the host promiscuous? */
1544 if (pTab->fHostActive)
1545 {
1546 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstMacAddr);
1547 if ( fExact
1548 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1549 || pTab->fHostPromiscuousEff)
1550 {
1551 cExactHits += fExact;
1552 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1553 }
1554 }
1555
1556 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1557 if (pTab->fWireActive && (!cExactHits || pTab->fWirePromiscuousEff))
1558 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1559 pDstTab->fTrunkDst &= ~fSrc;
1560 if (pDstTab->fTrunkDst)
1561 {
1562 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1563 pDstTab->pTrunk = pTrunk;
1564 intnetR0BusyIncTrunk(pTrunk);
1565 }
1566
1567 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1568 return pDstTab->cIfs
1569 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1570 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1571}
1572
1573
1574/**
1575 * Pre-switch a unicast MAC address.
1576 *
1577 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1578 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1579 * @param pNetwork The network to switch on.
1580 * @param fSrc The frame source.
1581 * @param pSrcAddr The source address of the frame.
1582 * @param pDstAddr The destination address of the frame.
1583 */
1584static INTNETSWDECISION intnetR0NetworkPreSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PCRTMAC pSrcAddr,
1585 PCRTMAC pDstAddr)
1586{
1587 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1588 Assert(fSrc);
1589
1590 /*
1591 * Grab the spinlock first and do the switching.
1592 */
1593 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
1594 PINTNETMACTAB pTab = &pNetwork->MacTab;
1595 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1596
1597 /* Iterate the internal network interfaces and look for matching source and
1598 destination addresses. */
1599 uint32_t iIfMac = pTab->cEntries;
1600 while (iIfMac-- > 0)
1601 {
1602 if (pTab->paEntries[iIfMac].fActive)
1603 {
1604 /* Unknown interface address? */
1605 if (intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr))
1606 break;
1607
1608 /* Promiscuous mode? */
1609 if (pTab->paEntries[iIfMac].fPromiscuousSeeTrunk)
1610 break;
1611
1612 /* Paranoia - this shouldn't happen, right? */
1613 if ( pSrcAddr
1614 && intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pSrcAddr))
1615 break;
1616
1617 /* Exact match? */
1618 if (intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr))
1619 {
1620 enmSwDecision = pTab->fHostPromiscuousEff && fSrc == INTNETTRUNKDIR_WIRE
1621 ? INTNETSWDECISION_BROADCAST
1622 : INTNETSWDECISION_INTNET;
1623 break;
1624 }
1625 }
1626 }
1627
1628 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1629 return enmSwDecision;
1630}
1631
1632
1633/**
1634 * Switch a unicast MAC address and return a destination table.
1635 *
1636 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1637 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1638 * @param pNetwork The network to switch on.
1639 * @param fSrc The frame source.
1640 * @param pIfSender The sender interface, NULL if trunk. Used to
1641 * prevent sending an echo to the sender.
1642 * @param pDstAddr The destination address of the frame.
1643 * @param pDstTab The destination output table.
1644 */
1645static INTNETSWDECISION intnetR0NetworkSwitchUnicast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1646 PCRTMAC pDstAddr, PINTNETDSTTAB pDstTab)
1647{
1648 AssertPtr(pDstTab);
1649 Assert(!intnetR0IsMacAddrMulticast(pDstAddr));
1650
1651 /*
1652 * Grab the spinlock first and do the switching.
1653 */
1654 PINTNETMACTAB pTab = &pNetwork->MacTab;
1655 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1656
1657 pDstTab->fTrunkDst = 0;
1658 pDstTab->pTrunk = 0;
1659 pDstTab->cIfs = 0;
1660
1661 /* Find exactly matching or promiscuous interfaces. */
1662 uint32_t cExactHits = 0;
1663 uint32_t iIfMac = pTab->cEntries;
1664 while (iIfMac-- > 0)
1665 {
1666 if (pTab->paEntries[iIfMac].fActive)
1667 {
1668 bool fExact = intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr);
1669 if ( fExact
1670 || intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr)
1671 || ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1672 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1673 )
1674 {
1675 cExactHits += fExact;
1676
1677 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1678 if (RT_LIKELY(pIf != pIfSender)) /* paranoia */
1679 {
1680 uint32_t iIfDst = pDstTab->cIfs++;
1681 pDstTab->aIfs[iIfDst].pIf = pIf;
1682 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1683 intnetR0BusyIncIf(pIf);
1684 }
1685 }
1686 }
1687 }
1688
1689 /* Network only promicuous mode ifs should see related trunk traffic. */
1690 if ( cExactHits
1691 && fSrc
1692 && pNetwork->MacTab.cPromiscuousNoTrunkEntries)
1693 {
1694 iIfMac = pTab->cEntries;
1695 while (iIfMac-- > 0)
1696 {
1697 if ( pTab->paEntries[iIfMac].fPromiscuousEff
1698 && !pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1699 && pTab->paEntries[iIfMac].fActive
1700 && !intnetR0AreMacAddrsEqual(&pTab->paEntries[iIfMac].MacAddr, pDstAddr)
1701 && !intnetR0IsMacAddrDummy(&pTab->paEntries[iIfMac].MacAddr) )
1702 {
1703 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1704 uint32_t iIfDst = pDstTab->cIfs++;
1705 pDstTab->aIfs[iIfDst].pIf = pIf;
1706 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1707 intnetR0BusyIncIf(pIf);
1708 }
1709 }
1710 }
1711
1712 /* Does it match the host, or is the host promiscuous? */
1713 if ( fSrc != INTNETTRUNKDIR_HOST
1714 && pTab->fHostActive)
1715 {
1716 bool fExact = intnetR0AreMacAddrsEqual(&pTab->HostMac, pDstAddr);
1717 if ( fExact
1718 || intnetR0IsMacAddrDummy(&pTab->HostMac)
1719 || pTab->fHostPromiscuousEff)
1720 {
1721 cExactHits += fExact;
1722 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1723 }
1724 }
1725
1726 /* Hit the wire if there are no exact matches or if it's in promiscuous mode. */
1727 if ( fSrc != INTNETTRUNKDIR_WIRE
1728 && pTab->fWireActive
1729 && (!cExactHits || pTab->fWirePromiscuousEff)
1730 )
1731 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1732
1733 /* Grab the trunk if we're sending to it. */
1734 if (pDstTab->fTrunkDst)
1735 {
1736 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1737 pDstTab->pTrunk = pTrunk;
1738 intnetR0BusyIncTrunk(pTrunk);
1739 }
1740
1741 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1742 return pDstTab->cIfs
1743 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST)
1744 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK);
1745}
1746
1747
1748/**
1749 * Create a destination table for a broadcast frame.
1750 *
1751 * @returns INTNETSWDECISION_BROADCAST.
1752 * @param pNetwork The network to switch on.
1753 * @param fSrc The frame source.
1754 * @param pIfSender The sender interface, NULL if trunk. Used to
1755 * prevent sending an echo to the sender.
1756 * @param pDstTab The destination output table.
1757 */
1758static INTNETSWDECISION intnetR0NetworkSwitchBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETIF pIfSender,
1759 PINTNETDSTTAB pDstTab)
1760{
1761 AssertPtr(pDstTab);
1762
1763 /*
1764 * Grab the spinlock first and record all active interfaces.
1765 */
1766 PINTNETMACTAB pTab = &pNetwork->MacTab;
1767 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1768
1769 pDstTab->fTrunkDst = 0;
1770 pDstTab->pTrunk = 0;
1771 pDstTab->cIfs = 0;
1772
1773 /* Regular interfaces. */
1774 uint32_t iIfMac = pTab->cEntries;
1775 while (iIfMac-- > 0)
1776 {
1777 if (pTab->paEntries[iIfMac].fActive)
1778 {
1779 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1780 if (pIf != pIfSender)
1781 {
1782 uint32_t iIfDst = pDstTab->cIfs++;
1783 pDstTab->aIfs[iIfDst].pIf = pIf;
1784 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1785 intnetR0BusyIncIf(pIf);
1786 }
1787 }
1788 }
1789
1790 /* The trunk interface. */
1791 if (pTab->fHostActive)
1792 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1793 if (pTab->fWireActive)
1794 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1795 pDstTab->fTrunkDst &= ~fSrc;
1796 if (pDstTab->fTrunkDst)
1797 {
1798 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1799 pDstTab->pTrunk = pTrunk;
1800 intnetR0BusyIncTrunk(pTrunk);
1801 }
1802
1803 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1804 return INTNETSWDECISION_BROADCAST;
1805}
1806
1807
1808/**
1809 * Create a destination table with the trunk and any promiscuous interfaces.
1810 *
1811 * This is only used in a fallback case of the level-3 switching, so we can
1812 * assume the wire as source and skip the sender interface filtering.
1813 *
1814 * @returns INTNETSWDECISION_DROP, INTNETSWDECISION_TRUNK,
1815 * INTNETSWDECISION_INTNET or INTNETSWDECISION_BROADCAST (misnomer).
1816 * @param pNetwork The network to switch on.
1817 * @param fSrc The frame source.
1818 * @param pDstTab The destination output table.
1819 */
1820static INTNETSWDECISION intnetR0NetworkSwitchTrunkAndPromisc(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1821{
1822 Assert(fSrc == INTNETTRUNKDIR_WIRE);
1823
1824 /*
1825 * Grab the spinlock first and do the switching.
1826 */
1827 PINTNETMACTAB pTab = &pNetwork->MacTab;
1828 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1829
1830 pDstTab->fTrunkDst = 0;
1831 pDstTab->pTrunk = 0;
1832 pDstTab->cIfs = 0;
1833
1834 /* Find promiscuous interfaces. */
1835 uint32_t iIfMac = pTab->cEntries;
1836 while (iIfMac-- > 0)
1837 {
1838 if ( pTab->paEntries[iIfMac].fActive
1839 && ( pTab->paEntries[iIfMac].fPromiscuousSeeTrunk
1840 || (!fSrc && pTab->paEntries[iIfMac].fPromiscuousEff) )
1841 )
1842 {
1843 PINTNETIF pIf = pTab->paEntries[iIfMac].pIf; AssertPtr(pIf); Assert(pIf->pNetwork == pNetwork);
1844 uint32_t iIfDst = pDstTab->cIfs++;
1845 pDstTab->aIfs[iIfDst].pIf = pIf;
1846 pDstTab->aIfs[iIfDst].fReplaceDstMac = false;
1847 intnetR0BusyIncIf(pIf);
1848 }
1849 }
1850
1851 /* The trunk interface. */
1852 if (pTab->fHostActive)
1853 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1854 if (pTab->fWireActive)
1855 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1856 pDstTab->fTrunkDst &= ~fSrc;
1857 if (pDstTab->fTrunkDst)
1858 {
1859 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1860 pDstTab->pTrunk = pTrunk;
1861 intnetR0BusyIncTrunk(pTrunk);
1862 }
1863
1864 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1865 return !pDstTab->cIfs
1866 ? (!pDstTab->fTrunkDst ? INTNETSWDECISION_DROP : INTNETSWDECISION_TRUNK)
1867 : (!pDstTab->fTrunkDst ? INTNETSWDECISION_INTNET : INTNETSWDECISION_BROADCAST);
1868}
1869
1870
1871/**
1872 * Create a destination table for a trunk frame.
1873 *
1874 * @returns INTNETSWDECISION_BROADCAST.
1875 * @param pNetwork The network to switch on.
1876 * @param fSrc The frame source.
1877 * @param pDstTab The destination output table.
1878 */
1879static INTNETSWDECISION intnetR0NetworkSwitchTrunk(PINTNETNETWORK pNetwork, uint32_t fSrc, PINTNETDSTTAB pDstTab)
1880{
1881 AssertPtr(pDstTab);
1882
1883 /*
1884 * Grab the spinlock first and record all active interfaces.
1885 */
1886 PINTNETMACTAB pTab= &pNetwork->MacTab;
1887 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
1888
1889 pDstTab->fTrunkDst = 0;
1890 pDstTab->pTrunk = 0;
1891 pDstTab->cIfs = 0;
1892
1893 /* The trunk interface. */
1894 if (pTab->fHostActive)
1895 pDstTab->fTrunkDst |= INTNETTRUNKDIR_HOST;
1896 if (pTab->fWireActive)
1897 pDstTab->fTrunkDst |= INTNETTRUNKDIR_WIRE;
1898 pDstTab->fTrunkDst &= ~fSrc;
1899 if (pDstTab->fTrunkDst)
1900 {
1901 PINTNETTRUNKIF pTrunk = pTab->pTrunk;
1902 pDstTab->pTrunk = pTrunk;
1903 intnetR0BusyIncTrunk(pTrunk);
1904 }
1905
1906 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
1907 return pDstTab->fTrunkDst ? INTNETSWDECISION_TRUNK : INTNETSWDECISION_DROP;
1908}
1909
1910
1911/**
1912 * Wrapper around RTMemAlloc for allocating a destination table.
1913 *
1914 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1915 * @param cEntries The size given as an entry count.
1916 * @param ppDstTab Where to store the pointer (always).
1917 */
1918DECLINLINE(int) intnetR0AllocDstTab(uint32_t cEntries, PINTNETDSTTAB *ppDstTab)
1919{
1920 PINTNETDSTTAB pDstTab;
1921 *ppDstTab = pDstTab = (PINTNETDSTTAB)RTMemAlloc(RT_OFFSETOF(INTNETDSTTAB, aIfs[cEntries]));
1922 if (RT_UNLIKELY(!pDstTab))
1923 return VERR_NO_MEMORY;
1924 return VINF_SUCCESS;
1925}
1926
1927
1928/**
1929 * Ensures that there is space for another interface in the MAC address lookup
1930 * table as well as all the destination tables.
1931 *
1932 * The caller must own the create/open/destroy mutex.
1933 *
1934 * @returns VINF_SUCCESS, VERR_NO_MEMORY or VERR_OUT_OF_RANGE.
1935 * @param pNetwork The network to operate on.
1936 */
1937static int intnetR0NetworkEnsureTabSpace(PINTNETNETWORK pNetwork)
1938{
1939 /*
1940 * The cEntries and cEntriesAllocated members are only updated while
1941 * owning the big mutex, so we only need the spinlock when doing the
1942 * actual table replacing.
1943 */
1944 PINTNETMACTAB pTab = &pNetwork->MacTab;
1945 int rc = VINF_SUCCESS;
1946 AssertReturn(pTab->cEntries <= pTab->cEntriesAllocated, VERR_INTERNAL_ERROR_2);
1947 if (pTab->cEntries + 1 > pTab->cEntriesAllocated)
1948 {
1949 uint32_t const cAllocated = pTab->cEntriesAllocated + INTNET_GROW_DSTTAB_SIZE;
1950 if (cAllocated <= INTNET_MAX_IFS)
1951 {
1952 /*
1953 * Resize the destination tables first, this can be kind of tedious.
1954 */
1955 for (uint32_t i = 0; i < pTab->cEntries; i++)
1956 {
1957 PINTNETIF pIf = pTab->paEntries[i].pIf; AssertPtr(pIf);
1958 PINTNETDSTTAB pNew;
1959 rc = intnetR0AllocDstTab(cAllocated, &pNew);
1960 if (RT_FAILURE(rc))
1961 break;
1962
1963 for (;;)
1964 {
1965 PINTNETDSTTAB pOld = pIf->pDstTab;
1966 if ( pOld
1967 && ASMAtomicCmpXchgPtr(&pIf->pDstTab, pNew, pOld))
1968 {
1969 RTMemFree(pOld);
1970 break;
1971 }
1972 intnetR0BusyWait(pNetwork, &pIf->cBusy);
1973 }
1974 }
1975
1976 /*
1977 * The trunk.
1978 */
1979 if ( RT_SUCCESS(rc)
1980 && pNetwork->MacTab.pTrunk)
1981 {
1982 AssertCompileAdjacentMembers(INTNETTRUNKIF, apTaskDstTabs, apIntDstTabs);
1983 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
1984 PINTNETDSTTAB * const ppEndDstTab = &pTrunk->apIntDstTabs[pTrunk->cIntDstTabs];
1985 for (PINTNETDSTTAB *ppDstTab = &pTrunk->apTaskDstTabs[0];
1986 ppDstTab != ppEndDstTab && RT_SUCCESS(rc);
1987 ppDstTab++)
1988 {
1989 PINTNETDSTTAB pNew;
1990 rc = intnetR0AllocDstTab(cAllocated, &pNew);
1991 if (RT_FAILURE(rc))
1992 break;
1993
1994 for (;;)
1995 {
1996 RTSpinlockAcquire(pTrunk->hDstTabSpinlock);
1997 void *pvOld = *ppDstTab;
1998 if (pvOld)
1999 *ppDstTab = pNew;
2000 RTSpinlockReleaseNoInts(pTrunk->hDstTabSpinlock);
2001 if (pvOld)
2002 {
2003 RTMemFree(pvOld);
2004 break;
2005 }
2006 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
2007 }
2008 }
2009 }
2010
2011 /*
2012 * The MAC Address table itself.
2013 */
2014 if (RT_SUCCESS(rc))
2015 {
2016 PINTNETMACTABENTRY paNew = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * cAllocated);
2017 if (paNew)
2018 {
2019 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2020
2021 PINTNETMACTABENTRY paOld = pTab->paEntries;
2022 uint32_t i = pTab->cEntries;
2023 while (i-- > 0)
2024 {
2025 paNew[i] = paOld[i];
2026
2027 paOld[i].fActive = false;
2028 paOld[i].pIf = NULL;
2029 }
2030
2031 pTab->paEntries = paNew;
2032 pTab->cEntriesAllocated = cAllocated;
2033
2034 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
2035
2036 RTMemFree(paOld);
2037 }
2038 else
2039 rc = VERR_NO_MEMORY;
2040 }
2041 }
2042 else
2043 rc = VERR_OUT_OF_RANGE;
2044 }
2045 return rc;
2046}
2047
2048
2049
2050
2051#ifdef INTNET_WITH_DHCP_SNOOPING
2052
2053/**
2054 * Snoops IP assignments and releases from the DHCPv4 traffic.
2055 *
2056 * The caller is responsible for making sure this traffic between the
2057 * BOOTPS and BOOTPC ports and validate the IP header. The UDP packet
2058 * need not be validated beyond the ports.
2059 *
2060 * @param pNetwork The network this frame was seen on.
2061 * @param pIpHdr Pointer to a valid IP header. This is for pseudo
2062 * header validation, so only the minimum header size
2063 * needs to be available and valid here.
2064 * @param pUdpHdr Pointer to the UDP header in the frame.
2065 * @param cbUdpPkt What's left of the frame when starting at the UDP header.
2066 * @param fGso Set if this is a GSO frame, clear if regular.
2067 */
2068static void intnetR0NetworkSnoopDhcp(PINTNETNETWORK pNetwork, PCRTNETIPV4 pIpHdr, PCRTNETUDP pUdpHdr, uint32_t cbUdpPkt)
2069{
2070 /*
2071 * Check if the DHCP message is valid and get the type.
2072 */
2073 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
2074 {
2075 Log6(("Bad UDP packet\n"));
2076 return;
2077 }
2078 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
2079 uint8_t MsgType;
2080 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &MsgType))
2081 {
2082 Log6(("Bad DHCP packet\n"));
2083 return;
2084 }
2085
2086#ifdef LOG_ENABLED
2087 /*
2088 * Log it.
2089 */
2090 const char *pszType = "unknown";
2091 switch (MsgType)
2092 {
2093 case RTNET_DHCP_MT_DISCOVER: pszType = "discover"; break;
2094 case RTNET_DHCP_MT_OFFER: pszType = "offer"; break;
2095 case RTNET_DHCP_MT_REQUEST: pszType = "request"; break;
2096 case RTNET_DHCP_MT_DECLINE: pszType = "decline"; break;
2097 case RTNET_DHCP_MT_ACK: pszType = "ack"; break;
2098 case RTNET_DHCP_MT_NAC: pszType = "nac"; break;
2099 case RTNET_DHCP_MT_RELEASE: pszType = "release"; break;
2100 case RTNET_DHCP_MT_INFORM: pszType = "inform"; break;
2101 }
2102 Log6(("DHCP msg: %d (%s) client %.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d\n", MsgType, pszType, &pDhcp->bp_chaddr,
2103 pDhcp->bp_ciaddr.au8[0], pDhcp->bp_ciaddr.au8[1], pDhcp->bp_ciaddr.au8[2], pDhcp->bp_ciaddr.au8[3],
2104 pDhcp->bp_yiaddr.au8[0], pDhcp->bp_yiaddr.au8[1], pDhcp->bp_yiaddr.au8[2], pDhcp->bp_yiaddr.au8[3]));
2105#endif /* LOG_EANBLED */
2106
2107 /*
2108 * Act upon the message.
2109 */
2110 switch (MsgType)
2111 {
2112#if 0
2113 case RTNET_DHCP_MT_REQUEST:
2114 /** @todo Check for valid non-broadcast requests w/ IP for any of the MACs we
2115 * know, and add the IP to the cache. */
2116 break;
2117#endif
2118
2119
2120 /*
2121 * Lookup the interface by its MAC address and insert the IPv4 address into the cache.
2122 * Delete the old client address first, just in case it changed in a renewal.
2123 */
2124 case RTNET_DHCP_MT_ACK:
2125 if (intnetR0IPv4AddrIsGood(pDhcp->bp_yiaddr))
2126 {
2127 PINTNETIF pMatchingIf = NULL;
2128 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2129
2130 uint32_t iIf = pNetwork->MacTab.cEntries;
2131 while (iIf-- > 0)
2132 {
2133 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2134 if ( intnetR0IfHasMacAddr(pCur)
2135 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2136 {
2137 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2138 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2139 if (!pMatchingIf)
2140 {
2141 pMatchingIf = pCur;
2142 intnetR0BusyIncIf(pMatchingIf);
2143 }
2144 }
2145 }
2146
2147 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
2148
2149 if (pMatchingIf)
2150 {
2151 intnetR0IfAddrCacheAdd(pMatchingIf, &pMatchingIf->aAddrCache[kIntNetAddrType_IPv4],
2152 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_ACK");
2153 intnetR0BusyDecIf(pMatchingIf);
2154 }
2155 }
2156 return;
2157
2158
2159 /*
2160 * Lookup the interface by its MAC address and remove the IPv4 address(es) from the cache.
2161 */
2162 case RTNET_DHCP_MT_RELEASE:
2163 {
2164 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
2165
2166 uint32_t iIf = pNetwork->MacTab.cEntries;
2167 while (iIf-- > 0)
2168 {
2169 PINTNETIF pCur = pNetwork->MacTab.paEntries[iIf].pIf;
2170 if ( intnetR0IfHasMacAddr(pCur)
2171 && !memcmp(&pCur->MacAddr, &pDhcp->bp_chaddr, sizeof(RTMAC)))
2172 {
2173 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2174 (PCRTNETADDRU)&pDhcp->bp_ciaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2175 intnetR0IfAddrCacheDelete(pCur, &pCur->aAddrCache[kIntNetAddrType_IPv4],
2176 (PCRTNETADDRU)&pDhcp->bp_yiaddr, sizeof(RTNETADDRIPV4), "DHCP_MT_RELEASE");
2177 }
2178 }
2179
2180 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
2181 break;
2182 }
2183 }
2184
2185}
2186
2187
2188/**
2189 * Worker for intnetR0TrunkIfSnoopAddr that takes care of what
2190 * is likely to be a DHCP message.
2191 *
2192 * The caller has already check that the UDP source and destination ports
2193 * are BOOTPS or BOOTPC.
2194 *
2195 * @param pNetwork The network this frame was seen on.
2196 * @param pSG The gather list for the frame.
2197 */
2198static void intnetR0TrunkIfSnoopDhcp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2199{
2200 /*
2201 * Get a pointer to a linear copy of the full packet, using the
2202 * temporary buffer if necessary.
2203 */
2204 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
2205 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
2206 if (pSG->cSegsUsed > 1)
2207 {
2208 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
2209 Log6(("intnetR0TrunkIfSnoopDhcp: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
2210 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2211 return;
2212 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
2213 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
2214 }
2215
2216 /*
2217 * Validate the IP header and find the UDP packet.
2218 */
2219 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fChecksum*/))
2220 {
2221 Log(("intnetR0TrunkIfSnoopDhcp: bad ip header\n"));
2222 return;
2223 }
2224 uint32_t cbIpHdr = pIpHdr->ip_hl * 4;
2225
2226 /*
2227 * Hand it over to the common DHCP snooper.
2228 */
2229 intnetR0NetworkSnoopDhcp(pNetwork, pIpHdr, (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr), cbPacket - cbIpHdr);
2230}
2231
2232#endif /* INTNET_WITH_DHCP_SNOOPING */
2233
2234
2235/**
2236 * Snoops up source addresses from ARP requests and purge these from the address
2237 * caches.
2238 *
2239 * The purpose of this purging is to get rid of stale addresses.
2240 *
2241 * @param pNetwork The network this frame was seen on.
2242 * @param pSG The gather list for the frame.
2243 */
2244static void intnetR0TrunkIfSnoopArp(PINTNETNETWORK pNetwork, PCINTNETSG pSG)
2245{
2246 /*
2247 * Check the minimum size first.
2248 */
2249 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
2250 return;
2251
2252 /*
2253 * Copy to temporary buffer if necessary.
2254 */
2255 uint32_t cbPacket = RT_MIN(pSG->cbTotal, sizeof(RTNETARPIPV4));
2256 PCRTNETARPIPV4 pArpIPv4 = (PCRTNETARPIPV4)((uintptr_t)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2257 if ( pSG->cSegsUsed != 1
2258 && pSG->aSegs[0].cb < cbPacket)
2259 {
2260 if ( (pSG->fFlags & (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP))
2261 != (INTNETSG_FLAGS_ARP_IPV4 | INTNETSG_FLAGS_PKT_CP_IN_TMP)
2262 && !intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
2263 return;
2264 pArpIPv4 = (PCRTNETARPIPV4)pNetwork->pbTmp;
2265 }
2266
2267 /*
2268 * Ignore packets which doesn't interest us or we perceive as malformed.
2269 */
2270 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2271 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2272 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2273 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2274 return;
2275 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2276 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2277 && ar_oper != RTNET_ARPOP_REPLY))
2278 {
2279 Log6(("ts-ar: op=%#x\n", ar_oper));
2280 return;
2281 }
2282
2283 /*
2284 * Delete the source address if it's OK.
2285 */
2286 if ( !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_sha)
2287 && ( pArpIPv4->ar_sha.au16[0]
2288 || pArpIPv4->ar_sha.au16[1]
2289 || pArpIPv4->ar_sha.au16[2])
2290 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2291 {
2292 Log6(("ts-ar: %d.%d.%d.%d / %.6Rhxs\n", pArpIPv4->ar_spa.au8[0], pArpIPv4->ar_spa.au8[1],
2293 pArpIPv4->ar_spa.au8[2], pArpIPv4->ar_spa.au8[3], &pArpIPv4->ar_sha));
2294 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_spa,
2295 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_spa), "tif/arp");
2296 }
2297}
2298
2299
2300#ifdef INTNET_WITH_DHCP_SNOOPING
2301/**
2302 * Snoop up addresses from ARP and DHCP traffic from frames coming
2303 * over the trunk connection.
2304 *
2305 * The caller is responsible for do some basic filtering before calling
2306 * this function.
2307 * For IPv4 this means checking against the minimum DHCPv4 frame size.
2308 *
2309 * @param pNetwork The network.
2310 * @param pSG The SG list for the frame.
2311 * @param EtherType The Ethertype of the frame.
2312 */
2313static void intnetR0TrunkIfSnoopAddr(PINTNETNETWORK pNetwork, PCINTNETSG pSG, uint16_t EtherType)
2314{
2315 switch (EtherType)
2316 {
2317 case RTNET_ETHERTYPE_IPV4:
2318 {
2319 uint32_t cbIpHdr;
2320 uint8_t b;
2321
2322 Assert(pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN);
2323 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN)
2324 {
2325 /* check if the protocol is UDP */
2326 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2327 if (pIpHdr->ip_p != RTNETIPV4_PROT_UDP)
2328 return;
2329
2330 /* get the TCP header length */
2331 cbIpHdr = pIpHdr->ip_hl * 4;
2332 }
2333 else
2334 {
2335 /* check if the protocol is UDP */
2336 if ( intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_p))
2337 != RTNETIPV4_PROT_UDP)
2338 return;
2339
2340 /* get the TCP header length */
2341 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + 0); /* (IPv4 first byte, a bitfield) */
2342 cbIpHdr = (b & 0x0f) * 4;
2343 }
2344 if (cbIpHdr < RTNETIPV4_MIN_LEN)
2345 return;
2346
2347 /* compare the ports. */
2348 if (pSG->aSegs[0].cb >= sizeof(RTNETETHERHDR) + cbIpHdr + RTNETUDP_MIN_LEN)
2349 {
2350 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR) + cbIpHdr);
2351 if ( ( RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPS
2352 && RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS)
2353 || ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPC
2354 && RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC))
2355 return;
2356 }
2357 else
2358 {
2359 /* get the lower byte of the UDP source port number. */
2360 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport) + 1);
2361 if ( b != RTNETIPV4_PORT_BOOTPS
2362 && b != RTNETIPV4_PORT_BOOTPC)
2363 return;
2364 uint8_t SrcPort = b;
2365 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_sport));
2366 if (b)
2367 return;
2368
2369 /* get the lower byte of the UDP destination port number. */
2370 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport) + 1);
2371 if ( b != RTNETIPV4_PORT_BOOTPS
2372 && b != RTNETIPV4_PORT_BOOTPC)
2373 return;
2374 if (b == SrcPort)
2375 return;
2376 b = intnetR0SgReadByte(pSG, sizeof(RTNETETHERHDR) + cbIpHdr + RT_OFFSETOF(RTNETUDP, uh_dport));
2377 if (b)
2378 return;
2379 }
2380 intnetR0TrunkIfSnoopDhcp(pNetwork, pSG);
2381 break;
2382 }
2383
2384 case RTNET_ETHERTYPE_ARP:
2385 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
2386 break;
2387 }
2388}
2389#endif /* INTNET_WITH_DHCP_SNOOPING */
2390
2391/**
2392 * Deals with an IPv6 packet.
2393 *
2394 * This will fish out the source IP address and add it to the cache.
2395 * Then it will look for DHCPRELEASE requests (?) and anything else
2396 * that we might find useful later.
2397 *
2398 * @param pIf The interface that's sending the frame.
2399 * @param pIpHdr Pointer to the IPv4 header in the frame.
2400 * @param cbPacket The size of the packet, or more correctly the
2401 * size of the frame without the ethernet header.
2402 * @param fGso Set if this is a GSO frame, clear if regular.
2403 */
2404static void intnetR0IfSnoopIPv6SourceAddr(PINTNETIF pIf, PCRTNETIPV6 pIpHdr, uint32_t cbPacket, bool fGso)
2405{
2406 NOREF(fGso);
2407
2408 /*
2409 * Check the header size first to prevent access invalid data.
2410 */
2411 if (cbPacket < RTNETIPV6_MIN_LEN)
2412 return;
2413
2414 /*
2415 * If the source address is good (not multicast) and
2416 * not already in the address cache of the sender, add it.
2417 */
2418 RTNETADDRU Addr;
2419 Addr.IPv6 = pIpHdr->ip6_src;
2420
2421 if ( intnetR0IPv6AddrIsGood(Addr.IPv6) && (pIpHdr->ip6_hlim == 0xff)
2422 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, sizeof(Addr.IPv6)) < 0)
2423 {
2424 intnetR0IfAddrCacheAddIt(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv6], &Addr, "if/ipv6");
2425 }
2426}
2427
2428
2429/**
2430 * Deals with an IPv4 packet.
2431 *
2432 * This will fish out the source IP address and add it to the cache.
2433 * Then it will look for DHCPRELEASE requests (?) and anything else
2434 * that we might find useful later.
2435 *
2436 * @param pIf The interface that's sending the frame.
2437 * @param pIpHdr Pointer to the IPv4 header in the frame.
2438 * @param cbPacket The size of the packet, or more correctly the
2439 * size of the frame without the ethernet header.
2440 * @param fGso Set if this is a GSO frame, clear if regular.
2441 */
2442static void intnetR0IfSnoopIPv4SourceAddr(PINTNETIF pIf, PCRTNETIPV4 pIpHdr, uint32_t cbPacket, bool fGso)
2443{
2444 /*
2445 * Check the header size first to prevent access invalid data.
2446 */
2447 if (cbPacket < RTNETIPV4_MIN_LEN)
2448 return;
2449 uint32_t cbHdr = (uint32_t)pIpHdr->ip_hl * 4;
2450 if ( cbHdr < RTNETIPV4_MIN_LEN
2451 || cbPacket < cbHdr)
2452 return;
2453
2454 /*
2455 * If the source address is good (not broadcast or my network) and
2456 * not already in the address cache of the sender, add it. Validate
2457 * the IP header before adding it.
2458 */
2459 bool fValidatedIpHdr = false;
2460 RTNETADDRU Addr;
2461 Addr.IPv4 = pIpHdr->ip_src;
2462 if ( intnetR0IPv4AddrIsGood(Addr.IPv4)
2463 && intnetR0IfAddrCacheLookupLikely(&pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, sizeof(Addr.IPv4)) < 0)
2464 {
2465 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2466 {
2467 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header\n"));
2468 return;
2469 }
2470 intnetR0IfAddrCacheAddIt(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4], &Addr, "if/ipv4");
2471 fValidatedIpHdr = true;
2472 }
2473
2474#ifdef INTNET_WITH_DHCP_SNOOPING
2475 /*
2476 * Check for potential DHCP packets.
2477 */
2478 if ( pIpHdr->ip_p == RTNETIPV4_PROT_UDP /* DHCP is UDP. */
2479 && cbPacket >= cbHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN /* Min DHCP packet len. */
2480 && !fGso) /* GSO is not applicable to DHCP traffic. */
2481 {
2482 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uint8_t const *)pIpHdr + cbHdr);
2483 if ( ( RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPS
2484 || RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPS)
2485 && ( RT_BE2H_U16(pUdpHdr->uh_sport) == RTNETIPV4_PORT_BOOTPC
2486 || RT_BE2H_U16(pUdpHdr->uh_dport) == RTNETIPV4_PORT_BOOTPC))
2487 {
2488 if ( fValidatedIpHdr
2489 || RTNetIPv4IsHdrValid(pIpHdr, cbPacket, cbPacket, !fGso /*fChecksum*/))
2490 intnetR0NetworkSnoopDhcp(pIf->pNetwork, pIpHdr, pUdpHdr, cbPacket - cbHdr);
2491 else
2492 Log(("intnetR0IfSnoopIPv4SourceAddr: bad ip header (dhcp)\n"));
2493 }
2494 }
2495#endif /* INTNET_WITH_DHCP_SNOOPING */
2496}
2497
2498
2499/**
2500 * Snoop up source addresses from an ARP request or reply.
2501 *
2502 * @param pIf The interface that's sending the frame.
2503 * @param pHdr The ARP header.
2504 * @param cbPacket The size of the packet (might be larger than the ARP
2505 * request 'cause of min ethernet frame size).
2506 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2507 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2508 */
2509static void intnetR0IfSnoopArpAddr(PINTNETIF pIf, PCRTNETARPIPV4 pArpIPv4, uint32_t cbPacket, uint16_t *pfSgFlags)
2510{
2511 /*
2512 * Ignore packets which doesn't interest us or we perceive as malformed.
2513 */
2514 if (RT_UNLIKELY(cbPacket < sizeof(RTNETARPIPV4)))
2515 return;
2516 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
2517 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
2518 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
2519 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
2520 return;
2521 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
2522 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
2523 && ar_oper != RTNET_ARPOP_REPLY))
2524 {
2525 Log6(("ar_oper=%#x\n", ar_oper));
2526 return;
2527 }
2528
2529 /*
2530 * Tag the SG as ARP IPv4 for later editing, then check for addresses
2531 * which can be removed or added to the address cache of the sender.
2532 */
2533 *pfSgFlags |= INTNETSG_FLAGS_ARP_IPV4;
2534
2535 if ( ar_oper == RTNET_ARPOP_REPLY
2536 && !intnetR0IsMacAddrMulticast(&pArpIPv4->ar_tha)
2537 && ( pArpIPv4->ar_tha.au16[0]
2538 || pArpIPv4->ar_tha.au16[1]
2539 || pArpIPv4->ar_tha.au16[2])
2540 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_tpa))
2541 intnetR0IfAddrCacheDelete(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2542 (PCRTNETADDRU)&pArpIPv4->ar_tpa, sizeof(RTNETADDRIPV4), "if/arp");
2543
2544 if ( !memcmp(&pArpIPv4->ar_sha, &pIf->MacAddr, sizeof(RTMAC))
2545 && intnetR0IPv4AddrIsGood(pArpIPv4->ar_spa))
2546 intnetR0IfAddrCacheAdd(pIf, &pIf->aAddrCache[kIntNetAddrType_IPv4],
2547 (PCRTNETADDRU)&pArpIPv4->ar_spa, sizeof(RTNETADDRIPV4), "if/arp");
2548}
2549
2550
2551
2552/**
2553 * Checks packets send by a normal interface for new network
2554 * layer addresses.
2555 *
2556 * @param pIf The interface that's sending the frame.
2557 * @param pbFrame The frame.
2558 * @param cbFrame The size of the frame.
2559 * @param fGso Set if this is a GSO frame, clear if regular.
2560 * @param pfSgFlags Pointer to the SG flags. This is used to tag the packet so we
2561 * don't have to repeat the frame parsing in intnetR0TrunkIfSend.
2562 */
2563static void intnetR0IfSnoopAddr(PINTNETIF pIf, uint8_t const *pbFrame, uint32_t cbFrame, bool fGso, uint16_t *pfSgFlags)
2564{
2565 /*
2566 * Fish out the ethertype and look for stuff we can handle.
2567 */
2568 if (cbFrame <= sizeof(RTNETETHERHDR))
2569 return;
2570 cbFrame -= sizeof(RTNETETHERHDR);
2571
2572 uint16_t EtherType = RT_H2BE_U16(((PCRTNETETHERHDR)pbFrame)->EtherType);
2573 switch (EtherType)
2574 {
2575 case RTNET_ETHERTYPE_IPV4:
2576 intnetR0IfSnoopIPv4SourceAddr(pIf, (PCRTNETIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2577 break;
2578
2579 case RTNET_ETHERTYPE_IPV6:
2580 intnetR0IfSnoopIPv6SourceAddr(pIf, (PCRTNETIPV6)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, fGso);
2581 break;
2582
2583#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
2584 case RTNET_ETHERTYPE_IPX_1:
2585 case RTNET_ETHERTYPE_IPX_2:
2586 case RTNET_ETHERTYPE_IPX_3:
2587 intnetR0IfSnoopIpxSourceAddr(pIf, (PCINTNETIPX)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2588 break;
2589#endif
2590 case RTNET_ETHERTYPE_ARP:
2591 intnetR0IfSnoopArpAddr(pIf, (PCRTNETARPIPV4)((PCRTNETETHERHDR)pbFrame + 1), cbFrame, pfSgFlags);
2592 break;
2593 }
2594}
2595
2596
2597/**
2598 * Writes a frame packet to the ring buffer.
2599 *
2600 * @returns VBox status code.
2601 * @param pBuf The buffer.
2602 * @param pRingBuf The ring buffer to read from.
2603 * @param pSG The gather list.
2604 * @param pNewDstMac Set the destination MAC address to the address if specified.
2605 */
2606static int intnetR0RingWriteFrame(PINTNETRINGBUF pRingBuf, PCINTNETSG pSG, PCRTMAC pNewDstMac)
2607{
2608 PINTNETHDR pHdr = NULL; /* shut up gcc*/
2609 void *pvDst = NULL; /* ditto */
2610 int rc;
2611 if (pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
2612 rc = IntNetRingAllocateFrame(pRingBuf, pSG->cbTotal, &pHdr, &pvDst);
2613 else
2614 rc = IntNetRingAllocateGsoFrame(pRingBuf, pSG->cbTotal, &pSG->GsoCtx, &pHdr, &pvDst);
2615 if (RT_SUCCESS(rc))
2616 {
2617 IntNetSgRead(pSG, pvDst);
2618 if (pNewDstMac)
2619 ((PRTNETETHERHDR)pvDst)->DstMac = *pNewDstMac;
2620
2621 IntNetRingCommitFrame(pRingBuf, pHdr);
2622 return VINF_SUCCESS;
2623 }
2624 return rc;
2625}
2626
2627
2628/**
2629 * Sends a frame to a specific interface.
2630 *
2631 * @param pIf The interface.
2632 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
2633 * @param pSG The gather buffer which data is being sent to the interface.
2634 * @param pNewDstMac Set the destination MAC address to the address if specified.
2635 */
2636static void intnetR0IfSend(PINTNETIF pIf, PINTNETIF pIfSender, PINTNETSG pSG, PCRTMAC pNewDstMac)
2637{
2638 /*
2639 * Grab the receive/producer lock and copy over the frame.
2640 */
2641 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2642 int rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2643 RTSpinlockReleaseNoInts(pIf->hRecvInSpinlock);
2644 if (RT_SUCCESS(rc))
2645 {
2646 pIf->cYields = 0;
2647 RTSemEventSignal(pIf->hRecvEvent);
2648 return;
2649 }
2650
2651 Log(("intnetR0IfSend: overflow cb=%d hIf=%RX32\n", pSG->cbTotal, pIf->hIf));
2652
2653 /*
2654 * Scheduling hack, for unicore machines primarily.
2655 */
2656 if ( pIf->fActive
2657 && pIf->cYields < 4 /* just twice */
2658 && pIfSender /* but not if it's from the trunk */
2659 && RTThreadPreemptIsEnabled(NIL_RTTHREAD)
2660 )
2661 {
2662 unsigned cYields = 2;
2663 while (--cYields > 0)
2664 {
2665 RTSemEventSignal(pIf->hRecvEvent);
2666 RTThreadYield();
2667
2668 RTSpinlockAcquire(pIf->hRecvInSpinlock);
2669 rc = intnetR0RingWriteFrame(&pIf->pIntBuf->Recv, pSG, pNewDstMac);
2670 RTSpinlockReleaseNoInts(pIf->hRecvInSpinlock);
2671 if (RT_SUCCESS(rc))
2672 {
2673 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsOk);
2674 RTSemEventSignal(pIf->hRecvEvent);
2675 return;
2676 }
2677 pIf->cYields++;
2678 }
2679 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatYieldsNok);
2680 }
2681
2682 /* ok, the frame is lost. */
2683 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatLost);
2684 RTSemEventSignal(pIf->hRecvEvent);
2685}
2686
2687
2688/**
2689 * Fallback path that does the GSO segmenting before passing the frame on to the
2690 * trunk interface.
2691 *
2692 * The caller holds the trunk lock.
2693 *
2694 * @param pThis The trunk.
2695 * @param pIfSender The IF sending the frame.
2696 * @param pSG Pointer to the gather list.
2697 * @param fDst The destination flags.
2698 */
2699static int intnetR0TrunkIfSendGsoFallback(PINTNETTRUNKIF pThis, PINTNETIF pIfSender, PINTNETSG pSG, uint32_t fDst)
2700{
2701 /*
2702 * Since we're only using this for GSO frame coming from the internal
2703 * network interfaces and never the trunk, we can assume there is only
2704 * one segment. This simplifies the code quite a bit.
2705 */
2706 Assert(PDMNetGsoIsValid(&pSG->GsoCtx, sizeof(pSG->GsoCtx), pSG->cbTotal));
2707 AssertReturn(pSG->cSegsUsed == 1, VERR_INTERNAL_ERROR_4);
2708
2709 union
2710 {
2711 uint8_t abBuf[sizeof(INTNETSG) + sizeof(INTNETSEG)];
2712 INTNETSG SG;
2713 } u;
2714
2715 /** @todo We have to adjust MSS so it does not exceed the value configured for
2716 * the host's interface.
2717 */
2718
2719 /*
2720 * Carve out the frame segments with the header and frame in different
2721 * scatter / gather segments.
2722 */
2723 uint32_t const cSegs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
2724 for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++)
2725 {
2726 uint32_t cbSegPayload, cbSegHdrs;
2727 uint32_t offSegPayload = PDMNetGsoCarveSegment(&pSG->GsoCtx, (uint8_t *)pSG->aSegs[0].pv, pSG->cbTotal, iSeg, cSegs,
2728 pIfSender->abGsoHdrs, &cbSegHdrs, &cbSegPayload);
2729
2730 IntNetSgInitTempSegs(&u.SG, cbSegHdrs + cbSegPayload, 2, 2);
2731 u.SG.aSegs[0].Phys = NIL_RTHCPHYS;
2732 u.SG.aSegs[0].pv = pIfSender->abGsoHdrs;
2733 u.SG.aSegs[0].cb = cbSegHdrs;
2734 u.SG.aSegs[1].Phys = NIL_RTHCPHYS;
2735 u.SG.aSegs[1].pv = (uint8_t *)pSG->aSegs[0].pv + offSegPayload;
2736 u.SG.aSegs[1].cb = (uint32_t)cbSegPayload;
2737
2738 int rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, &u.SG, fDst);
2739 if (RT_FAILURE(rc))
2740 return rc;
2741 }
2742 return VINF_SUCCESS;
2743}
2744
2745
2746/**
2747 * Checks if any of the given trunk destinations can handle this kind of GSO SG.
2748 *
2749 * @returns true if it can, false if it cannot.
2750 * @param pThis The trunk.
2751 * @param pSG The scatter / gather buffer.
2752 * @param fDst The destination mask.
2753 */
2754DECLINLINE(bool) intnetR0TrunkIfCanHandleGsoFrame(PINTNETTRUNKIF pThis, PINTNETSG pSG, uint32_t fDst)
2755{
2756 uint8_t u8Type = pSG->GsoCtx.u8Type;
2757 AssertReturn(u8Type < 32, false); /* paranoia */
2758 uint32_t fMask = RT_BIT_32(u8Type);
2759
2760 if (fDst == INTNETTRUNKDIR_HOST)
2761 return !!(pThis->fHostGsoCapabilites & fMask);
2762 if (fDst == INTNETTRUNKDIR_WIRE)
2763 return !!(pThis->fWireGsoCapabilites & fMask);
2764 Assert(fDst == (INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST));
2765 return !!(pThis->fHostGsoCapabilites & pThis->fWireGsoCapabilites & fMask);
2766}
2767
2768
2769/**
2770 * Calculates the checksum of a full ipv6 frame.
2771 *
2772 * @returns 16-bit hecksum value.
2773 * @param pIpHdr The IPv6 header (network endian (big)).
2774 * @param bProtocol The protocol number. This can be the same as the
2775 * ip6_nxt field, but doesn't need to be.
2776 * @param cbPkt The packet size (host endian of course). This can
2777 * be the same as the ip6_plen field, but as with @a
2778 * bProtocol it won't be when extension headers are
2779 * present. For UDP this will be uh_ulen converted to
2780 * host endian.
2781 */
2782static uint16_t computeIPv6FullChecksum(PCRTNETIPV6 pIpHdr)
2783{
2784 uint16_t const *data;
2785 int len = RT_BE2H_U16(pIpHdr->ip6_plen);
2786 uint32_t sum = RTNetIPv6PseudoChecksum(pIpHdr);
2787
2788 /* add the payload */
2789 data = (uint16_t *) (pIpHdr + 1);
2790 while(len > 1)
2791 {
2792 sum += *(data);
2793 data++;
2794 len -= 2;
2795 }
2796
2797 if(len > 0)
2798 sum += *((uint8_t *) data);
2799
2800 while(sum >> 16)
2801 sum = (sum & 0xffff) + (sum >> 16);
2802
2803 return (uint16_t) ~sum;
2804}
2805
2806/**
2807 * Sends a frame down the trunk.
2808 *
2809 * @param pThis The trunk.
2810 * @param pNetwork The network the frame is being sent to.
2811 * @param pIfSender The IF sending the frame. Used for MAC address
2812 * checks in shared MAC mode.
2813 * @param fDst The destination flags.
2814 * @param pSG Pointer to the gather list.
2815 */
2816static void intnetR0TrunkIfSend(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork, PINTNETIF pIfSender,
2817 uint32_t fDst, PINTNETSG pSG)
2818{
2819 /*
2820 * Quick sanity check.
2821 */
2822 AssertPtr(pThis);
2823 AssertPtr(pNetwork);
2824 AssertPtr(pIfSender);
2825 AssertPtr(pSG);
2826 Assert(fDst);
2827 AssertReturnVoid(pThis->pIfPort);
2828
2829 /*
2830 * Edit the frame if we're sharing the MAC address with the host on the wire.
2831 *
2832 * If the frame is headed for both the host and the wire, we'll have to send
2833 * it to the host before making any modifications, and force the OS specific
2834 * backend to copy it. We do this by marking it as TEMP (which is always the
2835 * case right now).
2836 */
2837 if ( (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
2838 && (fDst & INTNETTRUNKDIR_WIRE))
2839 {
2840 /*
2841 * Dispatch it to the host before making changes.
2842 */
2843 if (fDst & INTNETTRUNKDIR_HOST)
2844 {
2845 Assert(pSG->fFlags & INTNETSG_FLAGS_TEMP); /* make sure copy is forced */
2846 intnetR0TrunkIfSend(pThis, pNetwork, pIfSender, INTNETTRUNKDIR_HOST, pSG);
2847 fDst &= ~INTNETTRUNKDIR_HOST;
2848 }
2849
2850 /*
2851 * Edit the source address so that it it's the same as the host.
2852 */
2853 /* ASSUME frame from IntNetR0IfSend! */
2854 AssertReturnVoid(pSG->cSegsUsed == 1);
2855 AssertReturnVoid(pSG->cbTotal >= sizeof(RTNETETHERHDR));
2856 AssertReturnVoid(pIfSender);
2857 PRTNETETHERHDR pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
2858
2859 pEthHdr->SrcMac = pThis->MacAddr;
2860
2861 /*
2862 * Deal with tags from the snooping phase.
2863 */
2864 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
2865 {
2866 /*
2867 * APR IPv4: replace hardware (MAC) addresses because these end up
2868 * in ARP caches. So, if we don't the other machines will
2869 * send the packets to the MAC address of the guest
2870 * instead of the one of the host, which won't work on
2871 * wireless of course...
2872 */
2873 PRTNETARPIPV4 pArp = (PRTNETARPIPV4)(pEthHdr + 1);
2874 if (!memcmp(&pArp->ar_sha, &pIfSender->MacAddr, sizeof(RTMAC)))
2875 {
2876 Log6(("tw: ar_sha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_sha, &pThis->MacAddr));
2877 pArp->ar_sha = pThis->MacAddr;
2878 }
2879 if (!memcmp(&pArp->ar_tha, &pIfSender->MacAddr, sizeof(RTMAC))) /* just in case... */
2880 {
2881 Log6(("tw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArp->ar_tha, &pThis->MacAddr));
2882 pArp->ar_tha = pThis->MacAddr;
2883 }
2884 }
2885 else if (pEthHdr->EtherType == RT_H2BE_U16(RTNET_ETHERTYPE_IPV6))
2886 {
2887 /*
2888 * IPV6 ICMP Neighbor Discovery : replace
2889 * 1) the advertised source mac address in outgoing neighbor sollicitations
2890 * with the HW MAC address of the trunk interface,
2891 * 2) the advertised target mac address in outgoing neighbor advertisements
2892 * with the HW mac address of the trunk interface.
2893 *
2894 * Note that this only applies to traffic going out on the trunk. Incoming
2895 * NS/NA will never advertise any VM mac address, so we do not need to touch
2896 * them. Other VMs on this bridge as well as the host will see and use the VM's
2897 * actual mac addresses.
2898 *
2899 */
2900
2901 PRTNETIPV6 pIPv6 = (PRTNETIPV6)(pEthHdr + 1);
2902 PRTNETNDP pNd = (PRTNETNDP)(pIPv6 + 1);
2903 PRTNETNDP_SLLA_OPT pLLAOpt = (PRTNETNDP_SLLA_OPT)(pNd + 1);
2904
2905 /* make sure we have enough bytes to work with */
2906 if(pSG->cbTotal >= (RTNETIPV6_MIN_LEN + RTNETIPV6_ICMPV6_ND_WITH_LLA_OPT_MIN_LEN) &&
2907 /* ensure the packet came from our LAN (not gone through any router) */
2908 pIPv6->ip6_hlim == 0xff &&
2909 /* protocol has to be icmpv6 */
2910 pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6 &&
2911 /* we either have a sollicitation with source link layer addr. opt, or */
2912 ((pNd->icmp6_type == RTNETIPV6_ICMP_NS_TYPE &&
2913 pNd->icmp6_code == RTNETIPV6_ICMPV6_CODE_0 &&
2914 pLLAOpt->type == RTNETIPV6_ICMP_ND_SLLA_OPT) ||
2915 /* an advertisement with target link layer addr. option */
2916 ((pNd->icmp6_type == RTNETIPV6_ICMP_NA_TYPE &&
2917 pNd->icmp6_code == RTNETIPV6_ICMPV6_CODE_0 &&
2918 pLLAOpt->type == RTNETIPV6_ICMP_ND_TLLA_OPT)) ) &&
2919 pLLAOpt->len == RTNETIPV6_ICMP_ND_LLA_LEN)
2920 {
2921 /* swap the advertised VM MAC address with the trunk's */
2922 pLLAOpt->slla = pThis->MacAddr;
2923
2924 /* recompute the checksum since we changed the packet */
2925 pNd->icmp6_cksum = 0;
2926 pNd->icmp6_cksum = computeIPv6FullChecksum(pIPv6);
2927 }
2928
2929 }
2930 }
2931
2932 /*
2933 * Send the frame, handling the GSO fallback .
2934 * .
2935 * Note! The trunk implementation will re-check that the trunk is active .
2936 * before sending, so we don't have to duplicate that effort here.
2937 */
2938 STAM_REL_PROFILE_START(&pIfSender->pIntBuf->StatSend2, a);
2939 int rc;
2940 if ( pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID
2941 || intnetR0TrunkIfCanHandleGsoFrame(pThis, pSG, fDst) )
2942 rc = pThis->pIfPort->pfnXmit(pThis->pIfPort, pIfSender->pvIfData, pSG, fDst);
2943 else
2944 rc = intnetR0TrunkIfSendGsoFallback(pThis, pIfSender, pSG, fDst);
2945 STAM_REL_PROFILE_STOP(&pIfSender->pIntBuf->StatSend2, a);
2946
2947 /** @todo failure statistics? */
2948 Log2(("intnetR0TrunkIfSend: %Rrc fDst=%d\n", rc, fDst)); NOREF(rc);
2949}
2950
2951
2952/**
2953 * Work around the issue with WiFi routers that replace IPv6 multicast
2954 * Ethernet addresses with unicast ones. We check IPv6 destination address
2955 * to determine if the packet originally had a multicast address, and if so
2956 * we restore the original address and treat the modified packet as being a
2957 * broadcast.
2958 *
2959 * @param pNetwork The network the frame is being sent to.
2960 * @param pSG Pointer to the gather list for the frame.
2961 * @param pEthHdr Pointer to the ethernet header.
2962 */
2963static bool intnetR0NetworkDetectAndFixNdBroadcast(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
2964{
2965 NOREF(pNetwork);
2966
2967 if (RT_BE2H_U16(pEthHdr->EtherType) != RTNET_ETHERTYPE_IPV6)
2968 return false;
2969 /*
2970 * Check the minimum size and get a linear copy of the thing to work on,
2971 * using the temporary buffer if necessary.
2972 */
2973 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
2974 sizeof(RTNETNDP)))
2975 return false;
2976 uint8_t bTmp[sizeof(RTNETIPV6) + sizeof(RTNETNDP)];
2977 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
2978 if ( pSG->cSegsUsed != 1
2979 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
2980 sizeof(RTNETNDP))
2981 {
2982 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
2983 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
2984 + sizeof(RTNETNDP), bTmp))
2985 return false;
2986 pIPv6 = (PRTNETIPV6)bTmp;
2987 }
2988
2989 /* Check IPv6 destination address if it is a multicast address. */
2990 static uint8_t auSolicitedNodeMulticastPrefix[] =
2991 {
2992 0xFF, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
2993 0x00, 0x00, 0x00, 0x01, 0xff
2994 };
2995 if (memcmp(pIPv6->ip6_dst.au8, auSolicitedNodeMulticastPrefix,
2996 sizeof(auSolicitedNodeMulticastPrefix)) == 0)
2997 {
2998 /*
2999 * The original must have been composed of 0x3333 followed by the last
3000 * four bytes of the solicited-node multicast address.
3001 */
3002 if (pSG->aSegs[0].cb < sizeof(RTNETETHERHDR))
3003 {
3004 RTMAC DstMac;
3005 DstMac.au16[0] = 0x3333;
3006 DstMac.au16[1] = pIPv6->ip6_dst.au16[6];
3007 DstMac.au16[2] = pIPv6->ip6_dst.au16[7];
3008 return intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &DstMac);
3009 }
3010 pEthHdr = (PRTNETETHERHDR)pSG->aSegs[0].pv;
3011 pEthHdr->DstMac.au16[0] = 0x3333;
3012 pEthHdr->DstMac.au16[1] = pIPv6->ip6_dst.au16[6];
3013 pEthHdr->DstMac.au16[2] = pIPv6->ip6_dst.au16[7];
3014 return true;
3015 }
3016
3017 return false;
3018}
3019
3020
3021/**
3022 * Snoops a multicast ICMPv6 ND DAD from the wire via the trunk connection.
3023 *
3024 * @param pNetwork The network the frame is being sent to.
3025 * @param pSG Pointer to the gather list for the frame.
3026 * @param pEthHdr Pointer to the ethernet header.
3027 */
3028static void intnetR0NetworkSnoopNAFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3029{
3030 NOREF(pEthHdr);
3031
3032 /*
3033 * Check the minimum size and get a linear copy of the thing to work on,
3034 * using the temporary buffer if necessary.
3035 */
3036 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3037 sizeof(RTNETNDP)))
3038 return;
3039 PRTNETIPV6 pIPv6 = (PRTNETIPV6)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3040 if ( pSG->cSegsUsed != 1
3041 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETIPV6) +
3042 sizeof(RTNETNDP))
3043 {
3044 Log6(("fw: Copying IPv6 pkt %u\n", sizeof(RTNETIPV6)));
3045 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETIPV6)
3046 + sizeof(RTNETNDP), pNetwork->pbTmp))
3047 return;
3048 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3049 pIPv6 = (PRTNETIPV6)pNetwork->pbTmp;
3050 }
3051
3052 PCRTNETNDP pNd = (PCRTNETNDP) (pIPv6 + 1);
3053
3054 /*
3055 * a multicast NS with :: as source address means a DAD packet.
3056 * if it comes from the wire and we have the DAD'd address in our cache,
3057 * flush the entry as the address is being acquired by someone else on
3058 * the network.
3059 */
3060 if ( pIPv6->ip6_hlim == 0xff
3061 && pIPv6->ip6_nxt == RTNETIPV6_PROT_ICMPV6
3062 && pNd->icmp6_type == RTNETIPV6_ICMP_NS_TYPE
3063 && pNd->icmp6_code == RTNETIPV6_ICMPV6_CODE_0
3064 && pIPv6->ip6_src.QWords.qw0 == 0
3065 && pIPv6->ip6_src.QWords.qw1 == 0)
3066 {
3067
3068 intnetR0NetworkAddrCacheDelete(pNetwork, (PCRTNETADDRU) &pNd->target_address,
3069 kIntNetAddrType_IPv6, sizeof(RTNETADDRIPV6), "tif/ip6");
3070 }
3071}
3072/**
3073 * Edits an ARP packet arriving from the wire via the trunk connection.
3074 *
3075 * @param pNetwork The network the frame is being sent to.
3076 * @param pSG Pointer to the gather list for the frame.
3077 * The flags and data content may be updated.
3078 * @param pEthHdr Pointer to the ethernet header. This may also be
3079 * updated if it's a unicast...
3080 */
3081static void intnetR0NetworkEditArpFromWire(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3082{
3083 /*
3084 * Check the minimum size and get a linear copy of the thing to work on,
3085 * using the temporary buffer if necessary.
3086 */
3087 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4)))
3088 return;
3089 PRTNETARPIPV4 pArpIPv4 = (PRTNETARPIPV4)((uint8_t *)pSG->aSegs[0].pv + sizeof(RTNETETHERHDR));
3090 if ( pSG->cSegsUsed != 1
3091 && pSG->aSegs[0].cb < sizeof(RTNETETHERHDR) + sizeof(RTNETARPIPV4))
3092 {
3093 Log6(("fw: Copying ARP pkt %u\n", sizeof(RTNETARPIPV4)));
3094 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), sizeof(RTNETARPIPV4), pNetwork->pbTmp))
3095 return;
3096 pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3097 pArpIPv4 = (PRTNETARPIPV4)pNetwork->pbTmp;
3098 }
3099
3100 /*
3101 * Ignore packets which doesn't interest us or we perceive as malformed.
3102 */
3103 if (RT_UNLIKELY( pArpIPv4->Hdr.ar_hlen != sizeof(RTMAC)
3104 || pArpIPv4->Hdr.ar_plen != sizeof(RTNETADDRIPV4)
3105 || pArpIPv4->Hdr.ar_htype != RT_H2BE_U16(RTNET_ARP_ETHER)
3106 || pArpIPv4->Hdr.ar_ptype != RT_H2BE_U16(RTNET_ETHERTYPE_IPV4)))
3107 return;
3108 uint16_t ar_oper = RT_H2BE_U16(pArpIPv4->Hdr.ar_oper);
3109 if (RT_UNLIKELY( ar_oper != RTNET_ARPOP_REQUEST
3110 && ar_oper != RTNET_ARPOP_REPLY))
3111 {
3112 Log6(("ar_oper=%#x\n", ar_oper));
3113 return;
3114 }
3115
3116 /* Tag it as ARP IPv4. */
3117 pSG->fFlags |= INTNETSG_FLAGS_ARP_IPV4;
3118
3119 /*
3120 * The thing we're interested in here is a reply to a query made by a guest
3121 * since we modified the MAC in the initial request the guest made.
3122 */
3123 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3124 RTMAC MacAddrTrunk;
3125 if (pNetwork->MacTab.pTrunk)
3126 MacAddrTrunk = pNetwork->MacTab.pTrunk->MacAddr;
3127 else
3128 memset(&MacAddrTrunk, 0, sizeof(MacAddrTrunk));
3129 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
3130 if ( ar_oper == RTNET_ARPOP_REPLY
3131 && !memcmp(&pArpIPv4->ar_tha, &MacAddrTrunk, sizeof(RTMAC)))
3132 {
3133 PINTNETIF pIf = intnetR0NetworkAddrCacheLookupIf(pNetwork, (PCRTNETADDRU)&pArpIPv4->ar_tpa,
3134 kIntNetAddrType_IPv4, sizeof(pArpIPv4->ar_tpa));
3135 if (pIf)
3136 {
3137 Log6(("fw: ar_tha %.6Rhxs -> %.6Rhxs\n", &pArpIPv4->ar_tha, &pIf->MacAddr));
3138 pArpIPv4->ar_tha = pIf->MacAddr;
3139 if (!memcmp(&pEthHdr->DstMac, &MacAddrTrunk, sizeof(RTMAC)))
3140 {
3141 Log6(("fw: DstMac %.6Rhxs -> %.6Rhxs\n", &pEthHdr->DstMac, &pIf->MacAddr));
3142 pEthHdr->DstMac = pIf->MacAddr;
3143 if ((void *)pEthHdr != pSG->aSegs[0].pv)
3144 intnetR0SgWritePart(pSG, RT_OFFSETOF(RTNETETHERHDR, DstMac), sizeof(RTMAC), &pIf->MacAddr);
3145 }
3146 intnetR0BusyDecIf(pIf);
3147
3148 /* Write back the packet if we've been making changes to a buffered copy. */
3149 if (pSG->fFlags & INTNETSG_FLAGS_PKT_CP_IN_TMP)
3150 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR), sizeof(PRTNETARPIPV4), pArpIPv4);
3151 }
3152 }
3153}
3154
3155
3156/**
3157 * Detects and edits an DHCP packet arriving from the internal net.
3158 *
3159 * @param pNetwork The network the frame is being sent to.
3160 * @param pSG Pointer to the gather list for the frame.
3161 * The flags and data content may be updated.
3162 * @param pEthHdr Pointer to the ethernet header. This may also be
3163 * updated if it's a unicast...
3164 */
3165static void intnetR0NetworkEditDhcpFromIntNet(PINTNETNETWORK pNetwork, PINTNETSG pSG, PRTNETETHERHDR pEthHdr)
3166{
3167 NOREF(pEthHdr);
3168
3169 /*
3170 * Check the minimum size and get a linear copy of the thing to work on,
3171 * using the temporary buffer if necessary.
3172 */
3173 if (RT_UNLIKELY(pSG->cbTotal < sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN))
3174 return;
3175 /*
3176 * Get a pointer to a linear copy of the full packet, using the
3177 * temporary buffer if necessary.
3178 */
3179 PCRTNETIPV4 pIpHdr = (PCRTNETIPV4)((PCRTNETETHERHDR)pSG->aSegs[0].pv + 1);
3180 uint32_t cbPacket = pSG->cbTotal - sizeof(RTNETETHERHDR);
3181 if (pSG->cSegsUsed > 1)
3182 {
3183 cbPacket = RT_MIN(cbPacket, INTNETNETWORK_TMP_SIZE);
3184 Log6(("intnetR0NetworkEditDhcpFromIntNet: Copying IPv4/UDP/DHCP pkt %u\n", cbPacket));
3185 if (!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR), cbPacket, pNetwork->pbTmp))
3186 return;
3187 //pSG->fFlags |= INTNETSG_FLAGS_PKT_CP_IN_TMP;
3188 pIpHdr = (PCRTNETIPV4)pNetwork->pbTmp;
3189 }
3190
3191 /*
3192 * Validate the IP header and find the UDP packet.
3193 */
3194 if (!RTNetIPv4IsHdrValid(pIpHdr, cbPacket, pSG->cbTotal - sizeof(RTNETETHERHDR), true /*fCheckSum*/))
3195 {
3196 Log6(("intnetR0NetworkEditDhcpFromIntNet: bad ip header\n"));
3197 return;
3198 }
3199 size_t cbIpHdr = pIpHdr->ip_hl * 4;
3200 if ( pIpHdr->ip_p != RTNETIPV4_PROT_UDP /* DHCP is UDP. */
3201 || cbPacket < cbIpHdr + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN) /* Min DHCP packet len */
3202 return;
3203
3204 size_t cbUdpPkt = cbPacket - cbIpHdr;
3205 PCRTNETUDP pUdpHdr = (PCRTNETUDP)((uintptr_t)pIpHdr + cbIpHdr);
3206 /* We are only interested in DHCP packets coming from client to server. */
3207 if ( RT_BE2H_U16(pUdpHdr->uh_dport) != RTNETIPV4_PORT_BOOTPS
3208 || RT_BE2H_U16(pUdpHdr->uh_sport) != RTNETIPV4_PORT_BOOTPC)
3209 return;
3210
3211 /*
3212 * Check if the DHCP message is valid and get the type.
3213 */
3214 if (!RTNetIPv4IsUDPValid(pIpHdr, pUdpHdr, pUdpHdr + 1, cbUdpPkt, true /*fCheckSum*/))
3215 {
3216 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad UDP packet\n"));
3217 return;
3218 }
3219 PCRTNETBOOTP pDhcp = (PCRTNETBOOTP)(pUdpHdr + 1);
3220 uint8_t bMsgType;
3221 if (!RTNetIPv4IsDHCPValid(pUdpHdr, pDhcp, cbUdpPkt - sizeof(*pUdpHdr), &bMsgType))
3222 {
3223 Log6(("intnetR0NetworkEditDhcpFromIntNet: Bad DHCP packet\n"));
3224 return;
3225 }
3226
3227 switch (bMsgType)
3228 {
3229 case RTNET_DHCP_MT_DISCOVER:
3230 case RTNET_DHCP_MT_REQUEST:
3231 /*
3232 * Must set the broadcast flag or we won't catch the respons.
3233 */
3234 if (!(pDhcp->bp_flags & RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST)))
3235 {
3236 Log6(("intnetR0NetworkEditDhcpFromIntNet: Setting broadcast flag in DHCP %#x, previously %x\n",
3237 bMsgType, pDhcp->bp_flags));
3238
3239 /* Patch flags */
3240 uint16_t uFlags = pDhcp->bp_flags | RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3241 intnetR0SgWritePart(pSG, (uintptr_t)&pDhcp->bp_flags - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(uFlags), &uFlags);
3242
3243 /* Patch UDP checksum */
3244 uint32_t uChecksum = (uint32_t)~pUdpHdr->uh_sum + RT_H2BE_U16_C(RTNET_DHCP_FLAG_BROADCAST);
3245 while (uChecksum >> 16)
3246 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3247 uChecksum = ~uChecksum;
3248 intnetR0SgWritePart(pSG, (uintptr_t)&pUdpHdr->uh_sum - (uintptr_t)pIpHdr + sizeof(RTNETETHERHDR), sizeof(pUdpHdr->uh_sum), &uChecksum);
3249 }
3250
3251#ifdef RT_OS_DARWIN
3252 /*
3253 * Work around little endian checksum issue in mac os x 10.7.0 GM.
3254 */
3255 if ( pIpHdr->ip_tos
3256 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_WORKAROUND_1))
3257 {
3258 /* Patch it. */
3259 uint8_t uTos = pIpHdr->ip_tos;
3260 uint8_t uZero = 0;
3261 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + 1, sizeof(uZero), &uZero);
3262
3263 /* Patch the IP header checksum. */
3264 uint32_t uChecksum = (uint32_t)~pIpHdr->ip_sum - (uTos << 8);
3265 while (uChecksum >> 16)
3266 uChecksum = (uChecksum >> 16) + (uChecksum & 0xFFFF);
3267 uChecksum = ~uChecksum;
3268
3269 Log(("intnetR0NetworkEditDhcpFromIntNet: cleared ip_tos (was %#04x); ip_sum=%#06x -> %#06x\n",
3270 uTos, RT_BE2H_U16(pIpHdr->ip_sum), RT_BE2H_U16(uChecksum) ));
3271 intnetR0SgWritePart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_sum),
3272 sizeof(pIpHdr->ip_sum), &uChecksum);
3273 }
3274#endif
3275 break;
3276 }
3277}
3278
3279
3280/**
3281 * Checks if the callers context is okay for sending to the specified
3282 * destinations.
3283 *
3284 * @returns true if it's okay, false if it isn't.
3285 * @param pNetwork The network.
3286 * @param pIfSender The interface sending or NULL if it's the trunk.
3287 * @param pDstTab The destination table.
3288 */
3289DECLINLINE(bool) intnetR0NetworkIsContextOk(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, PCINTNETDSTTAB pDstTab)
3290{
3291 NOREF(pNetwork);
3292
3293 /* Sending to the trunk is the problematic path. If the trunk is the
3294 sender we won't be sending to it, so no problem..
3295 Note! fTrunkDst may be set event if if the trunk is the sender. */
3296 if (!pIfSender)
3297 return true;
3298
3299 uint32_t const fTrunkDst = pDstTab->fTrunkDst;
3300 if (!fTrunkDst)
3301 return true;
3302
3303 /* ASSUMES: that the trunk won't change its report while we're checking. */
3304 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3305 if (pTrunk && (fTrunkDst & pTrunk->fNoPreemptDsts) == fTrunkDst)
3306 return true;
3307
3308 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3309 non-preemptive systems as well.) */
3310 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3311 return true;
3312 return false;
3313}
3314
3315
3316/**
3317 * Checks if the callers context is okay for doing a broadcast given the
3318 * specified source.
3319 *
3320 * @returns true if it's okay, false if it isn't.
3321 * @param pNetwork The network.
3322 * @param fSrc The source of the packet. (0 (intnet),
3323 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3324 */
3325DECLINLINE(bool) intnetR0NetworkIsContextOkForBroadcast(PINTNETNETWORK pNetwork, uint32_t fSrc)
3326{
3327 /* Sending to the trunk is the problematic path. If the trunk is the
3328 sender we won't be sending to it, so no problem. */
3329 if (fSrc)
3330 return true;
3331
3332 /* ASSUMES: That a preemption test detects HM contexts. (Will work on
3333 non-preemptive systems as well.) */
3334 if (RTThreadPreemptIsEnabled(NIL_RTTHREAD))
3335 return true;
3336
3337 /* PARANOIA: Grab the spinlock to make sure the trunk structure cannot be
3338 freed while we're touching it. */
3339 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3340 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
3341
3342 bool fRc = !pTrunk
3343 || pTrunk->fNoPreemptDsts == (INTNETTRUNKDIR_HOST | INTNETTRUNKDIR_WIRE)
3344 || ( (!pNetwork->MacTab.fHostActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_HOST) )
3345 && (!pNetwork->MacTab.fWireActive || (pTrunk->fNoPreemptDsts & INTNETTRUNKDIR_WIRE) ) );
3346
3347 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
3348
3349 return fRc;
3350}
3351
3352
3353/**
3354 * Check context, edit, snoop and switch a broadcast frame when sharing MAC
3355 * address on the wire.
3356 *
3357 * The caller must hold at least one interface on the network busy to prevent it
3358 * from destructing beath us.
3359 *
3360 * @param pNetwork The network the frame is being sent to.
3361 * @param fSrc The source of the packet. (0 (intnet),
3362 * INTNETTRUNKDIR_HOST or INTNETTRUNKDIR_WIRE).
3363 * @param pIfSender The sender interface, NULL if trunk. Used to
3364 * prevent sending an echo to the sender.
3365 * @param pSG Pointer to the gather list.
3366 * @param pEthHdr Pointer to the ethernet header.
3367 * @param pDstTab The destination output table.
3368 */
3369static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchBroadcast(PINTNETNETWORK pNetwork,
3370 uint32_t fSrc, PINTNETIF pIfSender,
3371 PINTNETSG pSG, PRTNETETHERHDR pEthHdr,
3372 PINTNETDSTTAB pDstTab)
3373{
3374 /*
3375 * Before doing any work here, we need to figure out if we can handle it
3376 * in the current context. The restrictions are solely on the trunk.
3377 *
3378 * Note! Since at least one interface is busy, there won't be any changes
3379 * to the parameters here (unless the trunk changes its capability
3380 * report, which it shouldn't).
3381 */
3382 if (!intnetR0NetworkIsContextOkForBroadcast(pNetwork, fSrc))
3383 return INTNETSWDECISION_BAD_CONTEXT;
3384
3385 /*
3386 * Check for ICMPv6 Neighbor Advertisements coming from the trunk.
3387 * If we see an advertisement for an IP in our cache, we can safely remove
3388 * it as the IP has probably moved.
3389 */
3390 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3391 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV6
3392 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3393 intnetR0NetworkSnoopNAFromWire(pNetwork, pSG, pEthHdr);
3394
3395
3396 /*
3397 * Check for ARP packets from the wire since we'll have to make
3398 * modification to them if we're sharing the MAC address with the host.
3399 */
3400 if ( (fSrc & INTNETTRUNKDIR_WIRE)
3401 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_ARP
3402 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3403 intnetR0NetworkEditArpFromWire(pNetwork, pSG, pEthHdr);
3404
3405 /*
3406 * Check for DHCP packets from the internal net since we'll have to set
3407 * broadcast flag in DHCP requests if we're sharing the MAC address with
3408 * the host. GSO is not applicable to DHCP traffic.
3409 */
3410 if ( !fSrc
3411 && RT_BE2H_U16(pEthHdr->EtherType) == RTNET_ETHERTYPE_IPV4
3412 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3413 intnetR0NetworkEditDhcpFromIntNet(pNetwork, pSG, pEthHdr);
3414
3415 /*
3416 * Snoop address info from packet originating from the trunk connection.
3417 */
3418 if (fSrc)
3419 {
3420#ifdef INTNET_WITH_DHCP_SNOOPING
3421 uint16_t EtherType = RT_BE2H_U16(pEthHdr->EtherType);
3422 if ( ( EtherType == RTNET_ETHERTYPE_IPV4 /* for DHCP */
3423 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3424 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID )
3425 || (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4) )
3426 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, EtherType);
3427#else
3428 if (pSG->fFlags & INTNETSG_FLAGS_ARP_IPV4)
3429 intnetR0TrunkIfSnoopArp(pNetwork, pSG);
3430#endif
3431 }
3432
3433 /*
3434 * Create the broadcast destination table.
3435 */
3436 return intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3437}
3438
3439
3440/**
3441 * Check context, snoop and switch a unicast frame using the network layer
3442 * address of the link layer one (when sharing MAC address on the wire).
3443 *
3444 * This function is only used for frames coming from the wire (trunk).
3445 *
3446 * @returns true if it's addressed to someone on the network, otherwise false.
3447 * @param pNetwork The network the frame is being sent to.
3448 * @param pSG Pointer to the gather list.
3449 * @param pEthHdr Pointer to the ethernet header.
3450 * @param pDstTab The destination output table.
3451 */
3452static INTNETSWDECISION intnetR0NetworkSharedMacFixAndSwitchUnicast(PINTNETNETWORK pNetwork, PINTNETSG pSG,
3453 PRTNETETHERHDR pEthHdr, PINTNETDSTTAB pDstTab)
3454{
3455 /*
3456 * Extract the network address from the packet.
3457 */
3458 RTNETADDRU Addr;
3459 INTNETADDRTYPE enmAddrType;
3460 uint8_t cbAddr;
3461 switch (RT_BE2H_U16(pEthHdr->EtherType))
3462 {
3463 case RTNET_ETHERTYPE_IPV4:
3464 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV4, ip_dst), sizeof(Addr.IPv4), &Addr)))
3465 {
3466 Log(("intnetshareduni: failed to read ip_dst! cbTotal=%#x\n", pSG->cbTotal));
3467 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3468 }
3469 enmAddrType = kIntNetAddrType_IPv4;
3470 cbAddr = sizeof(Addr.IPv4);
3471 Log6(("intnetshareduni: IPv4 %d.%d.%d.%d\n", Addr.au8[0], Addr.au8[1], Addr.au8[2], Addr.au8[3]));
3472 break;
3473
3474 case RTNET_ETHERTYPE_IPV6:
3475 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPV6, ip6_dst), sizeof(Addr.IPv6), &Addr)))
3476 {
3477 Log(("intnetshareduni: failed to read ip6_dst! cbTotal=%#x\n", pSG->cbTotal));
3478 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3479 }
3480 enmAddrType = kIntNetAddrType_IPv6;
3481 cbAddr = sizeof(Addr.IPv6);
3482 break;
3483#if 0 /** @todo IntNet: implement IPX for wireless MAC sharing? */
3484 case RTNET_ETHERTYPE_IPX_1:
3485 case RTNET_ETHERTYPE_IPX_2:
3486 case RTNET_ETHERTYPE_IPX_3:
3487 if (RT_UNLIKELY(!intnetR0SgReadPart(pSG, sizeof(RTNETETHERHDR) + RT_OFFSETOF(RTNETIPX, ipx_dstnet), sizeof(Addr.IPX), &Addr)))
3488 {
3489 Log(("intnetshareduni: failed to read ipx_dstnet! cbTotal=%#x\n", pSG->cbTotal));
3490 return intnetR0NetworkSwitchTrunk(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3491 }
3492 enmAddrType = kIntNetAddrType_IPX;
3493 cbAddr = sizeof(Addr.IPX);
3494 break;
3495#endif
3496
3497 /*
3498 * Treat ARP as broadcast (it shouldn't end up here normally,
3499 * so it goes last in the switch).
3500 */
3501 case RTNET_ETHERTYPE_ARP:
3502 Log6(("intnetshareduni: ARP\n"));
3503 /** @todo revisit this broadcasting of unicast ARP frames! */
3504 return intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, INTNETTRUNKDIR_WIRE, NULL, pSG, pEthHdr, pDstTab);
3505
3506 /*
3507 * Unknown packets are sent to the trunk and any promiscuous interfaces.
3508 */
3509 default:
3510 {
3511 Log6(("intnetshareduni: unknown ethertype=%#x\n", RT_BE2H_U16(pEthHdr->EtherType)));
3512 return intnetR0NetworkSwitchTrunkAndPromisc(pNetwork, INTNETTRUNKDIR_WIRE, pDstTab);
3513 }
3514 }
3515
3516 /*
3517 * Do level-3 switching.
3518 */
3519 INTNETSWDECISION enmSwDecision = intnetR0NetworkSwitchLevel3(pNetwork, &pEthHdr->DstMac,
3520 enmAddrType, &Addr, cbAddr,
3521 INTNETTRUNKDIR_WIRE, pDstTab);
3522
3523#ifdef INTNET_WITH_DHCP_SNOOPING
3524 /*
3525 * Perform DHCP snooping. GSO is not applicable to DHCP traffic
3526 */
3527 if ( enmAddrType == kIntNetAddrType_IPv4
3528 && pSG->cbTotal >= sizeof(RTNETETHERHDR) + RTNETIPV4_MIN_LEN + RTNETUDP_MIN_LEN + RTNETBOOTP_DHCP_MIN_LEN
3529 && pSG->GsoCtx.u8Type == PDMNETWORKGSOTYPE_INVALID)
3530 intnetR0TrunkIfSnoopAddr(pNetwork, pSG, RT_BE2H_U16(pEthHdr->EtherType));
3531#endif /* INTNET_WITH_DHCP_SNOOPING */
3532
3533 return enmSwDecision;
3534}
3535
3536
3537/**
3538 * Release all the interfaces in the destination table when we realize that
3539 * we're in a context where we cannot get the job done.
3540 *
3541 * @param pNetwork The network.
3542 * @param pDstTab The destination table.
3543 */
3544static void intnetR0NetworkReleaseDstTab(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab)
3545{
3546 /* The trunk interface. */
3547 if (pDstTab->fTrunkDst)
3548 {
3549 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3550 if (pTrunk)
3551 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3552 pDstTab->pTrunk = NULL;
3553 pDstTab->fTrunkDst = 0;
3554 }
3555
3556 /* Regular interfaces. */
3557 uint32_t iIf = pDstTab->cIfs;
3558 while (iIf-- > 0)
3559 {
3560 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3561 intnetR0BusyDecIf(pIf);
3562 pDstTab->aIfs[iIf].pIf = NULL;
3563 }
3564 pDstTab->cIfs = 0;
3565}
3566
3567
3568/**
3569 * Deliver the frame to the interfaces specified in the destination table.
3570 *
3571 * @param pNetwork The network.
3572 * @param pDstTab The destination table.
3573 * @param pSG The frame to send.
3574 * @param pIfSender The sender interface. NULL if it originated via
3575 * the trunk.
3576 */
3577static void intnetR0NetworkDeliver(PINTNETNETWORK pNetwork, PINTNETDSTTAB pDstTab, PINTNETSG pSG, PINTNETIF pIfSender)
3578{
3579 /*
3580 * Do the interfaces first before sending it to the wire and risk having to
3581 * modify it.
3582 */
3583 uint32_t iIf = pDstTab->cIfs;
3584 while (iIf-- > 0)
3585 {
3586 PINTNETIF pIf = pDstTab->aIfs[iIf].pIf;
3587 intnetR0IfSend(pIf, pIfSender, pSG,
3588 pDstTab->aIfs[iIf].fReplaceDstMac ? &pIf->MacAddr: NULL);
3589 intnetR0BusyDecIf(pIf);
3590 pDstTab->aIfs[iIf].pIf = NULL;
3591 }
3592 pDstTab->cIfs = 0;
3593
3594 /*
3595 * Send to the trunk.
3596 *
3597 * Note! The switching functions will include the trunk even when the frame
3598 * source is the trunk. This is because we need it to figure out
3599 * whether the other half of the trunk should see the frame or not
3600 * and let the caller know.
3601 *
3602 * So, we'll ignore trunk sends here if the frame origin is
3603 * INTNETTRUNKSWPORT::pfnRecv.
3604 */
3605 if (pDstTab->fTrunkDst)
3606 {
3607 PINTNETTRUNKIF pTrunk = pDstTab->pTrunk;
3608 if (pTrunk)
3609 {
3610 if (pIfSender)
3611 intnetR0TrunkIfSend(pTrunk, pNetwork, pIfSender, pDstTab->fTrunkDst, pSG);
3612 intnetR0BusyDec(pNetwork, &pTrunk->cBusy);
3613 }
3614 pDstTab->pTrunk = NULL;
3615 pDstTab->fTrunkDst = 0;
3616 }
3617}
3618
3619
3620/**
3621 * Sends a frame.
3622 *
3623 * This function will distribute the frame to the interfaces it is addressed to.
3624 * It will also update the MAC address of the sender.
3625 *
3626 * The caller must own the network mutex.
3627 *
3628 * @returns The switching decision.
3629 * @param pNetwork The network the frame is being sent to.
3630 * @param pIfSender The interface sending the frame. This is NULL if it's the trunk.
3631 * @param fSrc The source flags. This 0 if it's not from the trunk.
3632 * @param pSG Pointer to the gather list.
3633 * @param pDstTab The destination table to use.
3634 */
3635static INTNETSWDECISION intnetR0NetworkSend(PINTNETNETWORK pNetwork, PINTNETIF pIfSender, uint32_t fSrc,
3636 PINTNETSG pSG, PINTNETDSTTAB pDstTab)
3637{
3638 /*
3639 * Assert reality.
3640 */
3641 AssertPtr(pNetwork);
3642 AssertPtrNull(pIfSender);
3643 Assert(pIfSender ? fSrc == 0 : fSrc != 0);
3644 Assert(!pIfSender || pNetwork == pIfSender->pNetwork);
3645 AssertPtr(pSG);
3646 Assert(pSG->cSegsUsed >= 1);
3647 Assert(pSG->cSegsUsed <= pSG->cSegsAlloc);
3648 if (pSG->cbTotal < sizeof(RTNETETHERHDR))
3649 return INTNETSWDECISION_INVALID;
3650
3651 /*
3652 * Get the ethernet header (might theoretically involve multiple segments).
3653 */
3654 RTNETETHERHDR EthHdr;
3655 if (pSG->aSegs[0].cb >= sizeof(EthHdr))
3656 EthHdr = *(PCRTNETETHERHDR)pSG->aSegs[0].pv;
3657 else if (!intnetR0SgReadPart(pSG, 0, sizeof(EthHdr), &EthHdr))
3658 return INTNETSWDECISION_INVALID;
3659 if ( (EthHdr.DstMac.au8[0] == 0x08 && EthHdr.DstMac.au8[1] == 0x00 && EthHdr.DstMac.au8[2] == 0x27)
3660 || (EthHdr.SrcMac.au8[0] == 0x08 && EthHdr.SrcMac.au8[1] == 0x00 && EthHdr.SrcMac.au8[2] == 0x27)
3661 || (EthHdr.DstMac.au8[0] == 0x00 && EthHdr.DstMac.au8[1] == 0x16 && EthHdr.DstMac.au8[2] == 0xcb)
3662 || (EthHdr.SrcMac.au8[0] == 0x00 && EthHdr.SrcMac.au8[1] == 0x16 && EthHdr.SrcMac.au8[2] == 0xcb)
3663 || EthHdr.DstMac.au8[0] == 0xff
3664 || EthHdr.SrcMac.au8[0] == 0xff)
3665 Log2(("D=%.6Rhxs S=%.6Rhxs T=%04x f=%x z=%x\n",
3666 &EthHdr.DstMac, &EthHdr.SrcMac, RT_BE2H_U16(EthHdr.EtherType), fSrc, pSG->cbTotal));
3667
3668 /*
3669 * Learn the MAC address of the sender. No re-learning as the interface
3670 * user will normally tell us the right MAC address.
3671 *
3672 * Note! We don't notify the trunk about these mainly because of the
3673 * problematic contexts we might be called in.
3674 */
3675 if (RT_UNLIKELY( pIfSender
3676 && !pIfSender->fMacSet
3677 && memcmp(&EthHdr.SrcMac, &pIfSender->MacAddr, sizeof(pIfSender->MacAddr))
3678 && !intnetR0IsMacAddrMulticast(&EthHdr.SrcMac)
3679 ))
3680 {
3681 Log2(("IF MAC: %.6Rhxs -> %.6Rhxs\n", &pIfSender->MacAddr, &EthHdr.SrcMac));
3682 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
3683
3684 PINTNETMACTABENTRY pIfEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIfSender);
3685 if (pIfEntry)
3686 pIfEntry->MacAddr = EthHdr.SrcMac;
3687 pIfSender->MacAddr = EthHdr.SrcMac;
3688
3689 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
3690 }
3691
3692 /*
3693 * Deal with MAC address sharing as that may required editing of the
3694 * packets before we dispatch them anywhere.
3695 */
3696 INTNETSWDECISION enmSwDecision;
3697 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3698 {
3699 if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3700 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3701 else if (fSrc & INTNETTRUNKDIR_WIRE)
3702 {
3703 if (intnetR0NetworkDetectAndFixNdBroadcast(pNetwork, pSG, &EthHdr))
3704 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchBroadcast(pNetwork, fSrc, pIfSender, pSG, &EthHdr, pDstTab);
3705 else
3706 enmSwDecision = intnetR0NetworkSharedMacFixAndSwitchUnicast(pNetwork, pSG, &EthHdr, pDstTab);
3707 }
3708 else
3709 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3710 }
3711 else if (intnetR0IsMacAddrMulticast(&EthHdr.DstMac))
3712 enmSwDecision = intnetR0NetworkSwitchBroadcast(pNetwork, fSrc, pIfSender, pDstTab);
3713 else
3714 enmSwDecision = intnetR0NetworkSwitchUnicast(pNetwork, fSrc, pIfSender, &EthHdr.DstMac, pDstTab);
3715
3716 /*
3717 * Deliver to the destinations if we can.
3718 */
3719 if (enmSwDecision != INTNETSWDECISION_BAD_CONTEXT)
3720 {
3721 if (intnetR0NetworkIsContextOk(pNetwork, pIfSender, pDstTab))
3722 intnetR0NetworkDeliver(pNetwork, pDstTab, pSG, pIfSender);
3723 else
3724 {
3725 intnetR0NetworkReleaseDstTab(pNetwork, pDstTab);
3726 enmSwDecision = INTNETSWDECISION_BAD_CONTEXT;
3727 }
3728 }
3729
3730 return enmSwDecision;
3731}
3732
3733
3734/**
3735 * Sends one or more frames.
3736 *
3737 * The function will first the frame which is passed as the optional arguments
3738 * pvFrame and cbFrame. These are optional since it also possible to chain
3739 * together one or more frames in the send buffer which the function will
3740 * process after considering it's arguments.
3741 *
3742 * The caller is responsible for making sure that there are no concurrent calls
3743 * to this method (with the same handle).
3744 *
3745 * @returns VBox status code.
3746 * @param hIf The interface handle.
3747 * @param pSession The caller's session.
3748 */
3749INTNETR0DECL(int) IntNetR0IfSend(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
3750{
3751 Log5(("IntNetR0IfSend: hIf=%RX32\n", hIf));
3752
3753 /*
3754 * Validate input and translate the handle.
3755 */
3756 PINTNET pIntNet = g_pIntNet;
3757 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3758 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3759
3760 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3761 if (!pIf)
3762 return VERR_INVALID_HANDLE;
3763 STAM_REL_PROFILE_START(&pIf->pIntBuf->StatSend1, a);
3764
3765 /*
3766 * Make sure we've got a network.
3767 */
3768 int rc = VINF_SUCCESS;
3769 intnetR0BusyIncIf(pIf);
3770 PINTNETNETWORK pNetwork = pIf->pNetwork;
3771 if (RT_LIKELY(pNetwork))
3772 {
3773 /*
3774 * Grab the destination table.
3775 */
3776 PINTNETDSTTAB pDstTab = ASMAtomicXchgPtrT(&pIf->pDstTab, NULL, PINTNETDSTTAB);
3777 if (RT_LIKELY(pDstTab))
3778 {
3779 /*
3780 * Process the send buffer.
3781 */
3782 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_BROADCAST;
3783 INTNETSG Sg; /** @todo this will have to be changed if we're going to use async sending
3784 * with buffer sharing for some OS or service. Darwin copies everything so
3785 * I won't bother allocating and managing SGs right now. Sorry. */
3786 PINTNETHDR pHdr;
3787 while ((pHdr = IntNetRingGetNextFrameToRead(&pIf->pIntBuf->Send)) != NULL)
3788 {
3789 uint8_t const u8Type = pHdr->u8Type;
3790 if (u8Type == INTNETHDR_TYPE_FRAME)
3791 {
3792 /* Send regular frame. */
3793 void *pvCurFrame = IntNetHdrGetFramePtr(pHdr, pIf->pIntBuf);
3794 IntNetSgInitTemp(&Sg, pvCurFrame, pHdr->cbFrame);
3795 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3796 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, pHdr->cbFrame, false /*fGso*/, (uint16_t *)&Sg.fFlags);
3797 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
3798 }
3799 else if (u8Type == INTNETHDR_TYPE_GSO)
3800 {
3801 /* Send GSO frame if sane. */
3802 PPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, pIf->pIntBuf);
3803 uint32_t cbFrame = pHdr->cbFrame - sizeof(*pGso);
3804 if (RT_LIKELY(PDMNetGsoIsValid(pGso, pHdr->cbFrame, cbFrame)))
3805 {
3806 void *pvCurFrame = pGso + 1;
3807 IntNetSgInitTempGso(&Sg, pvCurFrame, cbFrame, pGso);
3808 if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
3809 intnetR0IfSnoopAddr(pIf, (uint8_t *)pvCurFrame, cbFrame, true /*fGso*/, (uint16_t *)&Sg.fFlags);
3810 enmSwDecision = intnetR0NetworkSend(pNetwork, pIf, 0 /*fSrc*/, &Sg, pDstTab);
3811 }
3812 else
3813 {
3814 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
3815 enmSwDecision = INTNETSWDECISION_DROP;
3816 }
3817 }
3818 /* Unless it's a padding frame, we're getting babble from the producer. */
3819 else
3820 {
3821 if (u8Type != INTNETHDR_TYPE_PADDING)
3822 STAM_REL_COUNTER_INC(&pIf->pIntBuf->cStatBadFrames); /* ignore */
3823 enmSwDecision = INTNETSWDECISION_DROP;
3824 }
3825 if (enmSwDecision == INTNETSWDECISION_BAD_CONTEXT)
3826 {
3827 rc = VERR_TRY_AGAIN;
3828 break;
3829 }
3830
3831 /* Skip to the next frame. */
3832 IntNetRingSkipFrame(&pIf->pIntBuf->Send);
3833 }
3834
3835 /*
3836 * Put back the destination table.
3837 */
3838 Assert(!pIf->pDstTab);
3839 ASMAtomicWritePtr(&pIf->pDstTab, pDstTab);
3840 }
3841 else
3842 rc = VERR_INTERNAL_ERROR_4;
3843 }
3844 else
3845 rc = VERR_INTERNAL_ERROR_3;
3846
3847 /*
3848 * Release the interface.
3849 */
3850 intnetR0BusyDecIf(pIf);
3851 STAM_REL_PROFILE_STOP(&pIf->pIntBuf->StatSend1, a);
3852 intnetR0IfRelease(pIf, pSession);
3853 return rc;
3854}
3855
3856
3857/**
3858 * VMMR0 request wrapper for IntNetR0IfSend.
3859 *
3860 * @returns see IntNetR0IfSend.
3861 * @param pSession The caller's session.
3862 * @param pReq The request packet.
3863 */
3864INTNETR0DECL(int) IntNetR0IfSendReq(PSUPDRVSESSION pSession, PINTNETIFSENDREQ pReq)
3865{
3866 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3867 return VERR_INVALID_PARAMETER;
3868 return IntNetR0IfSend(pReq->hIf, pSession);
3869}
3870
3871
3872/**
3873 * Maps the default buffer into ring 3.
3874 *
3875 * @returns VBox status code.
3876 * @param hIf The interface handle.
3877 * @param pSession The caller's session.
3878 * @param ppRing3Buf Where to store the address of the ring-3 mapping
3879 * (optional).
3880 * @param ppRing0Buf Where to store the address of the ring-0 mapping
3881 * (optional).
3882 */
3883INTNETR0DECL(int) IntNetR0IfGetBufferPtrs(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession,
3884 R3PTRTYPE(PINTNETBUF) *ppRing3Buf, R0PTRTYPE(PINTNETBUF) *ppRing0Buf)
3885{
3886 LogFlow(("IntNetR0IfGetBufferPtrs: hIf=%RX32 ppRing3Buf=%p ppRing0Buf=%p\n", hIf, ppRing3Buf, ppRing0Buf));
3887
3888 /*
3889 * Validate input.
3890 */
3891 PINTNET pIntNet = g_pIntNet;
3892 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3893 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3894
3895 AssertPtrNullReturn(ppRing3Buf, VERR_INVALID_PARAMETER);
3896 AssertPtrNullReturn(ppRing0Buf, VERR_INVALID_PARAMETER);
3897 if (ppRing3Buf)
3898 *ppRing3Buf = 0;
3899 if (ppRing0Buf)
3900 *ppRing0Buf = 0;
3901
3902 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3903 if (!pIf)
3904 return VERR_INVALID_HANDLE;
3905
3906 /*
3907 * ASSUMES that only the process that created an interface can use it.
3908 * ASSUMES that we created the ring-3 mapping when selecting or
3909 * allocating the buffer.
3910 */
3911 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
3912 if (RT_SUCCESS(rc))
3913 {
3914 if (ppRing3Buf)
3915 *ppRing3Buf = pIf->pIntBufR3;
3916 if (ppRing0Buf)
3917 *ppRing0Buf = (R0PTRTYPE(PINTNETBUF))pIf->pIntBuf; /* tstIntNetR0 mess */
3918
3919 rc = RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
3920 }
3921
3922 intnetR0IfRelease(pIf, pSession);
3923 LogFlow(("IntNetR0IfGetBufferPtrs: returns %Rrc *ppRing3Buf=%p *ppRing0Buf=%p\n",
3924 rc, ppRing3Buf ? *ppRing3Buf : NIL_RTR3PTR, ppRing0Buf ? *ppRing0Buf : NIL_RTR0PTR));
3925 return rc;
3926}
3927
3928
3929/**
3930 * VMMR0 request wrapper for IntNetR0IfGetBufferPtrs.
3931 *
3932 * @returns see IntNetR0IfGetRing3Buffer.
3933 * @param pSession The caller's session.
3934 * @param pReq The request packet.
3935 */
3936INTNETR0DECL(int) IntNetR0IfGetBufferPtrsReq(PSUPDRVSESSION pSession, PINTNETIFGETBUFFERPTRSREQ pReq)
3937{
3938 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
3939 return VERR_INVALID_PARAMETER;
3940 return IntNetR0IfGetBufferPtrs(pReq->hIf, pSession, &pReq->pRing3Buf, &pReq->pRing0Buf);
3941}
3942
3943
3944#if 0
3945/**
3946 * Gets the physical addresses of the default interface buffer.
3947 *
3948 * @returns VBox status code.
3949 * @param hIF The interface handle.
3950 * @param paPages Where to store the addresses. (The reserved fields will be set to zero.)
3951 * @param cPages
3952 */
3953INTNETR0DECL(int) IntNetR0IfGetPhysBuffer(INTNETIFHANDLE hIf, PSUPPAGE paPages, unsigned cPages)
3954{
3955 /*
3956 * Validate input.
3957 */
3958 PINTNET pIntNet = g_pIntNet;
3959 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
3960 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
3961
3962 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
3963 AssertPtrReturn((uint8_t *)&paPages[cPages] - 1, VERR_INVALID_PARAMETER);
3964 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
3965 if (!pIf)
3966 return VERR_INVALID_HANDLE;
3967
3968 /*
3969 * Grab the lock and get the data.
3970 * ASSUMES that the handle isn't closed while we're here.
3971 */
3972 int rc = RTSemFastMutexRequest(pIf->pNetwork->FastMutex);
3973 if (RT_SUCCESS(rc))
3974 {
3975 /** @todo make a SUPR0 api for obtaining the array. SUPR0/IPRT is keeping track of everything, there
3976 * is no need for any extra bookkeeping here.. */
3977
3978 rc = RTSemFastMutexRelease(pIf->pNetwork->FastMutex);
3979 }
3980 intnetR0IfRelease(pIf, pSession);
3981 return VERR_NOT_IMPLEMENTED;
3982}
3983#endif
3984
3985
3986/**
3987 * Sets the promiscuous mode property of an interface.
3988 *
3989 * @returns VBox status code.
3990 * @param hIf The interface handle.
3991 * @param pSession The caller's session.
3992 * @param fPromiscuous Set if the interface should be in promiscuous mode, clear if not.
3993 */
3994INTNETR0DECL(int) IntNetR0IfSetPromiscuousMode(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fPromiscuous)
3995{
3996 LogFlow(("IntNetR0IfSetPromiscuousMode: hIf=%RX32 fPromiscuous=%d\n", hIf, fPromiscuous));
3997
3998 /*
3999 * Validate & translate input.
4000 */
4001 PINTNET pIntNet = g_pIntNet;
4002 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4003 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4004
4005 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4006 if (!pIf)
4007 {
4008 Log(("IntNetR0IfSetPromiscuousMode: returns VERR_INVALID_HANDLE\n"));
4009 return VERR_INVALID_HANDLE;
4010 }
4011
4012 /*
4013 * Get the network, take the address spinlock, and make the change.
4014 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4015 */
4016 int rc = VINF_SUCCESS;
4017 intnetR0BusyIncIf(pIf);
4018 PINTNETNETWORK pNetwork = pIf->pNetwork;
4019 if (pNetwork)
4020 {
4021 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4022
4023 if (pIf->fPromiscuousReal != fPromiscuous)
4024 {
4025 const bool fPromiscuousEff = fPromiscuous
4026 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW)
4027 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS);
4028 Log(("IntNetR0IfSetPromiscuousMode: hIf=%RX32: Changed from %d -> %d (%d)\n",
4029 hIf, !fPromiscuous, !!fPromiscuous, fPromiscuousEff));
4030
4031 pIf->fPromiscuousReal = fPromiscuous;
4032
4033 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4034 if (RT_LIKELY(pEntry))
4035 {
4036 if (pEntry->fPromiscuousEff)
4037 {
4038 pNetwork->MacTab.cPromiscuousEntries--;
4039 if (!pEntry->fPromiscuousSeeTrunk)
4040 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4041 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4042 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4043 }
4044
4045 pEntry->fPromiscuousEff = fPromiscuousEff;
4046 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
4047 && (pIf->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
4048
4049 if (pEntry->fPromiscuousEff)
4050 {
4051 pNetwork->MacTab.cPromiscuousEntries++;
4052 if (!pEntry->fPromiscuousSeeTrunk)
4053 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
4054 }
4055 Assert(pNetwork->MacTab.cPromiscuousEntries <= pNetwork->MacTab.cEntries);
4056 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries <= pNetwork->MacTab.cEntries);
4057 }
4058 }
4059
4060 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4061 }
4062 else
4063 rc = VERR_WRONG_ORDER;
4064
4065 intnetR0BusyDecIf(pIf);
4066 intnetR0IfRelease(pIf, pSession);
4067 return rc;
4068}
4069
4070
4071/**
4072 * VMMR0 request wrapper for IntNetR0IfSetPromiscuousMode.
4073 *
4074 * @returns see IntNetR0IfSetPromiscuousMode.
4075 * @param pSession The caller's session.
4076 * @param pReq The request packet.
4077 */
4078INTNETR0DECL(int) IntNetR0IfSetPromiscuousModeReq(PSUPDRVSESSION pSession, PINTNETIFSETPROMISCUOUSMODEREQ pReq)
4079{
4080 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4081 return VERR_INVALID_PARAMETER;
4082 return IntNetR0IfSetPromiscuousMode(pReq->hIf, pSession, pReq->fPromiscuous);
4083}
4084
4085
4086/**
4087 * Sets the MAC address of an interface.
4088 *
4089 * @returns VBox status code.
4090 * @param hIf The interface handle.
4091 * @param pSession The caller's session.
4092 * @param pMAC The new MAC address.
4093 */
4094INTNETR0DECL(int) IntNetR0IfSetMacAddress(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, PCRTMAC pMac)
4095{
4096 LogFlow(("IntNetR0IfSetMacAddress: hIf=%RX32 pMac=%p:{%.6Rhxs}\n", hIf, pMac, pMac));
4097
4098 /*
4099 * Validate & translate input.
4100 */
4101 PINTNET pIntNet = g_pIntNet;
4102 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4103 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4104
4105 AssertPtrReturn(pMac, VERR_INVALID_PARAMETER);
4106 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4107 if (!pIf)
4108 {
4109 Log(("IntNetR0IfSetMacAddress: returns VERR_INVALID_HANDLE\n"));
4110 return VERR_INVALID_HANDLE;
4111 }
4112
4113 /*
4114 * Get the network, take the address spinlock, and make the change.
4115 * Paranoia^2: Mark ourselves busy to prevent anything from being destroyed.
4116 */
4117 int rc = VINF_SUCCESS;
4118 intnetR0BusyIncIf(pIf);
4119 PINTNETNETWORK pNetwork = pIf->pNetwork;
4120 if (pNetwork)
4121 {
4122 PINTNETTRUNKIF pTrunk = NULL;
4123
4124 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4125
4126 if (memcmp(&pIf->MacAddr, pMac, sizeof(pIf->MacAddr)))
4127 {
4128 Log(("IntNetR0IfSetMacAddress: hIf=%RX32: Changed from %.6Rhxs -> %.6Rhxs\n",
4129 hIf, &pIf->MacAddr, pMac));
4130
4131 /* Update the two copies. */
4132 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4133 if (RT_LIKELY(pEntry))
4134 pEntry->MacAddr = *pMac;
4135 pIf->MacAddr = *pMac;
4136 pIf->fMacSet = true;
4137
4138 /* Grab a busy reference to the trunk so we release the lock before notifying it. */
4139 pTrunk = pNetwork->MacTab.pTrunk;
4140 if (pTrunk)
4141 intnetR0BusyIncTrunk(pTrunk);
4142 }
4143
4144 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4145
4146 if (pTrunk)
4147 {
4148 Log(("IntNetR0IfSetMacAddress: pfnNotifyMacAddress hIf=%RX32\n", hIf));
4149 PINTNETTRUNKIFPORT pIfPort = pTrunk->pIfPort;
4150 if (pIfPort)
4151 pIfPort->pfnNotifyMacAddress(pIfPort, pIf->pvIfData, pMac);
4152 intnetR0BusyDecTrunk(pTrunk);
4153 }
4154 }
4155 else
4156 rc = VERR_WRONG_ORDER;
4157
4158 intnetR0BusyDecIf(pIf);
4159 intnetR0IfRelease(pIf, pSession);
4160 return rc;
4161}
4162
4163
4164/**
4165 * VMMR0 request wrapper for IntNetR0IfSetMacAddress.
4166 *
4167 * @returns see IntNetR0IfSetMacAddress.
4168 * @param pSession The caller's session.
4169 * @param pReq The request packet.
4170 */
4171INTNETR0DECL(int) IntNetR0IfSetMacAddressReq(PSUPDRVSESSION pSession, PINTNETIFSETMACADDRESSREQ pReq)
4172{
4173 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4174 return VERR_INVALID_PARAMETER;
4175 return IntNetR0IfSetMacAddress(pReq->hIf, pSession, &pReq->Mac);
4176}
4177
4178
4179/**
4180 * Worker for intnetR0IfSetActive and intnetR0IfDestruct.
4181 *
4182 * This function will update the active interface count on the network and
4183 * activate or deactivate the trunk connection if necessary.
4184 *
4185 * The call must own the giant lock (we cannot take it here).
4186 *
4187 * @returns VBox status code.
4188 * @param pNetwork The network.
4189 * @param fIf The interface.
4190 * @param fActive What to do.
4191 */
4192static int intnetR0NetworkSetIfActive(PINTNETNETWORK pNetwork, PINTNETIF pIf, bool fActive)
4193{
4194 /* quick sanity check */
4195 AssertPtr(pNetwork);
4196 AssertPtr(pIf);
4197
4198 /*
4199 * The address spinlock of the network protects the variables, while the
4200 * big lock protects the calling of pfnSetState. Grab both lock at once
4201 * to save us the extra hassle.
4202 */
4203 PINTNETTRUNKIF pTrunk = NULL;
4204 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4205
4206 /*
4207 * Do the update.
4208 */
4209 if (pIf->fActive != fActive)
4210 {
4211 PINTNETMACTABENTRY pEntry = intnetR0NetworkFindMacAddrEntry(pNetwork, pIf); Assert(pEntry);
4212 if (RT_LIKELY(pEntry))
4213 {
4214 pEntry->fActive = fActive;
4215 pIf->fActive = fActive;
4216
4217 if (fActive)
4218 {
4219 pNetwork->cActiveIFs++;
4220 if (pNetwork->cActiveIFs == 1)
4221 {
4222 pTrunk = pNetwork->MacTab.pTrunk;
4223 if (pTrunk)
4224 {
4225 pNetwork->MacTab.fHostActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
4226 pNetwork->MacTab.fWireActive = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED);
4227 }
4228 }
4229 }
4230 else
4231 {
4232 pNetwork->cActiveIFs--;
4233 if (pNetwork->cActiveIFs == 0)
4234 {
4235 pTrunk = pNetwork->MacTab.pTrunk;
4236 pNetwork->MacTab.fHostActive = false;
4237 pNetwork->MacTab.fWireActive = false;
4238 }
4239 }
4240 }
4241 }
4242
4243 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4244
4245 /*
4246 * Tell the trunk if necessary.
4247 * The wait for !busy is for the Solaris streams trunk driver (mostly).
4248 */
4249 if (pTrunk && pTrunk->pIfPort)
4250 {
4251 if (!fActive)
4252 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
4253
4254 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, fActive ? INTNETTRUNKIFSTATE_ACTIVE : INTNETTRUNKIFSTATE_INACTIVE);
4255 }
4256
4257 return VINF_SUCCESS;
4258}
4259
4260
4261/**
4262 * Sets the active property of an interface.
4263 *
4264 * @returns VBox status code.
4265 * @param hIf The interface handle.
4266 * @param pSession The caller's session.
4267 * @param fActive The new state.
4268 */
4269INTNETR0DECL(int) IntNetR0IfSetActive(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fActive)
4270{
4271 LogFlow(("IntNetR0IfSetActive: hIf=%RX32 fActive=%RTbool\n", hIf, fActive));
4272
4273 /*
4274 * Validate & translate input.
4275 */
4276 PINTNET pIntNet = g_pIntNet;
4277 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4278 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4279
4280 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4281 if (!pIf)
4282 {
4283 Log(("IntNetR0IfSetActive: returns VERR_INVALID_HANDLE\n"));
4284 return VERR_INVALID_HANDLE;
4285 }
4286
4287 /*
4288 * Hand it to the network since it might involve the trunk and things are
4289 * tricky there wrt to locking order.
4290 *
4291 * 1. We take the giant lock here. This makes sure nobody is re-enabling
4292 * the network while we're pausing it and vice versa. This also enables
4293 * us to wait for the network to become idle before telling the trunk.
4294 * (Important on Solaris.)
4295 *
4296 * 2. For paranoid reasons, we grab a busy reference to the calling
4297 * interface. This is totally unnecessary but should hurt (when done
4298 * after grabbing the giant lock).
4299 */
4300 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4301 if (RT_SUCCESS(rc))
4302 {
4303 intnetR0BusyIncIf(pIf);
4304
4305 PINTNETNETWORK pNetwork = pIf->pNetwork;
4306 if (pNetwork)
4307 rc = intnetR0NetworkSetIfActive(pNetwork, pIf, fActive);
4308 else
4309 rc = VERR_WRONG_ORDER;
4310
4311 intnetR0BusyDecIf(pIf);
4312 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4313 }
4314
4315 intnetR0IfRelease(pIf, pSession);
4316 LogFlow(("IntNetR0IfSetActive: returns %Rrc\n", rc));
4317 return rc;
4318}
4319
4320
4321/**
4322 * VMMR0 request wrapper for IntNetR0IfSetActive.
4323 *
4324 * @returns see IntNetR0IfSetActive.
4325 * @param pIntNet The internal networking instance.
4326 * @param pSession The caller's session.
4327 * @param pReq The request packet.
4328 */
4329INTNETR0DECL(int) IntNetR0IfSetActiveReq(PSUPDRVSESSION pSession, PINTNETIFSETACTIVEREQ pReq)
4330{
4331 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4332 return VERR_INVALID_PARAMETER;
4333 return IntNetR0IfSetActive(pReq->hIf, pSession, pReq->fActive);
4334}
4335
4336
4337/**
4338 * Wait for the interface to get signaled.
4339 * The interface will be signaled when is put into the receive buffer.
4340 *
4341 * @returns VBox status code.
4342 * @param hIf The interface handle.
4343 * @param pSession The caller's session.
4344 * @param cMillies Number of milliseconds to wait. RT_INDEFINITE_WAIT should be
4345 * used if indefinite wait is desired.
4346 */
4347INTNETR0DECL(int) IntNetR0IfWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, uint32_t cMillies)
4348{
4349 Log4(("IntNetR0IfWait: hIf=%RX32 cMillies=%u\n", hIf, cMillies));
4350
4351 /*
4352 * Get and validate essential handles.
4353 */
4354 PINTNET pIntNet = g_pIntNet;
4355 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4356 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4357
4358 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4359 if (!pIf)
4360 {
4361 Log(("IntNetR0IfWait: returns VERR_INVALID_HANDLE\n"));
4362 return VERR_INVALID_HANDLE;
4363 }
4364
4365 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4366 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4367 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4368 if ( hIfSelf != hIf /* paranoia */
4369 || hRecvEvent == NIL_RTSEMEVENT
4370 || fDestroying
4371 )
4372 {
4373 Log(("IntNetR0IfWait: returns VERR_SEM_DESTROYED\n"));
4374 return VERR_SEM_DESTROYED;
4375 }
4376
4377 /*
4378 * It is tempting to check if there is data to be read here,
4379 * but the problem with such an approach is that it will cause
4380 * one unnecessary supervisor->user->supervisor trip. There is
4381 * already a slight risk for such, so no need to increase it.
4382 */
4383
4384 /*
4385 * Increment the number of waiters before starting the wait.
4386 * Upon wakeup we must assert reality, checking that we're not
4387 * already destroyed or in the process of being destroyed. This
4388 * code must be aligned with the waiting code in intnetR0IfDestruct.
4389 */
4390 ASMAtomicIncU32(&pIf->cSleepers);
4391 int rc = RTSemEventWaitNoResume(hRecvEvent, cMillies);
4392 if (pIf->hRecvEvent == hRecvEvent)
4393 {
4394 ASMAtomicDecU32(&pIf->cSleepers);
4395 if (!pIf->fDestroying)
4396 {
4397 if (intnetR0IfRelease(pIf, pSession))
4398 rc = VERR_SEM_DESTROYED;
4399 }
4400 else
4401 rc = VERR_SEM_DESTROYED;
4402 }
4403 else
4404 rc = VERR_SEM_DESTROYED;
4405 Log4(("IntNetR0IfWait: returns %Rrc\n", rc));
4406 return rc;
4407}
4408
4409
4410/**
4411 * VMMR0 request wrapper for IntNetR0IfWait.
4412 *
4413 * @returns see IntNetR0IfWait.
4414 * @param pSession The caller's session.
4415 * @param pReq The request packet.
4416 */
4417INTNETR0DECL(int) IntNetR0IfWaitReq(PSUPDRVSESSION pSession, PINTNETIFWAITREQ pReq)
4418{
4419 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4420 return VERR_INVALID_PARAMETER;
4421 return IntNetR0IfWait(pReq->hIf, pSession, pReq->cMillies);
4422}
4423
4424
4425/**
4426 * Wake up any threads waiting on the interface.
4427 *
4428 * @returns VBox status code.
4429 * @param hIf The interface handle.
4430 * @param pSession The caller's session.
4431 * @param fNoMoreWaits When set, no more waits are permitted.
4432 */
4433INTNETR0DECL(int) IntNetR0IfAbortWait(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession, bool fNoMoreWaits)
4434{
4435 Log4(("IntNetR0IfAbortWait: hIf=%RX32 fNoMoreWaits=%RTbool\n", hIf, fNoMoreWaits));
4436
4437 /*
4438 * Get and validate essential handles.
4439 */
4440 PINTNET pIntNet = g_pIntNet;
4441 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4442 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4443
4444 PINTNETIF pIf = (PINTNETIF)RTHandleTableLookupWithCtx(pIntNet->hHtIfs, hIf, pSession);
4445 if (!pIf)
4446 {
4447 Log(("IntNetR0IfAbortWait: returns VERR_INVALID_HANDLE\n"));
4448 return VERR_INVALID_HANDLE;
4449 }
4450
4451 const INTNETIFHANDLE hIfSelf = pIf->hIf;
4452 const RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4453 const bool fDestroying = ASMAtomicReadBool(&pIf->fDestroying);
4454 if ( hIfSelf != hIf /* paranoia */
4455 || hRecvEvent == NIL_RTSEMEVENT
4456 || fDestroying
4457 )
4458 {
4459 Log(("IntNetR0IfAbortWait: returns VERR_SEM_DESTROYED\n"));
4460 return VERR_SEM_DESTROYED;
4461 }
4462
4463 /*
4464 * Set fDestroying if requested to do so and then wake up all the sleeping
4465 * threads (usually just one). We leave the semaphore in the signalled
4466 * state so the next caller will return immediately.
4467 */
4468 if (fNoMoreWaits)
4469 ASMAtomicWriteBool(&pIf->fDestroying, true);
4470
4471 uint32_t cSleepers = ASMAtomicReadU32(&pIf->cSleepers) + 1;
4472 while (cSleepers-- > 0)
4473 {
4474 int rc = RTSemEventSignal(pIf->hRecvEvent);
4475 AssertRC(rc);
4476 }
4477
4478 Log4(("IntNetR0IfWait: returns %Rrc\n", VINF_SUCCESS));
4479 return VINF_SUCCESS;
4480}
4481
4482
4483/**
4484 * VMMR0 request wrapper for IntNetR0IfAbortWait.
4485 *
4486 * @returns see IntNetR0IfWait.
4487 * @param pSession The caller's session.
4488 * @param pReq The request packet.
4489 */
4490INTNETR0DECL(int) IntNetR0IfAbortWaitReq(PSUPDRVSESSION pSession, PINTNETIFABORTWAITREQ pReq)
4491{
4492 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4493 return VERR_INVALID_PARAMETER;
4494 return IntNetR0IfAbortWait(pReq->hIf, pSession, pReq->fNoMoreWaits);
4495}
4496
4497
4498/**
4499 * Close an interface.
4500 *
4501 * @returns VBox status code.
4502 * @param pIntNet The instance handle.
4503 * @param hIf The interface handle.
4504 * @param pSession The caller's session.
4505 */
4506INTNETR0DECL(int) IntNetR0IfClose(INTNETIFHANDLE hIf, PSUPDRVSESSION pSession)
4507{
4508 LogFlow(("IntNetR0IfClose: hIf=%RX32\n", hIf));
4509
4510 /*
4511 * Validate and free the handle.
4512 */
4513 PINTNET pIntNet = g_pIntNet;
4514 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
4515 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
4516
4517 PINTNETIF pIf = (PINTNETIF)RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pSession);
4518 if (!pIf)
4519 return VERR_INVALID_HANDLE;
4520
4521 /* Mark the handle as freed so intnetR0IfDestruct won't free it again. */
4522 ASMAtomicWriteU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4523
4524 /*
4525 * Signal the event semaphore to wake up any threads in IntNetR0IfWait
4526 * and give them a moment to get out and release the interface.
4527 */
4528 uint32_t i = pIf->cSleepers;
4529 while (i-- > 0)
4530 {
4531 RTSemEventSignal(pIf->hRecvEvent);
4532 RTThreadYield();
4533 }
4534 RTSemEventSignal(pIf->hRecvEvent);
4535
4536 /*
4537 * Release the references to the interface object (handle + free lookup).
4538 */
4539 void *pvObj = pIf->pvObj;
4540 intnetR0IfRelease(pIf, pSession); /* (RTHandleTableFreeWithCtx) */
4541
4542 int rc = SUPR0ObjRelease(pvObj, pSession);
4543 LogFlow(("IntNetR0IfClose: returns %Rrc\n", rc));
4544 return rc;
4545}
4546
4547
4548/**
4549 * VMMR0 request wrapper for IntNetR0IfCloseReq.
4550 *
4551 * @returns see IntNetR0IfClose.
4552 * @param pSession The caller's session.
4553 * @param pReq The request packet.
4554 */
4555INTNETR0DECL(int) IntNetR0IfCloseReq(PSUPDRVSESSION pSession, PINTNETIFCLOSEREQ pReq)
4556{
4557 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
4558 return VERR_INVALID_PARAMETER;
4559 return IntNetR0IfClose(pReq->hIf, pSession);
4560}
4561
4562
4563/**
4564 * Interface destructor callback.
4565 * This is called for reference counted objectes when the count reaches 0.
4566 *
4567 * @param pvObj The object pointer.
4568 * @param pvUser1 Pointer to the interface.
4569 * @param pvUser2 Pointer to the INTNET instance data.
4570 */
4571static DECLCALLBACK(void) intnetR0IfDestruct(void *pvObj, void *pvUser1, void *pvUser2)
4572{
4573 PINTNETIF pIf = (PINTNETIF)pvUser1;
4574 PINTNET pIntNet = (PINTNET)pvUser2;
4575 Log(("intnetR0IfDestruct: pvObj=%p pIf=%p pIntNet=%p hIf=%RX32\n", pvObj, pIf, pIntNet, pIf->hIf));
4576
4577 /*
4578 * We grab the INTNET create/open/destroy semaphore to make sure nobody is
4579 * adding or removing interface while we're in here. For paranoid reasons
4580 * we also mark the interface as destroyed here so any waiting threads can
4581 * take evasive action (theoretical case).
4582 */
4583 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
4584 ASMAtomicWriteBool(&pIf->fDestroying, true);
4585
4586 /*
4587 * Delete the interface handle so the object no longer can be used.
4588 * (Can happen if the client didn't close its session.)
4589 */
4590 INTNETIFHANDLE hIf = ASMAtomicXchgU32(&pIf->hIf, INTNET_HANDLE_INVALID);
4591 if (hIf != INTNET_HANDLE_INVALID)
4592 {
4593 void *pvObj2 = RTHandleTableFreeWithCtx(pIntNet->hHtIfs, hIf, pIf->pSession); NOREF(pvObj2);
4594 AssertMsg(pvObj2 == pIf, ("%p, %p, hIf=%RX32 pSession=%p\n", pvObj2, pIf, hIf, pIf->pSession));
4595 }
4596
4597 /*
4598 * If we've got a network deactivate and detach ourselves from it. Because
4599 * of cleanup order we might have been orphaned by the network destructor.
4600 */
4601 PINTNETNETWORK pNetwork = pIf->pNetwork;
4602 if (pNetwork)
4603 {
4604 /* set inactive. */
4605 intnetR0NetworkSetIfActive(pNetwork, pIf, false /*fActive*/);
4606
4607 /* remove ourselves from the switch table. */
4608 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4609
4610 uint32_t iIf = pNetwork->MacTab.cEntries;
4611 while (iIf-- > 0)
4612 if (pNetwork->MacTab.paEntries[iIf].pIf == pIf)
4613 {
4614 if (pNetwork->MacTab.paEntries[iIf].fPromiscuousEff)
4615 {
4616 pNetwork->MacTab.cPromiscuousEntries--;
4617 if (!pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk)
4618 pNetwork->MacTab.cPromiscuousNoTrunkEntries--;
4619 }
4620 Assert(pNetwork->MacTab.cPromiscuousEntries < pNetwork->MacTab.cEntries);
4621 Assert(pNetwork->MacTab.cPromiscuousNoTrunkEntries < pNetwork->MacTab.cEntries);
4622
4623 if (iIf + 1 < pNetwork->MacTab.cEntries)
4624 memmove(&pNetwork->MacTab.paEntries[iIf],
4625 &pNetwork->MacTab.paEntries[iIf + 1],
4626 (pNetwork->MacTab.cEntries - iIf - 1) * sizeof(pNetwork->MacTab.paEntries[0]));
4627 pNetwork->MacTab.cEntries--;
4628 break;
4629 }
4630
4631 /* recalc the min flags. */
4632 if (pIf->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
4633 {
4634 uint32_t fMinFlags = 0;
4635 iIf = pNetwork->MacTab.cEntries;
4636 while (iIf-- > 0)
4637 {
4638 PINTNETIF pIf2 = pNetwork->MacTab.paEntries[iIf].pIf;
4639 if ( pIf2 /* paranoia */
4640 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
4641 fMinFlags |= pIf2->fOpenFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
4642 }
4643 pNetwork->fMinFlags = fMinFlags;
4644 }
4645
4646 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4647
4648 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4649
4650 /* Notify the trunk about the interface being destroyed. */
4651 if (pTrunk && pTrunk->pIfPort)
4652 pTrunk->pIfPort->pfnDisconnectInterface(pTrunk->pIfPort, pIf->pvIfData);
4653
4654 /* Wait for the interface to quiesce while we still can. */
4655 intnetR0BusyWait(pNetwork, &pIf->cBusy);
4656
4657 /* Release our reference to the network. */
4658 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4659 pIf->pNetwork = NULL;
4660 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4661
4662 SUPR0ObjRelease(pNetwork->pvObj, pIf->pSession);
4663 }
4664
4665 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
4666
4667 /*
4668 * Wakeup anyone waiting on this interface.
4669 *
4670 * We *must* make sure they have woken up properly and realized
4671 * that the interface is no longer valid.
4672 */
4673 if (pIf->hRecvEvent != NIL_RTSEMEVENT)
4674 {
4675 RTSEMEVENT hRecvEvent = pIf->hRecvEvent;
4676 unsigned cMaxWait = 0x1000;
4677 while (pIf->cSleepers && cMaxWait-- > 0)
4678 {
4679 RTSemEventSignal(hRecvEvent);
4680 RTThreadYield();
4681 }
4682 if (pIf->cSleepers)
4683 {
4684 RTThreadSleep(1);
4685
4686 cMaxWait = pIf->cSleepers;
4687 while (pIf->cSleepers && cMaxWait-- > 0)
4688 {
4689 RTSemEventSignal(hRecvEvent);
4690 RTThreadSleep(10);
4691 }
4692 }
4693
4694 RTSemEventDestroy(hRecvEvent);
4695 pIf->hRecvEvent = NIL_RTSEMEVENT;
4696 }
4697
4698 /*
4699 * Unmap user buffer.
4700 */
4701 if (pIf->pIntBuf != pIf->pIntBufDefault)
4702 {
4703 /** @todo user buffer */
4704 }
4705
4706 /*
4707 * Unmap and Free the default buffer.
4708 */
4709 if (pIf->pIntBufDefault)
4710 {
4711 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
4712 pIf->pIntBufDefault = NULL;
4713 pIf->pIntBufDefaultR3 = 0;
4714 pIf->pIntBuf = NULL;
4715 pIf->pIntBufR3 = 0;
4716 }
4717
4718 /*
4719 * Free remaining resources
4720 */
4721 RTSpinlockDestroy(pIf->hRecvInSpinlock);
4722 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4723
4724 RTMemFree(pIf->pDstTab);
4725 pIf->pDstTab = NULL;
4726
4727 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
4728 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
4729
4730 pIf->pvObj = NULL;
4731 RTMemFree(pIf);
4732}
4733
4734
4735/**
4736 * Creates a new network interface.
4737 *
4738 * The call must have opened the network for the new interface and is
4739 * responsible for closing it on failure. On success it must leave the network
4740 * opened so the interface destructor can close it.
4741 *
4742 * @returns VBox status code.
4743 * @param pNetwork The network, referenced. The reference is consumed on
4744 * success.
4745 * @param pSession The session handle.
4746 * @param cbSend The size of the send buffer.
4747 * @param cbRecv The size of the receive buffer.
4748 * @param fFlags The open network flags.
4749 * @param phIf Where to store the interface handle.
4750 */
4751static int intnetR0NetworkCreateIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession,
4752 unsigned cbSend, unsigned cbRecv, uint32_t fFlags,
4753 PINTNETIFHANDLE phIf)
4754{
4755 LogFlow(("intnetR0NetworkCreateIf: pNetwork=%p pSession=%p cbSend=%u cbRecv=%u fFlags=%#x phIf=%p\n",
4756 pNetwork, pSession, cbSend, cbRecv, fFlags, phIf));
4757
4758 /*
4759 * Assert input.
4760 */
4761 AssertPtr(pNetwork);
4762 AssertPtr(phIf);
4763
4764 /*
4765 * Adjust the flags with defaults for the interface policies.
4766 * Note: Main restricts promiscuous mode per interface.
4767 */
4768 uint32_t const fDefFlags = INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
4769 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK;
4770 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
4771 if (!(fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair))
4772 fFlags |= g_afIntNetOpenNetworkIfFlags[i].fPair & fDefFlags;
4773
4774 /*
4775 * Make sure that all destination tables as well as the have space of
4776 */
4777 int rc = intnetR0NetworkEnsureTabSpace(pNetwork);
4778 if (RT_FAILURE(rc))
4779 return rc;
4780
4781 /*
4782 * Allocate the interface and initialize it.
4783 */
4784 PINTNETIF pIf = (PINTNETIF)RTMemAllocZ(sizeof(*pIf));
4785 if (!pIf)
4786 return VERR_NO_MEMORY;
4787
4788 memset(&pIf->MacAddr, 0xff, sizeof(pIf->MacAddr)); /* broadcast */
4789 //pIf->fMacSet = false;
4790 //pIf->fPromiscuousReal = false;
4791 //pIf->fActive = false;
4792 //pIf->fDestroying = false;
4793 pIf->fOpenFlags = fFlags;
4794 //pIf->cYields = 0;
4795 //pIf->pIntBuf = 0;
4796 //pIf->pIntBufR3 = NIL_RTR3PTR;
4797 //pIf->pIntBufDefault = 0;
4798 //pIf->pIntBufDefaultR3 = NIL_RTR3PTR;
4799 pIf->hRecvEvent = NIL_RTSEMEVENT;
4800 //pIf->cSleepers = 0;
4801 pIf->hIf = INTNET_HANDLE_INVALID;
4802 pIf->pNetwork = pNetwork;
4803 pIf->pSession = pSession;
4804 //pIf->pvObj = NULL;
4805 //pIf->aAddrCache = {0};
4806 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4807 pIf->cBusy = 0;
4808 //pIf->pDstTab = NULL;
4809 //pIf->pvIfData = NULL;
4810
4811 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End && RT_SUCCESS(rc); i++)
4812 rc = intnetR0IfAddrCacheInit(&pIf->aAddrCache[i], (INTNETADDRTYPE)i,
4813 !!(pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE));
4814 if (RT_SUCCESS(rc))
4815 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, (PINTNETDSTTAB *)&pIf->pDstTab);
4816 if (RT_SUCCESS(rc))
4817 rc = RTSemEventCreate((PRTSEMEVENT)&pIf->hRecvEvent);
4818 if (RT_SUCCESS(rc))
4819 rc = RTSpinlockCreate(&pIf->hRecvInSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hRecvInSpinlock");
4820 if (RT_SUCCESS(rc))
4821 {
4822 /*
4823 * Create the default buffer.
4824 */
4825 /** @todo adjust with minimums and apply defaults here. */
4826 cbRecv = RT_ALIGN(RT_MAX(cbRecv, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
4827 cbSend = RT_ALIGN(RT_MAX(cbSend, sizeof(INTNETHDR) * 4), INTNETRINGBUF_ALIGNMENT);
4828 const unsigned cbBuf = RT_ALIGN(sizeof(*pIf->pIntBuf), INTNETRINGBUF_ALIGNMENT) + cbRecv + cbSend;
4829 rc = SUPR0MemAlloc(pIf->pSession, cbBuf, (PRTR0PTR)&pIf->pIntBufDefault, (PRTR3PTR)&pIf->pIntBufDefaultR3);
4830 if (RT_SUCCESS(rc))
4831 {
4832 ASMMemZero32(pIf->pIntBufDefault, cbBuf); /** @todo I thought I specified these buggers as clearing the memory... */
4833
4834 pIf->pIntBuf = pIf->pIntBufDefault;
4835 pIf->pIntBufR3 = pIf->pIntBufDefaultR3;
4836 IntNetBufInit(pIf->pIntBuf, cbBuf, cbRecv, cbSend);
4837
4838 /*
4839 * Register the interface with the session and create a handle for it.
4840 */
4841 pIf->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK_INTERFACE,
4842 intnetR0IfDestruct, pIf, pNetwork->pIntNet);
4843 if (pIf->pvObj)
4844 {
4845 rc = RTHandleTableAllocWithCtx(pNetwork->pIntNet->hHtIfs, pIf, pSession, (uint32_t *)&pIf->hIf);
4846 if (RT_SUCCESS(rc))
4847 {
4848 /*
4849 * Finally add the interface to the network, consuming the
4850 * network reference of the caller.
4851 */
4852 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4853
4854 uint32_t iIf = pNetwork->MacTab.cEntries;
4855 Assert(iIf + 1 <= pNetwork->MacTab.cEntriesAllocated);
4856
4857 pNetwork->MacTab.paEntries[iIf].MacAddr = pIf->MacAddr;
4858 pNetwork->MacTab.paEntries[iIf].fActive = false;
4859 pNetwork->MacTab.paEntries[iIf].fPromiscuousEff = false;
4860 pNetwork->MacTab.paEntries[iIf].fPromiscuousSeeTrunk = false;
4861 pNetwork->MacTab.paEntries[iIf].pIf = pIf;
4862
4863 pNetwork->MacTab.cEntries = iIf + 1;
4864 pIf->pNetwork = pNetwork;
4865
4866 /*
4867 * Grab a busy reference (paranoia) to the trunk before releasing
4868 * the spinlock and then notify it about the new interface.
4869 */
4870 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
4871 if (pTrunk)
4872 intnetR0BusyIncTrunk(pTrunk);
4873
4874 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4875
4876 if (pTrunk)
4877 {
4878 Log(("intnetR0NetworkCreateIf: pfnConnectInterface hIf=%RX32\n", pIf->hIf));
4879 if (pTrunk->pIfPort)
4880 rc = pTrunk->pIfPort->pfnConnectInterface(pTrunk->pIfPort, pIf, &pIf->pvIfData);
4881 intnetR0BusyDecTrunk(pTrunk);
4882 }
4883 if (RT_SUCCESS(rc))
4884 {
4885 /*
4886 * We're good!
4887 */
4888 *phIf = pIf->hIf;
4889 Log(("intnetR0NetworkCreateIf: returns VINF_SUCCESS *phIf=%RX32 cbSend=%u cbRecv=%u cbBuf=%u\n",
4890 *phIf, pIf->pIntBufDefault->cbSend, pIf->pIntBufDefault->cbRecv, pIf->pIntBufDefault->cbBuf));
4891 return VINF_SUCCESS;
4892 }
4893 }
4894
4895 SUPR0ObjRelease(pIf->pvObj, pSession);
4896 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
4897 return rc;
4898 }
4899
4900 /* clean up */
4901 SUPR0MemFree(pIf->pSession, (RTHCUINTPTR)pIf->pIntBufDefault);
4902 pIf->pIntBufDefault = NULL;
4903 pIf->pIntBuf = NULL;
4904 }
4905 }
4906
4907 RTSpinlockDestroy(pIf->hRecvInSpinlock);
4908 pIf->hRecvInSpinlock = NIL_RTSPINLOCK;
4909 RTSemEventDestroy(pIf->hRecvEvent);
4910 pIf->hRecvEvent = NIL_RTSEMEVENT;
4911 RTMemFree(pIf->pDstTab);
4912 for (int i = kIntNetAddrType_Invalid + 1; i < kIntNetAddrType_End; i++)
4913 intnetR0IfAddrCacheDestroy(&pIf->aAddrCache[i]);
4914 RTMemFree(pIf);
4915 LogFlow(("intnetR0NetworkCreateIf: returns %Rrc\n", rc));
4916 return rc;
4917}
4918
4919
4920/** @copydoc INTNETTRUNKSWPORT::pfnSetSGPhys */
4921static DECLCALLBACK(bool) intnetR0TrunkIfPortSetSGPhys(PINTNETTRUNKSWPORT pSwitchPort, bool fEnable)
4922{
4923 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4924 AssertMsgFailed(("Not implemented because it wasn't required on Darwin\n"));
4925 return ASMAtomicXchgBool(&pThis->fPhysSG, fEnable);
4926}
4927
4928
4929/** @copydoc INTNETTRUNKSWPORT::pfnReportMacAddress */
4930static DECLCALLBACK(void) intnetR0TrunkIfPortReportMacAddress(PINTNETTRUNKSWPORT pSwitchPort, PCRTMAC pMacAddr)
4931{
4932 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4933
4934 /*
4935 * Get the network instance and grab the address spinlock before making
4936 * any changes.
4937 */
4938 intnetR0BusyIncTrunk(pThis);
4939 PINTNETNETWORK pNetwork = pThis->pNetwork;
4940 if (pNetwork)
4941 {
4942 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4943
4944 pNetwork->MacTab.HostMac = *pMacAddr;
4945 pThis->MacAddr = *pMacAddr;
4946
4947 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4948 }
4949 else
4950 pThis->MacAddr = *pMacAddr;
4951 intnetR0BusyDecTrunk(pThis);
4952}
4953
4954
4955/** @copydoc INTNETTRUNKSWPORT::pfnReportPromiscuousMode */
4956static DECLCALLBACK(void) intnetR0TrunkIfPortReportPromiscuousMode(PINTNETTRUNKSWPORT pSwitchPort, bool fPromiscuous)
4957{
4958 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4959
4960 /*
4961 * Get the network instance and grab the address spinlock before making
4962 * any changes.
4963 */
4964 intnetR0BusyIncTrunk(pThis);
4965 PINTNETNETWORK pNetwork = pThis->pNetwork;
4966 if (pNetwork)
4967 {
4968 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
4969
4970 pNetwork->MacTab.fHostPromiscuousReal = fPromiscuous
4971 || (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE);
4972 pNetwork->MacTab.fHostPromiscuousEff = pNetwork->MacTab.fHostPromiscuousReal
4973 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
4974
4975 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
4976 }
4977 intnetR0BusyDecTrunk(pThis);
4978}
4979
4980
4981/** @copydoc INTNETTRUNKSWPORT::pfnReportGsoCapabilities */
4982static DECLCALLBACK(void) intnetR0TrunkIfPortReportGsoCapabilities(PINTNETTRUNKSWPORT pSwitchPort,
4983 uint32_t fGsoCapabilities, uint32_t fDst)
4984{
4985 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
4986
4987 for (unsigned iBit = PDMNETWORKGSOTYPE_END; iBit < 32; iBit++)
4988 Assert(!(fGsoCapabilities & RT_BIT_32(iBit)));
4989 Assert(!(fDst & ~INTNETTRUNKDIR_VALID_MASK));
4990 Assert(fDst);
4991
4992 if (fDst & INTNETTRUNKDIR_HOST)
4993 pThis->fHostGsoCapabilites = fGsoCapabilities;
4994
4995 if (fDst & INTNETTRUNKDIR_WIRE)
4996 pThis->fWireGsoCapabilites = fGsoCapabilities;
4997}
4998
4999
5000/** @copydoc INTNETTRUNKSWPORT::pfnReportNoPreemptDsts */
5001static DECLCALLBACK(void) intnetR0TrunkIfPortReportNoPreemptDsts(PINTNETTRUNKSWPORT pSwitchPort, uint32_t fNoPreemptDsts)
5002{
5003 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5004 Assert(!(fNoPreemptDsts & ~INTNETTRUNKDIR_VALID_MASK));
5005
5006 pThis->fNoPreemptDsts = fNoPreemptDsts;
5007}
5008
5009
5010#ifdef VBOX_WITH_INTNET_DISCONNECT
5011/** @copydoc INTNETTRUNKSWPORT::pfnDisconnect */
5012static DECLCALLBACK(void) intnetR0TrunkIfPortDisconnect(PINTNETTRUNKSWPORT pSwitchPort, PINTNETTRUNKIFPORT pIfPort,
5013 PFNINTNETTRUNKIFPORTRELEASEBUSY pfnReleaseBusy)
5014{
5015 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5016
5017 /*
5018 * The caller has marked the trunk instance busy on his side before making
5019 * the call (see method docs) to let us safely grab the network and internal
5020 * network instance pointers without racing the network destruction code
5021 * (intnetR0TrunkIfDestroy (called by intnetR0TrunkIfDestroy) will wait for
5022 * the interface to stop being busy before setting pNetwork to NULL and
5023 * freeing up the resources).
5024 */
5025 PINTNETNETWORK pNetwork = pThis->pNetwork;
5026 if (pNetwork)
5027 {
5028 PINTNET pIntNet = pNetwork->pIntNet;
5029 Assert(pNetwork->pIntNet);
5030
5031 /*
5032 * We must decrease the callers busy count here to prevent deadlocking
5033 * when requesting the big mutex ownership. This will of course
5034 * unblock anyone stuck in intnetR0TrunkIfDestroy doing pfnWaitForIdle
5035 * (the other deadlock party), so we have to revalidate the network
5036 * pointer after taking ownership of the big mutex.
5037 */
5038 pfnReleaseBusy(pIfPort);
5039
5040 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5041
5042 if (intnetR0NetworkIsValid(pIntNet, pNetwork))
5043 {
5044 Assert(pNetwork->MacTab.pTrunk == pThis); /* Must be valid as long as tehre are no concurrent calls to this method. */
5045 Assert(pThis->pIfPort == pIfPort); /* Ditto */
5046
5047 /*
5048 * Disconnect the trunk and destroy it, similar to what is done int
5049 * intnetR0NetworkDestruct.
5050 */
5051 pIfPort->pfnSetState(pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5052
5053 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5054 pNetwork->MacTab.pTrunk = NULL;
5055 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
5056
5057 intnetR0TrunkIfDestroy(pThis, pNetwork);
5058 }
5059
5060 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5061 }
5062 /*
5063 * We must always release the busy reference.
5064 */
5065 else
5066 pfnReleaseBusy(pIfPort);
5067}
5068#endif /* VBOX_WITH_INTNET_DISCONNECT */
5069
5070
5071/** @copydoc INTNETTRUNKSWPORT::pfnPreRecv */
5072static DECLCALLBACK(INTNETSWDECISION) intnetR0TrunkIfPortPreRecv(PINTNETTRUNKSWPORT pSwitchPort,
5073 void const *pvSrc, size_t cbSrc, uint32_t fSrc)
5074{
5075 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5076
5077 /* assert some sanity */
5078 AssertPtr(pvSrc);
5079 AssertReturn(cbSrc >= 6, INTNETSWDECISION_BROADCAST);
5080 Assert(fSrc);
5081
5082 /*
5083 * Mark the trunk as busy, make sure we've got a network and that there are
5084 * some active interfaces around.
5085 */
5086 INTNETSWDECISION enmSwDecision = INTNETSWDECISION_TRUNK;
5087 intnetR0BusyIncTrunk(pThis);
5088 PINTNETNETWORK pNetwork = pThis->pNetwork;
5089 if (RT_LIKELY( pNetwork
5090 && pNetwork->cActiveIFs > 0 ))
5091 {
5092 /*
5093 * Lazy bird! No pre-switching of multicast and shared-MAC-on-wire.
5094 */
5095 PCRTNETETHERHDR pEthHdr = (PCRTNETETHERHDR)pvSrc;
5096 if (intnetR0IsMacAddrMulticast(&pEthHdr->DstMac))
5097 enmSwDecision = INTNETSWDECISION_BROADCAST;
5098 else if (pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5099 enmSwDecision = INTNETSWDECISION_BROADCAST;
5100 else
5101 enmSwDecision = intnetR0NetworkPreSwitchUnicast(pNetwork,
5102 fSrc,
5103 cbSrc >= 12 ? &pEthHdr->SrcMac : NULL,
5104 &pEthHdr->DstMac);
5105 }
5106
5107 intnetR0BusyDecTrunk(pThis);
5108 return enmSwDecision;
5109}
5110
5111
5112/** @copydoc INTNETTRUNKSWPORT::pfnRecv */
5113static DECLCALLBACK(bool) intnetR0TrunkIfPortRecv(PINTNETTRUNKSWPORT pSwitchPort, void *pvIf, PINTNETSG pSG, uint32_t fSrc)
5114{
5115 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5116
5117 /* assert some sanity */
5118 AssertPtr(pSG);
5119 Assert(fSrc);
5120 NOREF(pvIf); /* later */
5121
5122 /*
5123 * Mark the trunk as busy, make sure we've got a network and that there are
5124 * some active interfaces around.
5125 */
5126 bool fRc = false /* don't drop it */;
5127 intnetR0BusyIncTrunk(pThis);
5128 PINTNETNETWORK pNetwork = pThis->pNetwork;
5129 if (RT_LIKELY( pNetwork
5130 && pNetwork->cActiveIFs > 0 ))
5131 {
5132 /*
5133 * Grab or allocate a destination table.
5134 */
5135 bool const fIntCtx = RTThreadPreemptIsEnabled(NIL_RTTHREAD) || RTThreadIsInInterrupt(NIL_RTTHREAD);
5136 unsigned iDstTab = 0;
5137 PINTNETDSTTAB pDstTab = NULL;
5138 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5139 if (fIntCtx)
5140 {
5141 /* Interrupt or restricted context. */
5142 iDstTab = RTMpCpuIdToSetIndex(RTMpCpuId());
5143 iDstTab %= pThis->cIntDstTabs;
5144 pDstTab = pThis->apIntDstTabs[iDstTab];
5145 if (RT_LIKELY(pDstTab))
5146 pThis->apIntDstTabs[iDstTab] = NULL;
5147 else
5148 {
5149 iDstTab = pThis->cIntDstTabs;
5150 while (iDstTab-- > 0)
5151 {
5152 pDstTab = pThis->apIntDstTabs[iDstTab];
5153 if (pDstTab)
5154 {
5155 pThis->apIntDstTabs[iDstTab] = NULL;
5156 break;
5157 }
5158 }
5159 }
5160 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock);
5161 Assert(!pDstTab || iDstTab < pThis->cIntDstTabs);
5162 }
5163 else
5164 {
5165 /* Task context, fallback is to allocate a table. */
5166 AssertCompile(RT_ELEMENTS(pThis->apTaskDstTabs) == 2); /* for loop rollout */
5167 pDstTab = pThis->apIntDstTabs[iDstTab = 0];
5168 if (!pDstTab)
5169 pDstTab = pThis->apIntDstTabs[iDstTab = 1];
5170 if (pDstTab)
5171 {
5172 pThis->apIntDstTabs[iDstTab] = NULL;
5173 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock);
5174 Assert(iDstTab < RT_ELEMENTS(pThis->apTaskDstTabs));
5175 }
5176 else
5177 {
5178 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock);
5179 intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pDstTab);
5180 iDstTab = 65535;
5181 }
5182 }
5183 if (RT_LIKELY(pDstTab))
5184 {
5185 /*
5186 * Finally, get down to business of sending the frame.
5187 */
5188 INTNETSWDECISION enmSwDecision = intnetR0NetworkSend(pNetwork, NULL, fSrc, pSG, pDstTab);
5189 AssertMsg(enmSwDecision != INTNETSWDECISION_BAD_CONTEXT, ("fSrc=%#x fTrunkDst=%#x hdr=%.14Rhxs\n", fSrc, pDstTab->fTrunkDst, pSG->aSegs[0].pv));
5190 if (enmSwDecision == INTNETSWDECISION_INTNET)
5191 fRc = true; /* drop it */
5192
5193 /*
5194 * Free the destination table.
5195 */
5196 if (iDstTab == 65535)
5197 RTMemFree(pDstTab);
5198 else
5199 {
5200 RTSpinlockAcquire(pThis->hDstTabSpinlock);
5201 if (fIntCtx && !pThis->apIntDstTabs[iDstTab])
5202 pThis->apIntDstTabs[iDstTab] = pDstTab;
5203 else if (!fIntCtx && !pThis->apTaskDstTabs[iDstTab])
5204 pThis->apTaskDstTabs[iDstTab] = pDstTab;
5205 else
5206 {
5207 /* this shouldn't happen! */
5208 PINTNETDSTTAB *papDstTabs = fIntCtx ? &pThis->apIntDstTabs[0] : &pThis->apTaskDstTabs[0];
5209 iDstTab = fIntCtx ? pThis->cIntDstTabs : RT_ELEMENTS(pThis->apTaskDstTabs);
5210 while (iDstTab-- > 0)
5211 if (!papDstTabs[iDstTab])
5212 {
5213 papDstTabs[iDstTab] = pDstTab;
5214 break;
5215 }
5216 }
5217 RTSpinlockReleaseNoInts(pThis->hDstTabSpinlock);
5218 Assert(iDstTab < RT_MAX(RT_ELEMENTS(pThis->apTaskDstTabs), pThis->cIntDstTabs));
5219 }
5220 }
5221 }
5222
5223 intnetR0BusyDecTrunk(pThis);
5224 return fRc;
5225}
5226
5227
5228/** @copydoc INTNETTRUNKSWPORT::pfnSGRetain */
5229static DECLCALLBACK(void) intnetR0TrunkIfPortSGRetain(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5230{
5231 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5232 PINTNETNETWORK pNetwork = pThis->pNetwork;
5233
5234 /* assert some sanity */
5235 AssertPtrReturnVoid(pNetwork);
5236 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5237 AssertPtr(pSG);
5238 Assert(pSG->cUsers > 0 && pSG->cUsers < 256);
5239
5240 /* do it. */
5241 ++pSG->cUsers;
5242}
5243
5244
5245/** @copydoc INTNETTRUNKSWPORT::pfnSGRelease */
5246static DECLCALLBACK(void) intnetR0TrunkIfPortSGRelease(PINTNETTRUNKSWPORT pSwitchPort, PINTNETSG pSG)
5247{
5248 PINTNETTRUNKIF pThis = INTNET_SWITCHPORT_2_TRUNKIF(pSwitchPort);
5249 PINTNETNETWORK pNetwork = pThis->pNetwork;
5250
5251 /* assert some sanity */
5252 AssertPtrReturnVoid(pNetwork);
5253 AssertReturnVoid(pNetwork->hEvtBusyIf != NIL_RTSEMEVENT);
5254 AssertPtr(pSG);
5255 Assert(pSG->cUsers > 0);
5256
5257 /*
5258 * Free it?
5259 */
5260 if (!--pSG->cUsers)
5261 {
5262 /** @todo later */
5263 }
5264}
5265
5266
5267/**
5268 * Shutdown the trunk interface.
5269 *
5270 * @param pThis The trunk.
5271 * @param pNetworks The network.
5272 *
5273 * @remarks The caller must hold the global lock.
5274 */
5275static void intnetR0TrunkIfDestroy(PINTNETTRUNKIF pThis, PINTNETNETWORK pNetwork)
5276{
5277 /* assert sanity */
5278 if (!pThis)
5279 return;
5280 AssertPtr(pThis);
5281 Assert(pThis->pNetwork == pNetwork);
5282 AssertPtrNull(pThis->pIfPort);
5283
5284 /*
5285 * The interface has already been deactivated, we just to wait for
5286 * it to become idle before we can disconnect and release it.
5287 */
5288 PINTNETTRUNKIFPORT pIfPort = pThis->pIfPort;
5289 if (pIfPort)
5290 {
5291 /* unset it */
5292 pThis->pIfPort = NULL;
5293
5294 /* wait in portions so we can complain every now an then. */
5295 uint64_t StartTS = RTTimeSystemNanoTS();
5296 int rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5297 if (RT_FAILURE(rc))
5298 {
5299 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5300 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5301 Assert(rc == VERR_TIMEOUT);
5302 while ( RT_FAILURE(rc)
5303 && RTTimeSystemNanoTS() - StartTS < UINT64_C(30000000000)) /* 30 sec */
5304 rc = pIfPort->pfnWaitForIdle(pIfPort, 10*1000);
5305 if (rc == VERR_TIMEOUT)
5306 {
5307 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc).\n",
5308 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5309 while ( rc == VERR_TIMEOUT
5310 && RTTimeSystemNanoTS() - StartTS < UINT64_C(360000000000)) /* 360 sec */
5311 rc = pIfPort->pfnWaitForIdle(pIfPort, 30*1000);
5312 if (RT_FAILURE(rc))
5313 {
5314 LogRel(("intnet: '%s' didn't become idle in %RU64 ns (%Rrc), giving up.\n",
5315 pNetwork->szName, RTTimeSystemNanoTS() - StartTS, rc));
5316 AssertRC(rc);
5317 }
5318 }
5319 }
5320
5321 /* disconnect & release it. */
5322 pIfPort->pfnDisconnectAndRelease(pIfPort);
5323 }
5324
5325 /*
5326 * Free up the resources.
5327 */
5328 pThis->pNetwork = NULL; /* Must not be cleared while busy, see intnetR0TrunkIfPortDisconnect. */
5329 RTSpinlockDestroy(pThis->hDstTabSpinlock);
5330 for (unsigned i = 0; i < RT_ELEMENTS(pThis->apTaskDstTabs); i++)
5331 {
5332 Assert(pThis->apTaskDstTabs[i]);
5333 RTMemFree(pThis->apTaskDstTabs[i]);
5334 pThis->apTaskDstTabs[i] = NULL;
5335 }
5336 for (unsigned i = 0; i < pThis->cIntDstTabs; i++)
5337 {
5338 Assert(pThis->apIntDstTabs[i]);
5339 RTMemFree(pThis->apIntDstTabs[i]);
5340 pThis->apIntDstTabs[i] = NULL;
5341 }
5342 RTMemFree(pThis);
5343}
5344
5345
5346/**
5347 * Creates the trunk connection (if any).
5348 *
5349 * @returns VBox status code.
5350 *
5351 * @param pNetwork The newly created network.
5352 * @param pSession The session handle.
5353 */
5354static int intnetR0NetworkCreateTrunkIf(PINTNETNETWORK pNetwork, PSUPDRVSESSION pSession)
5355{
5356 const char *pszName;
5357 switch (pNetwork->enmTrunkType)
5358 {
5359 /*
5360 * The 'None' case, simple.
5361 */
5362 case kIntNetTrunkType_None:
5363 case kIntNetTrunkType_WhateverNone:
5364#ifdef VBOX_WITH_NAT_SERVICE
5365 /*
5366 * Well, here we don't want load anything special,
5367 * just communicate between processes via internal network.
5368 */
5369 case kIntNetTrunkType_SrvNat:
5370#endif
5371 return VINF_SUCCESS;
5372
5373 /* Can't happen, but makes GCC happy. */
5374 default:
5375 return VERR_NOT_IMPLEMENTED;
5376
5377 /*
5378 * Translate enum to component factory name.
5379 */
5380 case kIntNetTrunkType_NetFlt:
5381 pszName = "VBoxNetFlt";
5382 break;
5383 case kIntNetTrunkType_NetAdp:
5384#if defined(RT_OS_DARWIN) && !defined(VBOXNETADP_DO_NOT_USE_NETFLT)
5385 pszName = "VBoxNetFlt";
5386#else /* VBOXNETADP_DO_NOT_USE_NETFLT */
5387 pszName = "VBoxNetAdp";
5388#endif /* VBOXNETADP_DO_NOT_USE_NETFLT */
5389 break;
5390#ifndef VBOX_WITH_NAT_SERVICE
5391 case kIntNetTrunkType_SrvNat:
5392 pszName = "VBoxSrvNat";
5393 break;
5394#endif
5395 }
5396
5397 /*
5398 * Allocate the trunk interface and associated destination tables.
5399 *
5400 * We take a very optimistic view on the parallelism of the host
5401 * network stack and NIC driver. So, we allocate one table for each
5402 * possible CPU to deal with interrupt time requests and one for task
5403 * time calls.
5404 */
5405 RTCPUID cCpus = RTMpGetCount(); Assert(cCpus > 0);
5406 PINTNETTRUNKIF pTrunk = (PINTNETTRUNKIF)RTMemAllocZ(RT_OFFSETOF(INTNETTRUNKIF, apIntDstTabs[cCpus]));
5407 if (!pTrunk)
5408 return VERR_NO_MEMORY;
5409
5410 Assert(pNetwork->MacTab.cEntriesAllocated > 0);
5411 int rc = VINF_SUCCESS;
5412 pTrunk->cIntDstTabs = cCpus;
5413 for (unsigned i = 0; i < cCpus && RT_SUCCESS(rc); i++)
5414 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apIntDstTabs[i]);
5415 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs) && RT_SUCCESS(rc); i++)
5416 rc = intnetR0AllocDstTab(pNetwork->MacTab.cEntriesAllocated, &pTrunk->apTaskDstTabs[i]);
5417
5418 if (RT_SUCCESS(rc))
5419 {
5420 pTrunk->SwitchPort.u32Version = INTNETTRUNKSWPORT_VERSION;
5421 pTrunk->SwitchPort.pfnPreRecv = intnetR0TrunkIfPortPreRecv;
5422 pTrunk->SwitchPort.pfnRecv = intnetR0TrunkIfPortRecv;
5423 pTrunk->SwitchPort.pfnSGRetain = intnetR0TrunkIfPortSGRetain;
5424 pTrunk->SwitchPort.pfnSGRelease = intnetR0TrunkIfPortSGRelease;
5425 pTrunk->SwitchPort.pfnSetSGPhys = intnetR0TrunkIfPortSetSGPhys;
5426 pTrunk->SwitchPort.pfnReportMacAddress = intnetR0TrunkIfPortReportMacAddress;
5427 pTrunk->SwitchPort.pfnReportPromiscuousMode = intnetR0TrunkIfPortReportPromiscuousMode;
5428 pTrunk->SwitchPort.pfnReportGsoCapabilities = intnetR0TrunkIfPortReportGsoCapabilities;
5429 pTrunk->SwitchPort.pfnReportNoPreemptDsts = intnetR0TrunkIfPortReportNoPreemptDsts;
5430#ifdef VBOX_WITH_INTNET_DISCONNECT
5431 pTrunk->SwitchPort.pfnDisconnect = intnetR0TrunkIfPortDisconnect;
5432#endif /* VBOX_WITH_INTNET_DISCONNECT */
5433 pTrunk->SwitchPort.u32VersionEnd = INTNETTRUNKSWPORT_VERSION;
5434 //pTrunk->pIfPort = NULL;
5435 pTrunk->pNetwork = pNetwork;
5436 pTrunk->MacAddr.au8[0] = 0xff;
5437 pTrunk->MacAddr.au8[1] = 0xff;
5438 pTrunk->MacAddr.au8[2] = 0xff;
5439 pTrunk->MacAddr.au8[3] = 0xff;
5440 pTrunk->MacAddr.au8[4] = 0xff;
5441 pTrunk->MacAddr.au8[5] = 0xff;
5442 //pTrunk->fPhysSG = false;
5443 //pTrunk->fUnused = false;
5444 //pTrunk->cBusy = 0;
5445 //pTrunk->fNoPreemptDsts = 0;
5446 //pTrunk->fWireGsoCapabilites = 0;
5447 //pTrunk->fHostGsoCapabilites = 0;
5448 //pTrunk->abGsoHdrs = {0};
5449 pTrunk->hDstTabSpinlock = NIL_RTSPINLOCK;
5450 //pTrunk->apTaskDstTabs = above;
5451 //pTrunk->cIntDstTabs = above;
5452 //pTrunk->apIntDstTabs = above;
5453
5454 /*
5455 * Create the lock (we've NIL'ed the members above to simplify cleanup).
5456 */
5457 rc = RTSpinlockCreate(&pTrunk->hDstTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hDstTabSpinlock");
5458 if (RT_SUCCESS(rc))
5459 {
5460 /*
5461 * There are a couple of bits in MacTab as well pertaining to the
5462 * trunk. We have to set this before it's reported.
5463 *
5464 * Note! We don't need to lock the MacTab here - creation time.
5465 */
5466 pNetwork->MacTab.pTrunk = pTrunk;
5467 pNetwork->MacTab.HostMac = pTrunk->MacAddr;
5468 pNetwork->MacTab.fHostPromiscuousReal = false;
5469 pNetwork->MacTab.fHostPromiscuousEff = (pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE)
5470 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5471 pNetwork->MacTab.fHostActive = false;
5472 pNetwork->MacTab.fWirePromiscuousReal = RT_BOOL(pNetwork->fFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
5473 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
5474 && (pNetwork->fFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
5475 pNetwork->MacTab.fWireActive = false;
5476
5477#ifdef IN_RING0 /* (testcase is ring-3) */
5478 /*
5479 * Query the factory we want, then use it create and connect the trunk.
5480 */
5481 PINTNETTRUNKFACTORY pTrunkFactory = NULL;
5482 rc = SUPR0ComponentQueryFactory(pSession, pszName, INTNETTRUNKFACTORY_UUID_STR, (void **)&pTrunkFactory);
5483 if (RT_SUCCESS(rc))
5484 {
5485 rc = pTrunkFactory->pfnCreateAndConnect(pTrunkFactory,
5486 pNetwork->szTrunk,
5487 &pTrunk->SwitchPort,
5488 pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE
5489 ? INTNETTRUNKFACTORY_FLAG_NO_PROMISC
5490 : 0,
5491 &pTrunk->pIfPort);
5492 pTrunkFactory->pfnRelease(pTrunkFactory);
5493 if (RT_SUCCESS(rc))
5494 {
5495 Assert(pTrunk->pIfPort);
5496
5497 Log(("intnetR0NetworkCreateTrunkIf: VINF_SUCCESS - pszName=%s szTrunk=%s%s Network=%s\n",
5498 pszName, pNetwork->szTrunk, pNetwork->fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE ? " shared-mac" : "", pNetwork->szName));
5499 return VINF_SUCCESS;
5500 }
5501 }
5502#else /* IN_RING3 */
5503 NOREF(pSession);
5504 rc = VERR_NOT_SUPPORTED;
5505#endif /* IN_RING3 */
5506
5507 pNetwork->MacTab.pTrunk = NULL;
5508 }
5509
5510 /* bail out and clean up. */
5511 RTSpinlockDestroy(pTrunk->hDstTabSpinlock);
5512 }
5513
5514 for (unsigned i = 0; i < RT_ELEMENTS(pTrunk->apTaskDstTabs); i++)
5515 RTMemFree(pTrunk->apTaskDstTabs[i]);
5516 for (unsigned i = 0; i < pTrunk->cIntDstTabs; i++)
5517 RTMemFree(pTrunk->apIntDstTabs[i]);
5518 RTMemFree(pTrunk);
5519
5520 LogFlow(("intnetR0NetworkCreateTrunkIf: %Rrc - pszName=%s szTrunk=%s Network=%s\n",
5521 rc, pszName, pNetwork->szTrunk, pNetwork->szName));
5522 return rc;
5523}
5524
5525
5526
5527/**
5528 * Object destructor callback.
5529 * This is called for reference counted objectes when the count reaches 0.
5530 *
5531 * @param pvObj The object pointer.
5532 * @param pvUser1 Pointer to the network.
5533 * @param pvUser2 Pointer to the INTNET instance data.
5534 */
5535static DECLCALLBACK(void) intnetR0NetworkDestruct(void *pvObj, void *pvUser1, void *pvUser2)
5536{
5537 PINTNETNETWORK pNetwork = (PINTNETNETWORK)pvUser1;
5538 PINTNET pIntNet = (PINTNET)pvUser2;
5539 Log(("intnetR0NetworkDestruct: pvObj=%p pNetwork=%p pIntNet=%p %s\n", pvObj, pNetwork, pIntNet, pNetwork->szName));
5540 Assert(pNetwork->pIntNet == pIntNet);
5541
5542 /* Take the big create/open/destroy sem. */
5543 RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
5544
5545 /*
5546 * Tell the trunk, if present, that we're about to disconnect it and wish
5547 * no further calls from it.
5548 */
5549 PINTNETTRUNKIF pTrunk = pNetwork->MacTab.pTrunk;
5550 if (pTrunk)
5551 pTrunk->pIfPort->pfnSetState(pTrunk->pIfPort, INTNETTRUNKIFSTATE_DISCONNECTING);
5552
5553 /*
5554 * Deactivate and orphan any remaining interfaces and wait for them to idle.
5555 *
5556 * Note! Normally there are no more interfaces at this point, however, when
5557 * supdrvCloseSession / supdrvCleanupSession release the objects the
5558 * order is undefined. So, it's quite possible that the network will
5559 * be dereference and destroyed before the interfaces.
5560 */
5561 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5562
5563 uint32_t iIf = pNetwork->MacTab.cEntries;
5564 while (iIf-- > 0)
5565 {
5566 pNetwork->MacTab.paEntries[iIf].fActive = false;
5567 pNetwork->MacTab.paEntries[iIf].pIf->fActive = false;
5568 }
5569
5570 pNetwork->MacTab.fHostActive = false;
5571 pNetwork->MacTab.fWireActive = false;
5572
5573 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
5574
5575 /* Wait for all the interfaces to quiesce. (Interfaces cannot be
5576 removed / added since we're holding the big lock.) */
5577 if (pTrunk)
5578 intnetR0BusyWait(pNetwork, &pTrunk->cBusy);
5579
5580 iIf = pNetwork->MacTab.cEntries;
5581 while (iIf-- > 0)
5582 intnetR0BusyWait(pNetwork, &pNetwork->MacTab.paEntries[iIf].pIf->cBusy);
5583
5584 /* Orphan the interfaces (not trunk). Don't bother with calling
5585 pfnDisconnectInterface here since the networking is going away. */
5586 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5587 while ((iIf = pNetwork->MacTab.cEntries) > 0)
5588 {
5589 PINTNETIF pIf = pNetwork->MacTab.paEntries[iIf - 1].pIf;
5590 RTSpinlockRelease(pNetwork->hAddrSpinlock);
5591
5592 intnetR0BusyWait(pNetwork, &pIf->cBusy);
5593
5594 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5595 if ( iIf == pNetwork->MacTab.cEntries /* paranoia */
5596 && pIf->cBusy)
5597 {
5598 pIf->pNetwork = NULL;
5599 pNetwork->MacTab.cEntries--;
5600 }
5601 }
5602
5603 /*
5604 * Zap the trunk pointer while we still own the spinlock, destroy the
5605 * trunk after we've left it. Note that this might take a while...
5606 */
5607 pNetwork->MacTab.pTrunk = NULL;
5608
5609 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
5610
5611 if (pTrunk)
5612 intnetR0TrunkIfDestroy(pTrunk, pNetwork);
5613
5614 /*
5615 * Unlink the network.
5616 * Note that it needn't be in the list if we failed during creation.
5617 */
5618 PINTNETNETWORK pPrev = pIntNet->pNetworks;
5619 if (pPrev == pNetwork)
5620 pIntNet->pNetworks = pNetwork->pNext;
5621 else
5622 {
5623 for (; pPrev; pPrev = pPrev->pNext)
5624 if (pPrev->pNext == pNetwork)
5625 {
5626 pPrev->pNext = pNetwork->pNext;
5627 break;
5628 }
5629 }
5630 pNetwork->pNext = NULL;
5631 pNetwork->pvObj = NULL;
5632
5633 /*
5634 * Free resources.
5635 */
5636 RTSemEventDestroy(pNetwork->hEvtBusyIf);
5637 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5638 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
5639 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5640 RTMemFree(pNetwork->MacTab.paEntries);
5641 pNetwork->MacTab.paEntries = NULL;
5642 RTMemFree(pNetwork);
5643
5644 /* Release the create/destroy sem. */
5645 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
5646}
5647
5648
5649/**
5650 * Checks if the open network flags are compatible.
5651 *
5652 * @returns VBox status code.
5653 * @param pNetwork The network.
5654 * @param fFlags The open network flags.
5655 */
5656static int intnetR0CheckOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
5657{
5658 uint32_t const fNetFlags = pNetwork->fFlags;
5659
5660 if ( (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5661 ^ (fNetFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE))
5662 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5663
5664 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_EXACT)
5665 {
5666 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5667 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
5668 && (fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair)
5669 != (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) )
5670 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5671 }
5672
5673 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
5674 {
5675 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5676 if ( (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5677 && !(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5678 && (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed) )
5679 return VERR_INTNET_INCOMPATIBLE_FLAGS;
5680 }
5681
5682 return VINF_SUCCESS;
5683}
5684
5685
5686/**
5687 * Adapts flag changes on network opening.
5688 *
5689 * @returns VBox status code.
5690 * @param pNetwork The network.
5691 * @param fFlags The open network flags.
5692 */
5693static int intnetR0AdaptOpenNetworkFlags(PINTNETNETWORK pNetwork, uint32_t fFlags)
5694{
5695 /*
5696 * Upgrade the minimum policy flags.
5697 */
5698 uint32_t fNetMinFlags = pNetwork->fMinFlags;
5699 Assert(!(fNetMinFlags & INTNET_OPEN_FLAGS_RELAXED_MASK));
5700 if (fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES)
5701 {
5702 fNetMinFlags |= fFlags & INTNET_OPEN_FLAGS_STRICT_MASK;
5703 if (fNetMinFlags != pNetwork->fMinFlags)
5704 {
5705 LogRel(("INTNET: %s - min flags changed %#x -> %#x\n", pNetwork->szName, pNetwork->fMinFlags, fNetMinFlags));
5706 pNetwork->fMinFlags = fNetMinFlags;
5707 }
5708 }
5709
5710 /*
5711 * Calculate the new network flags.
5712 * (Depends on fNetMinFlags being recalculated first.)
5713 */
5714 uint32_t fNetFlags = pNetwork->fFlags;
5715
5716 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5717 {
5718 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
5719 Assert(!(fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRelaxed));
5720
5721 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
5722 continue;
5723 if (fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed)
5724 continue;
5725
5726 if ( (fNetMinFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive)
5727 || (fFlags & g_afIntNetOpenNetworkNetFlags[i].fRestrictive) )
5728 {
5729 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
5730 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRestrictive;
5731 }
5732 else if (!(fFlags & INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES))
5733 {
5734 fNetFlags &= ~g_afIntNetOpenNetworkNetFlags[i].fPair;
5735 fNetFlags |= g_afIntNetOpenNetworkNetFlags[i].fRelaxed;
5736 }
5737 }
5738
5739 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5740 {
5741 Assert(fNetFlags & g_afIntNetOpenNetworkNetFlags[i].fPair);
5742 fNetFlags |= fFlags & g_afIntNetOpenNetworkNetFlags[i].fFixed;
5743 }
5744
5745 /*
5746 * Apply the flags if they changed.
5747 */
5748 uint32_t const fOldNetFlags = pNetwork->fFlags;
5749 if (fOldNetFlags != fNetFlags)
5750 {
5751 LogRel(("INTNET: %s - flags changed %#x -> %#x\n", pNetwork->szName, fOldNetFlags, fNetFlags));
5752
5753 RTSpinlockAcquire(pNetwork->hAddrSpinlock);
5754
5755 pNetwork->fFlags = fNetFlags;
5756
5757 /* Recalculate some derived switcher variables. */
5758 bool fActiveTrunk = pNetwork->MacTab.pTrunk
5759 && pNetwork->cActiveIFs > 0;
5760 pNetwork->MacTab.fHostActive = fActiveTrunk
5761 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
5762 pNetwork->MacTab.fHostPromiscuousEff = ( pNetwork->MacTab.fHostPromiscuousReal
5763 || (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_PROMISC_MODE))
5764 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST);
5765
5766 pNetwork->MacTab.fWireActive = fActiveTrunk
5767 && (fNetFlags & INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED);
5768 pNetwork->MacTab.fWirePromiscuousReal= RT_BOOL(fNetFlags & INTNET_OPEN_FLAGS_TRUNK_WIRE_PROMISC_MODE);
5769 pNetwork->MacTab.fWirePromiscuousEff = pNetwork->MacTab.fWirePromiscuousReal
5770 && (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE);
5771
5772 if ((fOldNetFlags ^ fNetFlags) & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
5773 {
5774 pNetwork->MacTab.cPromiscuousEntries = 0;
5775 pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
5776
5777 uint32_t iIf = pNetwork->MacTab.cEntries;
5778 while (iIf-- > 0)
5779 {
5780 PINTNETMACTABENTRY pEntry = &pNetwork->MacTab.paEntries[iIf];
5781 PINTNETIF pIf2 = pEntry->pIf;
5782 if ( pIf2 /* paranoia */
5783 && pIf2->fPromiscuousReal)
5784 {
5785 bool fPromiscuousEff = (fNetFlags & INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS)
5786 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW);
5787 pEntry->fPromiscuousEff = fPromiscuousEff;
5788 pEntry->fPromiscuousSeeTrunk = fPromiscuousEff
5789 && (pIf2->fOpenFlags & INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK);
5790
5791 if (pEntry->fPromiscuousEff)
5792 {
5793 pNetwork->MacTab.cPromiscuousEntries++;
5794 if (!pEntry->fPromiscuousSeeTrunk)
5795 pNetwork->MacTab.cPromiscuousNoTrunkEntries++;
5796 }
5797 }
5798 }
5799 }
5800
5801 RTSpinlockReleaseNoInts(pNetwork->hAddrSpinlock);
5802 }
5803
5804 return VINF_SUCCESS;
5805}
5806
5807
5808/**
5809 * Opens an existing network.
5810 *
5811 * The call must own the INTNET::hMtxCreateOpenDestroy.
5812 *
5813 * @returns VBox status code.
5814 * @param pIntNet The instance data.
5815 * @param pSession The current session.
5816 * @param pszNetwork The network name. This has a valid length.
5817 * @param enmTrunkType The trunk type.
5818 * @param pszTrunk The trunk name. Its meaning is specific to the type.
5819 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
5820 * @param ppNetwork Where to store the pointer to the network on success.
5821 */
5822static int intnetR0OpenNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
5823 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
5824{
5825 LogFlow(("intnetR0OpenNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
5826 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
5827
5828 /* just pro forma validation, the caller is internal. */
5829 AssertPtr(pIntNet);
5830 AssertPtr(pSession);
5831 AssertPtr(pszNetwork);
5832 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
5833 AssertPtr(pszTrunk);
5834 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
5835 AssertPtr(ppNetwork);
5836 *ppNetwork = NULL;
5837
5838 /*
5839 * Search networks by name.
5840 */
5841 PINTNETNETWORK pCur;
5842 uint8_t cchName = (uint8_t)strlen(pszNetwork);
5843 Assert(cchName && cchName < sizeof(pCur->szName)); /* caller ensures this */
5844
5845 pCur = pIntNet->pNetworks;
5846 while (pCur)
5847 {
5848 if ( pCur->cchName == cchName
5849 && !memcmp(pCur->szName, pszNetwork, cchName))
5850 {
5851 /*
5852 * Found the network, now check that we have the same ideas
5853 * about the trunk setup and security.
5854 */
5855 int rc;
5856 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
5857#ifdef VBOX_WITH_NAT_SERVICE
5858 || enmTrunkType == kIntNetTrunkType_SrvNat /* @todo: what does it mean */
5859#endif
5860 || ( pCur->enmTrunkType == enmTrunkType
5861 && !strcmp(pCur->szTrunk, pszTrunk)))
5862 {
5863 rc = intnetR0CheckOpenNetworkFlags(pCur, fFlags);
5864 if (RT_SUCCESS(rc))
5865 {
5866 /*
5867 * Increment the reference and check that the session
5868 * can access this network.
5869 */
5870 rc = SUPR0ObjAddRef(pCur->pvObj, pSession);
5871 if (RT_SUCCESS(rc))
5872 {
5873 if (pCur->fFlags & INTNET_OPEN_FLAGS_ACCESS_RESTRICTED)
5874 rc = SUPR0ObjVerifyAccess(pCur->pvObj, pSession, pCur->szName);
5875 if (RT_SUCCESS(rc))
5876 *ppNetwork = pCur;
5877 else
5878 SUPR0ObjRelease(pCur->pvObj, pSession);
5879 }
5880 else if (rc == VERR_WRONG_ORDER)
5881 rc = VERR_NOT_FOUND; /* destruction race, pretend the other isn't there. */
5882 }
5883 }
5884 else
5885 {
5886 rc = VERR_INTNET_INCOMPATIBLE_TRUNK;
5887 LogRel(("intnetR0OpenNetwork failed. rc=%Rrc pCur->szTrunk=%s pszTrunk=%s pCur->enmTrunkType=%d enmTrunkType=%d\n",
5888 rc, pCur->szTrunk, pszTrunk, pCur->enmTrunkType, enmTrunkType));
5889 }
5890
5891 LogFlow(("intnetR0OpenNetwork: returns %Rrc *ppNetwork=%p\n", rc, *ppNetwork));
5892 return rc;
5893 }
5894
5895 pCur = pCur->pNext;
5896 }
5897
5898 LogFlow(("intnetR0OpenNetwork: returns VERR_NOT_FOUND\n"));
5899 return VERR_NOT_FOUND;
5900}
5901
5902
5903/**
5904 * Creates a new network.
5905 *
5906 * The call must own the INTNET::hMtxCreateOpenDestroy and has already attempted
5907 * opening the network and found it to be non-existing.
5908 *
5909 * @returns VBox status code.
5910 * @param pIntNet The instance data.
5911 * @param pSession The session handle.
5912 * @param pszNetwork The name of the network. This must be at least one character long and no longer
5913 * than the INTNETNETWORK::szName.
5914 * @param enmTrunkType The trunk type.
5915 * @param pszTrunk The trunk name. Its meaning is specific to the type.
5916 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
5917 * @param ppNetwork Where to store the network. In the case of failure
5918 * whatever is returned here should be dereferenced
5919 * outside the INTNET::hMtxCreateOpenDestroy.
5920 */
5921static int intnetR0CreateNetwork(PINTNET pIntNet, PSUPDRVSESSION pSession, const char *pszNetwork, INTNETTRUNKTYPE enmTrunkType,
5922 const char *pszTrunk, uint32_t fFlags, PINTNETNETWORK *ppNetwork)
5923{
5924 LogFlow(("intnetR0CreateNetwork: pIntNet=%p pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x ppNetwork=%p\n",
5925 pIntNet, pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, ppNetwork));
5926
5927 /* just pro forma validation, the caller is internal. */
5928 AssertPtr(pIntNet);
5929 AssertPtr(pSession);
5930 AssertPtr(pszNetwork);
5931 Assert(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End);
5932 AssertPtr(pszTrunk);
5933 Assert(!(fFlags & ~INTNET_OPEN_FLAGS_MASK));
5934 AssertPtr(ppNetwork);
5935
5936 *ppNetwork = NULL;
5937
5938 /*
5939 * Adjust the flags with defaults for the network policies.
5940 * Note: Main restricts promiscuous mode on the per interface level.
5941 */
5942 fFlags &= ~( INTNET_OPEN_FLAGS_IF_FIXED
5943 | INTNET_OPEN_FLAGS_IF_PROMISC_ALLOW
5944 | INTNET_OPEN_FLAGS_IF_PROMISC_DENY
5945 | INTNET_OPEN_FLAGS_IF_PROMISC_SEE_TRUNK
5946 | INTNET_OPEN_FLAGS_IF_PROMISC_NO_TRUNK
5947 | INTNET_OPEN_FLAGS_REQUIRE_AS_RESTRICTIVE_POLICIES
5948 | INTNET_OPEN_FLAGS_REQUIRE_EXACT);
5949 uint32_t fDefFlags = INTNET_OPEN_FLAGS_PROMISC_ALLOW_CLIENTS
5950 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_HOST
5951 | INTNET_OPEN_FLAGS_PROMISC_ALLOW_TRUNK_WIRE
5952 | INTNET_OPEN_FLAGS_TRUNK_HOST_ENABLED
5953 | INTNET_OPEN_FLAGS_TRUNK_HOST_CHASTE_MODE
5954 | INTNET_OPEN_FLAGS_TRUNK_WIRE_ENABLED
5955 | INTNET_OPEN_FLAGS_TRUNK_WIRE_CHASTE_MODE;
5956 if ( enmTrunkType == kIntNetTrunkType_WhateverNone
5957#ifdef VBOX_WITH_NAT_SERVICE
5958 || enmTrunkType == kIntNetTrunkType_SrvNat /* simialar security */
5959#endif
5960 || enmTrunkType == kIntNetTrunkType_None)
5961 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_RESTRICTED;
5962 else
5963 fDefFlags |= INTNET_OPEN_FLAGS_ACCESS_PUBLIC;
5964 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
5965 if (!(fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair))
5966 fFlags |= g_afIntNetOpenNetworkNetFlags[i].fPair & fDefFlags;
5967
5968 /*
5969 * Allocate and initialize.
5970 */
5971 size_t cb = sizeof(INTNETNETWORK);
5972 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5973 cb += INTNETNETWORK_TMP_SIZE + 64;
5974 PINTNETNETWORK pNetwork = (PINTNETNETWORK)RTMemAllocZ(cb);
5975 if (!pNetwork)
5976 return VERR_NO_MEMORY;
5977 //pNetwork->pNext = NULL;
5978 //pNetwork->pIfs = NULL;
5979 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
5980 pNetwork->MacTab.cEntries = 0;
5981 pNetwork->MacTab.cEntriesAllocated = INTNET_GROW_DSTTAB_SIZE;
5982 //pNetwork->MacTab.cPromiscuousEntries = 0;
5983 //pNetwork->MacTab.cPromiscuousNoTrunkEntries = 0;
5984 pNetwork->MacTab.paEntries = NULL;
5985 pNetwork->MacTab.fHostPromiscuousReal = false;
5986 pNetwork->MacTab.fHostPromiscuousEff = false;
5987 pNetwork->MacTab.fHostActive = false;
5988 pNetwork->MacTab.fWirePromiscuousReal = false;
5989 pNetwork->MacTab.fWirePromiscuousEff = false;
5990 pNetwork->MacTab.fWireActive = false;
5991 pNetwork->MacTab.pTrunk = NULL;
5992 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
5993 pNetwork->pIntNet = pIntNet;
5994 //pNetwork->pvObj = NULL;
5995 if (fFlags & INTNET_OPEN_FLAGS_SHARED_MAC_ON_WIRE)
5996 pNetwork->pbTmp = RT_ALIGN_PT(pNetwork + 1, 64, uint8_t *);
5997 //else
5998 // pNetwork->pbTmp = NULL;
5999 pNetwork->fFlags = fFlags;
6000 //pNetwork->fMinFlags = 0;
6001 //pNetwork->cActiveIFs = 0;
6002 size_t cchName = strlen(pszNetwork);
6003 pNetwork->cchName = (uint8_t)cchName;
6004 Assert(cchName && cchName < sizeof(pNetwork->szName)); /* caller's responsibility. */
6005 memcpy(pNetwork->szName, pszNetwork, cchName); /* '\0' at courtesy of alloc. */
6006 pNetwork->enmTrunkType = enmTrunkType;
6007 Assert(strlen(pszTrunk) < sizeof(pNetwork->szTrunk)); /* caller's responsibility. */
6008 strcpy(pNetwork->szTrunk, pszTrunk);
6009
6010 /*
6011 * Create the semaphore, spinlock and allocate the interface table.
6012 */
6013 int rc = RTSemEventCreate(&pNetwork->hEvtBusyIf);
6014 if (RT_SUCCESS(rc))
6015 rc = RTSpinlockCreate(&pNetwork->hAddrSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "hAddrSpinlock");
6016 if (RT_SUCCESS(rc))
6017 {
6018 pNetwork->MacTab.paEntries = (PINTNETMACTABENTRY)RTMemAlloc(sizeof(INTNETMACTABENTRY) * pNetwork->MacTab.cEntriesAllocated);
6019 if (!pNetwork->MacTab.paEntries)
6020 rc = VERR_NO_MEMORY;
6021 }
6022 if (RT_SUCCESS(rc))
6023 {
6024 /*
6025 * Register the object in the current session and link it into the network list.
6026 */
6027 pNetwork->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_INTERNAL_NETWORK, intnetR0NetworkDestruct, pNetwork, pIntNet);
6028 if (pNetwork->pvObj)
6029 {
6030 pNetwork->pNext = pIntNet->pNetworks;
6031 pIntNet->pNetworks = pNetwork;
6032
6033 /*
6034 * Check if the current session is actually allowed to create and
6035 * open the network. It is possible to implement network name
6036 * based policies and these must be checked now. SUPR0ObjRegister
6037 * does no such checks.
6038 */
6039 rc = SUPR0ObjVerifyAccess(pNetwork->pvObj, pSession, pNetwork->szName);
6040 if (RT_SUCCESS(rc))
6041 {
6042 /*
6043 * Connect the trunk.
6044 */
6045 rc = intnetR0NetworkCreateTrunkIf(pNetwork, pSession);
6046 if (RT_SUCCESS(rc))
6047 {
6048 *ppNetwork = pNetwork;
6049 LogFlow(("intnetR0CreateNetwork: returns VINF_SUCCESS *ppNetwork=%p\n", pNetwork));
6050 return VINF_SUCCESS;
6051 }
6052 }
6053
6054 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6055 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6056 return rc;
6057 }
6058
6059 /* cleanup */
6060 rc = VERR_NO_MEMORY;
6061 }
6062
6063 RTSemEventDestroy(pNetwork->hEvtBusyIf);
6064 pNetwork->hEvtBusyIf = NIL_RTSEMEVENT;
6065 RTSpinlockDestroy(pNetwork->hAddrSpinlock);
6066 pNetwork->hAddrSpinlock = NIL_RTSPINLOCK;
6067 RTMemFree(pNetwork->MacTab.paEntries);
6068 pNetwork->MacTab.paEntries = NULL;
6069 RTMemFree(pNetwork);
6070
6071 LogFlow(("intnetR0CreateNetwork: returns %Rrc\n", rc));
6072 return rc;
6073}
6074
6075
6076/**
6077 * Opens a network interface and connects it to the specified network.
6078 *
6079 * @returns VBox status code.
6080 * @param pSession The session handle.
6081 * @param pszNetwork The network name.
6082 * @param enmTrunkType The trunk type.
6083 * @param pszTrunk The trunk name. Its meaning is specific to the type.
6084 * @param fFlags Flags, see INTNET_OPEN_FLAGS_*.
6085 * @param fRestrictAccess Whether new participants should be subjected to access check or not.
6086 * @param cbSend The send buffer size.
6087 * @param cbRecv The receive buffer size.
6088 * @param phIf Where to store the handle to the network interface.
6089 */
6090INTNETR0DECL(int) IntNetR0Open(PSUPDRVSESSION pSession, const char *pszNetwork,
6091 INTNETTRUNKTYPE enmTrunkType, const char *pszTrunk, uint32_t fFlags,
6092 uint32_t cbSend, uint32_t cbRecv, PINTNETIFHANDLE phIf)
6093{
6094 LogFlow(("IntNetR0Open: pSession=%p pszNetwork=%p:{%s} enmTrunkType=%d pszTrunk=%p:{%s} fFlags=%#x cbSend=%u cbRecv=%u phIf=%p\n",
6095 pSession, pszNetwork, pszNetwork, enmTrunkType, pszTrunk, pszTrunk, fFlags, cbSend, cbRecv, phIf));
6096
6097 /*
6098 * Validate input.
6099 */
6100 PINTNET pIntNet = g_pIntNet;
6101 AssertPtrReturn(pIntNet, VERR_INVALID_PARAMETER);
6102 AssertReturn(pIntNet->u32Magic, VERR_INVALID_MAGIC);
6103
6104 AssertPtrReturn(pszNetwork, VERR_INVALID_PARAMETER);
6105 const char *pszNetworkEnd = RTStrEnd(pszNetwork, INTNET_MAX_NETWORK_NAME);
6106 AssertReturn(pszNetworkEnd, VERR_INVALID_PARAMETER);
6107 size_t cchNetwork = pszNetworkEnd - pszNetwork;
6108 AssertReturn(cchNetwork, VERR_INVALID_PARAMETER);
6109
6110 if (pszTrunk)
6111 {
6112 AssertPtrReturn(pszTrunk, VERR_INVALID_PARAMETER);
6113 const char *pszTrunkEnd = RTStrEnd(pszTrunk, INTNET_MAX_TRUNK_NAME);
6114 AssertReturn(pszTrunkEnd, VERR_INVALID_PARAMETER);
6115 }
6116 else
6117 pszTrunk = "";
6118
6119 AssertMsgReturn(enmTrunkType > kIntNetTrunkType_Invalid && enmTrunkType < kIntNetTrunkType_End,
6120 ("%d\n", enmTrunkType), VERR_INVALID_PARAMETER);
6121 switch (enmTrunkType)
6122 {
6123 case kIntNetTrunkType_None:
6124 case kIntNetTrunkType_WhateverNone:
6125#ifdef VBOX_WITH_NAT_SERVICE
6126 case kIntNetTrunkType_SrvNat:
6127#endif
6128 if (*pszTrunk)
6129 return VERR_INVALID_PARAMETER;
6130 break;
6131
6132 case kIntNetTrunkType_NetFlt:
6133 case kIntNetTrunkType_NetAdp:
6134 if (!*pszTrunk)
6135 return VERR_INVALID_PARAMETER;
6136 break;
6137
6138 default:
6139 return VERR_NOT_IMPLEMENTED;
6140 }
6141
6142 AssertMsgReturn(!(fFlags & ~INTNET_OPEN_FLAGS_MASK), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
6143 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkNetFlags); i++)
6144 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkNetFlags[i].fPair) != g_afIntNetOpenNetworkNetFlags[i].fPair,
6145 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkNetFlags[i].fPair), VERR_INVALID_PARAMETER);
6146 for (uint32_t i = 0; i < RT_ELEMENTS(g_afIntNetOpenNetworkIfFlags); i++)
6147 AssertMsgReturn((fFlags & g_afIntNetOpenNetworkIfFlags[i].fPair) != g_afIntNetOpenNetworkIfFlags[i].fPair,
6148 ("%#x (%#x)\n", fFlags, g_afIntNetOpenNetworkIfFlags[i].fPair), VERR_INVALID_PARAMETER);
6149 AssertPtrReturn(phIf, VERR_INVALID_PARAMETER);
6150
6151 /*
6152 * Acquire the mutex to serialize open/create/close.
6153 */
6154 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6155 if (RT_FAILURE(rc))
6156 return rc;
6157
6158 /*
6159 * Try open / create the network and create an interface on it for the
6160 * caller to use.
6161 */
6162 PINTNETNETWORK pNetwork = NULL;
6163 rc = intnetR0OpenNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6164 if (RT_SUCCESS(rc))
6165 {
6166 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6167 if (RT_SUCCESS(rc))
6168 {
6169 intnetR0AdaptOpenNetworkFlags(pNetwork, fFlags);
6170 rc = VINF_ALREADY_INITIALIZED;
6171 }
6172 else
6173 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6174 }
6175 else if (rc == VERR_NOT_FOUND)
6176 {
6177 rc = intnetR0CreateNetwork(pIntNet, pSession, pszNetwork, enmTrunkType, pszTrunk, fFlags, &pNetwork);
6178 if (RT_SUCCESS(rc))
6179 {
6180 rc = intnetR0NetworkCreateIf(pNetwork, pSession, cbSend, cbRecv, fFlags, phIf);
6181 if (RT_FAILURE(rc))
6182 SUPR0ObjRelease(pNetwork->pvObj, pSession);
6183 }
6184 }
6185
6186 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6187 LogFlow(("IntNetR0Open: return %Rrc *phIf=%RX32\n", rc, *phIf));
6188 return rc;
6189}
6190
6191
6192/**
6193 * VMMR0 request wrapper for IntNetR0Open.
6194 *
6195 * @returns see GMMR0MapUnmapChunk.
6196 * @param pSession The caller's session.
6197 * @param pReq The request packet.
6198 */
6199INTNETR0DECL(int) IntNetR0OpenReq(PSUPDRVSESSION pSession, PINTNETOPENREQ pReq)
6200{
6201 if (RT_UNLIKELY(pReq->Hdr.cbReq != sizeof(*pReq)))
6202 return VERR_INVALID_PARAMETER;
6203 return IntNetR0Open(pSession, &pReq->szNetwork[0], pReq->enmTrunkType, pReq->szTrunk,
6204 pReq->fFlags, pReq->cbSend, pReq->cbRecv, &pReq->hIf);
6205}
6206
6207
6208/**
6209 * Count the internal networks.
6210 *
6211 * This is mainly for providing the testcase with some introspection to validate
6212 * behavior when closing interfaces.
6213 *
6214 * @returns The number of networks.
6215 */
6216INTNETR0DECL(uint32_t) IntNetR0GetNetworkCount(void)
6217{
6218 /*
6219 * Grab the instance.
6220 */
6221 PINTNET pIntNet = g_pIntNet;
6222 if (!pIntNet)
6223 return 0;
6224 AssertPtrReturn(pIntNet, 0);
6225 AssertReturn(pIntNet->u32Magic == INTNET_MAGIC, 0);
6226
6227 /*
6228 * Grab the mutex and count the networks.
6229 */
6230 int rc = RTSemMutexRequest(pIntNet->hMtxCreateOpenDestroy, RT_INDEFINITE_WAIT);
6231 if (RT_FAILURE(rc))
6232 return 0;
6233
6234 uint32_t cNetworks = 0;
6235 for (PINTNETNETWORK pCur = pIntNet->pNetworks; pCur; pCur = pCur->pNext)
6236 cNetworks++;
6237
6238 RTSemMutexRelease(pIntNet->hMtxCreateOpenDestroy);
6239
6240 return cNetworks;
6241}
6242
6243
6244
6245/**
6246 * Destroys an instance of the Ring-0 internal networking service.
6247 */
6248INTNETR0DECL(void) IntNetR0Term(void)
6249{
6250 LogFlow(("IntNetR0Term:\n"));
6251
6252 /*
6253 * Zap the global pointer and validate it.
6254 */
6255 PINTNET pIntNet = g_pIntNet;
6256 g_pIntNet = NULL;
6257 if (!pIntNet)
6258 return;
6259 AssertPtrReturnVoid(pIntNet);
6260 AssertReturnVoid(pIntNet->u32Magic == INTNET_MAGIC);
6261
6262 /*
6263 * There is not supposed to be any networks hanging around at this time.
6264 */
6265 AssertReturnVoid(ASMAtomicCmpXchgU32(&pIntNet->u32Magic, ~INTNET_MAGIC, INTNET_MAGIC));
6266 Assert(pIntNet->pNetworks == NULL);
6267 if (pIntNet->hMtxCreateOpenDestroy != NIL_RTSEMMUTEX)
6268 {
6269 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6270 pIntNet->hMtxCreateOpenDestroy = NIL_RTSEMMUTEX;
6271 }
6272 if (pIntNet->hHtIfs != NIL_RTHANDLETABLE)
6273 {
6274 /** @todo does it make sense to have a deleter here? */
6275 RTHandleTableDestroy(pIntNet->hHtIfs, NULL, NULL);
6276 pIntNet->hHtIfs = NIL_RTHANDLETABLE;
6277 }
6278
6279 RTMemFree(pIntNet);
6280}
6281
6282
6283/**
6284 * Initializes the internal network ring-0 service.
6285 *
6286 * @returns VBox status code.
6287 */
6288INTNETR0DECL(int) IntNetR0Init(void)
6289{
6290 LogFlow(("IntNetR0Init:\n"));
6291 int rc = VERR_NO_MEMORY;
6292 PINTNET pIntNet = (PINTNET)RTMemAllocZ(sizeof(*pIntNet));
6293 if (pIntNet)
6294 {
6295 //pIntNet->pNetworks = NULL;
6296
6297 rc = RTSemMutexCreate(&pIntNet->hMtxCreateOpenDestroy);
6298 if (RT_SUCCESS(rc))
6299 {
6300 rc = RTHandleTableCreateEx(&pIntNet->hHtIfs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT,
6301 UINT32_C(0x8ffe0000), 4096, intnetR0IfRetainHandle, NULL);
6302 if (RT_SUCCESS(rc))
6303 {
6304 pIntNet->u32Magic = INTNET_MAGIC;
6305 g_pIntNet = pIntNet;
6306 LogFlow(("IntNetR0Init: returns VINF_SUCCESS pIntNet=%p\n", pIntNet));
6307 return VINF_SUCCESS;
6308 }
6309
6310 RTSemMutexDestroy(pIntNet->hMtxCreateOpenDestroy);
6311 }
6312 RTMemFree(pIntNet);
6313 }
6314 LogFlow(("IntNetR0Init: returns %Rrc\n", rc));
6315 return rc;
6316}
6317
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette