VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/VBoxNetFlt/linux/VBoxNetFlt-linux.c@ 28161

Last change on this file since 28161 was 28161, checked in by vboxsync, 15 years ago

2.6.29 that is

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 59.6 KB
Line 
1/* $Id: VBoxNetFlt-linux.c 28161 2010-04-11 09:36:19Z vboxsync $ */
2/** @file
3 * VBoxNetFlt - Network Filter Driver (Host), Linux Specific Code.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#include "the-linux-kernel.h"
26#include "version-generated.h"
27#include <linux/netdevice.h>
28#include <linux/etherdevice.h>
29#include <linux/rtnetlink.h>
30#include <linux/miscdevice.h>
31#include <linux/ip.h>
32
33#define LOG_GROUP LOG_GROUP_NET_FLT_DRV
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <VBox/intnetinline.h>
37#include <VBox/pdmnetinline.h>
38#include <VBox/param.h>
39#include <iprt/alloca.h>
40#include <iprt/assert.h>
41#include <iprt/spinlock.h>
42#include <iprt/semaphore.h>
43#include <iprt/initterm.h>
44#include <iprt/process.h>
45#include <iprt/mem.h>
46#include <iprt/net.h>
47#include <iprt/log.h>
48#include <iprt/mp.h>
49#include <iprt/mem.h>
50#include <iprt/time.h>
51
52#define VBOXNETFLT_OS_SPECFIC 1
53#include "../VBoxNetFltInternal.h"
54
55
56/*******************************************************************************
57* Defined Constants And Macros *
58*******************************************************************************/
59#define VBOX_FLT_NB_TO_INST(pNB) ((PVBOXNETFLTINS)((uint8_t *)pNB - RT_OFFSETOF(VBOXNETFLTINS, u.s.Notifier)))
60#define VBOX_FLT_PT_TO_INST(pPT) ((PVBOXNETFLTINS)((uint8_t *)pPT - RT_OFFSETOF(VBOXNETFLTINS, u.s.PacketType)))
61#define VBOX_FLT_XT_TO_INST(pXT) ((PVBOXNETFLTINS)((uint8_t *)pXT - RT_OFFSETOF(VBOXNETFLTINS, u.s.XmitTask)))
62
63#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
64# define VBOX_SKB_RESET_NETWORK_HDR(skb) skb_reset_network_header(skb)
65# define VBOX_SKB_RESET_MAC_HDR(skb) skb_reset_mac_header(skb)
66#else
67# define VBOX_SKB_RESET_NETWORK_HDR(skb) skb->nh.raw = skb->data
68# define VBOX_SKB_RESET_MAC_HDR(skb) skb->mac.raw = skb->data
69#endif
70
71#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
72# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(skb)
73#else
74# define CHECKSUM_PARTIAL CHECKSUM_HW
75# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10)
76# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(skb, 0)
77# else
78# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 7)
79# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(&skb, 0)
80# else
81# define VBOX_SKB_CHECKSUM_HELP(skb) (!skb_checksum_help(skb))
82# endif
83# endif
84#endif
85
86#ifndef NET_IP_ALIGN
87# define NET_IP_ALIGN 2
88#endif
89
90#if 0
91/** Create scatter / gather segments for fragments. When not used, we will
92 * linearize the socket buffer before creating the internal networking SG. */
93# define VBOXNETFLT_SG_SUPPORT 1
94#endif
95
96#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
97/** Indicates that the linux kernel may send us GSO frames. */
98# define VBOXNETFLT_WITH_GSO 1
99
100/** This enables or disables the transmitting of GSO frame from the internal
101 * network and to the host. */
102# define VBOXNETFLT_WITH_GSO_XMIT_HOST 1
103
104# if 0 /** @todo This is currently disable because it causes performance loss of 5-10%. */
105/** This enables or disables the transmitting of GSO frame from the internal
106 * network and to the wire. */
107# define VBOXNETFLT_WITH_GSO_XMIT_WIRE 1
108# endif
109
110/** This enables or disables the forwarding/flooding of GSO frame from the host
111 * to the internal network. */
112# define VBOXNETFLT_WITH_GSO_RECV 1
113
114#endif
115
116
117/*******************************************************************************
118* Internal Functions *
119*******************************************************************************/
120static int VBoxNetFltLinuxInit(void);
121static void VBoxNetFltLinuxUnload(void);
122
123
124/*******************************************************************************
125* Global Variables *
126*******************************************************************************/
127/**
128 * The (common) global data.
129 */
130static VBOXNETFLTGLOBALS g_VBoxNetFltGlobals;
131
132module_init(VBoxNetFltLinuxInit);
133module_exit(VBoxNetFltLinuxUnload);
134
135MODULE_AUTHOR("Sun Microsystems, Inc.");
136MODULE_DESCRIPTION("VirtualBox Network Filter Driver");
137MODULE_LICENSE("GPL");
138#ifdef MODULE_VERSION
139MODULE_VERSION(VBOX_VERSION_STRING " (" RT_XSTR(INTNETTRUNKIFPORT_VERSION) ")");
140#endif
141
142
143#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) && defined(LOG_ENABLED)
144unsigned dev_get_flags(const struct net_device *dev)
145{
146 unsigned flags;
147
148 flags = (dev->flags & ~(IFF_PROMISC |
149 IFF_ALLMULTI |
150 IFF_RUNNING)) |
151 (dev->gflags & (IFF_PROMISC |
152 IFF_ALLMULTI));
153
154 if (netif_running(dev) && netif_carrier_ok(dev))
155 flags |= IFF_RUNNING;
156
157 return flags;
158}
159#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) */
160
161
162/**
163 * Initialize module.
164 *
165 * @returns appropriate status code.
166 */
167static int __init VBoxNetFltLinuxInit(void)
168{
169 int rc;
170 /*
171 * Initialize IPRT.
172 */
173 rc = RTR0Init(0);
174 if (RT_SUCCESS(rc))
175 {
176 Log(("VBoxNetFltLinuxInit\n"));
177
178 /*
179 * Initialize the globals and connect to the support driver.
180 *
181 * This will call back vboxNetFltOsOpenSupDrv (and maybe vboxNetFltOsCloseSupDrv)
182 * for establishing the connect to the support driver.
183 */
184 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
185 rc = vboxNetFltInitGlobalsAndIdc(&g_VBoxNetFltGlobals);
186 if (RT_SUCCESS(rc))
187 {
188 LogRel(("VBoxNetFlt: Successfully started.\n"));
189 return 0;
190 }
191
192 LogRel(("VBoxNetFlt: failed to initialize device extension (rc=%d)\n", rc));
193 RTR0Term();
194 }
195 else
196 LogRel(("VBoxNetFlt: failed to initialize IPRT (rc=%d)\n", rc));
197
198 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
199 return -RTErrConvertToErrno(rc);
200}
201
202
203/**
204 * Unload the module.
205 *
206 * @todo We have to prevent this if we're busy!
207 */
208static void __exit VBoxNetFltLinuxUnload(void)
209{
210 int rc;
211 Log(("VBoxNetFltLinuxUnload\n"));
212 Assert(vboxNetFltCanUnload(&g_VBoxNetFltGlobals));
213
214 /*
215 * Undo the work done during start (in reverse order).
216 */
217 rc = vboxNetFltTryDeleteIdcAndGlobals(&g_VBoxNetFltGlobals);
218 AssertRC(rc); NOREF(rc);
219
220 RTR0Term();
221
222 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
223
224 Log(("VBoxNetFltLinuxUnload - done\n"));
225}
226
227/**
228 * Experiment where we filter trafic from the host to the internal network
229 * before it reaches the NIC driver.
230 *
231 * The current code uses a very ugly hack and only works on kernels using the
232 * net_device_ops (>= 2.6.29). It has been shown to give us a
233 * performance boost of 60-100% though. So, we have to find some less hacky way
234 * of getting this job done eventually.
235 *
236 * #define VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
237 */
238#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
239
240/**
241 * The overridden net_device_ops of the device we're attached to.
242 *
243 * Requires Linux 2.6.29 or later.
244 *
245 * This is a very dirty hack that was create to explore how much we can improve
246 * the host to guest transfers by not CC'ing the NIC.
247 */
248typedef struct VBoxNetDeviceOpsOverride
249{
250 /** Our overridden ops. */
251 struct net_device_ops Ops;
252 /** Pointer to the original ops. */
253 struct net_device_ops const *pOrgOps;
254 /** Magic word. */
255 uint32_t u32Magic;
256 /** The number of filtered packages. */
257 uint64_t cFiltered;
258 /** The total number of packets */
259 uint64_t cTotal;
260} VBOXNETDEVICEOPSOVERRIDE, *PVBOXNETDEVICEOPSOVERRIDE;
261/** VBOXNETDEVICEOPSOVERRIDE::u32Magic value. */
262#define VBOXNETDEVICEOPSOVERRIDE_MAGIC UINT32_C(0x00c0ffee)
263
264/**
265 * ndo_start_xmit wrapper that drops packets that shouldn't go to the wire
266 * because they belong on the internal network.
267 *
268 * @returns NETDEV_TX_XXX.
269 * @param pSkb The socket buffer to transmit.
270 * @param pDev The net device.
271 */
272static int vboxNetFltLinuxStartXmitFilter(struct sk_buff *pSkb, struct net_device *pDev)
273{
274 PVBOXNETDEVICEOPSOVERRIDE pOverride = (PVBOXNETDEVICEOPSOVERRIDE)pDev->netdev_ops;
275 RTNETETHERHDR EtherHdrBuf;
276 PCRTNETETHERHDR pEtherHdr;
277
278 /*
279 * Validate the override structure.
280 *
281 * Note! We're racing vboxNetFltLinuxUnhookDev here. If this was supposed
282 * to be production quality code, we would have to be much more
283 * careful here and avoid the race.
284 */
285 if ( !VALID_PTR(pOverride)
286 || pOverride->u32Magic != VBOXNETDEVICEOPSOVERRIDE_MAGIC
287 || !VALID_PTR(pOverride->pOrgOps))
288 {
289 printk("vboxNetFltLinuxStartXmitFilter: bad override %p\n", pOverride);
290 dev_kfree_skb(pSkb);
291 return NETDEV_TX_OK;
292 }
293 pOverride->cTotal++;
294
295 /*
296 * Do the filtering base on the defaul OUI of our virtual NICs
297 *
298 * Note! In a real solution, we would ask the switch whether the
299 * destination MAC is 100% to be on the internal network and then
300 * drop it.
301 */
302 pEtherHdr = (PCRTNETETHERHDR)skb_header_pointer(pSkb, 0, sizeof(EtherHdrBuf), &EtherHdrBuf);
303 if ( pEtherHdr
304 && pEtherHdr->DstMac.au8[0] == 0x08
305 && pEtherHdr->DstMac.au8[1] == 0x00
306 && pEtherHdr->DstMac.au8[2] == 0x27
307 )
308 {
309 dev_kfree_skb(pSkb);
310 pOverride->cFiltered++;
311 return NETDEV_TX_OK;
312 }
313
314 return pOverride->pOrgOps->ndo_start_xmit(pSkb, pDev);
315}
316
317/**
318 * Hooks the device ndo_start_xmit operation of the device.
319 *
320 * @param pThis The net filter instance.
321 * @param pDev The net device.
322 */
323static void vboxNetFltLinuxHookDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
324{
325 PVBOXNETDEVICEOPSOVERRIDE pOverride;
326 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
327
328 pOverride = RTMemAlloc(sizeof(*pOverride));
329 if (!pOverride)
330 return;
331 pOverride->pOrgOps = pDev->netdev_ops;
332 pOverride->Ops = *pDev->netdev_ops;
333 pOverride->Ops.ndo_start_xmit = vboxNetFltLinuxStartXmitFilter;
334 pOverride->u32Magic = VBOXNETDEVICEOPSOVERRIDE_MAGIC;
335
336 RTSpinlockAcquire(pThis->hSpinlock, &Tmp); /* (this isn't necessary, but so what) */
337 ASMAtomicXchgPtr((void * volatile *)&pDev->netdev_ops, pOverride);
338 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
339}
340
341/**
342 * Undos what vboxNetFltLinuxHookDev did.
343 *
344 * @param pThis The net filter instance.
345 * @param pDev The net device. Can be NULL, in which case
346 * we'll try retrieve it from @a pThis.
347 */
348static void vboxNetFltLinuxUnhookDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
349{
350 PVBOXNETDEVICEOPSOVERRIDE pOverride;
351 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
352
353 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
354 if (!pDev)
355 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
356 if (VALID_PTR(pDev))
357 {
358 pOverride = (PVBOXNETDEVICEOPSOVERRIDE)pDev->netdev_ops;
359 if ( VALID_PTR(pOverride)
360 && pOverride->u32Magic == VBOXNETDEVICEOPSOVERRIDE_MAGIC
361 && VALID_PTR(pOverride->pOrgOps)
362 )
363 {
364 ASMAtomicXchgPtr((void * volatile *)&pDev->netdev_ops, pOverride->pOrgOps);
365 ASMAtomicWriteU32(&pOverride->u32Magic, 0);
366 }
367 else
368 pOverride = NULL;
369 }
370 else
371 pOverride = NULL;
372 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
373
374 if (pOverride)
375 {
376 printk("vboxnetflt: dropped %llu out of %llu packets\n", pOverride->cFiltered, pOverride->cTotal);
377 RTMemFree(pOverride);
378 }
379}
380
381#endif /* VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT */
382
383
384/**
385 * Reads and retains the host interface handle.
386 *
387 * @returns The handle, NULL if detached.
388 * @param pThis
389 */
390DECLINLINE(struct net_device *) vboxNetFltLinuxRetainNetDev(PVBOXNETFLTINS pThis)
391{
392#if 0
393 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
394 struct net_device *pDev = NULL;
395
396 Log(("vboxNetFltLinuxRetainNetDev\n"));
397 /*
398 * Be careful here to avoid problems racing the detached callback.
399 */
400 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
401 if (!ASMAtomicUoReadBool(&pThis->fDisconnectedFromHost))
402 {
403 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
404 if (pDev)
405 {
406 dev_hold(pDev);
407 Log(("vboxNetFltLinuxRetainNetDev: Device %p(%s) retained. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
408 }
409 }
410 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
411
412 Log(("vboxNetFltLinuxRetainNetDev - done\n"));
413 return pDev;
414#else
415 return (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
416#endif
417}
418
419
420/**
421 * Release the host interface handle previously retained
422 * by vboxNetFltLinuxRetainNetDev.
423 *
424 * @param pThis The instance.
425 * @param pDev The vboxNetFltLinuxRetainNetDev
426 * return value, NULL is fine.
427 */
428DECLINLINE(void) vboxNetFltLinuxReleaseNetDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
429{
430#if 0
431 Log(("vboxNetFltLinuxReleaseNetDev\n"));
432 NOREF(pThis);
433 if (pDev)
434 {
435 dev_put(pDev);
436 Log(("vboxNetFltLinuxReleaseNetDev: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
437 }
438 Log(("vboxNetFltLinuxReleaseNetDev - done\n"));
439#endif
440}
441
442#define VBOXNETFLT_CB_TAG(skb) (0xA1C90000 | (skb->dev->ifindex & 0xFFFF))
443#define VBOXNETFLT_SKB_TAG(skb) (*(uint32_t*)&((skb)->cb[sizeof((skb)->cb)-sizeof(uint32_t)]))
444
445/**
446 * Checks whether this is an mbuf created by vboxNetFltLinuxMBufFromSG,
447 * i.e. a buffer which we're pushing and should be ignored by the filter callbacks.
448 *
449 * @returns true / false accordingly.
450 * @param pBuf The sk_buff.
451 */
452DECLINLINE(bool) vboxNetFltLinuxSkBufIsOur(struct sk_buff *pBuf)
453{
454 return VBOXNETFLT_SKB_TAG(pBuf) == VBOXNETFLT_CB_TAG(pBuf);
455}
456
457
458/**
459 * Internal worker that create a linux sk_buff for a
460 * (scatter/)gather list.
461 *
462 * @returns Pointer to the sk_buff.
463 * @param pThis The instance.
464 * @param pSG The (scatter/)gather list.
465 * @param fDstWire Set if the destination is the wire.
466 */
467static struct sk_buff *vboxNetFltLinuxSkBufFromSG(PVBOXNETFLTINS pThis, PINTNETSG pSG, bool fDstWire)
468{
469 struct sk_buff *pPkt;
470 struct net_device *pDev;
471 unsigned fGsoType = 0;
472
473 if (pSG->cbTotal == 0)
474 {
475 LogRel(("VBoxNetFlt: Dropped empty packet coming from internal network.\n"));
476 return NULL;
477 }
478
479 /** @todo We should use fragments mapping the SG buffers with large packets.
480 * 256 bytes seems to be the a threshold used a lot for this. It
481 * requires some nasty work on the intnet side though... */
482 /*
483 * Allocate a packet and copy over the data.
484 */
485 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
486 pPkt = dev_alloc_skb(pSG->cbTotal + NET_IP_ALIGN);
487 if (RT_UNLIKELY(!pPkt))
488 {
489 Log(("vboxNetFltLinuxSkBufFromSG: Failed to allocate sk_buff(%u).\n", pSG->cbTotal));
490 pSG->pvUserData = NULL;
491 return NULL;
492 }
493 pPkt->dev = pDev;
494 pPkt->ip_summed = CHECKSUM_NONE;
495
496 /* Align IP header on 16-byte boundary: 2 + 14 (ethernet hdr size). */
497 skb_reserve(pPkt, NET_IP_ALIGN);
498
499 /* Copy the segments. */
500 skb_put(pPkt, pSG->cbTotal);
501 INTNETSgRead(pSG, pPkt->data);
502
503#if defined(VBOXNETFLT_WITH_GSO_XMIT_WIRE) || defined(VBOXNETFLT_WITH_GSO_XMIT_HOST)
504 /*
505 * Setup GSO if used by this packet.
506 */
507 switch ((PDMNETWORKGSOTYPE)pSG->GsoCtx.u8Type)
508 {
509 default:
510 AssertMsgFailed(("%u (%s)\n", pSG->GsoCtx.u8Type, PDMNetGsoTypeName((PDMNETWORKGSOTYPE)pSG->GsoCtx.u8Type) ));
511 /* fall thru */
512 case PDMNETWORKGSOTYPE_INVALID:
513 fGsoType = 0;
514 break;
515 case PDMNETWORKGSOTYPE_IPV4_TCP:
516 fGsoType = SKB_GSO_TCPV4;
517 break;
518 case PDMNETWORKGSOTYPE_IPV4_UDP:
519 fGsoType = SKB_GSO_UDP;
520 break;
521 case PDMNETWORKGSOTYPE_IPV6_TCP:
522 fGsoType = SKB_GSO_TCPV6;
523 break;
524 }
525 if (fGsoType)
526 {
527 struct skb_shared_info *pShInfo = skb_shinfo(pPkt);
528
529 if (!fDstWire)
530 fGsoType |= SKB_GSO_DODGY;
531 pShInfo->gso_type = fGsoType;
532 pShInfo->gso_size = pSG->GsoCtx.cbMaxSeg;
533 pShInfo->gso_segs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
534
535 /** @todo figure out the checksum bit... We're checksumming way too much here
536 * I hope. */
537 if (fDstWire)
538 {
539 /** @todo check skb_partial_csum_set status code. */
540 if (fGsoType & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
541 skb_partial_csum_set(pPkt, pSG->GsoCtx.offHdr2, RT_OFFSETOF(RTNETTCP, th_sum));
542 else
543 skb_partial_csum_set(pPkt, pSG->GsoCtx.offHdr2, RT_OFFSETOF(RTNETUDP, uh_sum));
544 }
545 else
546 {
547 pPkt->ip_summed = CHECKSUM_UNNECESSARY;
548 PDMNetGsoPrepForDirectUse(&pSG->GsoCtx, pPkt->data, pSG->cbTotal, false /*fPayloadChecksum*/);
549 }
550 }
551#endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE || VBOXNETFLT_WITH_GSO_XMIT_HOST */
552
553 /*
554 * Finish up the socket buffer.
555 */
556 pPkt->protocol = eth_type_trans(pPkt, pDev);
557 if (fDstWire)
558 {
559 VBOX_SKB_RESET_NETWORK_HDR(pPkt);
560
561 /* Restore ethernet header back. */
562 skb_push(pPkt, ETH_HLEN); /** @todo VLAN: +4 if VLAN? */
563 VBOX_SKB_RESET_MAC_HDR(pPkt);
564 }
565 VBOXNETFLT_SKB_TAG(pPkt) = VBOXNETFLT_CB_TAG(pPkt);
566
567 return pPkt;
568}
569
570
571/**
572 * Initializes a SG list from an sk_buff.
573 *
574 * @returns Number of segments.
575 * @param pThis The instance.
576 * @param pBuf The sk_buff.
577 * @param pSG The SG.
578 * @param pvFrame The frame pointer, optional.
579 * @param cSegs The number of segments allocated for the SG.
580 * This should match the number in the mbuf exactly!
581 * @param fSrc The source of the frame.
582 * @param pGso Pointer to the GSO context if it's a GSO
583 * internal network frame. NULL if regular frame.
584 */
585DECLINLINE(void) vboxNetFltLinuxSkBufToSG(PVBOXNETFLTINS pThis, struct sk_buff *pBuf, PINTNETSG pSG,
586 unsigned cSegs, uint32_t fSrc, PCPDMNETWORKGSO pGsoCtx)
587{
588 int i;
589 NOREF(pThis);
590
591 Assert(!skb_shinfo(pBuf)->frag_list);
592
593 if (fSrc & INTNETTRUNKDIR_WIRE)
594 {
595 /*
596 * The packet came from wire, ethernet header was removed by device driver.
597 * Restore it.
598 */
599 skb_push(pBuf, ETH_HLEN);
600 }
601
602 if (!pGsoCtx)
603 INTNETSgInitTempSegs(pSG, pBuf->len, cSegs, 0 /*cSegsUsed*/);
604 else
605 INTNETSgInitTempSegsGso(pSG, pBuf->len, cSegs, 0 /*cSegsUsed*/, pGsoCtx);
606
607#ifdef VBOXNETFLT_SG_SUPPORT
608 pSG->aSegs[0].cb = skb_headlen(pBuf);
609 pSG->aSegs[0].pv = pBuf->data;
610 pSG->aSegs[0].Phys = NIL_RTHCPHYS;
611
612 for (i = 0; i < skb_shinfo(pBuf)->nr_frags; i++)
613 {
614 skb_frag_t *pFrag = &skb_shinfo(pBuf)->frags[i];
615 pSG->aSegs[i+1].cb = pFrag->size;
616 pSG->aSegs[i+1].pv = kmap(pFrag->page);
617 printk("%p = kmap()\n", pSG->aSegs[i+1].pv);
618 pSG->aSegs[i+1].Phys = NIL_RTHCPHYS;
619 }
620 ++i;
621
622#else
623 pSG->aSegs[0].cb = pBuf->len;
624 pSG->aSegs[0].pv = pBuf->data;
625 pSG->aSegs[0].Phys = NIL_RTHCPHYS;
626 i = 1;
627#endif
628
629 pSG->cSegsUsed = i;
630
631#ifdef PADD_RUNT_FRAMES_FROM_HOST
632 /*
633 * Add a trailer if the frame is too small.
634 *
635 * Since we're getting to the packet before it is framed, it has not
636 * yet been padded. The current solution is to add a segment pointing
637 * to a buffer containing all zeros and pray that works for all frames...
638 */
639 if (pSG->cbTotal < 60 && (fSrc & INTNETTRUNKDIR_HOST))
640 {
641 static uint8_t const s_abZero[128] = {0};
642
643 AssertReturnVoid(i < cSegs);
644
645 pSG->aSegs[i].Phys = NIL_RTHCPHYS;
646 pSG->aSegs[i].pv = (void *)&s_abZero[0];
647 pSG->aSegs[i].cb = 60 - pSG->cbTotal;
648 pSG->cbTotal = 60;
649 pSG->cSegsUsed++;
650 Assert(i + 1 <= pSG->cSegsAlloc)
651 }
652#endif
653
654 Log4(("vboxNetFltLinuxSkBufToSG: allocated=%d, segments=%d frags=%d next=%p frag_list=%p pkt_type=%x fSrc=%x\n",
655 pSG->cSegsAlloc, pSG->cSegsUsed, skb_shinfo(pBuf)->nr_frags, pBuf->next, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type, fSrc));
656 for (i = 0; i < pSG->cSegsUsed; i++)
657 Log4(("vboxNetFltLinuxSkBufToSG: #%d: cb=%d pv=%p\n",
658 i, pSG->aSegs[i].cb, pSG->aSegs[i].pv));
659}
660
661/**
662 * Packet handler,
663 *
664 * @returns 0 or EJUSTRETURN.
665 * @param pThis The instance.
666 * @param pMBuf The mbuf.
667 * @param pvFrame The start of the frame, optional.
668 * @param fSrc Where the packet (allegedly) comes from, one INTNETTRUNKDIR_* value.
669 * @param eProtocol The protocol.
670 */
671#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 14)
672static int vboxNetFltLinuxPacketHandler(struct sk_buff *pBuf,
673 struct net_device *pSkbDev,
674 struct packet_type *pPacketType,
675 struct net_device *pOrigDev)
676#else
677static int vboxNetFltLinuxPacketHandler(struct sk_buff *pBuf,
678 struct net_device *pSkbDev,
679 struct packet_type *pPacketType)
680#endif
681{
682 PVBOXNETFLTINS pThis;
683 struct net_device *pDev;
684 LogFlow(("vboxNetFltLinuxPacketHandler: pBuf=%p pSkbDev=%p pPacketType=%p\n",
685 pBuf, pSkbDev, pPacketType));
686#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
687 Log3(("vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
688 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
689#else
690 Log3(("vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
691 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->tso_size, skb_shinfo(pBuf)->tso_segs, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
692#endif
693 /*
694 * Drop it immediately?
695 */
696 if (!pBuf)
697 return 0;
698
699 pThis = VBOX_FLT_PT_TO_INST(pPacketType);
700 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
701 if (pThis->u.s.pDev != pSkbDev)
702 {
703 Log(("vboxNetFltLinuxPacketHandler: Devices do not match, pThis may be wrong! pThis=%p\n", pThis));
704 return 0;
705 }
706
707 Log4(("vboxNetFltLinuxPacketHandler: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
708 if (vboxNetFltLinuxSkBufIsOur(pBuf))
709 {
710 Log2(("vboxNetFltLinuxPacketHandler: got our own sk_buff, drop it.\n"));
711 dev_kfree_skb(pBuf);
712 return 0;
713 }
714
715#ifndef VBOXNETFLT_SG_SUPPORT
716 {
717 /*
718 * Get rid of fragmented packets, they cause too much trouble.
719 */
720 struct sk_buff *pCopy = skb_copy(pBuf, GFP_ATOMIC);
721 kfree_skb(pBuf);
722 if (!pCopy)
723 {
724 LogRel(("VBoxNetFlt: Failed to allocate packet buffer, dropping the packet.\n"));
725 return 0;
726 }
727 pBuf = pCopy;
728# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
729 Log3(("vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
730 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
731# else
732 Log3(("vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
733 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->tso_size, skb_shinfo(pBuf)->tso_segs, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
734# endif
735 }
736#endif
737
738 /* Add the packet to transmit queue and schedule the bottom half. */
739 skb_queue_tail(&pThis->u.s.XmitQueue, pBuf);
740 schedule_work(&pThis->u.s.XmitTask);
741 Log4(("vboxNetFltLinuxPacketHandler: scheduled work %p for sk_buff %p\n",
742 &pThis->u.s.XmitTask, pBuf));
743
744 /* It does not really matter what we return, it is ignored by the kernel. */
745 return 0;
746}
747
748/**
749 * Calculate the number of INTNETSEG segments the socket buffer will need.
750 *
751 * @returns Segment count.
752 * @param pBuf The socket buffer.
753 */
754DECLINLINE(unsigned) vboxNetFltLinuxCalcSGSegments(struct sk_buff *pBuf)
755{
756#ifdef VBOXNETFLT_SG_SUPPORT
757 unsigned cSegs = 1 + skb_shinfo(pBuf)->nr_frags;
758#else
759 unsigned cSegs = 1;
760#endif
761#ifdef PADD_RUNT_FRAMES_FROM_HOST
762 /* vboxNetFltLinuxSkBufToSG adds a padding segment if it's a runt. */
763 if (pBuf->len < 60)
764 cSegs++;
765#endif
766 return cSegs;
767}
768
769/**
770 * Destroy the intnet scatter / gather buffer created by
771 * vboxNetFltLinuxSkBufToSG.
772 */
773static void vboxNetFltLinuxDestroySG(PINTNETSG pSG)
774{
775#ifdef VBOXNETFLT_SG_SUPPORT
776 int i;
777
778 for (i = 0; i < skb_shinfo(pBuf)->nr_frags; i++)
779 {
780 printk("kunmap(%p)\n", pSG->aSegs[i+1].pv);
781 kunmap(pSG->aSegs[i+1].pv);
782 }
783#endif
784 NOREF(pSG);
785}
786
787#ifdef LOG_ENABLED
788/**
789 * Logging helper.
790 */
791static void vboxNetFltDumpPacket(PINTNETSG pSG, bool fEgress, const char *pszWhere, int iIncrement)
792{
793 uint8_t *pInt, *pExt;
794 static int iPacketNo = 1;
795 iPacketNo += iIncrement;
796 if (fEgress)
797 {
798 pExt = pSG->aSegs[0].pv;
799 pInt = pExt + 6;
800 }
801 else
802 {
803 pInt = pSG->aSegs[0].pv;
804 pExt = pInt + 6;
805 }
806 Log(("VBoxNetFlt: (int)%02x:%02x:%02x:%02x:%02x:%02x"
807 " %s (%s)%02x:%02x:%02x:%02x:%02x:%02x (%u bytes) packet #%u\n",
808 pInt[0], pInt[1], pInt[2], pInt[3], pInt[4], pInt[5],
809 fEgress ? "-->" : "<--", pszWhere,
810 pExt[0], pExt[1], pExt[2], pExt[3], pExt[4], pExt[5],
811 pSG->cbTotal, iPacketNo));
812 Log3(("%.*Rhxd\n", pSG->aSegs[0].cb, pSG->aSegs[0].pv));
813}
814#else
815# define vboxNetFltDumpPacket(a, b, c, d) do {} while (0)
816#endif
817
818#ifdef VBOXNETFLT_WITH_GSO_RECV
819
820/**
821 * Worker for vboxNetFltLinuxForwardToIntNet that checks if we can forwards a
822 * GSO socket buffer without having to segment it.
823 *
824 * @returns true on success, false if needs segmenting.
825 * @param pThis The net filter instance.
826 * @param pSkb The GSO socket buffer.
827 * @param fSrc The source.
828 * @param pGsoCtx Where to return the GSO context on success.
829 */
830static bool vboxNetFltLinuxCanForwardAsGso(PVBOXNETFLTINS pThis, struct sk_buff *pSkb, uint32_t fSrc,
831 PPDMNETWORKGSO pGsoCtx)
832{
833 PDMNETWORKGSOTYPE enmGsoType;
834 uint16_t uEtherType;
835 unsigned int cbTransport;
836 unsigned int offTransport;
837 unsigned int cbTransportHdr;
838 unsigned uProtocol;
839 union
840 {
841 RTNETIPV4 IPv4;
842 RTNETIPV6 IPv6;
843 RTNETTCP Tcp;
844 uint8_t ab[40];
845 uint16_t au16[40/2];
846 uint32_t au32[40/4];
847 } Buf;
848
849 /*
850 * Check the GSO properties of the socket buffer and make sure it fits.
851 */
852 /** @todo Figure out how to handle SKB_GSO_TCP_ECN! */
853 if (RT_UNLIKELY( skb_shinfo(pSkb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCPV6 | SKB_GSO_TCPV4) ))
854 {
855 Log5(("vboxNetFltLinuxCanForwardAsGso: gso_type=%#x\n", skb_shinfo(pSkb)->gso_type));
856 return false;
857 }
858 if (RT_UNLIKELY( skb_shinfo(pSkb)->gso_size < 1
859 || pSkb->len > VBOX_MAX_GSO_SIZE ))
860 {
861 Log5(("vboxNetFltLinuxCanForwardAsGso: gso_size=%#x skb_len=%#x (max=%#x)\n", skb_shinfo(pSkb)->gso_size, pSkb->len, VBOX_MAX_GSO_SIZE));
862 return false;
863 }
864 if (RT_UNLIKELY(fSrc & INTNETTRUNKDIR_WIRE))
865 {
866 Log5(("vboxNetFltLinuxCanForwardAsGso: fSrc=wire\n"));
867 return false;
868 }
869
870 /*
871 * skb_gso_segment does the following. Do we need to do it as well?
872 */
873 skb_reset_mac_header(pSkb);
874 pSkb->mac_len = pSkb->network_header - pSkb->mac_header;
875
876 /*
877 * Switch on the ethertype.
878 */
879 uEtherType = pSkb->protocol;
880 if ( uEtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_VLAN)
881 && pSkb->mac_len == sizeof(RTNETETHERHDR) + sizeof(uint32_t))
882 {
883 uint16_t const *puEtherType = skb_header_pointer(pSkb, sizeof(RTNETETHERHDR) + sizeof(uint16_t), sizeof(uint16_t), &Buf);
884 if (puEtherType)
885 uEtherType = *puEtherType;
886 }
887 switch (uEtherType)
888 {
889 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
890 {
891 unsigned int cbHdr;
892 PCRTNETIPV4 pIPv4 = (PCRTNETIPV4)skb_header_pointer(pSkb, pSkb->mac_len, sizeof(Buf.IPv4), &Buf);
893 if (RT_UNLIKELY(!pIPv4))
894 {
895 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access IPv4 hdr\n"));
896 return false;
897 }
898
899 cbHdr = pIPv4->ip_hl * 4;
900 cbTransport = RT_N2H_U16(pIPv4->ip_len);
901 if (RT_UNLIKELY( cbHdr < RTNETIPV4_MIN_LEN
902 || cbHdr > cbTransport ))
903 {
904 Log5(("vboxNetFltLinuxCanForwardAsGso: invalid IPv4 lengths: ip_hl=%u ip_len=%u\n", pIPv4->ip_hl, RT_N2H_U16(pIPv4->ip_len)));
905 return false;
906 }
907 cbTransport -= cbHdr;
908 offTransport = pSkb->mac_len + cbHdr;
909 uProtocol = pIPv4->ip_p;
910 if (uProtocol == RTNETIPV4_PROT_TCP)
911 enmGsoType = PDMNETWORKGSOTYPE_IPV4_TCP;
912 else if (uProtocol == RTNETIPV4_PROT_UDP)
913 enmGsoType = PDMNETWORKGSOTYPE_IPV4_UDP;
914 else /** @todo IPv6: 4to6 tunneling */
915 enmGsoType = PDMNETWORKGSOTYPE_INVALID;
916 break;
917 }
918
919 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
920 {
921 PCRTNETIPV6 pIPv6 = (PCRTNETIPV6)skb_header_pointer(pSkb, pSkb->mac_len, sizeof(Buf.IPv6), &Buf);
922 if (RT_UNLIKELY(!pIPv6))
923 {
924 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access IPv6 hdr\n"));
925 return false;
926 }
927
928 cbTransport = RT_N2H_U16(pIPv6->ip6_plen);
929 offTransport = pSkb->mac_len + sizeof(RTNETIPV6);
930 uProtocol = pIPv6->ip6_nxt;
931 /** @todo IPv6: Dig our way out of the other headers. */
932 if (uProtocol == RTNETIPV4_PROT_TCP)
933 enmGsoType = PDMNETWORKGSOTYPE_IPV6_TCP;
934 else if (uProtocol == RTNETIPV4_PROT_UDP)
935 enmGsoType = PDMNETWORKGSOTYPE_IPV4_UDP;
936 else
937 enmGsoType = PDMNETWORKGSOTYPE_INVALID;
938 break;
939 }
940
941 default:
942 Log5(("vboxNetFltLinuxCanForwardAsGso: uEtherType=%#x\n", RT_H2N_U16(uEtherType)));
943 return false;
944 }
945
946 if (enmGsoType == PDMNETWORKGSOTYPE_INVALID)
947 {
948 Log5(("vboxNetFltLinuxCanForwardAsGso: Unsupported protocol %d\n", uProtocol));
949 return false;
950 }
951
952 if (RT_UNLIKELY( offTransport + cbTransport <= offTransport
953 || offTransport + cbTransport > pSkb->len
954 || cbTransport < (uProtocol == RTNETIPV4_PROT_TCP ? RTNETTCP_MIN_LEN : RTNETUDP_MIN_LEN)) )
955 {
956 Log5(("vboxNetFltLinuxCanForwardAsGso: Bad transport length; off=%#x + cb=%#x => %#x; skb_len=%#x (%s)\n",
957 offTransport, cbTransport, offTransport + cbTransport, pSkb->len, PDMNetGsoTypeName(enmGsoType) ));
958 return false;
959 }
960
961 /*
962 * Check the TCP/UDP bits.
963 */
964 if (uProtocol == RTNETIPV4_PROT_TCP)
965 {
966 PCRTNETTCP pTcp = (PCRTNETTCP)skb_header_pointer(pSkb, offTransport, sizeof(Buf.Tcp), &Buf);
967 if (RT_UNLIKELY(!pTcp))
968 {
969 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access TCP hdr\n"));
970 return false;
971 }
972
973 cbTransportHdr = pTcp->th_off * 4;
974 if (RT_UNLIKELY( cbTransportHdr < RTNETTCP_MIN_LEN
975 || cbTransportHdr > cbTransport
976 || offTransport + cbTransportHdr >= UINT8_MAX
977 || offTransport + cbTransportHdr >= pSkb->len ))
978 {
979 Log5(("vboxNetFltLinuxCanForwardAsGso: No space for TCP header; off=%#x cb=%#x skb_len=%#x\n", offTransport, cbTransportHdr, pSkb->len));
980 return false;
981 }
982
983 }
984 else
985 {
986 Assert(uProtocol == RTNETIPV4_PROT_UDP);
987 cbTransportHdr = sizeof(RTNETUDP);
988 if (RT_UNLIKELY( offTransport + cbTransportHdr >= UINT8_MAX
989 || offTransport + cbTransportHdr >= pSkb->len ))
990 {
991 Log5(("vboxNetFltLinuxCanForwardAsGso: No space for UDP header; off=%#x skb_len=%#x\n", offTransport, pSkb->len));
992 return false;
993 }
994 }
995
996 /*
997 * We're good, init the GSO context.
998 */
999 pGsoCtx->u8Type = enmGsoType;
1000 pGsoCtx->cbHdrs = offTransport + cbTransportHdr;
1001 pGsoCtx->cbMaxSeg = skb_shinfo(pSkb)->gso_size;
1002 pGsoCtx->offHdr1 = pSkb->mac_len;
1003 pGsoCtx->offHdr2 = offTransport;
1004 pGsoCtx->au8Unused[0] = 0;
1005 pGsoCtx->au8Unused[1] = 0;
1006
1007 return true;
1008}
1009
1010/**
1011 * Forward the socket buffer as a GSO internal network frame.
1012 *
1013 * @returns IPRT status code.
1014 * @param pThis The net filter instance.
1015 * @param pSkb The GSO socket buffer.
1016 * @param fSrc The source.
1017 * @param pGsoCtx Where to return the GSO context on success.
1018 */
1019static int vboxNetFltLinuxForwardAsGso(PVBOXNETFLTINS pThis, struct sk_buff *pSkb, uint32_t fSrc, PCPDMNETWORKGSO pGsoCtx)
1020{
1021 int rc;
1022 unsigned cSegs = vboxNetFltLinuxCalcSGSegments(pSkb);
1023 if (RT_LIKELY(cSegs <= MAX_SKB_FRAGS + 1))
1024 {
1025 PINTNETSG pSG = (PINTNETSG)alloca(RT_OFFSETOF(INTNETSG, aSegs[cSegs]));
1026 if (RT_LIKELY(pSG))
1027 {
1028 vboxNetFltLinuxSkBufToSG(pThis, pSkb, pSG, cSegs, fSrc, pGsoCtx);
1029
1030 vboxNetFltDumpPacket(pSG, false, (fSrc & INTNETTRUNKDIR_HOST) ? "host" : "wire", 1);
1031 pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, pSG, fSrc);
1032
1033 vboxNetFltLinuxDestroySG(pSG);
1034 rc = VINF_SUCCESS;
1035 }
1036 else
1037 {
1038 Log(("VBoxNetFlt: Dropping the sk_buff (failure case).\n"));
1039 rc = VERR_NO_MEMORY;
1040 }
1041 }
1042 else
1043 {
1044 Log(("VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n", cSegs));
1045 rc = VERR_INTERNAL_ERROR_3;
1046 }
1047
1048 Log4(("VBoxNetFlt: Dropping the sk_buff.\n"));
1049 dev_kfree_skb(pSkb);
1050 return rc;
1051}
1052
1053#endif /* VBOXNETFLT_WITH_GSO_RECV */
1054
1055/**
1056 * Worker for vboxNetFltLinuxForwardToIntNet.
1057 *
1058 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1059 * @param pThis The net filter instance.
1060 * @param pBuf The socket buffer.
1061 * @param fSrc The source.
1062 */
1063static int vboxNetFltLinuxForwardSegment(PVBOXNETFLTINS pThis, struct sk_buff *pBuf, uint32_t fSrc)
1064{
1065 int rc;
1066 unsigned cSegs = vboxNetFltLinuxCalcSGSegments(pBuf);
1067 if (cSegs <= MAX_SKB_FRAGS + 1)
1068 {
1069 PINTNETSG pSG = (PINTNETSG)alloca(RT_OFFSETOF(INTNETSG, aSegs[cSegs]));
1070 if (RT_LIKELY(pSG))
1071 {
1072 vboxNetFltLinuxSkBufToSG(pThis, pBuf, pSG, cSegs, fSrc, NULL /*pGsoCtx*/);
1073
1074 vboxNetFltDumpPacket(pSG, false, (fSrc & INTNETTRUNKDIR_HOST) ? "host" : "wire", 1);
1075 pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, pSG, fSrc);
1076
1077 vboxNetFltLinuxDestroySG(pSG);
1078 rc = VINF_SUCCESS;
1079 }
1080 else
1081 {
1082 Log(("VBoxNetFlt: Failed to allocate SG buffer.\n"));
1083 rc = VERR_NO_MEMORY;
1084 }
1085 }
1086 else
1087 {
1088 Log(("VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n", cSegs));
1089 rc = VERR_INTERNAL_ERROR_3;
1090 }
1091
1092 Log4(("VBoxNetFlt: Dropping the sk_buff.\n"));
1093 dev_kfree_skb(pBuf);
1094 return rc;
1095}
1096
1097static void vboxNetFltLinuxForwardToIntNet(PVBOXNETFLTINS pThis, struct sk_buff *pBuf)
1098{
1099 uint32_t fSrc = pBuf->pkt_type == PACKET_OUTGOING ? INTNETTRUNKDIR_HOST : INTNETTRUNKDIR_WIRE;
1100
1101#ifdef VBOXNETFLT_WITH_GSO
1102 if (skb_is_gso(pBuf))
1103 {
1104 PDMNETWORKGSO GsoCtx;
1105 Log3(("vboxNetFltLinuxForwardToIntNet: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x ip_summed=%d\n",
1106 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type, pBuf->ip_summed));
1107# ifdef VBOXNETFLT_WITH_GSO_RECV
1108 if ( (skb_shinfo(pBuf)->gso_type & (SKB_GSO_UDP | SKB_GSO_TCPV6 | SKB_GSO_TCPV4))
1109 && vboxNetFltLinuxCanForwardAsGso(pThis, pBuf, fSrc, &GsoCtx) )
1110 vboxNetFltLinuxForwardAsGso(pThis, pBuf, fSrc, &GsoCtx);
1111 else
1112# endif
1113 {
1114 /* Need to segment the packet */
1115 struct sk_buff *pNext;
1116 struct sk_buff *pSegment = skb_gso_segment(pBuf, 0 /*supported features*/);
1117 if (IS_ERR(pSegment))
1118 {
1119 dev_kfree_skb(pBuf);
1120 LogRel(("VBoxNetFlt: Failed to segment a packet (%d).\n", PTR_ERR(pSegment)));
1121 return;
1122 }
1123
1124 for (; pSegment; pSegment = pNext)
1125 {
1126 Log3(("vboxNetFltLinuxForwardToIntNet: segment len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
1127 pSegment->len, pSegment->data_len, pSegment->truesize, pSegment->next, skb_shinfo(pSegment)->nr_frags, skb_shinfo(pSegment)->gso_size, skb_shinfo(pSegment)->gso_segs, skb_shinfo(pSegment)->gso_type, skb_shinfo(pSegment)->frag_list, pSegment->pkt_type));
1128 pNext = pSegment->next;
1129 pSegment->next = 0;
1130 vboxNetFltLinuxForwardSegment(pThis, pSegment, fSrc);
1131 }
1132 dev_kfree_skb(pBuf);
1133 }
1134 }
1135 else
1136#endif /* VBOXNETFLT_WITH_GSO */
1137 {
1138 if (pBuf->ip_summed == CHECKSUM_PARTIAL && pBuf->pkt_type == PACKET_OUTGOING)
1139 {
1140#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1141 /*
1142 * Try to work around the problem with CentOS 4.7 and 5.2 (2.6.9
1143 * and 2.6.18 kernels), they pass wrong 'h' pointer down. We take IP
1144 * header length from the header itself and reconstruct 'h' pointer
1145 * to TCP (or whatever) header.
1146 */
1147 unsigned char *tmp = pBuf->h.raw;
1148 if (pBuf->h.raw == pBuf->nh.raw && pBuf->protocol == htons(ETH_P_IP))
1149 pBuf->h.raw = pBuf->nh.raw + pBuf->nh.iph->ihl * 4;
1150#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */
1151 if (VBOX_SKB_CHECKSUM_HELP(pBuf))
1152 {
1153 LogRel(("VBoxNetFlt: Failed to compute checksum, dropping the packet.\n"));
1154 dev_kfree_skb(pBuf);
1155 return;
1156 }
1157#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1158 /* Restore the original (wrong) pointer. */
1159 pBuf->h.raw = tmp;
1160#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */
1161 }
1162 vboxNetFltLinuxForwardSegment(pThis, pBuf, fSrc);
1163 }
1164}
1165
1166/**
1167 * Work queue handler that forwards the socket buffers queued by
1168 * vboxNetFltLinuxPacketHandler to the internal network.
1169 *
1170 * @param pWork The work queue.
1171 */
1172#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
1173static void vboxNetFltLinuxXmitTask(struct work_struct *pWork)
1174#else
1175static void vboxNetFltLinuxXmitTask(void *pWork)
1176#endif
1177{
1178 PVBOXNETFLTINS pThis = VBOX_FLT_XT_TO_INST(pWork);
1179 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1180 struct sk_buff *pBuf;
1181
1182 Log4(("vboxNetFltLinuxXmitTask: Got work %p.\n", pWork));
1183
1184 /*
1185 * Active? Retain the instance and increment the busy counter.
1186 */
1187 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1188 if (ASMAtomicUoReadBool(&pThis->fActive))
1189 {
1190 vboxNetFltRetain(pThis, true /* fBusy */);
1191 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1192
1193 while ((pBuf = skb_dequeue(&pThis->u.s.XmitQueue)) != NULL)
1194 vboxNetFltLinuxForwardToIntNet(pThis, pBuf);
1195
1196 vboxNetFltRelease(pThis, true /* fBusy */);
1197 }
1198 else
1199 {
1200 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1201 /** @todo Shouldn't we just drop the packets here? There is little point in
1202 * making them accumulate when the VM is paused and it'll only waste
1203 * kernel memory anyway... Hmm. maybe wait a short while (2-5 secs)
1204 * before start draining the packets (goes for the intnet ring buf
1205 * too)? */
1206 }
1207}
1208
1209/**
1210 * Reports the GSO capabilites of the hardware NIC.
1211 *
1212 * @param pThis The net filter instance. The caller hold a
1213 * reference to this.
1214 */
1215static void vboxNetFltLinuxReportNicGsoCapabilities(PVBOXNETFLTINS pThis)
1216{
1217#ifdef VBOXNETFLT_WITH_GSO_XMIT_WIRE
1218 struct net_device *pDev;
1219 PINTNETTRUNKSWPORT pSwitchPort;
1220 unsigned int fFeatures;
1221 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1222
1223 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1224
1225 pSwitchPort = pThis->pSwitchPort; /* this doesn't need to be here, but it doesn't harm. */
1226 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1227 if (pDev)
1228 fFeatures = pDev->features;
1229 else
1230 fFeatures = 0;
1231
1232 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1233
1234 if (pThis->pSwitchPort)
1235 {
1236 /* Set/update the GSO capabilities of the NIC. */
1237 uint32_t fGsoCapabilites = 0;
1238 if (fFeatures & NETIF_F_TSO)
1239 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_TCP);
1240 if (fFeatures & NETIF_F_TSO6)
1241 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_TCP);
1242# if 0 /** @todo GSO: Test UDP offloading (UFO) on linux. */
1243 if (fFeatures & NETIF_F_UFO)
1244 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_UDP);
1245 if (fFeatures & NETIF_F_UFO)
1246 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_UDP);
1247# endif
1248 pThis->pSwitchPort->pfnReportGsoCapabilities(pThis->pSwitchPort, fGsoCapabilites, INTNETTRUNKDIR_WIRE);
1249 }
1250#endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE */
1251}
1252
1253/**
1254 * Internal worker for vboxNetFltLinuxNotifierCallback.
1255 *
1256 * @returns VBox status code.
1257 * @param pThis The instance.
1258 * @param fRediscovery If set we're doing a rediscovery attempt, so, don't
1259 * flood the release log.
1260 */
1261static int vboxNetFltLinuxAttachToInterface(PVBOXNETFLTINS pThis, struct net_device *pDev)
1262{
1263 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1264 LogFlow(("vboxNetFltLinuxAttachToInterface: pThis=%p (%s)\n", pThis, pThis->szName));
1265
1266 /*
1267 * Retain and store the device.
1268 */
1269 dev_hold(pDev);
1270
1271 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1272 ASMAtomicUoWritePtr((void * volatile *)&pThis->u.s.pDev, pDev);
1273 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1274
1275 Log(("vboxNetFltLinuxAttachToInterface: Device %p(%s) retained. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1276 Log(("vboxNetFltLinuxAttachToInterface: Got pDev=%p pThis=%p pThis->u.s.pDev=%p\n", pDev, pThis, ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev)));
1277
1278 /* Get the mac address while we still have a valid net_device reference. */
1279 memcpy(&pThis->u.s.Mac, pDev->dev_addr, sizeof(pThis->u.s.Mac));
1280
1281 /*
1282 * Install a packet filter for this device with a protocol wildcard (ETH_P_ALL).
1283 */
1284 pThis->u.s.PacketType.type = __constant_htons(ETH_P_ALL);
1285 pThis->u.s.PacketType.dev = pDev;
1286 pThis->u.s.PacketType.func = vboxNetFltLinuxPacketHandler;
1287 dev_add_pack(&pThis->u.s.PacketType);
1288
1289#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1290 vboxNetFltLinuxHookDev(pThis, pDev);
1291#endif
1292
1293 /*
1294 * Set indicators that require the spinlock. Be abit paranoid about racing
1295 * the device notification handle.
1296 */
1297 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1298 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1299 if (pDev)
1300 {
1301 ASMAtomicUoWriteBool(&pThis->fDisconnectedFromHost, false);
1302 ASMAtomicUoWriteBool(&pThis->u.s.fRegistered, true);
1303 pDev = NULL; /* don't dereference it */
1304 }
1305 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1306 Log(("vboxNetFltLinuxAttachToInterface: this=%p: Packet handler installed.\n", pThis));
1307
1308 /*
1309 * If the above succeeded report GSO capabilites, if not undo and
1310 * release the device.
1311 */
1312 if (!pDev)
1313 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1314 else
1315 {
1316#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1317 vboxNetFltLinuxUnhookDev(pThis, pDev);
1318#endif
1319 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1320 ASMAtomicUoWritePtr((void * volatile *)&pThis->u.s.pDev, NULL);
1321 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1322 dev_put(pDev);
1323 Log(("vboxNetFltLinuxAttachToInterface: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1324 }
1325
1326 LogRel(("VBoxNetFlt: attached to '%s' / %.*Rhxs\n", pThis->szName, sizeof(pThis->u.s.Mac), &pThis->u.s.Mac));
1327 return VINF_SUCCESS;
1328}
1329
1330
1331static int vboxNetFltLinuxUnregisterDevice(PVBOXNETFLTINS pThis, struct net_device *pDev)
1332{
1333 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1334
1335 Assert(!pThis->fDisconnectedFromHost);
1336
1337#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1338 vboxNetFltLinuxUnhookDev(pThis, pDev);
1339#endif
1340
1341 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1342 ASMAtomicWriteBool(&pThis->u.s.fRegistered, false);
1343 ASMAtomicWriteBool(&pThis->fDisconnectedFromHost, true);
1344 ASMAtomicUoWritePtr((void * volatile *)&pThis->u.s.pDev, NULL);
1345 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1346
1347 dev_remove_pack(&pThis->u.s.PacketType);
1348 skb_queue_purge(&pThis->u.s.XmitQueue);
1349 Log(("vboxNetFltLinuxUnregisterDevice: this=%p: Packet handler removed, xmit queue purged.\n", pThis));
1350 Log(("vboxNetFltLinuxUnregisterDevice: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1351 dev_put(pDev);
1352
1353 return NOTIFY_OK;
1354}
1355
1356static int vboxNetFltLinuxDeviceIsUp(PVBOXNETFLTINS pThis, struct net_device *pDev)
1357{
1358 /* Check if we are not suspended and promiscuous mode has not been set. */
1359 if (ASMAtomicUoReadBool(&pThis->fActive) && !ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet))
1360 {
1361 /* Note that there is no need for locking as the kernel got hold of the lock already. */
1362 dev_set_promiscuity(pDev, 1);
1363 ASMAtomicWriteBool(&pThis->u.s.fPromiscuousSet, true);
1364 Log(("vboxNetFltLinuxDeviceIsUp: enabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1365 }
1366 else
1367 Log(("vboxNetFltLinuxDeviceIsUp: no need to enable promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1368 return NOTIFY_OK;
1369}
1370
1371static int vboxNetFltLinuxDeviceGoingDown(PVBOXNETFLTINS pThis, struct net_device *pDev)
1372{
1373 /* Undo promiscuous mode if we has set it. */
1374 if (ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet))
1375 {
1376 /* Note that there is no need for locking as the kernel got hold of the lock already. */
1377 dev_set_promiscuity(pDev, -1);
1378 ASMAtomicWriteBool(&pThis->u.s.fPromiscuousSet, false);
1379 Log(("vboxNetFltLinuxDeviceGoingDown: disabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1380 }
1381 else
1382 Log(("vboxNetFltLinuxDeviceGoingDown: no need to disable promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1383 return NOTIFY_OK;
1384}
1385
1386#ifdef LOG_ENABLED
1387/** Stringify the NETDEV_XXX constants. */
1388static const char *vboxNetFltLinuxGetNetDevEventName(unsigned long ulEventType)
1389{
1390 const char *pszEvent = "NETDRV_<unknown>";
1391 switch (ulEventType)
1392 {
1393 case NETDEV_REGISTER: pszEvent = "NETDEV_REGISTER"; break;
1394 case NETDEV_UNREGISTER: pszEvent = "NETDEV_UNREGISTER"; break;
1395 case NETDEV_UP: pszEvent = "NETDEV_UP"; break;
1396 case NETDEV_DOWN: pszEvent = "NETDEV_DOWN"; break;
1397 case NETDEV_REBOOT: pszEvent = "NETDEV_REBOOT"; break;
1398 case NETDEV_CHANGENAME: pszEvent = "NETDEV_CHANGENAME"; break;
1399 case NETDEV_CHANGE: pszEvent = "NETDEV_CHANGE"; break;
1400 case NETDEV_CHANGEMTU: pszEvent = "NETDEV_CHANGEMTU"; break;
1401 case NETDEV_CHANGEADDR: pszEvent = "NETDEV_CHANGEADDR"; break;
1402 case NETDEV_GOING_DOWN: pszEvent = "NETDEV_GOING_DOWN"; break;
1403# ifdef NETDEV_FEAT_CHANGE
1404 case NETDEV_FEAT_CHANGE: pszEvent = "NETDEV_FEAT_CHANGE"; break;
1405# endif
1406 }
1407 return pszEvent;
1408}
1409#endif /* LOG_ENABLED */
1410
1411/**
1412 * Callback for listening to netdevice events.
1413 *
1414 * This works the rediscovery, clean up on unregistration, promiscuity on
1415 * up/down, and GSO feature changes from ethtool.
1416 *
1417 * @returns NOTIFY_OK
1418 * @param self Pointer to our notifier registration block.
1419 * @param ulEventType The event.
1420 * @param ptr Event specific, but it is usually the device it
1421 * relates to.
1422 */
1423static int vboxNetFltLinuxNotifierCallback(struct notifier_block *self, unsigned long ulEventType, void *ptr)
1424
1425{
1426 PVBOXNETFLTINS pThis = VBOX_FLT_NB_TO_INST(self);
1427 struct net_device *pDev = (struct net_device *)ptr;
1428 int rc = NOTIFY_OK;
1429
1430 Log(("VBoxNetFlt: got event %s(0x%lx) on %s, pDev=%p pThis=%p pThis->u.s.pDev=%p\n",
1431 vboxNetFltLinuxGetNetDevEventName(ulEventType), ulEventType, pDev->name, pDev, pThis, ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev)));
1432 if ( ulEventType == NETDEV_REGISTER
1433 && !strcmp(pDev->name, pThis->szName))
1434 {
1435 vboxNetFltLinuxAttachToInterface(pThis, pDev);
1436 }
1437 else
1438 {
1439 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1440 if (pDev == ptr)
1441 {
1442 switch (ulEventType)
1443 {
1444 case NETDEV_UNREGISTER:
1445 rc = vboxNetFltLinuxUnregisterDevice(pThis, pDev);
1446 break;
1447 case NETDEV_UP:
1448 rc = vboxNetFltLinuxDeviceIsUp(pThis, pDev);
1449 break;
1450 case NETDEV_GOING_DOWN:
1451 rc = vboxNetFltLinuxDeviceGoingDown(pThis, pDev);
1452 break;
1453 case NETDEV_CHANGENAME:
1454 break;
1455#ifdef NETDEV_FEAT_CHANGE
1456 case NETDEV_FEAT_CHANGE:
1457 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1458 break;
1459#endif
1460 }
1461 }
1462 }
1463
1464 return rc;
1465}
1466
1467bool vboxNetFltOsMaybeRediscovered(PVBOXNETFLTINS pThis)
1468{
1469 return !ASMAtomicUoReadBool(&pThis->fDisconnectedFromHost);
1470}
1471
1472int vboxNetFltPortOsXmit(PVBOXNETFLTINS pThis, PINTNETSG pSG, uint32_t fDst)
1473{
1474 struct net_device * pDev;
1475 int err;
1476 int rc = VINF_SUCCESS;
1477
1478 LogFlow(("vboxNetFltPortOsXmit: pThis=%p (%s)\n", pThis, pThis->szName));
1479
1480 pDev = vboxNetFltLinuxRetainNetDev(pThis);
1481 if (pDev)
1482 {
1483 /*
1484 * Create a sk_buff for the gather list and push it onto the wire.
1485 */
1486 if (fDst & INTNETTRUNKDIR_WIRE)
1487 {
1488 struct sk_buff *pBuf = vboxNetFltLinuxSkBufFromSG(pThis, pSG, true);
1489 if (pBuf)
1490 {
1491 vboxNetFltDumpPacket(pSG, true, "wire", 1);
1492 Log4(("vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
1493 Log4(("vboxNetFltPortOsXmit: dev_queue_xmit(%p)\n", pBuf));
1494 err = dev_queue_xmit(pBuf);
1495 if (err)
1496 rc = RTErrConvertFromErrno(err);
1497 }
1498 else
1499 rc = VERR_NO_MEMORY;
1500 }
1501
1502 /*
1503 * Create a sk_buff for the gather list and push it onto the host stack.
1504 */
1505 if (fDst & INTNETTRUNKDIR_HOST)
1506 {
1507 struct sk_buff *pBuf = vboxNetFltLinuxSkBufFromSG(pThis, pSG, false);
1508 if (pBuf)
1509 {
1510 vboxNetFltDumpPacket(pSG, true, "host", (fDst & INTNETTRUNKDIR_WIRE) ? 0 : 1);
1511 Log4(("vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
1512 Log4(("vboxNetFltPortOsXmit: netif_rx_ni(%p)\n", pBuf));
1513 err = netif_rx_ni(pBuf);
1514 if (err)
1515 rc = RTErrConvertFromErrno(err);
1516 }
1517 else
1518 rc = VERR_NO_MEMORY;
1519 }
1520
1521 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1522 }
1523
1524 return rc;
1525}
1526
1527
1528bool vboxNetFltPortOsIsPromiscuous(PVBOXNETFLTINS pThis)
1529{
1530 bool fRc = false;
1531 struct net_device * pDev = vboxNetFltLinuxRetainNetDev(pThis);
1532 if (pDev)
1533 {
1534 fRc = !!(pDev->promiscuity - (ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet) & 1));
1535 LogFlow(("vboxNetFltPortOsIsPromiscuous: returns %d, pDev->promiscuity=%d, fPromiscuousSet=%d\n",
1536 fRc, pDev->promiscuity, pThis->u.s.fPromiscuousSet));
1537 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1538 }
1539 return fRc;
1540}
1541
1542
1543void vboxNetFltPortOsGetMacAddress(PVBOXNETFLTINS pThis, PRTMAC pMac)
1544{
1545 *pMac = pThis->u.s.Mac;
1546}
1547
1548
1549bool vboxNetFltPortOsIsHostMac(PVBOXNETFLTINS pThis, PCRTMAC pMac)
1550{
1551 /* ASSUMES that the MAC address never changes. */
1552 return pThis->u.s.Mac.au16[0] == pMac->au16[0]
1553 && pThis->u.s.Mac.au16[1] == pMac->au16[1]
1554 && pThis->u.s.Mac.au16[2] == pMac->au16[2];
1555}
1556
1557
1558void vboxNetFltPortOsSetActive(PVBOXNETFLTINS pThis, bool fActive)
1559{
1560 struct net_device * pDev;
1561
1562 LogFlow(("vboxNetFltPortOsSetActive: pThis=%p (%s), fActive=%s, fDisablePromiscuous=%s\n",
1563 pThis, pThis->szName, fActive?"true":"false",
1564 pThis->fDisablePromiscuous?"true":"false"));
1565
1566 if (pThis->fDisablePromiscuous)
1567 return;
1568
1569 pDev = vboxNetFltLinuxRetainNetDev(pThis);
1570 if (pDev)
1571 {
1572 /*
1573 * This api is a bit weird, the best reference is the code.
1574 *
1575 * Also, we have a bit or race conditions wrt the maintance of
1576 * host the interface promiscuity for vboxNetFltPortOsIsPromiscuous.
1577 */
1578#ifdef LOG_ENABLED
1579 u_int16_t fIf;
1580 unsigned const cPromiscBefore = pDev->promiscuity;
1581#endif
1582 if (fActive)
1583 {
1584 Assert(!pThis->u.s.fPromiscuousSet);
1585
1586 rtnl_lock();
1587 dev_set_promiscuity(pDev, 1);
1588 rtnl_unlock();
1589 pThis->u.s.fPromiscuousSet = true;
1590 Log(("vboxNetFltPortOsSetActive: enabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1591 }
1592 else
1593 {
1594 if (pThis->u.s.fPromiscuousSet)
1595 {
1596 rtnl_lock();
1597 dev_set_promiscuity(pDev, -1);
1598 rtnl_unlock();
1599 Log(("vboxNetFltPortOsSetActive: disabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1600 }
1601 pThis->u.s.fPromiscuousSet = false;
1602
1603#ifdef LOG_ENABLED
1604 fIf = dev_get_flags(pDev);
1605 Log(("VBoxNetFlt: fIf=%#x; %d->%d\n", fIf, cPromiscBefore, pDev->promiscuity));
1606#endif
1607 }
1608
1609 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1610 }
1611}
1612
1613
1614int vboxNetFltOsDisconnectIt(PVBOXNETFLTINS pThis)
1615{
1616 /* Nothing to do here. */
1617 return VINF_SUCCESS;
1618}
1619
1620
1621int vboxNetFltOsConnectIt(PVBOXNETFLTINS pThis)
1622{
1623 /*
1624 * Report the GSO capabilities of the host and device (if connected).
1625 */
1626#if defined(VBOXNETFLT_WITH_GSO_XMIT_HOST)
1627 pThis->pSwitchPort->pfnReportGsoCapabilities(pThis->pSwitchPort,
1628 0
1629 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_TCP)
1630 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_TCP)
1631# if 0 /** @todo GSO: Test UDP offloading (UFO) on linux. */
1632 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_UDP)
1633 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_UDP)
1634# endif
1635 , INTNETTRUNKDIR_HOST);
1636
1637#endif
1638 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1639
1640 return VINF_SUCCESS;
1641}
1642
1643
1644void vboxNetFltOsDeleteInstance(PVBOXNETFLTINS pThis)
1645{
1646 struct net_device *pDev;
1647 bool fRegistered;
1648 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1649
1650#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1651 vboxNetFltLinuxUnhookDev(pThis, NULL);
1652#endif
1653
1654 /** @todo This code may race vboxNetFltLinuxUnregisterDevice (very very
1655 * unlikely, but none the less). Since it doesn't actually update the
1656 * state (just reads it), it is likely to panic in some interesting
1657 * ways. */
1658
1659 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1660 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1661 fRegistered = ASMAtomicUoReadBool(&pThis->u.s.fRegistered);
1662 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1663
1664 if (fRegistered)
1665 {
1666 dev_remove_pack(&pThis->u.s.PacketType);
1667 skb_queue_purge(&pThis->u.s.XmitQueue);
1668 Log(("vboxNetFltOsDeleteInstance: this=%p: Packet handler removed, xmit queue purged.\n", pThis));
1669 Log(("vboxNetFltOsDeleteInstance: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1670 dev_put(pDev);
1671 }
1672 Log(("vboxNetFltOsDeleteInstance: this=%p: Notifier removed.\n", pThis));
1673 unregister_netdevice_notifier(&pThis->u.s.Notifier);
1674 module_put(THIS_MODULE);
1675}
1676
1677
1678int vboxNetFltOsInitInstance(PVBOXNETFLTINS pThis, void *pvContext)
1679{
1680 int err;
1681 NOREF(pvContext);
1682
1683 pThis->u.s.Notifier.notifier_call = vboxNetFltLinuxNotifierCallback;
1684 err = register_netdevice_notifier(&pThis->u.s.Notifier);
1685 if (err)
1686 return VERR_INTNET_FLT_IF_FAILED;
1687 if (!pThis->u.s.fRegistered)
1688 {
1689 unregister_netdevice_notifier(&pThis->u.s.Notifier);
1690 LogRel(("VBoxNetFlt: failed to find %s.\n", pThis->szName));
1691 return VERR_INTNET_FLT_IF_NOT_FOUND;
1692 }
1693
1694 Log(("vboxNetFltOsInitInstance: this=%p: Notifier installed.\n", pThis));
1695 if ( pThis->fDisconnectedFromHost
1696 || !try_module_get(THIS_MODULE))
1697 return VERR_INTNET_FLT_IF_FAILED;
1698
1699 return VINF_SUCCESS;
1700}
1701
1702int vboxNetFltOsPreInitInstance(PVBOXNETFLTINS pThis)
1703{
1704 /*
1705 * Init the linux specific members.
1706 */
1707 pThis->u.s.pDev = NULL;
1708 pThis->u.s.fRegistered = false;
1709 pThis->u.s.fPromiscuousSet = false;
1710 memset(&pThis->u.s.PacketType, 0, sizeof(pThis->u.s.PacketType));
1711 skb_queue_head_init(&pThis->u.s.XmitQueue);
1712#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
1713 INIT_WORK(&pThis->u.s.XmitTask, vboxNetFltLinuxXmitTask);
1714#else
1715 INIT_WORK(&pThis->u.s.XmitTask, vboxNetFltLinuxXmitTask, &pThis->u.s.XmitTask);
1716#endif
1717
1718 return VINF_SUCCESS;
1719}
1720
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette