VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/VBoxNetFlt/linux/VBoxNetFlt-linux.c@ 29491

Last change on this file since 29491 was 29491, checked in by vboxsync, 15 years ago

IntNet: added MAC address notification and connect/disconnect interface callbacks.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 62.0 KB
Line 
1/* $Id: VBoxNetFlt-linux.c 29491 2010-05-14 17:46:22Z vboxsync $ */
2/** @file
3 * VBoxNetFlt - Network Filter Driver (Host), Linux Specific Code.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_NET_FLT_DRV
22#define VBOXNETFLT_LINUX_NO_XMIT_QUEUE
23#include "the-linux-kernel.h"
24#include "version-generated.h"
25#include "product-generated.h"
26#include <linux/netdevice.h>
27#include <linux/etherdevice.h>
28#include <linux/rtnetlink.h>
29#include <linux/miscdevice.h>
30#include <linux/ip.h>
31
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <VBox/intnetinline.h>
35#include <VBox/pdmnetinline.h>
36#include <VBox/param.h>
37#include <iprt/alloca.h>
38#include <iprt/assert.h>
39#include <iprt/spinlock.h>
40#include <iprt/semaphore.h>
41#include <iprt/initterm.h>
42#include <iprt/process.h>
43#include <iprt/mem.h>
44#include <iprt/net.h>
45#include <iprt/log.h>
46#include <iprt/mp.h>
47#include <iprt/mem.h>
48#include <iprt/time.h>
49
50#define VBOXNETFLT_OS_SPECFIC 1
51#include "../VBoxNetFltInternal.h"
52
53
54/*******************************************************************************
55* Defined Constants And Macros *
56*******************************************************************************/
57#define VBOX_FLT_NB_TO_INST(pNB) RT_FROM_MEMBER(pNB, VBOXNETFLTINS, u.s.Notifier)
58#define VBOX_FLT_PT_TO_INST(pPT) RT_FROM_MEMBER(pPT, VBOXNETFLTINS, u.s.PacketType)
59#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
60# define VBOX_FLT_XT_TO_INST(pXT) RT_FROM_MEMBER(pXT, VBOXNETFLTINS, u.s.XmitTask)
61#endif
62
63#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
64# define VBOX_SKB_RESET_NETWORK_HDR(skb) skb_reset_network_header(skb)
65# define VBOX_SKB_RESET_MAC_HDR(skb) skb_reset_mac_header(skb)
66#else
67# define VBOX_SKB_RESET_NETWORK_HDR(skb) skb->nh.raw = skb->data
68# define VBOX_SKB_RESET_MAC_HDR(skb) skb->mac.raw = skb->data
69#endif
70
71#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
72# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(skb)
73#else
74# define CHECKSUM_PARTIAL CHECKSUM_HW
75# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10)
76# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(skb, 0)
77# else
78# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 7)
79# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(&skb, 0)
80# else
81# define VBOX_SKB_CHECKSUM_HELP(skb) (!skb_checksum_help(skb))
82# endif
83# endif
84#endif
85
86#ifndef NET_IP_ALIGN
87# define NET_IP_ALIGN 2
88#endif
89
90#if 0
91/** Create scatter / gather segments for fragments. When not used, we will
92 * linearize the socket buffer before creating the internal networking SG. */
93# define VBOXNETFLT_SG_SUPPORT 1
94#endif
95
96#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
97/** Indicates that the linux kernel may send us GSO frames. */
98# define VBOXNETFLT_WITH_GSO 1
99
100/** This enables or disables the transmitting of GSO frame from the internal
101 * network and to the host. */
102# define VBOXNETFLT_WITH_GSO_XMIT_HOST 1
103
104# if 0 /** @todo This is currently disable because it causes performance loss of 5-10%. */
105/** This enables or disables the transmitting of GSO frame from the internal
106 * network and to the wire. */
107# define VBOXNETFLT_WITH_GSO_XMIT_WIRE 1
108# endif
109
110/** This enables or disables the forwarding/flooding of GSO frame from the host
111 * to the internal network. */
112# define VBOXNETFLT_WITH_GSO_RECV 1
113
114#endif
115
116
117/*******************************************************************************
118* Internal Functions *
119*******************************************************************************/
120static int VBoxNetFltLinuxInit(void);
121static void VBoxNetFltLinuxUnload(void);
122static void vboxNetFltLinuxForwardToIntNet(PVBOXNETFLTINS pThis, struct sk_buff *pBuf);
123
124
125/*******************************************************************************
126* Global Variables *
127*******************************************************************************/
128/**
129 * The (common) global data.
130 */
131static VBOXNETFLTGLOBALS g_VBoxNetFltGlobals;
132
133module_init(VBoxNetFltLinuxInit);
134module_exit(VBoxNetFltLinuxUnload);
135
136MODULE_AUTHOR(VBOX_VENDOR);
137MODULE_DESCRIPTION(VBOX_PRODUCT " Network Filter Driver");
138MODULE_LICENSE("GPL");
139#ifdef MODULE_VERSION
140MODULE_VERSION(VBOX_VERSION_STRING " (" RT_XSTR(INTNETTRUNKIFPORT_VERSION) ")");
141#endif
142
143
144#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) && defined(LOG_ENABLED)
145unsigned dev_get_flags(const struct net_device *dev)
146{
147 unsigned flags;
148
149 flags = (dev->flags & ~(IFF_PROMISC |
150 IFF_ALLMULTI |
151 IFF_RUNNING)) |
152 (dev->gflags & (IFF_PROMISC |
153 IFF_ALLMULTI));
154
155 if (netif_running(dev) && netif_carrier_ok(dev))
156 flags |= IFF_RUNNING;
157
158 return flags;
159}
160#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) */
161
162
163/**
164 * Initialize module.
165 *
166 * @returns appropriate status code.
167 */
168static int __init VBoxNetFltLinuxInit(void)
169{
170 int rc;
171 /*
172 * Initialize IPRT.
173 */
174 rc = RTR0Init(0);
175 if (RT_SUCCESS(rc))
176 {
177 Log(("VBoxNetFltLinuxInit\n"));
178
179 /*
180 * Initialize the globals and connect to the support driver.
181 *
182 * This will call back vboxNetFltOsOpenSupDrv (and maybe vboxNetFltOsCloseSupDrv)
183 * for establishing the connect to the support driver.
184 */
185 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
186 rc = vboxNetFltInitGlobalsAndIdc(&g_VBoxNetFltGlobals);
187 if (RT_SUCCESS(rc))
188 {
189 LogRel(("VBoxNetFlt: Successfully started.\n"));
190 return 0;
191 }
192
193 LogRel(("VBoxNetFlt: failed to initialize device extension (rc=%d)\n", rc));
194 RTR0Term();
195 }
196 else
197 LogRel(("VBoxNetFlt: failed to initialize IPRT (rc=%d)\n", rc));
198
199 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
200 return -RTErrConvertToErrno(rc);
201}
202
203
204/**
205 * Unload the module.
206 *
207 * @todo We have to prevent this if we're busy!
208 */
209static void __exit VBoxNetFltLinuxUnload(void)
210{
211 int rc;
212 Log(("VBoxNetFltLinuxUnload\n"));
213 Assert(vboxNetFltCanUnload(&g_VBoxNetFltGlobals));
214
215 /*
216 * Undo the work done during start (in reverse order).
217 */
218 rc = vboxNetFltTryDeleteIdcAndGlobals(&g_VBoxNetFltGlobals);
219 AssertRC(rc); NOREF(rc);
220
221 RTR0Term();
222
223 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
224
225 Log(("VBoxNetFltLinuxUnload - done\n"));
226}
227
228/**
229 * Experiment where we filter trafic from the host to the internal network
230 * before it reaches the NIC driver.
231 *
232 * The current code uses a very ugly hack and only works on kernels using the
233 * net_device_ops (>= 2.6.29). It has been shown to give us a
234 * performance boost of 60-100% though. So, we have to find some less hacky way
235 * of getting this job done eventually.
236 *
237 * #define VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
238 */
239#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
240
241/**
242 * The overridden net_device_ops of the device we're attached to.
243 *
244 * Requires Linux 2.6.29 or later.
245 *
246 * This is a very dirty hack that was create to explore how much we can improve
247 * the host to guest transfers by not CC'ing the NIC.
248 */
249typedef struct VBoxNetDeviceOpsOverride
250{
251 /** Our overridden ops. */
252 struct net_device_ops Ops;
253 /** Magic word. */
254 uint32_t u32Magic;
255 /** Pointer to the original ops. */
256 struct net_device_ops const *pOrgOps;
257 /** Pointer to the net filter instance. */
258 PVBOXNETFLTINS pVBoxNetFlt;
259 /** The number of filtered packages. */
260 uint64_t cFiltered;
261 /** The total number of packets */
262 uint64_t cTotal;
263} VBOXNETDEVICEOPSOVERRIDE, *PVBOXNETDEVICEOPSOVERRIDE;
264/** VBOXNETDEVICEOPSOVERRIDE::u32Magic value. */
265#define VBOXNETDEVICEOPSOVERRIDE_MAGIC UINT32_C(0x00c0ffee)
266
267/**
268 * ndo_start_xmit wrapper that drops packets that shouldn't go to the wire
269 * because they belong on the internal network.
270 *
271 * @returns NETDEV_TX_XXX.
272 * @param pSkb The socket buffer to transmit.
273 * @param pDev The net device.
274 */
275static int vboxNetFltLinuxStartXmitFilter(struct sk_buff *pSkb, struct net_device *pDev)
276{
277 PVBOXNETDEVICEOPSOVERRIDE pOverride = (PVBOXNETDEVICEOPSOVERRIDE)pDev->netdev_ops;
278 uint8_t abHdrBuf[sizeof(RTNETETHERHDR) + sizeof(uint32_t) + RTNETIPV4_MIN_LEN];
279 PCRTNETETHERHDR pEtherHdr;
280 PINTNETTRUNKSWPORT pSwitchPort;
281
282
283 /*
284 * Validate the override structure.
285 *
286 * Note! We're racing vboxNetFltLinuxUnhookDev here. If this was supposed
287 * to be production quality code, we would have to be much more
288 * careful here and avoid the race.
289 */
290 if ( !VALID_PTR(pOverride)
291 || pOverride->u32Magic != VBOXNETDEVICEOPSOVERRIDE_MAGIC
292 || !VALID_PTR(pOverride->pOrgOps))
293 {
294 printk("vboxNetFltLinuxStartXmitFilter: bad override %p\n", pOverride);
295 dev_kfree_skb(pSkb);
296 return NETDEV_TX_OK;
297 }
298 pOverride->cTotal++;
299
300 /*
301 * Do the filtering base on the defaul OUI of our virtual NICs
302 *
303 * Note! In a real solution, we would ask the switch whether the
304 * destination MAC is 100% to be on the internal network and then
305 * drop it.
306 */
307 pEtherHdr = (PCRTNETETHERHDR)skb_header_pointer(pSkb, 0, sizeof(abHdrBuf), &abHdrBuf[0]);
308 if ( pEtherHdr
309 && VALID_PTR(pOverride->pVBoxNetFlt)
310 && (pSwitchPort = pOverride->pVBoxNetFlt->pSwitchPort) != NULL
311 && VALID_PTR(pSwitchPort)
312 )
313 {
314 INTNETSWDECISION enmDecision;
315 uint32_t cbHdrs = skb_headlen(pSkb);
316 cbHdrs = RT_MAX(cbHdrs, sizeof(abHdrBuf));
317
318 /** @todo consider reference counting, etc. */
319 enmDecision = pSwitchPort->pfnPreRecv(pSwitchPort, pEtherHdr, cbHdrs, INTNETTRUNKDIR_HOST);
320 if (enmDecision == INTNETSWDECISION_INTNET)
321 {
322 dev_kfree_skb(pSkb);
323 pOverride->cFiltered++;
324 return NETDEV_TX_OK;
325 }
326 }
327
328 return pOverride->pOrgOps->ndo_start_xmit(pSkb, pDev);
329}
330
331/**
332 * Hooks the device ndo_start_xmit operation of the device.
333 *
334 * @param pThis The net filter instance.
335 * @param pDev The net device.
336 */
337static void vboxNetFltLinuxHookDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
338{
339 PVBOXNETDEVICEOPSOVERRIDE pOverride;
340 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
341
342 pOverride = RTMemAlloc(sizeof(*pOverride));
343 if (!pOverride)
344 return;
345 pOverride->pOrgOps = pDev->netdev_ops;
346 pOverride->Ops = *pDev->netdev_ops;
347 pOverride->Ops.ndo_start_xmit = vboxNetFltLinuxStartXmitFilter;
348 pOverride->u32Magic = VBOXNETDEVICEOPSOVERRIDE_MAGIC;
349 pOverride->cTotal = 0;
350 pOverride->cFiltered = 0;
351 pOverride->pVBoxNetFlt = pThis;
352
353 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp); /* (this isn't necessary, but so what) */
354 ASMAtomicXchgPtr((void * volatile *)&pDev->netdev_ops, pOverride);
355 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
356}
357
358/**
359 * Undos what vboxNetFltLinuxHookDev did.
360 *
361 * @param pThis The net filter instance.
362 * @param pDev The net device. Can be NULL, in which case
363 * we'll try retrieve it from @a pThis.
364 */
365static void vboxNetFltLinuxUnhookDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
366{
367 PVBOXNETDEVICEOPSOVERRIDE pOverride;
368 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
369
370 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
371 if (!pDev)
372 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
373 if (VALID_PTR(pDev))
374 {
375 pOverride = (PVBOXNETDEVICEOPSOVERRIDE)pDev->netdev_ops;
376 if ( VALID_PTR(pOverride)
377 && pOverride->u32Magic == VBOXNETDEVICEOPSOVERRIDE_MAGIC
378 && VALID_PTR(pOverride->pOrgOps)
379 )
380 {
381 ASMAtomicXchgPtr((void * volatile *)&pDev->netdev_ops, pOverride->pOrgOps);
382 ASMAtomicWriteU32(&pOverride->u32Magic, 0);
383 }
384 else
385 pOverride = NULL;
386 }
387 else
388 pOverride = NULL;
389 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
390
391 if (pOverride)
392 {
393 printk("vboxnetflt: dropped %llu out of %llu packets\n", pOverride->cFiltered, pOverride->cTotal);
394 RTMemFree(pOverride);
395 }
396}
397
398#endif /* VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT */
399
400
401/**
402 * Reads and retains the host interface handle.
403 *
404 * @returns The handle, NULL if detached.
405 * @param pThis
406 */
407DECLINLINE(struct net_device *) vboxNetFltLinuxRetainNetDev(PVBOXNETFLTINS pThis)
408{
409#if 0
410 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
411 struct net_device *pDev = NULL;
412
413 Log(("vboxNetFltLinuxRetainNetDev\n"));
414 /*
415 * Be careful here to avoid problems racing the detached callback.
416 */
417 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
418 if (!ASMAtomicUoReadBool(&pThis->fDisconnectedFromHost))
419 {
420 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
421 if (pDev)
422 {
423 dev_hold(pDev);
424 Log(("vboxNetFltLinuxRetainNetDev: Device %p(%s) retained. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
425 }
426 }
427 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
428
429 Log(("vboxNetFltLinuxRetainNetDev - done\n"));
430 return pDev;
431#else
432 return (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
433#endif
434}
435
436
437/**
438 * Release the host interface handle previously retained
439 * by vboxNetFltLinuxRetainNetDev.
440 *
441 * @param pThis The instance.
442 * @param pDev The vboxNetFltLinuxRetainNetDev
443 * return value, NULL is fine.
444 */
445DECLINLINE(void) vboxNetFltLinuxReleaseNetDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
446{
447#if 0
448 Log(("vboxNetFltLinuxReleaseNetDev\n"));
449 NOREF(pThis);
450 if (pDev)
451 {
452 dev_put(pDev);
453 Log(("vboxNetFltLinuxReleaseNetDev: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
454 }
455 Log(("vboxNetFltLinuxReleaseNetDev - done\n"));
456#endif
457}
458
459#define VBOXNETFLT_CB_TAG(skb) (0xA1C90000 | (skb->dev->ifindex & 0xFFFF))
460#define VBOXNETFLT_SKB_TAG(skb) (*(uint32_t*)&((skb)->cb[sizeof((skb)->cb)-sizeof(uint32_t)]))
461
462/**
463 * Checks whether this is an mbuf created by vboxNetFltLinuxMBufFromSG,
464 * i.e. a buffer which we're pushing and should be ignored by the filter callbacks.
465 *
466 * @returns true / false accordingly.
467 * @param pBuf The sk_buff.
468 */
469DECLINLINE(bool) vboxNetFltLinuxSkBufIsOur(struct sk_buff *pBuf)
470{
471 return VBOXNETFLT_SKB_TAG(pBuf) == VBOXNETFLT_CB_TAG(pBuf);
472}
473
474
475/**
476 * Internal worker that create a linux sk_buff for a
477 * (scatter/)gather list.
478 *
479 * @returns Pointer to the sk_buff.
480 * @param pThis The instance.
481 * @param pSG The (scatter/)gather list.
482 * @param fDstWire Set if the destination is the wire.
483 */
484static struct sk_buff *vboxNetFltLinuxSkBufFromSG(PVBOXNETFLTINS pThis, PINTNETSG pSG, bool fDstWire)
485{
486 struct sk_buff *pPkt;
487 struct net_device *pDev;
488 unsigned fGsoType = 0;
489
490 if (pSG->cbTotal == 0)
491 {
492 LogRel(("VBoxNetFlt: Dropped empty packet coming from internal network.\n"));
493 return NULL;
494 }
495
496 /** @todo We should use fragments mapping the SG buffers with large packets.
497 * 256 bytes seems to be the a threshold used a lot for this. It
498 * requires some nasty work on the intnet side though... */
499 /*
500 * Allocate a packet and copy over the data.
501 */
502 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
503 pPkt = dev_alloc_skb(pSG->cbTotal + NET_IP_ALIGN);
504 if (RT_UNLIKELY(!pPkt))
505 {
506 Log(("vboxNetFltLinuxSkBufFromSG: Failed to allocate sk_buff(%u).\n", pSG->cbTotal));
507 pSG->pvUserData = NULL;
508 return NULL;
509 }
510 pPkt->dev = pDev;
511 pPkt->ip_summed = CHECKSUM_NONE;
512
513 /* Align IP header on 16-byte boundary: 2 + 14 (ethernet hdr size). */
514 skb_reserve(pPkt, NET_IP_ALIGN);
515
516 /* Copy the segments. */
517 skb_put(pPkt, pSG->cbTotal);
518 IntNetSgRead(pSG, pPkt->data);
519
520#if defined(VBOXNETFLT_WITH_GSO_XMIT_WIRE) || defined(VBOXNETFLT_WITH_GSO_XMIT_HOST)
521 /*
522 * Setup GSO if used by this packet.
523 */
524 switch ((PDMNETWORKGSOTYPE)pSG->GsoCtx.u8Type)
525 {
526 default:
527 AssertMsgFailed(("%u (%s)\n", pSG->GsoCtx.u8Type, PDMNetGsoTypeName((PDMNETWORKGSOTYPE)pSG->GsoCtx.u8Type) ));
528 /* fall thru */
529 case PDMNETWORKGSOTYPE_INVALID:
530 fGsoType = 0;
531 break;
532 case PDMNETWORKGSOTYPE_IPV4_TCP:
533 fGsoType = SKB_GSO_TCPV4;
534 break;
535 case PDMNETWORKGSOTYPE_IPV4_UDP:
536 fGsoType = SKB_GSO_UDP;
537 break;
538 case PDMNETWORKGSOTYPE_IPV6_TCP:
539 fGsoType = SKB_GSO_TCPV6;
540 break;
541 }
542 if (fGsoType)
543 {
544 struct skb_shared_info *pShInfo = skb_shinfo(pPkt);
545
546 pShInfo->gso_type = fGsoType | SKB_GSO_DODGY;
547 pShInfo->gso_size = pSG->GsoCtx.cbMaxSeg;
548 pShInfo->gso_segs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
549
550 if (fDstWire)
551 {
552 Assert(skb_headlen(pPkt) >= pSG->GsoCtx.cbHdrs);
553 pPkt->ip_summed = CHECKSUM_PARTIAL;
554# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
555 pPkt->csum_start = skb_headroom(pPkt) + pSG->GsoCtx.offHdr2;
556 if (fGsoType & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
557 pPkt->csum_offset = RT_OFFSETOF(RTNETTCP, th_sum);
558 else
559 pPkt->csum_offset = RT_OFFSETOF(RTNETUDP, uh_sum);
560# else
561 pPkt->h.raw = pPkt->data + pSG->GsoCtx.offHdr2;
562 if (fGsoType & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
563 pPkt->csum = RT_OFFSETOF(RTNETTCP, th_sum);
564 else
565 pPkt->csum = RT_OFFSETOF(RTNETUDP, uh_sum);
566# endif
567 }
568 else
569 {
570 pPkt->ip_summed = CHECKSUM_UNNECESSARY;
571 pPkt->csum = 0;
572 PDMNetGsoPrepForDirectUse(&pSG->GsoCtx, pPkt->data, pSG->cbTotal, false /*fPayloadChecksum*/);
573 }
574 }
575#endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE || VBOXNETFLT_WITH_GSO_XMIT_HOST */
576
577 /*
578 * Finish up the socket buffer.
579 */
580 pPkt->protocol = eth_type_trans(pPkt, pDev);
581 if (fDstWire)
582 {
583 VBOX_SKB_RESET_NETWORK_HDR(pPkt);
584
585 /* Restore ethernet header back. */
586 skb_push(pPkt, ETH_HLEN); /** @todo VLAN: +4 if VLAN? */
587 VBOX_SKB_RESET_MAC_HDR(pPkt);
588 }
589 VBOXNETFLT_SKB_TAG(pPkt) = VBOXNETFLT_CB_TAG(pPkt);
590
591 return pPkt;
592}
593
594
595/**
596 * Initializes a SG list from an sk_buff.
597 *
598 * @returns Number of segments.
599 * @param pThis The instance.
600 * @param pBuf The sk_buff.
601 * @param pSG The SG.
602 * @param pvFrame The frame pointer, optional.
603 * @param cSegs The number of segments allocated for the SG.
604 * This should match the number in the mbuf exactly!
605 * @param fSrc The source of the frame.
606 * @param pGso Pointer to the GSO context if it's a GSO
607 * internal network frame. NULL if regular frame.
608 */
609DECLINLINE(void) vboxNetFltLinuxSkBufToSG(PVBOXNETFLTINS pThis, struct sk_buff *pBuf, PINTNETSG pSG,
610 unsigned cSegs, uint32_t fSrc, PCPDMNETWORKGSO pGsoCtx)
611{
612 int i;
613 NOREF(pThis);
614
615 Assert(!skb_shinfo(pBuf)->frag_list);
616
617 if (fSrc & INTNETTRUNKDIR_WIRE)
618 {
619 /*
620 * The packet came from wire, ethernet header was removed by device driver.
621 * Restore it.
622 */
623 skb_push(pBuf, ETH_HLEN);
624 }
625
626 if (!pGsoCtx)
627 IntNetSgInitTempSegs(pSG, pBuf->len, cSegs, 0 /*cSegsUsed*/);
628 else
629 IntNetSgInitTempSegsGso(pSG, pBuf->len, cSegs, 0 /*cSegsUsed*/, pGsoCtx);
630
631#ifdef VBOXNETFLT_SG_SUPPORT
632 pSG->aSegs[0].cb = skb_headlen(pBuf);
633 pSG->aSegs[0].pv = pBuf->data;
634 pSG->aSegs[0].Phys = NIL_RTHCPHYS;
635
636 for (i = 0; i < skb_shinfo(pBuf)->nr_frags; i++)
637 {
638 skb_frag_t *pFrag = &skb_shinfo(pBuf)->frags[i];
639 pSG->aSegs[i+1].cb = pFrag->size;
640 pSG->aSegs[i+1].pv = kmap(pFrag->page);
641 printk("%p = kmap()\n", pSG->aSegs[i+1].pv);
642 pSG->aSegs[i+1].Phys = NIL_RTHCPHYS;
643 }
644 ++i;
645
646#else
647 pSG->aSegs[0].cb = pBuf->len;
648 pSG->aSegs[0].pv = pBuf->data;
649 pSG->aSegs[0].Phys = NIL_RTHCPHYS;
650 i = 1;
651#endif
652
653 pSG->cSegsUsed = i;
654
655#ifdef PADD_RUNT_FRAMES_FROM_HOST
656 /*
657 * Add a trailer if the frame is too small.
658 *
659 * Since we're getting to the packet before it is framed, it has not
660 * yet been padded. The current solution is to add a segment pointing
661 * to a buffer containing all zeros and pray that works for all frames...
662 */
663 if (pSG->cbTotal < 60 && (fSrc & INTNETTRUNKDIR_HOST))
664 {
665 static uint8_t const s_abZero[128] = {0};
666
667 AssertReturnVoid(i < cSegs);
668
669 pSG->aSegs[i].Phys = NIL_RTHCPHYS;
670 pSG->aSegs[i].pv = (void *)&s_abZero[0];
671 pSG->aSegs[i].cb = 60 - pSG->cbTotal;
672 pSG->cbTotal = 60;
673 pSG->cSegsUsed++;
674 Assert(i + 1 <= pSG->cSegsAlloc)
675 }
676#endif
677
678 Log4(("vboxNetFltLinuxSkBufToSG: allocated=%d, segments=%d frags=%d next=%p frag_list=%p pkt_type=%x fSrc=%x\n",
679 pSG->cSegsAlloc, pSG->cSegsUsed, skb_shinfo(pBuf)->nr_frags, pBuf->next, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type, fSrc));
680 for (i = 0; i < pSG->cSegsUsed; i++)
681 Log4(("vboxNetFltLinuxSkBufToSG: #%d: cb=%d pv=%p\n",
682 i, pSG->aSegs[i].cb, pSG->aSegs[i].pv));
683}
684
685/**
686 * Packet handler,
687 *
688 * @returns 0 or EJUSTRETURN.
689 * @param pThis The instance.
690 * @param pMBuf The mbuf.
691 * @param pvFrame The start of the frame, optional.
692 * @param fSrc Where the packet (allegedly) comes from, one INTNETTRUNKDIR_* value.
693 * @param eProtocol The protocol.
694 */
695#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 14)
696static int vboxNetFltLinuxPacketHandler(struct sk_buff *pBuf,
697 struct net_device *pSkbDev,
698 struct packet_type *pPacketType,
699 struct net_device *pOrigDev)
700#else
701static int vboxNetFltLinuxPacketHandler(struct sk_buff *pBuf,
702 struct net_device *pSkbDev,
703 struct packet_type *pPacketType)
704#endif
705{
706 PVBOXNETFLTINS pThis;
707 struct net_device *pDev;
708 LogFlow(("vboxNetFltLinuxPacketHandler: pBuf=%p pSkbDev=%p pPacketType=%p\n",
709 pBuf, pSkbDev, pPacketType));
710#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
711 Log3(("vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
712 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
713#else
714 Log3(("vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
715 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->tso_size, skb_shinfo(pBuf)->tso_segs, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
716#endif
717 /*
718 * Drop it immediately?
719 */
720 if (!pBuf)
721 return 0;
722
723 pThis = VBOX_FLT_PT_TO_INST(pPacketType);
724 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
725 if (pThis->u.s.pDev != pSkbDev)
726 {
727 Log(("vboxNetFltLinuxPacketHandler: Devices do not match, pThis may be wrong! pThis=%p\n", pThis));
728 return 0;
729 }
730
731 Log4(("vboxNetFltLinuxPacketHandler: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
732 if (vboxNetFltLinuxSkBufIsOur(pBuf))
733 {
734 Log2(("vboxNetFltLinuxPacketHandler: got our own sk_buff, drop it.\n"));
735 dev_kfree_skb(pBuf);
736 return 0;
737 }
738
739#ifndef VBOXNETFLT_SG_SUPPORT
740 {
741 /*
742 * Get rid of fragmented packets, they cause too much trouble.
743 */
744 struct sk_buff *pCopy = skb_copy(pBuf, GFP_ATOMIC);
745 kfree_skb(pBuf);
746 if (!pCopy)
747 {
748 LogRel(("VBoxNetFlt: Failed to allocate packet buffer, dropping the packet.\n"));
749 return 0;
750 }
751 pBuf = pCopy;
752# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
753 Log3(("vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
754 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
755# else
756 Log3(("vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
757 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->tso_size, skb_shinfo(pBuf)->tso_segs, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
758# endif
759 }
760#endif
761
762#ifdef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
763 /* Forward it to the internal network. */
764 vboxNetFltLinuxForwardToIntNet(pThis, pBuf);
765#else
766 /* Add the packet to transmit queue and schedule the bottom half. */
767 skb_queue_tail(&pThis->u.s.XmitQueue, pBuf);
768 schedule_work(&pThis->u.s.XmitTask);
769 Log4(("vboxNetFltLinuxPacketHandler: scheduled work %p for sk_buff %p\n",
770 &pThis->u.s.XmitTask, pBuf));
771#endif
772
773 /* It does not really matter what we return, it is ignored by the kernel. */
774 return 0;
775}
776
777/**
778 * Calculate the number of INTNETSEG segments the socket buffer will need.
779 *
780 * @returns Segment count.
781 * @param pBuf The socket buffer.
782 */
783DECLINLINE(unsigned) vboxNetFltLinuxCalcSGSegments(struct sk_buff *pBuf)
784{
785#ifdef VBOXNETFLT_SG_SUPPORT
786 unsigned cSegs = 1 + skb_shinfo(pBuf)->nr_frags;
787#else
788 unsigned cSegs = 1;
789#endif
790#ifdef PADD_RUNT_FRAMES_FROM_HOST
791 /* vboxNetFltLinuxSkBufToSG adds a padding segment if it's a runt. */
792 if (pBuf->len < 60)
793 cSegs++;
794#endif
795 return cSegs;
796}
797
798/**
799 * Destroy the intnet scatter / gather buffer created by
800 * vboxNetFltLinuxSkBufToSG.
801 */
802static void vboxNetFltLinuxDestroySG(PINTNETSG pSG)
803{
804#ifdef VBOXNETFLT_SG_SUPPORT
805 int i;
806
807 for (i = 0; i < skb_shinfo(pBuf)->nr_frags; i++)
808 {
809 printk("kunmap(%p)\n", pSG->aSegs[i+1].pv);
810 kunmap(pSG->aSegs[i+1].pv);
811 }
812#endif
813 NOREF(pSG);
814}
815
816#ifdef LOG_ENABLED
817/**
818 * Logging helper.
819 */
820static void vboxNetFltDumpPacket(PINTNETSG pSG, bool fEgress, const char *pszWhere, int iIncrement)
821{
822 uint8_t *pInt, *pExt;
823 static int iPacketNo = 1;
824 iPacketNo += iIncrement;
825 if (fEgress)
826 {
827 pExt = pSG->aSegs[0].pv;
828 pInt = pExt + 6;
829 }
830 else
831 {
832 pInt = pSG->aSegs[0].pv;
833 pExt = pInt + 6;
834 }
835 Log(("VBoxNetFlt: (int)%02x:%02x:%02x:%02x:%02x:%02x"
836 " %s (%s)%02x:%02x:%02x:%02x:%02x:%02x (%u bytes) packet #%u\n",
837 pInt[0], pInt[1], pInt[2], pInt[3], pInt[4], pInt[5],
838 fEgress ? "-->" : "<--", pszWhere,
839 pExt[0], pExt[1], pExt[2], pExt[3], pExt[4], pExt[5],
840 pSG->cbTotal, iPacketNo));
841 Log3(("%.*Rhxd\n", pSG->aSegs[0].cb, pSG->aSegs[0].pv));
842}
843#else
844# define vboxNetFltDumpPacket(a, b, c, d) do {} while (0)
845#endif
846
847#ifdef VBOXNETFLT_WITH_GSO_RECV
848
849/**
850 * Worker for vboxNetFltLinuxForwardToIntNet that checks if we can forwards a
851 * GSO socket buffer without having to segment it.
852 *
853 * @returns true on success, false if needs segmenting.
854 * @param pThis The net filter instance.
855 * @param pSkb The GSO socket buffer.
856 * @param fSrc The source.
857 * @param pGsoCtx Where to return the GSO context on success.
858 */
859static bool vboxNetFltLinuxCanForwardAsGso(PVBOXNETFLTINS pThis, struct sk_buff *pSkb, uint32_t fSrc,
860 PPDMNETWORKGSO pGsoCtx)
861{
862 PDMNETWORKGSOTYPE enmGsoType;
863 uint16_t uEtherType;
864 unsigned int cbTransport;
865 unsigned int offTransport;
866 unsigned int cbTransportHdr;
867 unsigned uProtocol;
868 union
869 {
870 RTNETIPV4 IPv4;
871 RTNETIPV6 IPv6;
872 RTNETTCP Tcp;
873 uint8_t ab[40];
874 uint16_t au16[40/2];
875 uint32_t au32[40/4];
876 } Buf;
877
878 /*
879 * Check the GSO properties of the socket buffer and make sure it fits.
880 */
881 /** @todo Figure out how to handle SKB_GSO_TCP_ECN! */
882 if (RT_UNLIKELY( skb_shinfo(pSkb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCPV6 | SKB_GSO_TCPV4) ))
883 {
884 Log5(("vboxNetFltLinuxCanForwardAsGso: gso_type=%#x\n", skb_shinfo(pSkb)->gso_type));
885 return false;
886 }
887 if (RT_UNLIKELY( skb_shinfo(pSkb)->gso_size < 1
888 || pSkb->len > VBOX_MAX_GSO_SIZE ))
889 {
890 Log5(("vboxNetFltLinuxCanForwardAsGso: gso_size=%#x skb_len=%#x (max=%#x)\n", skb_shinfo(pSkb)->gso_size, pSkb->len, VBOX_MAX_GSO_SIZE));
891 return false;
892 }
893 if (RT_UNLIKELY(fSrc & INTNETTRUNKDIR_WIRE))
894 {
895 Log5(("vboxNetFltLinuxCanForwardAsGso: fSrc=wire\n"));
896 return false;
897 }
898
899 /*
900 * skb_gso_segment does the following. Do we need to do it as well?
901 */
902#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
903 skb_reset_mac_header(pSkb);
904 pSkb->mac_len = pSkb->network_header - pSkb->mac_header;
905#else
906 pSkb->mac.raw = pSkb->data;
907 pSkb->mac_len = pSkb->nh.raw - pSkb->data;
908#endif
909
910 /*
911 * Switch on the ethertype.
912 */
913 uEtherType = pSkb->protocol;
914 if ( uEtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_VLAN)
915 && pSkb->mac_len == sizeof(RTNETETHERHDR) + sizeof(uint32_t))
916 {
917 uint16_t const *puEtherType = skb_header_pointer(pSkb, sizeof(RTNETETHERHDR) + sizeof(uint16_t), sizeof(uint16_t), &Buf);
918 if (puEtherType)
919 uEtherType = *puEtherType;
920 }
921 switch (uEtherType)
922 {
923 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
924 {
925 unsigned int cbHdr;
926 PCRTNETIPV4 pIPv4 = (PCRTNETIPV4)skb_header_pointer(pSkb, pSkb->mac_len, sizeof(Buf.IPv4), &Buf);
927 if (RT_UNLIKELY(!pIPv4))
928 {
929 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access IPv4 hdr\n"));
930 return false;
931 }
932
933 cbHdr = pIPv4->ip_hl * 4;
934 cbTransport = RT_N2H_U16(pIPv4->ip_len);
935 if (RT_UNLIKELY( cbHdr < RTNETIPV4_MIN_LEN
936 || cbHdr > cbTransport ))
937 {
938 Log5(("vboxNetFltLinuxCanForwardAsGso: invalid IPv4 lengths: ip_hl=%u ip_len=%u\n", pIPv4->ip_hl, RT_N2H_U16(pIPv4->ip_len)));
939 return false;
940 }
941 cbTransport -= cbHdr;
942 offTransport = pSkb->mac_len + cbHdr;
943 uProtocol = pIPv4->ip_p;
944 if (uProtocol == RTNETIPV4_PROT_TCP)
945 enmGsoType = PDMNETWORKGSOTYPE_IPV4_TCP;
946 else if (uProtocol == RTNETIPV4_PROT_UDP)
947 enmGsoType = PDMNETWORKGSOTYPE_IPV4_UDP;
948 else /** @todo IPv6: 4to6 tunneling */
949 enmGsoType = PDMNETWORKGSOTYPE_INVALID;
950 break;
951 }
952
953 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
954 {
955 PCRTNETIPV6 pIPv6 = (PCRTNETIPV6)skb_header_pointer(pSkb, pSkb->mac_len, sizeof(Buf.IPv6), &Buf);
956 if (RT_UNLIKELY(!pIPv6))
957 {
958 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access IPv6 hdr\n"));
959 return false;
960 }
961
962 cbTransport = RT_N2H_U16(pIPv6->ip6_plen);
963 offTransport = pSkb->mac_len + sizeof(RTNETIPV6);
964 uProtocol = pIPv6->ip6_nxt;
965 /** @todo IPv6: Dig our way out of the other headers. */
966 if (uProtocol == RTNETIPV4_PROT_TCP)
967 enmGsoType = PDMNETWORKGSOTYPE_IPV6_TCP;
968 else if (uProtocol == RTNETIPV4_PROT_UDP)
969 enmGsoType = PDMNETWORKGSOTYPE_IPV4_UDP;
970 else
971 enmGsoType = PDMNETWORKGSOTYPE_INVALID;
972 break;
973 }
974
975 default:
976 Log5(("vboxNetFltLinuxCanForwardAsGso: uEtherType=%#x\n", RT_H2N_U16(uEtherType)));
977 return false;
978 }
979
980 if (enmGsoType == PDMNETWORKGSOTYPE_INVALID)
981 {
982 Log5(("vboxNetFltLinuxCanForwardAsGso: Unsupported protocol %d\n", uProtocol));
983 return false;
984 }
985
986 if (RT_UNLIKELY( offTransport + cbTransport <= offTransport
987 || offTransport + cbTransport > pSkb->len
988 || cbTransport < (uProtocol == RTNETIPV4_PROT_TCP ? RTNETTCP_MIN_LEN : RTNETUDP_MIN_LEN)) )
989 {
990 Log5(("vboxNetFltLinuxCanForwardAsGso: Bad transport length; off=%#x + cb=%#x => %#x; skb_len=%#x (%s)\n",
991 offTransport, cbTransport, offTransport + cbTransport, pSkb->len, PDMNetGsoTypeName(enmGsoType) ));
992 return false;
993 }
994
995 /*
996 * Check the TCP/UDP bits.
997 */
998 if (uProtocol == RTNETIPV4_PROT_TCP)
999 {
1000 PCRTNETTCP pTcp = (PCRTNETTCP)skb_header_pointer(pSkb, offTransport, sizeof(Buf.Tcp), &Buf);
1001 if (RT_UNLIKELY(!pTcp))
1002 {
1003 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access TCP hdr\n"));
1004 return false;
1005 }
1006
1007 cbTransportHdr = pTcp->th_off * 4;
1008 if (RT_UNLIKELY( cbTransportHdr < RTNETTCP_MIN_LEN
1009 || cbTransportHdr > cbTransport
1010 || offTransport + cbTransportHdr >= UINT8_MAX
1011 || offTransport + cbTransportHdr >= pSkb->len ))
1012 {
1013 Log5(("vboxNetFltLinuxCanForwardAsGso: No space for TCP header; off=%#x cb=%#x skb_len=%#x\n", offTransport, cbTransportHdr, pSkb->len));
1014 return false;
1015 }
1016
1017 }
1018 else
1019 {
1020 Assert(uProtocol == RTNETIPV4_PROT_UDP);
1021 cbTransportHdr = sizeof(RTNETUDP);
1022 if (RT_UNLIKELY( offTransport + cbTransportHdr >= UINT8_MAX
1023 || offTransport + cbTransportHdr >= pSkb->len ))
1024 {
1025 Log5(("vboxNetFltLinuxCanForwardAsGso: No space for UDP header; off=%#x skb_len=%#x\n", offTransport, pSkb->len));
1026 return false;
1027 }
1028 }
1029
1030 /*
1031 * We're good, init the GSO context.
1032 */
1033 pGsoCtx->u8Type = enmGsoType;
1034 pGsoCtx->cbHdrs = offTransport + cbTransportHdr;
1035 pGsoCtx->cbMaxSeg = skb_shinfo(pSkb)->gso_size;
1036 pGsoCtx->offHdr1 = pSkb->mac_len;
1037 pGsoCtx->offHdr2 = offTransport;
1038 pGsoCtx->au8Unused[0] = 0;
1039 pGsoCtx->au8Unused[1] = 0;
1040
1041 return true;
1042}
1043
1044/**
1045 * Forward the socket buffer as a GSO internal network frame.
1046 *
1047 * @returns IPRT status code.
1048 * @param pThis The net filter instance.
1049 * @param pSkb The GSO socket buffer.
1050 * @param fSrc The source.
1051 * @param pGsoCtx Where to return the GSO context on success.
1052 */
1053static int vboxNetFltLinuxForwardAsGso(PVBOXNETFLTINS pThis, struct sk_buff *pSkb, uint32_t fSrc, PCPDMNETWORKGSO pGsoCtx)
1054{
1055 int rc;
1056 unsigned cSegs = vboxNetFltLinuxCalcSGSegments(pSkb);
1057 if (RT_LIKELY(cSegs <= MAX_SKB_FRAGS + 1))
1058 {
1059 PINTNETSG pSG = (PINTNETSG)alloca(RT_OFFSETOF(INTNETSG, aSegs[cSegs]));
1060 if (RT_LIKELY(pSG))
1061 {
1062 vboxNetFltLinuxSkBufToSG(pThis, pSkb, pSG, cSegs, fSrc, pGsoCtx);
1063
1064 vboxNetFltDumpPacket(pSG, false, (fSrc & INTNETTRUNKDIR_HOST) ? "host" : "wire", 1);
1065 pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, pSG, fSrc);
1066
1067 vboxNetFltLinuxDestroySG(pSG);
1068 rc = VINF_SUCCESS;
1069 }
1070 else
1071 {
1072 Log(("VBoxNetFlt: Dropping the sk_buff (failure case).\n"));
1073 rc = VERR_NO_MEMORY;
1074 }
1075 }
1076 else
1077 {
1078 Log(("VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n", cSegs));
1079 rc = VERR_INTERNAL_ERROR_3;
1080 }
1081
1082 Log4(("VBoxNetFlt: Dropping the sk_buff.\n"));
1083 dev_kfree_skb(pSkb);
1084 return rc;
1085}
1086
1087#endif /* VBOXNETFLT_WITH_GSO_RECV */
1088
1089/**
1090 * Worker for vboxNetFltLinuxForwardToIntNet.
1091 *
1092 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1093 * @param pThis The net filter instance.
1094 * @param pBuf The socket buffer.
1095 * @param fSrc The source.
1096 */
1097static int vboxNetFltLinuxForwardSegment(PVBOXNETFLTINS pThis, struct sk_buff *pBuf, uint32_t fSrc)
1098{
1099 int rc;
1100 unsigned cSegs = vboxNetFltLinuxCalcSGSegments(pBuf);
1101 if (cSegs <= MAX_SKB_FRAGS + 1)
1102 {
1103 PINTNETSG pSG = (PINTNETSG)alloca(RT_OFFSETOF(INTNETSG, aSegs[cSegs]));
1104 if (RT_LIKELY(pSG))
1105 {
1106 vboxNetFltLinuxSkBufToSG(pThis, pBuf, pSG, cSegs, fSrc, NULL /*pGsoCtx*/);
1107
1108 vboxNetFltDumpPacket(pSG, false, (fSrc & INTNETTRUNKDIR_HOST) ? "host" : "wire", 1);
1109 pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, pSG, fSrc);
1110
1111 vboxNetFltLinuxDestroySG(pSG);
1112 rc = VINF_SUCCESS;
1113 }
1114 else
1115 {
1116 Log(("VBoxNetFlt: Failed to allocate SG buffer.\n"));
1117 rc = VERR_NO_MEMORY;
1118 }
1119 }
1120 else
1121 {
1122 Log(("VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n", cSegs));
1123 rc = VERR_INTERNAL_ERROR_3;
1124 }
1125
1126 Log4(("VBoxNetFlt: Dropping the sk_buff.\n"));
1127 dev_kfree_skb(pBuf);
1128 return rc;
1129}
1130
1131/**
1132 *
1133 * @param pBuf The socket buffer. This is consumed by this function.
1134 */
1135static void vboxNetFltLinuxForwardToIntNet(PVBOXNETFLTINS pThis, struct sk_buff *pBuf)
1136{
1137 uint32_t fSrc = pBuf->pkt_type == PACKET_OUTGOING ? INTNETTRUNKDIR_HOST : INTNETTRUNKDIR_WIRE;
1138
1139#ifdef VBOXNETFLT_WITH_GSO
1140 if (skb_is_gso(pBuf))
1141 {
1142 PDMNETWORKGSO GsoCtx;
1143 Log3(("vboxNetFltLinuxForwardToIntNet: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x ip_summed=%d\n",
1144 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type, pBuf->ip_summed));
1145# ifdef VBOXNETFLT_WITH_GSO_RECV
1146 if ( (skb_shinfo(pBuf)->gso_type & (SKB_GSO_UDP | SKB_GSO_TCPV6 | SKB_GSO_TCPV4))
1147 && vboxNetFltLinuxCanForwardAsGso(pThis, pBuf, fSrc, &GsoCtx) )
1148 vboxNetFltLinuxForwardAsGso(pThis, pBuf, fSrc, &GsoCtx);
1149 else
1150# endif
1151 {
1152 /* Need to segment the packet */
1153 struct sk_buff *pNext;
1154 struct sk_buff *pSegment = skb_gso_segment(pBuf, 0 /*supported features*/);
1155 if (IS_ERR(pSegment))
1156 {
1157 dev_kfree_skb(pBuf);
1158 LogRel(("VBoxNetFlt: Failed to segment a packet (%d).\n", PTR_ERR(pSegment)));
1159 return;
1160 }
1161
1162 for (; pSegment; pSegment = pNext)
1163 {
1164 Log3(("vboxNetFltLinuxForwardToIntNet: segment len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
1165 pSegment->len, pSegment->data_len, pSegment->truesize, pSegment->next, skb_shinfo(pSegment)->nr_frags, skb_shinfo(pSegment)->gso_size, skb_shinfo(pSegment)->gso_segs, skb_shinfo(pSegment)->gso_type, skb_shinfo(pSegment)->frag_list, pSegment->pkt_type));
1166 pNext = pSegment->next;
1167 pSegment->next = 0;
1168 vboxNetFltLinuxForwardSegment(pThis, pSegment, fSrc);
1169 }
1170 dev_kfree_skb(pBuf);
1171 }
1172 }
1173 else
1174#endif /* VBOXNETFLT_WITH_GSO */
1175 {
1176 if (pBuf->ip_summed == CHECKSUM_PARTIAL && pBuf->pkt_type == PACKET_OUTGOING)
1177 {
1178#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1179 /*
1180 * Try to work around the problem with CentOS 4.7 and 5.2 (2.6.9
1181 * and 2.6.18 kernels), they pass wrong 'h' pointer down. We take IP
1182 * header length from the header itself and reconstruct 'h' pointer
1183 * to TCP (or whatever) header.
1184 */
1185 unsigned char *tmp = pBuf->h.raw;
1186 if (pBuf->h.raw == pBuf->nh.raw && pBuf->protocol == htons(ETH_P_IP))
1187 pBuf->h.raw = pBuf->nh.raw + pBuf->nh.iph->ihl * 4;
1188#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */
1189 if (VBOX_SKB_CHECKSUM_HELP(pBuf))
1190 {
1191 LogRel(("VBoxNetFlt: Failed to compute checksum, dropping the packet.\n"));
1192 dev_kfree_skb(pBuf);
1193 return;
1194 }
1195#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1196 /* Restore the original (wrong) pointer. */
1197 pBuf->h.raw = tmp;
1198#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */
1199 }
1200 vboxNetFltLinuxForwardSegment(pThis, pBuf, fSrc);
1201 }
1202}
1203
1204#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
1205/**
1206 * Work queue handler that forwards the socket buffers queued by
1207 * vboxNetFltLinuxPacketHandler to the internal network.
1208 *
1209 * @param pWork The work queue.
1210 */
1211# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
1212static void vboxNetFltLinuxXmitTask(struct work_struct *pWork)
1213# else
1214static void vboxNetFltLinuxXmitTask(void *pWork)
1215# endif
1216{
1217 PVBOXNETFLTINS pThis = VBOX_FLT_XT_TO_INST(pWork);
1218 struct sk_buff *pBuf;
1219
1220 Log4(("vboxNetFltLinuxXmitTask: Got work %p.\n", pWork));
1221
1222 /*
1223 * Active? Retain the instance and increment the busy counter.
1224 */
1225 if (vboxNetFltTryRetainBusyActive(pThis))
1226 {
1227 while ((pBuf = skb_dequeue(&pThis->u.s.XmitQueue)) != NULL)
1228 vboxNetFltLinuxForwardToIntNet(pThis, pBuf);
1229
1230 vboxNetFltRelease(pThis, true /* fBusy */);
1231 }
1232 else
1233 {
1234 /** @todo Shouldn't we just drop the packets here? There is little point in
1235 * making them accumulate when the VM is paused and it'll only waste
1236 * kernel memory anyway... Hmm. maybe wait a short while (2-5 secs)
1237 * before start draining the packets (goes for the intnet ring buf
1238 * too)? */
1239 }
1240}
1241#endif /* !VBOXNETFLT_LINUX_NO_XMIT_QUEUE */
1242
1243/**
1244 * Reports the GSO capabilites of the hardware NIC.
1245 *
1246 * @param pThis The net filter instance. The caller hold a
1247 * reference to this.
1248 */
1249static void vboxNetFltLinuxReportNicGsoCapabilities(PVBOXNETFLTINS pThis)
1250{
1251#ifdef VBOXNETFLT_WITH_GSO_XMIT_WIRE
1252 if (vboxNetFltTryRetainBusyNotDisconnected(pThis))
1253 {
1254 struct net_device *pDev;
1255 PINTNETTRUNKSWPORT pSwitchPort;
1256 unsigned int fFeatures;
1257 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1258
1259 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1260
1261 pSwitchPort = pThis->pSwitchPort; /* this doesn't need to be here, but it doesn't harm. */
1262 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1263 if (pDev)
1264 fFeatures = pDev->features;
1265 else
1266 fFeatures = 0;
1267
1268 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1269
1270 if (pThis->pSwitchPort)
1271 {
1272 /* Set/update the GSO capabilities of the NIC. */
1273 uint32_t fGsoCapabilites = 0;
1274 if (fFeatures & NETIF_F_TSO)
1275 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_TCP);
1276 if (fFeatures & NETIF_F_TSO6)
1277 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_TCP);
1278# if 0 /** @todo GSO: Test UDP offloading (UFO) on linux. */
1279 if (fFeatures & NETIF_F_UFO)
1280 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_UDP);
1281 if (fFeatures & NETIF_F_UFO)
1282 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_UDP);
1283# endif
1284 pThis->pSwitchPort->pfnReportGsoCapabilities(pThis->pSwitchPort, fGsoCapabilites, INTNETTRUNKDIR_WIRE);
1285 }
1286
1287 vboxNetFltRelease(pThis, true /*fBusy*/);
1288 }
1289#endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE */
1290}
1291
1292/**
1293 * Helper that determins whether the host (ignoreing us) is operating the
1294 * interface in promiscuous mode or not.
1295 */
1296static bool vboxNetFltLinuxPromiscuous(PVBOXNETFLTINS pThis)
1297{
1298 bool fRc = false;
1299 struct net_device * pDev = vboxNetFltLinuxRetainNetDev(pThis);
1300 if (pDev)
1301 {
1302 fRc = !!(pDev->promiscuity - (ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet) & 1));
1303 LogFlow(("vboxNetFltPortOsIsPromiscuous: returns %d, pDev->promiscuity=%d, fPromiscuousSet=%d\n",
1304 fRc, pDev->promiscuity, pThis->u.s.fPromiscuousSet));
1305 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1306 }
1307 return fRc;
1308}
1309
1310/**
1311 * Internal worker for vboxNetFltLinuxNotifierCallback.
1312 *
1313 * @returns VBox status code.
1314 * @param pThis The instance.
1315 * @param fRediscovery If set we're doing a rediscovery attempt, so, don't
1316 * flood the release log.
1317 */
1318static int vboxNetFltLinuxAttachToInterface(PVBOXNETFLTINS pThis, struct net_device *pDev)
1319{
1320 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1321 LogFlow(("vboxNetFltLinuxAttachToInterface: pThis=%p (%s)\n", pThis, pThis->szName));
1322
1323 /*
1324 * Retain and store the device.
1325 */
1326 dev_hold(pDev);
1327
1328 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1329 ASMAtomicUoWritePtr((void * volatile *)&pThis->u.s.pDev, pDev);
1330 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1331
1332 Log(("vboxNetFltLinuxAttachToInterface: Device %p(%s) retained. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1333 Log(("vboxNetFltLinuxAttachToInterface: Got pDev=%p pThis=%p pThis->u.s.pDev=%p\n", pDev, pThis, ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev)));
1334
1335 /* Get the mac address while we still have a valid net_device reference. */
1336 memcpy(&pThis->u.s.MacAddr, pDev->dev_addr, sizeof(pThis->u.s.MacAddr));
1337
1338 /*
1339 * Install a packet filter for this device with a protocol wildcard (ETH_P_ALL).
1340 */
1341 pThis->u.s.PacketType.type = __constant_htons(ETH_P_ALL);
1342 pThis->u.s.PacketType.dev = pDev;
1343 pThis->u.s.PacketType.func = vboxNetFltLinuxPacketHandler;
1344 dev_add_pack(&pThis->u.s.PacketType);
1345
1346#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1347 vboxNetFltLinuxHookDev(pThis, pDev);
1348#endif
1349
1350 /*
1351 * Set indicators that require the spinlock. Be abit paranoid about racing
1352 * the device notification handle.
1353 */
1354 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1355 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1356 if (pDev)
1357 {
1358 ASMAtomicUoWriteBool(&pThis->fDisconnectedFromHost, false);
1359 ASMAtomicUoWriteBool(&pThis->u.s.fRegistered, true);
1360 pDev = NULL; /* don't dereference it */
1361 }
1362 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1363 Log(("vboxNetFltLinuxAttachToInterface: this=%p: Packet handler installed.\n", pThis));
1364
1365 /*
1366 * If the above succeeded report GSO capabilites, if not undo and
1367 * release the device.
1368 */
1369 if (!pDev)
1370 {
1371 Assert(pThis->pSwitchPort);
1372 if (vboxNetFltTryRetainBusyNotDisconnected(pThis))
1373 {
1374 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1375 pThis->pSwitchPort->pfnReportMacAddress(pThis->pSwitchPort, &pThis->u.s.MacAddr);
1376 pThis->pSwitchPort->pfnReportPromiscuousMode(pThis->pSwitchPort, vboxNetFltLinuxPromiscuous(pThis));
1377 pThis->pSwitchPort->pfnReportNoPreemptDsts(pThis->pSwitchPort, INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST);
1378 vboxNetFltRelease(pThis, true /*fBusy*/);
1379 }
1380 }
1381 else
1382 {
1383#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1384 vboxNetFltLinuxUnhookDev(pThis, pDev);
1385#endif
1386 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1387 ASMAtomicUoWritePtr((void * volatile *)&pThis->u.s.pDev, NULL);
1388 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1389 dev_put(pDev);
1390 Log(("vboxNetFltLinuxAttachToInterface: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1391 }
1392
1393 LogRel(("VBoxNetFlt: attached to '%s' / %.*Rhxs\n", pThis->szName, sizeof(pThis->u.s.MacAddr), &pThis->u.s.MacAddr));
1394 return VINF_SUCCESS;
1395}
1396
1397
1398static int vboxNetFltLinuxUnregisterDevice(PVBOXNETFLTINS pThis, struct net_device *pDev)
1399{
1400 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1401
1402 Assert(!pThis->fDisconnectedFromHost);
1403
1404#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1405 vboxNetFltLinuxUnhookDev(pThis, pDev);
1406#endif
1407
1408 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1409 ASMAtomicWriteBool(&pThis->u.s.fRegistered, false);
1410 ASMAtomicWriteBool(&pThis->fDisconnectedFromHost, true);
1411 ASMAtomicUoWritePtr((void * volatile *)&pThis->u.s.pDev, NULL);
1412 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1413
1414 dev_remove_pack(&pThis->u.s.PacketType);
1415#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
1416 skb_queue_purge(&pThis->u.s.XmitQueue);
1417#endif
1418 Log(("vboxNetFltLinuxUnregisterDevice: this=%p: Packet handler removed, xmit queue purged.\n", pThis));
1419 Log(("vboxNetFltLinuxUnregisterDevice: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1420 dev_put(pDev);
1421
1422 return NOTIFY_OK;
1423}
1424
1425static int vboxNetFltLinuxDeviceIsUp(PVBOXNETFLTINS pThis, struct net_device *pDev)
1426{
1427 /* Check if we are not suspended and promiscuous mode has not been set. */
1428 if ( pThis->enmTrunkState == INTNETTRUNKIFSTATE_ACTIVE
1429 && !ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet))
1430 {
1431 /* Note that there is no need for locking as the kernel got hold of the lock already. */
1432 dev_set_promiscuity(pDev, 1);
1433 ASMAtomicWriteBool(&pThis->u.s.fPromiscuousSet, true);
1434 Log(("vboxNetFltLinuxDeviceIsUp: enabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1435 }
1436 else
1437 Log(("vboxNetFltLinuxDeviceIsUp: no need to enable promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1438 return NOTIFY_OK;
1439}
1440
1441static int vboxNetFltLinuxDeviceGoingDown(PVBOXNETFLTINS pThis, struct net_device *pDev)
1442{
1443 /* Undo promiscuous mode if we has set it. */
1444 if (ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet))
1445 {
1446 /* Note that there is no need for locking as the kernel got hold of the lock already. */
1447 dev_set_promiscuity(pDev, -1);
1448 ASMAtomicWriteBool(&pThis->u.s.fPromiscuousSet, false);
1449 Log(("vboxNetFltLinuxDeviceGoingDown: disabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1450 }
1451 else
1452 Log(("vboxNetFltLinuxDeviceGoingDown: no need to disable promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1453 return NOTIFY_OK;
1454}
1455
1456#ifdef LOG_ENABLED
1457/** Stringify the NETDEV_XXX constants. */
1458static const char *vboxNetFltLinuxGetNetDevEventName(unsigned long ulEventType)
1459{
1460 const char *pszEvent = "NETDRV_<unknown>";
1461 switch (ulEventType)
1462 {
1463 case NETDEV_REGISTER: pszEvent = "NETDEV_REGISTER"; break;
1464 case NETDEV_UNREGISTER: pszEvent = "NETDEV_UNREGISTER"; break;
1465 case NETDEV_UP: pszEvent = "NETDEV_UP"; break;
1466 case NETDEV_DOWN: pszEvent = "NETDEV_DOWN"; break;
1467 case NETDEV_REBOOT: pszEvent = "NETDEV_REBOOT"; break;
1468 case NETDEV_CHANGENAME: pszEvent = "NETDEV_CHANGENAME"; break;
1469 case NETDEV_CHANGE: pszEvent = "NETDEV_CHANGE"; break;
1470 case NETDEV_CHANGEMTU: pszEvent = "NETDEV_CHANGEMTU"; break;
1471 case NETDEV_CHANGEADDR: pszEvent = "NETDEV_CHANGEADDR"; break;
1472 case NETDEV_GOING_DOWN: pszEvent = "NETDEV_GOING_DOWN"; break;
1473# ifdef NETDEV_FEAT_CHANGE
1474 case NETDEV_FEAT_CHANGE: pszEvent = "NETDEV_FEAT_CHANGE"; break;
1475# endif
1476 }
1477 return pszEvent;
1478}
1479#endif /* LOG_ENABLED */
1480
1481/**
1482 * Callback for listening to netdevice events.
1483 *
1484 * This works the rediscovery, clean up on unregistration, promiscuity on
1485 * up/down, and GSO feature changes from ethtool.
1486 *
1487 * @returns NOTIFY_OK
1488 * @param self Pointer to our notifier registration block.
1489 * @param ulEventType The event.
1490 * @param ptr Event specific, but it is usually the device it
1491 * relates to.
1492 */
1493static int vboxNetFltLinuxNotifierCallback(struct notifier_block *self, unsigned long ulEventType, void *ptr)
1494
1495{
1496 PVBOXNETFLTINS pThis = VBOX_FLT_NB_TO_INST(self);
1497 struct net_device *pDev = (struct net_device *)ptr;
1498 int rc = NOTIFY_OK;
1499
1500 Log(("VBoxNetFlt: got event %s(0x%lx) on %s, pDev=%p pThis=%p pThis->u.s.pDev=%p\n",
1501 vboxNetFltLinuxGetNetDevEventName(ulEventType), ulEventType, pDev->name, pDev, pThis, ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev)));
1502 if ( ulEventType == NETDEV_REGISTER
1503 && !strcmp(pDev->name, pThis->szName))
1504 {
1505 vboxNetFltLinuxAttachToInterface(pThis, pDev);
1506 }
1507 else
1508 {
1509 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1510 if (pDev == ptr)
1511 {
1512 switch (ulEventType)
1513 {
1514 case NETDEV_UNREGISTER:
1515 rc = vboxNetFltLinuxUnregisterDevice(pThis, pDev);
1516 break;
1517 case NETDEV_UP:
1518 rc = vboxNetFltLinuxDeviceIsUp(pThis, pDev);
1519 break;
1520 case NETDEV_GOING_DOWN:
1521 rc = vboxNetFltLinuxDeviceGoingDown(pThis, pDev);
1522 break;
1523 case NETDEV_CHANGENAME:
1524 break;
1525#ifdef NETDEV_FEAT_CHANGE
1526 case NETDEV_FEAT_CHANGE:
1527 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1528 break;
1529#endif
1530 }
1531 }
1532 }
1533
1534 return rc;
1535}
1536
1537bool vboxNetFltOsMaybeRediscovered(PVBOXNETFLTINS pThis)
1538{
1539 return !ASMAtomicUoReadBool(&pThis->fDisconnectedFromHost);
1540}
1541
1542int vboxNetFltPortOsXmit(PVBOXNETFLTINS pThis, PINTNETSG pSG, uint32_t fDst)
1543{
1544 struct net_device * pDev;
1545 int err;
1546 int rc = VINF_SUCCESS;
1547
1548 LogFlow(("vboxNetFltPortOsXmit: pThis=%p (%s)\n", pThis, pThis->szName));
1549
1550 pDev = vboxNetFltLinuxRetainNetDev(pThis);
1551 if (pDev)
1552 {
1553 /*
1554 * Create a sk_buff for the gather list and push it onto the wire.
1555 */
1556 if (fDst & INTNETTRUNKDIR_WIRE)
1557 {
1558 struct sk_buff *pBuf = vboxNetFltLinuxSkBufFromSG(pThis, pSG, true);
1559 if (pBuf)
1560 {
1561 vboxNetFltDumpPacket(pSG, true, "wire", 1);
1562 Log4(("vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
1563 Log4(("vboxNetFltPortOsXmit: dev_queue_xmit(%p)\n", pBuf));
1564 err = dev_queue_xmit(pBuf);
1565 if (err)
1566 rc = RTErrConvertFromErrno(err);
1567 }
1568 else
1569 rc = VERR_NO_MEMORY;
1570 }
1571
1572 /*
1573 * Create a sk_buff for the gather list and push it onto the host stack.
1574 */
1575 if (fDst & INTNETTRUNKDIR_HOST)
1576 {
1577 struct sk_buff *pBuf = vboxNetFltLinuxSkBufFromSG(pThis, pSG, false);
1578 if (pBuf)
1579 {
1580 vboxNetFltDumpPacket(pSG, true, "host", (fDst & INTNETTRUNKDIR_WIRE) ? 0 : 1);
1581 Log4(("vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
1582 Log4(("vboxNetFltPortOsXmit: netif_rx_ni(%p)\n", pBuf));
1583 err = netif_rx_ni(pBuf);
1584 if (err)
1585 rc = RTErrConvertFromErrno(err);
1586 }
1587 else
1588 rc = VERR_NO_MEMORY;
1589 }
1590
1591 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1592 }
1593
1594 return rc;
1595}
1596
1597
1598void vboxNetFltPortOsSetActive(PVBOXNETFLTINS pThis, bool fActive)
1599{
1600 struct net_device * pDev;
1601
1602 LogFlow(("vboxNetFltPortOsSetActive: pThis=%p (%s), fActive=%s, fDisablePromiscuous=%s\n",
1603 pThis, pThis->szName, fActive?"true":"false",
1604 pThis->fDisablePromiscuous?"true":"false"));
1605
1606 if (pThis->fDisablePromiscuous)
1607 return;
1608
1609 pDev = vboxNetFltLinuxRetainNetDev(pThis);
1610 if (pDev)
1611 {
1612 /*
1613 * This api is a bit weird, the best reference is the code.
1614 *
1615 * Also, we have a bit or race conditions wrt the maintance of
1616 * host the interface promiscuity for vboxNetFltPortOsIsPromiscuous.
1617 */
1618#ifdef LOG_ENABLED
1619 u_int16_t fIf;
1620 unsigned const cPromiscBefore = pDev->promiscuity;
1621#endif
1622 if (fActive)
1623 {
1624 Assert(!pThis->u.s.fPromiscuousSet);
1625
1626 rtnl_lock();
1627 dev_set_promiscuity(pDev, 1);
1628 rtnl_unlock();
1629 pThis->u.s.fPromiscuousSet = true;
1630 Log(("vboxNetFltPortOsSetActive: enabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1631 }
1632 else
1633 {
1634 if (pThis->u.s.fPromiscuousSet)
1635 {
1636 rtnl_lock();
1637 dev_set_promiscuity(pDev, -1);
1638 rtnl_unlock();
1639 Log(("vboxNetFltPortOsSetActive: disabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
1640 }
1641 pThis->u.s.fPromiscuousSet = false;
1642
1643#ifdef LOG_ENABLED
1644 fIf = dev_get_flags(pDev);
1645 Log(("VBoxNetFlt: fIf=%#x; %d->%d\n", fIf, cPromiscBefore, pDev->promiscuity));
1646#endif
1647 }
1648
1649 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1650 }
1651}
1652
1653
1654int vboxNetFltOsDisconnectIt(PVBOXNETFLTINS pThis)
1655{
1656 /* Nothing to do here. */
1657 return VINF_SUCCESS;
1658}
1659
1660
1661int vboxNetFltOsConnectIt(PVBOXNETFLTINS pThis)
1662{
1663 /*
1664 * Report the GSO capabilities of the host and device (if connected).
1665 * Note! No need to mark ourselves busy here.
1666 */
1667 /** @todo duplicate work here now? Attach */
1668#if defined(VBOXNETFLT_WITH_GSO_XMIT_HOST)
1669 pThis->pSwitchPort->pfnReportGsoCapabilities(pThis->pSwitchPort,
1670 0
1671 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_TCP)
1672 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_TCP)
1673# if 0 /** @todo GSO: Test UDP offloading (UFO) on linux. */
1674 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_UDP)
1675 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_UDP)
1676# endif
1677 , INTNETTRUNKDIR_HOST);
1678
1679#endif
1680 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1681
1682 return VINF_SUCCESS;
1683}
1684
1685
1686void vboxNetFltOsDeleteInstance(PVBOXNETFLTINS pThis)
1687{
1688 struct net_device *pDev;
1689 bool fRegistered;
1690 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1691
1692#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1693 vboxNetFltLinuxUnhookDev(pThis, NULL);
1694#endif
1695
1696 /** @todo This code may race vboxNetFltLinuxUnregisterDevice (very very
1697 * unlikely, but none the less). Since it doesn't actually update the
1698 * state (just reads it), it is likely to panic in some interesting
1699 * ways. */
1700
1701 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1702 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
1703 fRegistered = ASMAtomicUoReadBool(&pThis->u.s.fRegistered);
1704 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1705
1706 if (fRegistered)
1707 {
1708 dev_remove_pack(&pThis->u.s.PacketType);
1709#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
1710 skb_queue_purge(&pThis->u.s.XmitQueue);
1711#endif
1712 Log(("vboxNetFltOsDeleteInstance: this=%p: Packet handler removed, xmit queue purged.\n", pThis));
1713 Log(("vboxNetFltOsDeleteInstance: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1714 dev_put(pDev);
1715 }
1716 Log(("vboxNetFltOsDeleteInstance: this=%p: Notifier removed.\n", pThis));
1717 unregister_netdevice_notifier(&pThis->u.s.Notifier);
1718 module_put(THIS_MODULE);
1719}
1720
1721
1722int vboxNetFltOsInitInstance(PVBOXNETFLTINS pThis, void *pvContext)
1723{
1724 int err;
1725 NOREF(pvContext);
1726
1727 pThis->u.s.Notifier.notifier_call = vboxNetFltLinuxNotifierCallback;
1728 err = register_netdevice_notifier(&pThis->u.s.Notifier);
1729 if (err)
1730 return VERR_INTNET_FLT_IF_FAILED;
1731 if (!pThis->u.s.fRegistered)
1732 {
1733 unregister_netdevice_notifier(&pThis->u.s.Notifier);
1734 LogRel(("VBoxNetFlt: failed to find %s.\n", pThis->szName));
1735 return VERR_INTNET_FLT_IF_NOT_FOUND;
1736 }
1737
1738 Log(("vboxNetFltOsInitInstance: this=%p: Notifier installed.\n", pThis));
1739 if ( pThis->fDisconnectedFromHost
1740 || !try_module_get(THIS_MODULE))
1741 return VERR_INTNET_FLT_IF_FAILED;
1742
1743 return VINF_SUCCESS;
1744}
1745
1746int vboxNetFltOsPreInitInstance(PVBOXNETFLTINS pThis)
1747{
1748 /*
1749 * Init the linux specific members.
1750 */
1751 pThis->u.s.pDev = NULL;
1752 pThis->u.s.fRegistered = false;
1753 pThis->u.s.fPromiscuousSet = false;
1754 memset(&pThis->u.s.PacketType, 0, sizeof(pThis->u.s.PacketType));
1755#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
1756 skb_queue_head_init(&pThis->u.s.XmitQueue);
1757# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
1758 INIT_WORK(&pThis->u.s.XmitTask, vboxNetFltLinuxXmitTask);
1759# else
1760 INIT_WORK(&pThis->u.s.XmitTask, vboxNetFltLinuxXmitTask, &pThis->u.s.XmitTask);
1761# endif
1762#endif
1763
1764 return VINF_SUCCESS;
1765}
1766
1767
1768void vboxNetFltPortOsNotifyMacAddress(PVBOXNETFLTINS pThis, INTNETIFHANDLE hIf, PCRTMAC pMac)
1769{
1770 NOREF(pThis); NOREF(hIf); NOREF(pMac);
1771}
1772
1773
1774int vboxNetFltPortOsConnectInterface(PVBOXNETFLTINS pThis, INTNETIFHANDLE hIf)
1775{
1776 /* Nothing to do */
1777 NOREF(pThis); NOREF(hIf);
1778 return VINF_SUCCESS;
1779}
1780
1781
1782int vboxNetFltPortOsDisconnectInterface(PVBOXNETFLTINS pThis, INTNETIFHANDLE hIf)
1783{
1784 /* Nothing to do */
1785 NOREF(pThis); NOREF(hIf);
1786 return VINF_SUCCESS;
1787}
1788
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette