VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/VBoxNetFlt/linux/VBoxNetFlt-linux.c@ 32048

Last change on this file since 32048 was 31705, checked in by vboxsync, 14 years ago

vboxnetflt: indentation

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 82.7 KB
Line 
1/* $Id: VBoxNetFlt-linux.c 31705 2010-08-16 15:24:39Z vboxsync $ */
2/** @file
3 * VBoxNetFlt - Network Filter Driver (Host), Linux Specific Code.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_NET_FLT_DRV
22#define VBOXNETFLT_LINUX_NO_XMIT_QUEUE
23#include "the-linux-kernel.h"
24#include "version-generated.h"
25#include "product-generated.h"
26#include <linux/netdevice.h>
27#include <linux/etherdevice.h>
28#include <linux/rtnetlink.h>
29#include <linux/miscdevice.h>
30#include <linux/ip.h>
31
32#include <VBox/log.h>
33#include <VBox/err.h>
34#include <VBox/intnetinline.h>
35#include <VBox/pdmnetinline.h>
36#include <VBox/param.h>
37#include <iprt/alloca.h>
38#include <iprt/assert.h>
39#include <iprt/spinlock.h>
40#include <iprt/semaphore.h>
41#include <iprt/initterm.h>
42#include <iprt/process.h>
43#include <iprt/mem.h>
44#include <iprt/net.h>
45#include <iprt/log.h>
46#include <iprt/mp.h>
47#include <iprt/mem.h>
48#include <iprt/time.h>
49
50#define VBOXNETFLT_OS_SPECFIC 1
51#include "../VBoxNetFltInternal.h"
52
53#ifdef CONFIG_NET_SCHED
54# define VBOXNETFLT_WITH_QDISC /* Comment this out to disable qdisc support */
55# ifdef VBOXNETFLT_WITH_QDISC
56# include <net/pkt_sched.h>
57# endif /* VBOXNETFLT_WITH_QDISC */
58#endif
59
60
61/*******************************************************************************
62* Defined Constants And Macros *
63*******************************************************************************/
64#define VBOX_FLT_NB_TO_INST(pNB) RT_FROM_MEMBER(pNB, VBOXNETFLTINS, u.s.Notifier)
65#define VBOX_FLT_PT_TO_INST(pPT) RT_FROM_MEMBER(pPT, VBOXNETFLTINS, u.s.PacketType)
66#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
67# define VBOX_FLT_XT_TO_INST(pXT) RT_FROM_MEMBER(pXT, VBOXNETFLTINS, u.s.XmitTask)
68#endif
69
70#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
71# define VBOX_SKB_RESET_NETWORK_HDR(skb) skb_reset_network_header(skb)
72# define VBOX_SKB_RESET_MAC_HDR(skb) skb_reset_mac_header(skb)
73#else
74# define VBOX_SKB_RESET_NETWORK_HDR(skb) skb->nh.raw = skb->data
75# define VBOX_SKB_RESET_MAC_HDR(skb) skb->mac.raw = skb->data
76#endif
77
78#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
79# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(skb)
80#else
81# define CHECKSUM_PARTIAL CHECKSUM_HW
82# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10)
83# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(skb, 0)
84# else
85# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 7)
86# define VBOX_SKB_CHECKSUM_HELP(skb) skb_checksum_help(&skb, 0)
87# else
88# define VBOX_SKB_CHECKSUM_HELP(skb) (!skb_checksum_help(skb))
89# endif
90/* Versions prior 2.6.10 use stats for both bstats and qstats */
91# define bstats stats
92# define qstats stats
93# endif
94#endif
95
96#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 13)
97static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
98{
99 kfree_skb(skb);
100 sch->stats.drops++;
101
102 return NET_XMIT_DROP;
103}
104#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 13) */
105
106#ifndef NET_IP_ALIGN
107# define NET_IP_ALIGN 2
108#endif
109
110#if 0
111/** Create scatter / gather segments for fragments. When not used, we will
112 * linearize the socket buffer before creating the internal networking SG. */
113# define VBOXNETFLT_SG_SUPPORT 1
114#endif
115
116#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
117/** Indicates that the linux kernel may send us GSO frames. */
118# define VBOXNETFLT_WITH_GSO 1
119
120/** This enables or disables the transmitting of GSO frame from the internal
121 * network and to the host. */
122# define VBOXNETFLT_WITH_GSO_XMIT_HOST 1
123
124# if 0 /** @todo This is currently disable because it causes performance loss of 5-10%. */
125/** This enables or disables the transmitting of GSO frame from the internal
126 * network and to the wire. */
127# define VBOXNETFLT_WITH_GSO_XMIT_WIRE 1
128# endif
129
130/** This enables or disables the forwarding/flooding of GSO frame from the host
131 * to the internal network. */
132# define VBOXNETFLT_WITH_GSO_RECV 1
133
134#endif
135
136#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
137/** This enables or disables handling of GSO frames coming from the wire (GRO). */
138# define VBOXNETFLT_WITH_GRO 1
139#endif
140/*
141 * GRO support was backported to RHEL 5.4
142 */
143#ifdef RHEL_RELEASE_CODE
144# if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 4)
145# define VBOXNETFLT_WITH_GRO 1
146# endif
147#endif
148
149/*******************************************************************************
150* Internal Functions *
151*******************************************************************************/
152static int VBoxNetFltLinuxInit(void);
153static void VBoxNetFltLinuxUnload(void);
154static void vboxNetFltLinuxForwardToIntNet(PVBOXNETFLTINS pThis, struct sk_buff *pBuf);
155
156
157/*******************************************************************************
158* Global Variables *
159*******************************************************************************/
160/**
161 * The (common) global data.
162 */
163static VBOXNETFLTGLOBALS g_VBoxNetFltGlobals;
164
165module_init(VBoxNetFltLinuxInit);
166module_exit(VBoxNetFltLinuxUnload);
167
168MODULE_AUTHOR(VBOX_VENDOR);
169MODULE_DESCRIPTION(VBOX_PRODUCT " Network Filter Driver");
170MODULE_LICENSE("GPL");
171#ifdef MODULE_VERSION
172MODULE_VERSION(VBOX_VERSION_STRING " (" RT_XSTR(INTNETTRUNKIFPORT_VERSION) ")");
173#endif
174
175
176#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) && defined(LOG_ENABLED)
177unsigned dev_get_flags(const struct net_device *dev)
178{
179 unsigned flags;
180
181 flags = (dev->flags & ~(IFF_PROMISC |
182 IFF_ALLMULTI |
183 IFF_RUNNING)) |
184 (dev->gflags & (IFF_PROMISC |
185 IFF_ALLMULTI));
186
187 if (netif_running(dev) && netif_carrier_ok(dev))
188 flags |= IFF_RUNNING;
189
190 return flags;
191}
192#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) */
193
194
195#ifdef VBOXNETFLT_WITH_QDISC
196//#define QDISC_LOG(x) printk x
197# define QDISC_LOG(x) do { } while (0)
198
199# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
200# define QDISC_CREATE(dev, queue, ops, parent) qdisc_create_dflt(dev, ops)
201# elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
202# define QDISC_CREATE(dev, queue, ops, parent) qdisc_create_dflt(dev, ops, parent)
203# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
204# define QDISC_CREATE(dev, queue, ops, parent) qdisc_create_dflt(dev, queue, ops, parent)
205# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
206
207# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
208# define qdisc_dev(qdisc) (qdisc->dev)
209# define qdisc_pkt_len(skb) (skb->len)
210# define QDISC_GET(dev) (dev->qdisc_sleeping)
211# else
212# define QDISC_GET(dev) (netdev_get_tx_queue(dev, 0)->qdisc_sleeping)
213# endif
214
215# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
216# define QDISC_SAVED_NUM(dev) 1
217# elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
218# define QDISC_SAVED_NUM(dev) dev->num_tx_queues
219# else
220# define QDISC_SAVED_NUM(dev) dev->num_tx_queues+1
221# endif
222
223# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
224# define QDISC_IS_BUSY(dev, qdisc) test_bit(__LINK_STATE_SCHED, &dev->state)
225# elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
226# define QDISC_IS_BUSY(dev, qdisc) (test_bit(__QDISC_STATE_RUNNING, &qdisc->state) || \
227 test_bit(__QDISC_STATE_SCHED, &qdisc->state))
228# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) */
229# define QDISC_IS_BUSY(dev, qdisc) (qdisc_is_running(qdisc) || \
230 test_bit(__QDISC_STATE_SCHED, &qdisc->state))
231# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) */
232
233struct VBoxNetQDiscPriv
234{
235 /** Pointer to the single child qdisc. */
236 struct Qdisc *pChild;
237 /*
238 * Technically it is possible to have different qdiscs for different TX
239 * queues so we have to save them all.
240 */
241 /** Pointer to the array of saved qdiscs. */
242 struct Qdisc **ppSaved;
243 /** Pointer to the net filter instance. */
244 PVBOXNETFLTINS pVBoxNetFlt;
245};
246typedef struct VBoxNetQDiscPriv *PVBOXNETQDISCPRIV;
247
248//#define VBOXNETFLT_QDISC_ENQUEUE
249static int vboxNetFltQdiscEnqueue(struct sk_buff *skb, struct Qdisc *sch)
250{
251 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
252 int rc;
253
254# ifdef VBOXNETFLT_QDISC_ENQUEUE
255 if (VALID_PTR(pPriv->pVBoxNetFlt))
256 {
257 uint8_t abHdrBuf[sizeof(RTNETETHERHDR) + sizeof(uint32_t) + RTNETIPV4_MIN_LEN];
258 PCRTNETETHERHDR pEtherHdr;
259 PINTNETTRUNKSWPORT pSwitchPort;
260 uint32_t cbHdrs = skb_headlen(skb);
261
262 cbHdrs = RT_MIN(cbHdrs, sizeof(abHdrBuf));
263 pEtherHdr = (PCRTNETETHERHDR)skb_header_pointer(skb, 0, cbHdrs, &abHdrBuf[0]);
264 if ( pEtherHdr
265 && (pSwitchPort = pPriv->pVBoxNetFlt->pSwitchPort) != NULL
266 && VALID_PTR(pSwitchPort)
267 && cbHdrs >= 6)
268 {
269 /** @todo consider reference counting, etc. */
270 INTNETSWDECISION enmDecision = pSwitchPort->pfnPreRecv(pSwitchPort, pEtherHdr, cbHdrs, INTNETTRUNKDIR_HOST);
271 if (enmDecision == INTNETSWDECISION_INTNET)
272 {
273 struct sk_buff *pBuf = skb_copy(skb, GFP_ATOMIC);
274 pBuf->pkt_type = PACKET_OUTGOING;
275 vboxNetFltLinuxForwardToIntNet(pPriv->pVBoxNetFlt, pBuf);
276 qdisc_drop(skb, sch);
277 ++sch->bstats.packets;
278 sch->bstats.bytes += qdisc_pkt_len(skb);
279 return NET_XMIT_SUCCESS;
280 }
281 }
282 }
283# endif /* VBOXNETFLT_QDISC_ENQUEUE */
284 rc = pPriv->pChild->enqueue(skb, pPriv->pChild);
285 if (rc == NET_XMIT_SUCCESS)
286 {
287 ++sch->q.qlen;
288 ++sch->bstats.packets;
289 sch->bstats.bytes += qdisc_pkt_len(skb);
290 }
291 else
292 ++sch->qstats.drops;
293 return rc;
294}
295
296static struct sk_buff *vboxNetFltQdiscDequeue(struct Qdisc *sch)
297{
298 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
299# ifdef VBOXNETFLT_QDISC_ENQUEUE
300 --sch->q.qlen;
301 return pPriv->pChild->dequeue(pPriv->pChild);
302# else /* VBOXNETFLT_QDISC_ENQUEUE */
303 uint8_t abHdrBuf[sizeof(RTNETETHERHDR) + sizeof(uint32_t) + RTNETIPV4_MIN_LEN];
304 PCRTNETETHERHDR pEtherHdr;
305 PINTNETTRUNKSWPORT pSwitchPort;
306 struct sk_buff *pSkb;
307
308 QDISC_LOG(("vboxNetFltDequeue: Enter pThis=%p\n", pPriv->pVBoxNetFlt));
309
310 while ((pSkb = pPriv->pChild->dequeue(pPriv->pChild)) != NULL)
311 {
312 struct sk_buff *pBuf;
313 INTNETSWDECISION enmDecision;
314 uint32_t cbHdrs;
315
316 --sch->q.qlen;
317
318 if (!VALID_PTR(pPriv->pVBoxNetFlt))
319 break;
320
321 cbHdrs = skb_headlen(pSkb);
322 cbHdrs = RT_MIN(cbHdrs, sizeof(abHdrBuf));
323 pEtherHdr = (PCRTNETETHERHDR)skb_header_pointer(pSkb, 0, cbHdrs, &abHdrBuf[0]);
324 if ( !pEtherHdr
325 || (pSwitchPort = pPriv->pVBoxNetFlt->pSwitchPort) == NULL
326 || !VALID_PTR(pSwitchPort)
327 || cbHdrs < 6)
328 break;
329
330 /** @todo consider reference counting, etc. */
331 enmDecision = pSwitchPort->pfnPreRecv(pSwitchPort, pEtherHdr, cbHdrs, INTNETTRUNKDIR_HOST);
332 if (enmDecision != INTNETSWDECISION_INTNET)
333 break;
334
335 pBuf = skb_copy(pSkb, GFP_ATOMIC);
336 pBuf->pkt_type = PACKET_OUTGOING;
337 QDISC_LOG(("vboxNetFltDequeue: pThis=%p\n", pPriv->pVBoxNetFlt));
338 vboxNetFltLinuxForwardToIntNet(pPriv->pVBoxNetFlt, pBuf);
339 qdisc_drop(pSkb, sch);
340 QDISC_LOG(("VBoxNetFlt: Packet for %02x:%02x:%02x:%02x:%02x:%02x dropped\n",
341 pSkb->data[0], pSkb->data[1], pSkb->data[2],
342 pSkb->data[3], pSkb->data[4], pSkb->data[5]));
343 }
344
345 return pSkb;
346# endif /* VBOXNETFLT_QDISC_ENQUEUE */
347}
348
349# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
350static int vboxNetFltQdiscRequeue(struct sk_buff *skb, struct Qdisc *sch)
351{
352 int rc;
353 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
354
355 rc = pPriv->pChild->ops->requeue(skb, pPriv->pChild);
356 if (rc == 0)
357 {
358 sch->q.qlen++;
359# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10)
360 sch->qstats.requeues++;
361# endif
362 }
363
364 return rc;
365}
366# endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) */
367
368static unsigned int vboxNetFltQdiscDrop(struct Qdisc *sch)
369{
370 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
371 unsigned int cbLen;
372
373 if (pPriv->pChild->ops->drop)
374 {
375 cbLen = pPriv->pChild->ops->drop(pPriv->pChild);
376 if (cbLen != 0)
377 {
378 ++sch->qstats.drops;
379 --sch->q.qlen;
380 return cbLen;
381 }
382 }
383
384 return 0;
385}
386
387# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
388static int vboxNetFltQdiscInit(struct Qdisc *sch, struct rtattr *opt)
389# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) */
390static int vboxNetFltQdiscInit(struct Qdisc *sch, struct nlattr *opt)
391# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) */
392{
393 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
394 struct net_device *pDev = qdisc_dev(sch);
395
396 pPriv->pVBoxNetFlt = NULL;
397
398 pPriv->ppSaved = kcalloc(QDISC_SAVED_NUM(pDev), sizeof(pPriv->ppSaved[0]),
399 GFP_KERNEL);
400 if (!pPriv->ppSaved)
401 return -ENOMEM;
402
403 pPriv->pChild = QDISC_CREATE(pDev, netdev_get_tx_queue(pDev, 0),
404 &pfifo_qdisc_ops,
405 TC_H_MAKE(TC_H_MAJ(sch->handle),
406 TC_H_MIN(1)));
407 if (!pPriv->pChild)
408 {
409 kfree(pPriv->ppSaved);
410 pPriv->ppSaved = NULL;
411 return -ENOMEM;
412 }
413
414 return 0;
415}
416
417static void vboxNetFltQdiscReset(struct Qdisc *sch)
418{
419 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
420
421 qdisc_reset(pPriv->pChild);
422 sch->q.qlen = 0;
423 sch->qstats.backlog = 0;
424}
425
426static void vboxNetFltQdiscDestroy(struct Qdisc* sch)
427{
428 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
429 struct net_device *pDev = qdisc_dev(sch);
430
431 qdisc_destroy(pPriv->pChild);
432 pPriv->pChild = NULL;
433
434 if (pPriv->ppSaved)
435 {
436 int i;
437 for (i = 0; i < QDISC_SAVED_NUM(pDev); i++)
438 if (pPriv->ppSaved[i])
439 qdisc_destroy(pPriv->ppSaved[i]);
440 kfree(pPriv->ppSaved);
441 pPriv->ppSaved = NULL;
442 }
443}
444
445static int vboxNetFltClassGraft(struct Qdisc *sch, unsigned long arg, struct Qdisc *pNew,
446 struct Qdisc **ppOld)
447{
448 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
449
450 if (pNew == NULL)
451 pNew = &noop_qdisc;
452
453 sch_tree_lock(sch);
454 *ppOld = pPriv->pChild;
455 pPriv->pChild = pNew;
456# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20)
457 sch->q.qlen = 0;
458# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) */
459 qdisc_tree_decrease_qlen(*ppOld, (*ppOld)->q.qlen);
460# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) */
461 qdisc_reset(*ppOld);
462 sch_tree_unlock(sch);
463
464 return 0;
465}
466
467static struct Qdisc *vboxNetFltClassLeaf(struct Qdisc *sch, unsigned long arg)
468{
469 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
470 return pPriv->pChild;
471}
472
473static unsigned long vboxNetFltClassGet(struct Qdisc *sch, u32 classid)
474{
475 return 1;
476}
477
478static void vboxNetFltClassPut(struct Qdisc *sch, unsigned long arg)
479{
480}
481
482# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 25)
483static int vboxNetFltClassChange(struct Qdisc *sch, u32 classid, u32 parentid,
484 struct rtattr **tca, unsigned long *arg)
485# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) */
486static int vboxNetFltClassChange(struct Qdisc *sch, u32 classid, u32 parentid,
487 struct nlattr **tca, unsigned long *arg)
488# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) */
489{
490 return -ENOSYS;
491}
492
493static int vboxNetFltClassDelete(struct Qdisc *sch, unsigned long arg)
494{
495 return -ENOSYS;
496}
497
498static void vboxNetFltClassWalk(struct Qdisc *sch, struct qdisc_walker *walker)
499{
500 if (!walker->stop) {
501 if (walker->count >= walker->skip)
502 if (walker->fn(sch, 1, walker) < 0) {
503 walker->stop = 1;
504 return;
505 }
506 walker->count++;
507 }
508}
509
510static struct tcf_proto **vboxNetFltClassFindTcf(struct Qdisc *sch, unsigned long cl)
511{
512 return NULL;
513}
514
515static int vboxNetFltClassDump(struct Qdisc *sch, unsigned long cl,
516 struct sk_buff *skb, struct tcmsg *tcm)
517{
518 PVBOXNETQDISCPRIV pPriv = qdisc_priv(sch);
519
520 if (cl != 1)
521 return -ENOENT;
522
523 tcm->tcm_handle |= TC_H_MIN(1);
524 tcm->tcm_info = pPriv->pChild->handle;
525
526 return 0;
527}
528
529
530static struct Qdisc_class_ops g_VBoxNetFltClassOps =
531{
532 .graft = vboxNetFltClassGraft,
533 .leaf = vboxNetFltClassLeaf,
534 .get = vboxNetFltClassGet,
535 .put = vboxNetFltClassPut,
536 .change = vboxNetFltClassChange,
537 .delete = vboxNetFltClassDelete,
538 .walk = vboxNetFltClassWalk,
539 .tcf_chain = vboxNetFltClassFindTcf,
540 .dump = vboxNetFltClassDump,
541};
542
543
544static struct Qdisc_ops g_VBoxNetFltQDiscOps = {
545 .cl_ops = &g_VBoxNetFltClassOps,
546 .id = "vboxnetflt",
547 .priv_size = sizeof(struct VBoxNetQDiscPriv),
548 .enqueue = vboxNetFltQdiscEnqueue,
549 .dequeue = vboxNetFltQdiscDequeue,
550# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29)
551 .requeue = vboxNetFltQdiscRequeue,
552# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */
553 .peek = qdisc_peek_dequeued,
554# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */
555 .drop = vboxNetFltQdiscDrop,
556 .init = vboxNetFltQdiscInit,
557 .reset = vboxNetFltQdiscReset,
558 .destroy = vboxNetFltQdiscDestroy,
559 .owner = THIS_MODULE
560};
561
562/*
563 * If our qdisc is already attached to the device (that means the user
564 * installed it from command line with 'tc' command) we simply update
565 * the pointer to vboxnetflt instance in qdisc's private structure.
566 * Otherwise we need to take some additional steps:
567 * - Create our qdisc;
568 * - Save all references to qdiscs;
569 * - Replace our child with the first qdisc reference;
570 * - Replace all references so they point to our qdisc.
571 */
572static void vboxNetFltLinuxQdiscInstall(PVBOXNETFLTINS pThis, struct net_device *pDev)
573{
574# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
575 int i;
576# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
577 PVBOXNETQDISCPRIV pPriv;
578
579 struct Qdisc *pExisting = QDISC_GET(pDev);
580 if (strcmp(pExisting->ops->id, "vboxnetflt"))
581 {
582 /* The existing qdisc is different from ours, let's create new one. */
583 struct Qdisc *pNew = QDISC_CREATE(pDev, netdev_get_tx_queue(pDev, 0),
584 &g_VBoxNetFltQDiscOps, TC_H_ROOT);
585 if (!pNew)
586 return; // TODO: Error?
587
588 if (!try_module_get(THIS_MODULE))
589 {
590 /*
591 * This may cause a memory leak but calling qdisc_destroy()
592 * is not an option as it will call module_put().
593 */
594 return;
595 }
596 pPriv = qdisc_priv(pNew);
597
598 qdisc_destroy(pPriv->pChild);
599 pPriv->pChild = QDISC_GET(pDev);
600 atomic_inc(&pPriv->pChild->refcnt);
601 /*
602 * There is no need in deactivating the device or acquiring any locks
603 * prior changing qdiscs since we do not destroy the old qdisc.
604 * Atomic replacement of pointers is enough.
605 */
606 /*
607 * No need to change reference counters here as we merely move
608 * the pointer and the reference counter of the newly allocated
609 * qdisc is already 1.
610 */
611# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
612 pPriv->ppSaved[0] = pDev->qdisc_sleeping;
613 ASMAtomicWritePtr(&pDev->qdisc_sleeping, pNew);
614 ASMAtomicWritePtr(&pDev->qdisc, pNew);
615# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
616 for (i = 0; i < pDev->num_tx_queues; i++)
617 {
618 struct netdev_queue *pQueue = netdev_get_tx_queue(pDev, i);
619
620 pPriv->ppSaved[i] = pQueue->qdisc_sleeping;
621 ASMAtomicWritePtr(&pQueue->qdisc_sleeping, pNew);
622 ASMAtomicWritePtr(&pQueue->qdisc, pNew);
623 if (i)
624 atomic_inc(&pNew->refcnt);
625 }
626 /* Newer kernels store root qdisc in netdev structure as well. */
627# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
628 pPriv->ppSaved[pDev->num_tx_queues] = pDev->qdisc;
629 ASMAtomicWritePtr(&pDev->qdisc, pNew);
630 atomic_inc(&pNew->refcnt);
631# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) */
632# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
633 /* Synch the queue len with our child */
634 pNew->q.qlen = pPriv->pChild->q.qlen;
635 }
636 else
637 {
638 /* We already have vboxnetflt qdisc, let's use it. */
639 pPriv = qdisc_priv(pExisting);
640 }
641 ASMAtomicWritePtr(&pPriv->pVBoxNetFlt, pThis);
642 QDISC_LOG(("vboxNetFltLinuxInstallQdisc: pThis=%p\n", pPriv->pVBoxNetFlt));
643}
644
645static void vboxNetFltLinuxQdiscRemove(PVBOXNETFLTINS pThis, struct net_device *pDev)
646{
647# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
648 int i;
649# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
650 PVBOXNETQDISCPRIV pPriv;
651 struct Qdisc *pQdisc, *pChild;
652 if (!pDev)
653 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
654 if (!VALID_PTR(pDev))
655 {
656 printk("VBoxNetFlt: Failed to detach qdisc, invalid device pointer: %p\n",
657 pDev);
658 return; // TODO: Consider returing an error
659 }
660
661
662 pQdisc = QDISC_GET(pDev);
663 if (strcmp(pQdisc->ops->id, "vboxnetflt"))
664 {
665 /* Looks like the user has replaced our qdisc manually. */
666 printk("VBoxNetFlt: Failed to detach qdisc, wrong qdisc: %s\n",
667 pQdisc->ops->id);
668 return; // TODO: Consider returing an error
669 }
670
671 pPriv = qdisc_priv(pQdisc);
672 Assert(pPriv->pVBoxNetFlt == pThis);
673 ASMAtomicWriteNullPtr(&pPriv->pVBoxNetFlt);
674 pChild = ASMAtomicXchgPtrT(&pPriv->pChild, &noop_qdisc, struct Qdisc *);
675 qdisc_destroy(pChild); /* It won't be the last reference. */
676
677 QDISC_LOG(("vboxNetFltLinuxQdiscRemove: refcnt=%d num_tx_queues=%d\n",
678 atomic_read(&pQdisc->refcnt), pDev->num_tx_queues));
679# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27)
680 /* Play it safe, make sure the qdisc is not being used. */
681 if (pPriv->ppSaved[0])
682 {
683 ASMAtomicWritePtr(&pDev->qdisc_sleeping, pPriv->ppSaved[0]);
684 ASMAtomicWritePtr(&pDev->qdisc, pPriv->ppSaved[0]);
685 pPriv->ppSaved[0] = NULL;
686 while (QDISC_IS_BUSY(pDev, pQdisc))
687 yield();
688 qdisc_destroy(pQdisc); /* Destroy reference */
689 }
690# else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
691 for (i = 0; i < pDev->num_tx_queues; i++)
692 {
693 struct netdev_queue *pQueue = netdev_get_tx_queue(pDev, i);
694 if (pPriv->ppSaved[i])
695 {
696 Assert(pQueue->qdisc_sleeping == pQdisc);
697 ASMAtomicWritePtr(&pQueue->qdisc_sleeping, pPriv->ppSaved[i]);
698 ASMAtomicWritePtr(&pQueue->qdisc, pPriv->ppSaved[i]);
699 pPriv->ppSaved[i] = NULL;
700 while (QDISC_IS_BUSY(pDev, pQdisc))
701 yield();
702 qdisc_destroy(pQdisc); /* Destroy reference */
703 }
704 }
705 /* Newer kernels store root qdisc in netdev structure as well. */
706# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
707 ASMAtomicWritePtr(&pDev->qdisc, pPriv->ppSaved[pDev->num_tx_queues]);
708 pPriv->ppSaved[pDev->num_tx_queues] = NULL;
709 while (QDISC_IS_BUSY(pDev, pQdisc))
710 yield();
711 qdisc_destroy(pQdisc); /* Destroy reference */
712# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) */
713# endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */
714
715 /*
716 * At this point all references to our qdisc should be gone
717 * unless the user had installed it manually.
718 */
719 QDISC_LOG(("vboxNetFltLinuxRemoveQdisc: pThis=%p\n", pPriv->pVBoxNetFlt));
720}
721
722#endif /* VBOXNETFLT_WITH_QDISC */
723
724
725/**
726 * Initialize module.
727 *
728 * @returns appropriate status code.
729 */
730static int __init VBoxNetFltLinuxInit(void)
731{
732 int rc;
733 /*
734 * Initialize IPRT.
735 */
736 rc = RTR0Init(0);
737 if (RT_SUCCESS(rc))
738 {
739 Log(("VBoxNetFltLinuxInit\n"));
740
741 /*
742 * Initialize the globals and connect to the support driver.
743 *
744 * This will call back vboxNetFltOsOpenSupDrv (and maybe vboxNetFltOsCloseSupDrv)
745 * for establishing the connect to the support driver.
746 */
747 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
748 rc = vboxNetFltInitGlobalsAndIdc(&g_VBoxNetFltGlobals);
749 if (RT_SUCCESS(rc))
750 {
751#ifdef VBOXNETFLT_WITH_QDISC
752 /*memcpy(&g_VBoxNetFltQDiscOps, &pfifo_qdisc_ops, sizeof(g_VBoxNetFltQDiscOps));
753 strcpy(g_VBoxNetFltQDiscOps.id, "vboxnetflt");
754 g_VBoxNetFltQDiscOps.owner = THIS_MODULE;*/
755 rc = register_qdisc(&g_VBoxNetFltQDiscOps);
756 if (rc)
757 {
758 LogRel(("VBoxNetFlt: Failed to registed qdisc: %d\n", rc));
759 return rc;
760 }
761#endif /* VBOXNETFLT_WITH_QDISC */
762 LogRel(("VBoxNetFlt: Successfully started.\n"));
763 return 0;
764 }
765
766 LogRel(("VBoxNetFlt: failed to initialize device extension (rc=%d)\n", rc));
767 RTR0Term();
768 }
769 else
770 LogRel(("VBoxNetFlt: failed to initialize IPRT (rc=%d)\n", rc));
771
772 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
773 return -RTErrConvertToErrno(rc);
774}
775
776
777/**
778 * Unload the module.
779 *
780 * @todo We have to prevent this if we're busy!
781 */
782static void __exit VBoxNetFltLinuxUnload(void)
783{
784 int rc;
785 Log(("VBoxNetFltLinuxUnload\n"));
786 Assert(vboxNetFltCanUnload(&g_VBoxNetFltGlobals));
787
788#ifdef VBOXNETFLT_WITH_QDISC
789 unregister_qdisc(&g_VBoxNetFltQDiscOps);
790#endif /* VBOXNETFLT_WITH_QDISC */
791 /*
792 * Undo the work done during start (in reverse order).
793 */
794 rc = vboxNetFltTryDeleteIdcAndGlobals(&g_VBoxNetFltGlobals);
795 AssertRC(rc); NOREF(rc);
796
797 RTR0Term();
798
799 memset(&g_VBoxNetFltGlobals, 0, sizeof(g_VBoxNetFltGlobals));
800
801 Log(("VBoxNetFltLinuxUnload - done\n"));
802}
803
804
805/**
806 * Experiment where we filter trafic from the host to the internal network
807 * before it reaches the NIC driver.
808 *
809 * The current code uses a very ugly hack and only works on kernels using the
810 * net_device_ops (>= 2.6.29). It has been shown to give us a
811 * performance boost of 60-100% though. So, we have to find some less hacky way
812 * of getting this job done eventually.
813 *
814 * #define VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
815 */
816#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
817
818/**
819 * The overridden net_device_ops of the device we're attached to.
820 *
821 * Requires Linux 2.6.29 or later.
822 *
823 * This is a very dirty hack that was create to explore how much we can improve
824 * the host to guest transfers by not CC'ing the NIC.
825 */
826typedef struct VBoxNetDeviceOpsOverride
827{
828 /** Our overridden ops. */
829 struct net_device_ops Ops;
830 /** Magic word. */
831 uint32_t u32Magic;
832 /** Pointer to the original ops. */
833 struct net_device_ops const *pOrgOps;
834 /** Pointer to the net filter instance. */
835 PVBOXNETFLTINS pVBoxNetFlt;
836 /** The number of filtered packages. */
837 uint64_t cFiltered;
838 /** The total number of packets */
839 uint64_t cTotal;
840} VBOXNETDEVICEOPSOVERRIDE, *PVBOXNETDEVICEOPSOVERRIDE;
841/** VBOXNETDEVICEOPSOVERRIDE::u32Magic value. */
842#define VBOXNETDEVICEOPSOVERRIDE_MAGIC UINT32_C(0x00c0ffee)
843
844/**
845 * ndo_start_xmit wrapper that drops packets that shouldn't go to the wire
846 * because they belong on the internal network.
847 *
848 * @returns NETDEV_TX_XXX.
849 * @param pSkb The socket buffer to transmit.
850 * @param pDev The net device.
851 */
852static int vboxNetFltLinuxStartXmitFilter(struct sk_buff *pSkb, struct net_device *pDev)
853{
854 PVBOXNETDEVICEOPSOVERRIDE pOverride = (PVBOXNETDEVICEOPSOVERRIDE)pDev->netdev_ops;
855 uint8_t abHdrBuf[sizeof(RTNETETHERHDR) + sizeof(uint32_t) + RTNETIPV4_MIN_LEN];
856 PCRTNETETHERHDR pEtherHdr;
857 PINTNETTRUNKSWPORT pSwitchPort;
858 uint32_t cbHdrs;
859
860
861 /*
862 * Validate the override structure.
863 *
864 * Note! We're racing vboxNetFltLinuxUnhookDev here. If this was supposed
865 * to be production quality code, we would have to be much more
866 * careful here and avoid the race.
867 */
868 if ( !VALID_PTR(pOverride)
869 || pOverride->u32Magic != VBOXNETDEVICEOPSOVERRIDE_MAGIC
870 || !VALID_PTR(pOverride->pOrgOps))
871 {
872 printk("vboxNetFltLinuxStartXmitFilter: bad override %p\n", pOverride);
873 dev_kfree_skb(pSkb);
874 return NETDEV_TX_OK;
875 }
876 pOverride->cTotal++;
877
878 /*
879 * Do the filtering base on the defaul OUI of our virtual NICs
880 *
881 * Note! In a real solution, we would ask the switch whether the
882 * destination MAC is 100% to be on the internal network and then
883 * drop it.
884 */
885 cbHdrs = skb_headlen(pSkb);
886 cbHdrs = RT_MIN(cbHdrs, sizeof(abHdrBuf));
887 pEtherHdr = (PCRTNETETHERHDR)skb_header_pointer(pSkb, 0, cbHdrs, &abHdrBuf[0]);
888 if ( pEtherHdr
889 && VALID_PTR(pOverride->pVBoxNetFlt)
890 && (pSwitchPort = pOverride->pVBoxNetFlt->pSwitchPort) != NULL
891 && VALID_PTR(pSwitchPort)
892 && cbHdrs >= 6)
893 {
894 INTNETSWDECISION enmDecision;
895
896 /** @todo consider reference counting, etc. */
897 enmDecision = pSwitchPort->pfnPreRecv(pSwitchPort, pEtherHdr, cbHdrs, INTNETTRUNKDIR_HOST);
898 if (enmDecision == INTNETSWDECISION_INTNET)
899 {
900 dev_kfree_skb(pSkb);
901 pOverride->cFiltered++;
902 return NETDEV_TX_OK;
903 }
904 }
905
906 return pOverride->pOrgOps->ndo_start_xmit(pSkb, pDev);
907}
908
909/**
910 * Hooks the device ndo_start_xmit operation of the device.
911 *
912 * @param pThis The net filter instance.
913 * @param pDev The net device.
914 */
915static void vboxNetFltLinuxHookDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
916{
917 PVBOXNETDEVICEOPSOVERRIDE pOverride;
918 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
919
920 pOverride = RTMemAlloc(sizeof(*pOverride));
921 if (!pOverride)
922 return;
923 pOverride->pOrgOps = pDev->netdev_ops;
924 pOverride->Ops = *pDev->netdev_ops;
925 pOverride->Ops.ndo_start_xmit = vboxNetFltLinuxStartXmitFilter;
926 pOverride->u32Magic = VBOXNETDEVICEOPSOVERRIDE_MAGIC;
927 pOverride->cTotal = 0;
928 pOverride->cFiltered = 0;
929 pOverride->pVBoxNetFlt = pThis;
930
931 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp); /* (this isn't necessary, but so what) */
932 ASMAtomicWritePtr((void * volatile *)&pDev->netdev_ops, pOverride);
933 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
934}
935
936/**
937 * Undos what vboxNetFltLinuxHookDev did.
938 *
939 * @param pThis The net filter instance.
940 * @param pDev The net device. Can be NULL, in which case
941 * we'll try retrieve it from @a pThis.
942 */
943static void vboxNetFltLinuxUnhookDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
944{
945 PVBOXNETDEVICEOPSOVERRIDE pOverride;
946 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
947
948 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
949 if (!pDev)
950 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
951 if (VALID_PTR(pDev))
952 {
953 pOverride = (PVBOXNETDEVICEOPSOVERRIDE)pDev->netdev_ops;
954 if ( VALID_PTR(pOverride)
955 && pOverride->u32Magic == VBOXNETDEVICEOPSOVERRIDE_MAGIC
956 && VALID_PTR(pOverride->pOrgOps)
957 )
958 {
959 ASMAtomicWritePtr((void * volatile *)&pDev->netdev_ops, pOverride->pOrgOps);
960 ASMAtomicWriteU32(&pOverride->u32Magic, 0);
961 }
962 else
963 pOverride = NULL;
964 }
965 else
966 pOverride = NULL;
967 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
968
969 if (pOverride)
970 {
971 printk("vboxnetflt: dropped %llu out of %llu packets\n", pOverride->cFiltered, pOverride->cTotal);
972 RTMemFree(pOverride);
973 }
974}
975
976#endif /* VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT */
977
978
979/**
980 * Reads and retains the host interface handle.
981 *
982 * @returns The handle, NULL if detached.
983 * @param pThis
984 */
985DECLINLINE(struct net_device *) vboxNetFltLinuxRetainNetDev(PVBOXNETFLTINS pThis)
986{
987#if 0
988 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
989 struct net_device *pDev = NULL;
990
991 Log(("vboxNetFltLinuxRetainNetDev\n"));
992 /*
993 * Be careful here to avoid problems racing the detached callback.
994 */
995 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
996 if (!ASMAtomicUoReadBool(&pThis->fDisconnectedFromHost))
997 {
998 pDev = (struct net_device *)ASMAtomicUoReadPtr((void * volatile *)&pThis->u.s.pDev);
999 if (pDev)
1000 {
1001 dev_hold(pDev);
1002 Log(("vboxNetFltLinuxRetainNetDev: Device %p(%s) retained. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1003 }
1004 }
1005 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1006
1007 Log(("vboxNetFltLinuxRetainNetDev - done\n"));
1008 return pDev;
1009#else
1010 return ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
1011#endif
1012}
1013
1014
1015/**
1016 * Release the host interface handle previously retained
1017 * by vboxNetFltLinuxRetainNetDev.
1018 *
1019 * @param pThis The instance.
1020 * @param pDev The vboxNetFltLinuxRetainNetDev
1021 * return value, NULL is fine.
1022 */
1023DECLINLINE(void) vboxNetFltLinuxReleaseNetDev(PVBOXNETFLTINS pThis, struct net_device *pDev)
1024{
1025#if 0
1026 Log(("vboxNetFltLinuxReleaseNetDev\n"));
1027 NOREF(pThis);
1028 if (pDev)
1029 {
1030 dev_put(pDev);
1031 Log(("vboxNetFltLinuxReleaseNetDev: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1032 }
1033 Log(("vboxNetFltLinuxReleaseNetDev - done\n"));
1034#endif
1035}
1036
1037#define VBOXNETFLT_CB_TAG(skb) (0xA1C90000 | (skb->dev->ifindex & 0xFFFF))
1038#define VBOXNETFLT_SKB_TAG(skb) (*(uint32_t*)&((skb)->cb[sizeof((skb)->cb)-sizeof(uint32_t)]))
1039
1040/**
1041 * Checks whether this is an mbuf created by vboxNetFltLinuxMBufFromSG,
1042 * i.e. a buffer which we're pushing and should be ignored by the filter callbacks.
1043 *
1044 * @returns true / false accordingly.
1045 * @param pBuf The sk_buff.
1046 */
1047DECLINLINE(bool) vboxNetFltLinuxSkBufIsOur(struct sk_buff *pBuf)
1048{
1049 return VBOXNETFLT_SKB_TAG(pBuf) == VBOXNETFLT_CB_TAG(pBuf);
1050}
1051
1052
1053/**
1054 * Internal worker that create a linux sk_buff for a
1055 * (scatter/)gather list.
1056 *
1057 * @returns Pointer to the sk_buff.
1058 * @param pThis The instance.
1059 * @param pSG The (scatter/)gather list.
1060 * @param fDstWire Set if the destination is the wire.
1061 */
1062static struct sk_buff *vboxNetFltLinuxSkBufFromSG(PVBOXNETFLTINS pThis, PINTNETSG pSG, bool fDstWire)
1063{
1064 struct sk_buff *pPkt;
1065 struct net_device *pDev;
1066 unsigned fGsoType = 0;
1067
1068 if (pSG->cbTotal == 0)
1069 {
1070 LogRel(("VBoxNetFlt: Dropped empty packet coming from internal network.\n"));
1071 return NULL;
1072 }
1073
1074 /** @todo We should use fragments mapping the SG buffers with large packets.
1075 * 256 bytes seems to be the a threshold used a lot for this. It
1076 * requires some nasty work on the intnet side though... */
1077 /*
1078 * Allocate a packet and copy over the data.
1079 */
1080 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
1081 pPkt = dev_alloc_skb(pSG->cbTotal + NET_IP_ALIGN);
1082 if (RT_UNLIKELY(!pPkt))
1083 {
1084 Log(("vboxNetFltLinuxSkBufFromSG: Failed to allocate sk_buff(%u).\n", pSG->cbTotal));
1085 pSG->pvUserData = NULL;
1086 return NULL;
1087 }
1088 pPkt->dev = pDev;
1089 pPkt->ip_summed = CHECKSUM_NONE;
1090
1091 /* Align IP header on 16-byte boundary: 2 + 14 (ethernet hdr size). */
1092 skb_reserve(pPkt, NET_IP_ALIGN);
1093
1094 /* Copy the segments. */
1095 skb_put(pPkt, pSG->cbTotal);
1096 IntNetSgRead(pSG, pPkt->data);
1097
1098#if defined(VBOXNETFLT_WITH_GSO_XMIT_WIRE) || defined(VBOXNETFLT_WITH_GSO_XMIT_HOST)
1099 /*
1100 * Setup GSO if used by this packet.
1101 */
1102 switch ((PDMNETWORKGSOTYPE)pSG->GsoCtx.u8Type)
1103 {
1104 default:
1105 AssertMsgFailed(("%u (%s)\n", pSG->GsoCtx.u8Type, PDMNetGsoTypeName((PDMNETWORKGSOTYPE)pSG->GsoCtx.u8Type) ));
1106 /* fall thru */
1107 case PDMNETWORKGSOTYPE_INVALID:
1108 fGsoType = 0;
1109 break;
1110 case PDMNETWORKGSOTYPE_IPV4_TCP:
1111 fGsoType = SKB_GSO_TCPV4;
1112 break;
1113 case PDMNETWORKGSOTYPE_IPV4_UDP:
1114 fGsoType = SKB_GSO_UDP;
1115 break;
1116 case PDMNETWORKGSOTYPE_IPV6_TCP:
1117 fGsoType = SKB_GSO_TCPV6;
1118 break;
1119 }
1120 if (fGsoType)
1121 {
1122 struct skb_shared_info *pShInfo = skb_shinfo(pPkt);
1123
1124 pShInfo->gso_type = fGsoType | SKB_GSO_DODGY;
1125 pShInfo->gso_size = pSG->GsoCtx.cbMaxSeg;
1126 pShInfo->gso_segs = PDMNetGsoCalcSegmentCount(&pSG->GsoCtx, pSG->cbTotal);
1127
1128 /*
1129 * We need to set checksum fields even if the packet goes to the host
1130 * directly as it may be immediately forwared by IP layer @bugref{5020}.
1131 */
1132 Assert(skb_headlen(pPkt) >= pSG->GsoCtx.cbHdrs);
1133 pPkt->ip_summed = CHECKSUM_PARTIAL;
1134# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
1135 pPkt->csum_start = skb_headroom(pPkt) + pSG->GsoCtx.offHdr2;
1136 if (fGsoType & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
1137 pPkt->csum_offset = RT_OFFSETOF(RTNETTCP, th_sum);
1138 else
1139 pPkt->csum_offset = RT_OFFSETOF(RTNETUDP, uh_sum);
1140# else
1141 pPkt->h.raw = pPkt->data + pSG->GsoCtx.offHdr2;
1142 if (fGsoType & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))
1143 pPkt->csum = RT_OFFSETOF(RTNETTCP, th_sum);
1144 else
1145 pPkt->csum = RT_OFFSETOF(RTNETUDP, uh_sum);
1146# endif
1147 if (!fDstWire)
1148 PDMNetGsoPrepForDirectUse(&pSG->GsoCtx, pPkt->data, pSG->cbTotal, PDMNETCSUMTYPE_PSEUDO);
1149 }
1150#endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE || VBOXNETFLT_WITH_GSO_XMIT_HOST */
1151
1152 /*
1153 * Finish up the socket buffer.
1154 */
1155 pPkt->protocol = eth_type_trans(pPkt, pDev);
1156 if (fDstWire)
1157 {
1158 VBOX_SKB_RESET_NETWORK_HDR(pPkt);
1159
1160 /* Restore ethernet header back. */
1161 skb_push(pPkt, ETH_HLEN); /** @todo VLAN: +4 if VLAN? */
1162 VBOX_SKB_RESET_MAC_HDR(pPkt);
1163 }
1164 VBOXNETFLT_SKB_TAG(pPkt) = VBOXNETFLT_CB_TAG(pPkt);
1165
1166 return pPkt;
1167}
1168
1169
1170/**
1171 * Initializes a SG list from an sk_buff.
1172 *
1173 * @returns Number of segments.
1174 * @param pThis The instance.
1175 * @param pBuf The sk_buff.
1176 * @param pSG The SG.
1177 * @param pvFrame The frame pointer, optional.
1178 * @param cSegs The number of segments allocated for the SG.
1179 * This should match the number in the mbuf exactly!
1180 * @param fSrc The source of the frame.
1181 * @param pGso Pointer to the GSO context if it's a GSO
1182 * internal network frame. NULL if regular frame.
1183 */
1184DECLINLINE(void) vboxNetFltLinuxSkBufToSG(PVBOXNETFLTINS pThis, struct sk_buff *pBuf, PINTNETSG pSG,
1185 unsigned cSegs, uint32_t fSrc, PCPDMNETWORKGSO pGsoCtx)
1186{
1187 int i;
1188 NOREF(pThis);
1189
1190 Assert(!skb_shinfo(pBuf)->frag_list);
1191
1192 if (!pGsoCtx)
1193 IntNetSgInitTempSegs(pSG, pBuf->len, cSegs, 0 /*cSegsUsed*/);
1194 else
1195 IntNetSgInitTempSegsGso(pSG, pBuf->len, cSegs, 0 /*cSegsUsed*/, pGsoCtx);
1196
1197#ifdef VBOXNETFLT_SG_SUPPORT
1198 pSG->aSegs[0].cb = skb_headlen(pBuf);
1199 pSG->aSegs[0].pv = pBuf->data;
1200 pSG->aSegs[0].Phys = NIL_RTHCPHYS;
1201
1202 for (i = 0; i < skb_shinfo(pBuf)->nr_frags; i++)
1203 {
1204 skb_frag_t *pFrag = &skb_shinfo(pBuf)->frags[i];
1205 pSG->aSegs[i+1].cb = pFrag->size;
1206 pSG->aSegs[i+1].pv = kmap(pFrag->page);
1207 printk("%p = kmap()\n", pSG->aSegs[i+1].pv);
1208 pSG->aSegs[i+1].Phys = NIL_RTHCPHYS;
1209 }
1210 ++i;
1211
1212#else
1213 pSG->aSegs[0].cb = pBuf->len;
1214 pSG->aSegs[0].pv = pBuf->data;
1215 pSG->aSegs[0].Phys = NIL_RTHCPHYS;
1216 i = 1;
1217#endif
1218
1219 pSG->cSegsUsed = i;
1220
1221#ifdef PADD_RUNT_FRAMES_FROM_HOST
1222 /*
1223 * Add a trailer if the frame is too small.
1224 *
1225 * Since we're getting to the packet before it is framed, it has not
1226 * yet been padded. The current solution is to add a segment pointing
1227 * to a buffer containing all zeros and pray that works for all frames...
1228 */
1229 if (pSG->cbTotal < 60 && (fSrc & INTNETTRUNKDIR_HOST))
1230 {
1231 static uint8_t const s_abZero[128] = {0};
1232
1233 AssertReturnVoid(i < cSegs);
1234
1235 pSG->aSegs[i].Phys = NIL_RTHCPHYS;
1236 pSG->aSegs[i].pv = (void *)&s_abZero[0];
1237 pSG->aSegs[i].cb = 60 - pSG->cbTotal;
1238 pSG->cbTotal = 60;
1239 pSG->cSegsUsed++;
1240 Assert(i + 1 <= pSG->cSegsAlloc)
1241 }
1242#endif
1243
1244 Log4(("vboxNetFltLinuxSkBufToSG: allocated=%d, segments=%d frags=%d next=%p frag_list=%p pkt_type=%x fSrc=%x\n",
1245 pSG->cSegsAlloc, pSG->cSegsUsed, skb_shinfo(pBuf)->nr_frags, pBuf->next, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type, fSrc));
1246 for (i = 0; i < pSG->cSegsUsed; i++)
1247 Log4(("vboxNetFltLinuxSkBufToSG: #%d: cb=%d pv=%p\n",
1248 i, pSG->aSegs[i].cb, pSG->aSegs[i].pv));
1249}
1250
1251/**
1252 * Packet handler,
1253 *
1254 * @returns 0 or EJUSTRETURN.
1255 * @param pThis The instance.
1256 * @param pMBuf The mbuf.
1257 * @param pvFrame The start of the frame, optional.
1258 * @param fSrc Where the packet (allegedly) comes from, one INTNETTRUNKDIR_* value.
1259 * @param eProtocol The protocol.
1260 */
1261#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 14)
1262static int vboxNetFltLinuxPacketHandler(struct sk_buff *pBuf,
1263 struct net_device *pSkbDev,
1264 struct packet_type *pPacketType,
1265 struct net_device *pOrigDev)
1266#else
1267static int vboxNetFltLinuxPacketHandler(struct sk_buff *pBuf,
1268 struct net_device *pSkbDev,
1269 struct packet_type *pPacketType)
1270#endif
1271{
1272 PVBOXNETFLTINS pThis;
1273 struct net_device *pDev;
1274 LogFlow(("vboxNetFltLinuxPacketHandler: pBuf=%p pSkbDev=%p pPacketType=%p\n",
1275 pBuf, pSkbDev, pPacketType));
1276#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
1277 Log3(("vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
1278 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
1279 Log4(("vboxNetFltLinuxPacketHandler: packet dump follows:\n%.*Rhxd\n", pBuf->len-pBuf->data_len, skb_mac_header(pBuf)));
1280#else
1281 Log3(("vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
1282 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->tso_size, skb_shinfo(pBuf)->tso_segs, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
1283#endif
1284 /*
1285 * Drop it immediately?
1286 */
1287 if (!pBuf)
1288 return 0;
1289
1290 pThis = VBOX_FLT_PT_TO_INST(pPacketType);
1291 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
1292 if (pThis->u.s.pDev != pSkbDev)
1293 {
1294 Log(("vboxNetFltLinuxPacketHandler: Devices do not match, pThis may be wrong! pThis=%p\n", pThis));
1295 return 0;
1296 }
1297
1298 Log4(("vboxNetFltLinuxPacketHandler: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
1299 if (vboxNetFltLinuxSkBufIsOur(pBuf))
1300 {
1301 Log2(("vboxNetFltLinuxPacketHandler: got our own sk_buff, drop it.\n"));
1302 dev_kfree_skb(pBuf);
1303 return 0;
1304 }
1305
1306#ifndef VBOXNETFLT_SG_SUPPORT
1307 {
1308 /*
1309 * Get rid of fragmented packets, they cause too much trouble.
1310 */
1311 struct sk_buff *pCopy = skb_copy(pBuf, GFP_ATOMIC);
1312 kfree_skb(pBuf);
1313 if (!pCopy)
1314 {
1315 LogRel(("VBoxNetFlt: Failed to allocate packet buffer, dropping the packet.\n"));
1316 return 0;
1317 }
1318 pBuf = pCopy;
1319# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
1320 Log3(("vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
1321 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
1322 Log4(("vboxNetFltLinuxPacketHandler: packet dump follows:\n%.*Rhxd\n", pBuf->len-pBuf->data_len, skb_mac_header(pBuf)));
1323# else
1324 Log3(("vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
1325 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->tso_size, skb_shinfo(pBuf)->tso_segs, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type));
1326# endif
1327 }
1328#endif
1329
1330#ifdef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
1331 /* Forward it to the internal network. */
1332 vboxNetFltLinuxForwardToIntNet(pThis, pBuf);
1333#else
1334 /* Add the packet to transmit queue and schedule the bottom half. */
1335 skb_queue_tail(&pThis->u.s.XmitQueue, pBuf);
1336 schedule_work(&pThis->u.s.XmitTask);
1337 Log4(("vboxNetFltLinuxPacketHandler: scheduled work %p for sk_buff %p\n",
1338 &pThis->u.s.XmitTask, pBuf));
1339#endif
1340
1341 /* It does not really matter what we return, it is ignored by the kernel. */
1342 return 0;
1343}
1344
1345/**
1346 * Calculate the number of INTNETSEG segments the socket buffer will need.
1347 *
1348 * @returns Segment count.
1349 * @param pBuf The socket buffer.
1350 */
1351DECLINLINE(unsigned) vboxNetFltLinuxCalcSGSegments(struct sk_buff *pBuf)
1352{
1353#ifdef VBOXNETFLT_SG_SUPPORT
1354 unsigned cSegs = 1 + skb_shinfo(pBuf)->nr_frags;
1355#else
1356 unsigned cSegs = 1;
1357#endif
1358#ifdef PADD_RUNT_FRAMES_FROM_HOST
1359 /* vboxNetFltLinuxSkBufToSG adds a padding segment if it's a runt. */
1360 if (pBuf->len < 60)
1361 cSegs++;
1362#endif
1363 return cSegs;
1364}
1365
1366/**
1367 * Destroy the intnet scatter / gather buffer created by
1368 * vboxNetFltLinuxSkBufToSG.
1369 */
1370static void vboxNetFltLinuxDestroySG(PINTNETSG pSG)
1371{
1372#ifdef VBOXNETFLT_SG_SUPPORT
1373 int i;
1374
1375 for (i = 0; i < skb_shinfo(pBuf)->nr_frags; i++)
1376 {
1377 printk("kunmap(%p)\n", pSG->aSegs[i+1].pv);
1378 kunmap(pSG->aSegs[i+1].pv);
1379 }
1380#endif
1381 NOREF(pSG);
1382}
1383
1384#ifdef LOG_ENABLED
1385/**
1386 * Logging helper.
1387 */
1388static void vboxNetFltDumpPacket(PINTNETSG pSG, bool fEgress, const char *pszWhere, int iIncrement)
1389{
1390 uint8_t *pInt, *pExt;
1391 static int iPacketNo = 1;
1392 iPacketNo += iIncrement;
1393 if (fEgress)
1394 {
1395 pExt = pSG->aSegs[0].pv;
1396 pInt = pExt + 6;
1397 }
1398 else
1399 {
1400 pInt = pSG->aSegs[0].pv;
1401 pExt = pInt + 6;
1402 }
1403 Log(("VBoxNetFlt: (int)%02x:%02x:%02x:%02x:%02x:%02x"
1404 " %s (%s)%02x:%02x:%02x:%02x:%02x:%02x (%u bytes) packet #%u\n",
1405 pInt[0], pInt[1], pInt[2], pInt[3], pInt[4], pInt[5],
1406 fEgress ? "-->" : "<--", pszWhere,
1407 pExt[0], pExt[1], pExt[2], pExt[3], pExt[4], pExt[5],
1408 pSG->cbTotal, iPacketNo));
1409 Log3(("%.*Rhxd\n", pSG->aSegs[0].cb, pSG->aSegs[0].pv));
1410}
1411#else
1412# define vboxNetFltDumpPacket(a, b, c, d) do {} while (0)
1413#endif
1414
1415#ifdef VBOXNETFLT_WITH_GSO_RECV
1416
1417/**
1418 * Worker for vboxNetFltLinuxForwardToIntNet that checks if we can forwards a
1419 * GSO socket buffer without having to segment it.
1420 *
1421 * @returns true on success, false if needs segmenting.
1422 * @param pThis The net filter instance.
1423 * @param pSkb The GSO socket buffer.
1424 * @param fSrc The source.
1425 * @param pGsoCtx Where to return the GSO context on success.
1426 */
1427static bool vboxNetFltLinuxCanForwardAsGso(PVBOXNETFLTINS pThis, struct sk_buff *pSkb, uint32_t fSrc,
1428 PPDMNETWORKGSO pGsoCtx)
1429{
1430 PDMNETWORKGSOTYPE enmGsoType;
1431 uint16_t uEtherType;
1432 unsigned int cbTransport;
1433 unsigned int offTransport;
1434 unsigned int cbTransportHdr;
1435 unsigned uProtocol;
1436 union
1437 {
1438 RTNETIPV4 IPv4;
1439 RTNETIPV6 IPv6;
1440 RTNETTCP Tcp;
1441 uint8_t ab[40];
1442 uint16_t au16[40/2];
1443 uint32_t au32[40/4];
1444 } Buf;
1445
1446 /*
1447 * Check the GSO properties of the socket buffer and make sure it fits.
1448 */
1449 /** @todo Figure out how to handle SKB_GSO_TCP_ECN! */
1450 if (RT_UNLIKELY( skb_shinfo(pSkb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCPV6 | SKB_GSO_TCPV4) ))
1451 {
1452 Log5(("vboxNetFltLinuxCanForwardAsGso: gso_type=%#x\n", skb_shinfo(pSkb)->gso_type));
1453 return false;
1454 }
1455 if (RT_UNLIKELY( skb_shinfo(pSkb)->gso_size < 1
1456 || pSkb->len > VBOX_MAX_GSO_SIZE ))
1457 {
1458 Log5(("vboxNetFltLinuxCanForwardAsGso: gso_size=%#x skb_len=%#x (max=%#x)\n", skb_shinfo(pSkb)->gso_size, pSkb->len, VBOX_MAX_GSO_SIZE));
1459 return false;
1460 }
1461 /*
1462 * It is possible to receive GSO packets from wire if GRO is enabled.
1463 */
1464 if (RT_UNLIKELY(fSrc & INTNETTRUNKDIR_WIRE))
1465 {
1466 Log5(("vboxNetFltLinuxCanForwardAsGso: fSrc=wire\n"));
1467#ifdef VBOXNETFLT_WITH_GRO
1468 /*
1469 * The packet came from the wire and the driver has already consumed
1470 * mac header. We need to restore it back.
1471 */
1472 pSkb->mac_len = skb_network_header(pSkb) - skb_mac_header(pSkb);
1473 skb_push(pSkb, pSkb->mac_len);
1474 Log5(("vboxNetFltLinuxCanForwardAsGso: mac_len=%d data=%p mac_header=%p network_header=%p\n",
1475 pSkb->mac_len, pSkb->data, skb_mac_header(pSkb), skb_network_header(pSkb)));
1476#else /* !VBOXNETFLT_WITH_GRO */
1477 /* Older kernels didn't have GRO. */
1478 return false;
1479#endif /* !VBOXNETFLT_WITH_GRO */
1480 }
1481 else
1482 {
1483 /*
1484 * skb_gso_segment does the following. Do we need to do it as well?
1485 */
1486#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
1487 skb_reset_mac_header(pSkb);
1488 pSkb->mac_len = pSkb->network_header - pSkb->mac_header;
1489#else
1490 pSkb->mac.raw = pSkb->data;
1491 pSkb->mac_len = pSkb->nh.raw - pSkb->data;
1492#endif
1493 }
1494
1495 /*
1496 * Switch on the ethertype.
1497 */
1498 uEtherType = pSkb->protocol;
1499 if ( uEtherType == RT_H2N_U16_C(RTNET_ETHERTYPE_VLAN)
1500 && pSkb->mac_len == sizeof(RTNETETHERHDR) + sizeof(uint32_t))
1501 {
1502 uint16_t const *puEtherType = skb_header_pointer(pSkb, sizeof(RTNETETHERHDR) + sizeof(uint16_t), sizeof(uint16_t), &Buf);
1503 if (puEtherType)
1504 uEtherType = *puEtherType;
1505 }
1506 switch (uEtherType)
1507 {
1508 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV4):
1509 {
1510 unsigned int cbHdr;
1511 PCRTNETIPV4 pIPv4 = (PCRTNETIPV4)skb_header_pointer(pSkb, pSkb->mac_len, sizeof(Buf.IPv4), &Buf);
1512 if (RT_UNLIKELY(!pIPv4))
1513 {
1514 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access IPv4 hdr\n"));
1515 return false;
1516 }
1517
1518 cbHdr = pIPv4->ip_hl * 4;
1519 cbTransport = RT_N2H_U16(pIPv4->ip_len);
1520 if (RT_UNLIKELY( cbHdr < RTNETIPV4_MIN_LEN
1521 || cbHdr > cbTransport ))
1522 {
1523 Log5(("vboxNetFltLinuxCanForwardAsGso: invalid IPv4 lengths: ip_hl=%u ip_len=%u\n", pIPv4->ip_hl, RT_N2H_U16(pIPv4->ip_len)));
1524 return false;
1525 }
1526 cbTransport -= cbHdr;
1527 offTransport = pSkb->mac_len + cbHdr;
1528 uProtocol = pIPv4->ip_p;
1529 if (uProtocol == RTNETIPV4_PROT_TCP)
1530 enmGsoType = PDMNETWORKGSOTYPE_IPV4_TCP;
1531 else if (uProtocol == RTNETIPV4_PROT_UDP)
1532 enmGsoType = PDMNETWORKGSOTYPE_IPV4_UDP;
1533 else /** @todo IPv6: 4to6 tunneling */
1534 enmGsoType = PDMNETWORKGSOTYPE_INVALID;
1535 break;
1536 }
1537
1538 case RT_H2N_U16_C(RTNET_ETHERTYPE_IPV6):
1539 {
1540 PCRTNETIPV6 pIPv6 = (PCRTNETIPV6)skb_header_pointer(pSkb, pSkb->mac_len, sizeof(Buf.IPv6), &Buf);
1541 if (RT_UNLIKELY(!pIPv6))
1542 {
1543 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access IPv6 hdr\n"));
1544 return false;
1545 }
1546
1547 cbTransport = RT_N2H_U16(pIPv6->ip6_plen);
1548 offTransport = pSkb->mac_len + sizeof(RTNETIPV6);
1549 uProtocol = pIPv6->ip6_nxt;
1550 /** @todo IPv6: Dig our way out of the other headers. */
1551 if (uProtocol == RTNETIPV4_PROT_TCP)
1552 enmGsoType = PDMNETWORKGSOTYPE_IPV6_TCP;
1553 else if (uProtocol == RTNETIPV4_PROT_UDP)
1554 enmGsoType = PDMNETWORKGSOTYPE_IPV4_UDP;
1555 else
1556 enmGsoType = PDMNETWORKGSOTYPE_INVALID;
1557 break;
1558 }
1559
1560 default:
1561 Log5(("vboxNetFltLinuxCanForwardAsGso: uEtherType=%#x\n", RT_H2N_U16(uEtherType)));
1562 return false;
1563 }
1564
1565 if (enmGsoType == PDMNETWORKGSOTYPE_INVALID)
1566 {
1567 Log5(("vboxNetFltLinuxCanForwardAsGso: Unsupported protocol %d\n", uProtocol));
1568 return false;
1569 }
1570
1571 if (RT_UNLIKELY( offTransport + cbTransport <= offTransport
1572 || offTransport + cbTransport > pSkb->len
1573 || cbTransport < (uProtocol == RTNETIPV4_PROT_TCP ? RTNETTCP_MIN_LEN : RTNETUDP_MIN_LEN)) )
1574 {
1575 Log5(("vboxNetFltLinuxCanForwardAsGso: Bad transport length; off=%#x + cb=%#x => %#x; skb_len=%#x (%s)\n",
1576 offTransport, cbTransport, offTransport + cbTransport, pSkb->len, PDMNetGsoTypeName(enmGsoType) ));
1577 return false;
1578 }
1579
1580 /*
1581 * Check the TCP/UDP bits.
1582 */
1583 if (uProtocol == RTNETIPV4_PROT_TCP)
1584 {
1585 PCRTNETTCP pTcp = (PCRTNETTCP)skb_header_pointer(pSkb, offTransport, sizeof(Buf.Tcp), &Buf);
1586 if (RT_UNLIKELY(!pTcp))
1587 {
1588 Log5(("vboxNetFltLinuxCanForwardAsGso: failed to access TCP hdr\n"));
1589 return false;
1590 }
1591
1592 cbTransportHdr = pTcp->th_off * 4;
1593 if (RT_UNLIKELY( cbTransportHdr < RTNETTCP_MIN_LEN
1594 || cbTransportHdr > cbTransport
1595 || offTransport + cbTransportHdr >= UINT8_MAX
1596 || offTransport + cbTransportHdr >= pSkb->len ))
1597 {
1598 Log5(("vboxNetFltLinuxCanForwardAsGso: No space for TCP header; off=%#x cb=%#x skb_len=%#x\n", offTransport, cbTransportHdr, pSkb->len));
1599 return false;
1600 }
1601
1602 }
1603 else
1604 {
1605 Assert(uProtocol == RTNETIPV4_PROT_UDP);
1606 cbTransportHdr = sizeof(RTNETUDP);
1607 if (RT_UNLIKELY( offTransport + cbTransportHdr >= UINT8_MAX
1608 || offTransport + cbTransportHdr >= pSkb->len ))
1609 {
1610 Log5(("vboxNetFltLinuxCanForwardAsGso: No space for UDP header; off=%#x skb_len=%#x\n", offTransport, pSkb->len));
1611 return false;
1612 }
1613 }
1614
1615 /*
1616 * We're good, init the GSO context.
1617 */
1618 pGsoCtx->u8Type = enmGsoType;
1619 pGsoCtx->cbHdrs = offTransport + cbTransportHdr;
1620 pGsoCtx->cbMaxSeg = skb_shinfo(pSkb)->gso_size;
1621 pGsoCtx->offHdr1 = pSkb->mac_len;
1622 pGsoCtx->offHdr2 = offTransport;
1623 pGsoCtx->au8Unused[0] = 0;
1624 pGsoCtx->au8Unused[1] = 0;
1625
1626 return true;
1627}
1628
1629/**
1630 * Forward the socket buffer as a GSO internal network frame.
1631 *
1632 * @returns IPRT status code.
1633 * @param pThis The net filter instance.
1634 * @param pSkb The GSO socket buffer.
1635 * @param fSrc The source.
1636 * @param pGsoCtx Where to return the GSO context on success.
1637 */
1638static int vboxNetFltLinuxForwardAsGso(PVBOXNETFLTINS pThis, struct sk_buff *pSkb, uint32_t fSrc, PCPDMNETWORKGSO pGsoCtx)
1639{
1640 int rc;
1641 unsigned cSegs = vboxNetFltLinuxCalcSGSegments(pSkb);
1642 if (RT_LIKELY(cSegs <= MAX_SKB_FRAGS + 1))
1643 {
1644 PINTNETSG pSG = (PINTNETSG)alloca(RT_OFFSETOF(INTNETSG, aSegs[cSegs]));
1645 if (RT_LIKELY(pSG))
1646 {
1647 vboxNetFltLinuxSkBufToSG(pThis, pSkb, pSG, cSegs, fSrc, pGsoCtx);
1648
1649 vboxNetFltDumpPacket(pSG, false, (fSrc & INTNETTRUNKDIR_HOST) ? "host" : "wire", 1);
1650 pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, NULL /* pvIf */, pSG, fSrc);
1651
1652 vboxNetFltLinuxDestroySG(pSG);
1653 rc = VINF_SUCCESS;
1654 }
1655 else
1656 {
1657 Log(("VBoxNetFlt: Dropping the sk_buff (failure case).\n"));
1658 rc = VERR_NO_MEMORY;
1659 }
1660 }
1661 else
1662 {
1663 Log(("VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n", cSegs));
1664 rc = VERR_INTERNAL_ERROR_3;
1665 }
1666
1667 Log4(("VBoxNetFlt: Dropping the sk_buff.\n"));
1668 dev_kfree_skb(pSkb);
1669 return rc;
1670}
1671
1672#endif /* VBOXNETFLT_WITH_GSO_RECV */
1673
1674/**
1675 * Worker for vboxNetFltLinuxForwardToIntNet.
1676 *
1677 * @returns VINF_SUCCESS or VERR_NO_MEMORY.
1678 * @param pThis The net filter instance.
1679 * @param pBuf The socket buffer.
1680 * @param fSrc The source.
1681 */
1682static int vboxNetFltLinuxForwardSegment(PVBOXNETFLTINS pThis, struct sk_buff *pBuf, uint32_t fSrc)
1683{
1684 int rc;
1685 unsigned cSegs = vboxNetFltLinuxCalcSGSegments(pBuf);
1686 if (cSegs <= MAX_SKB_FRAGS + 1)
1687 {
1688 PINTNETSG pSG = (PINTNETSG)alloca(RT_OFFSETOF(INTNETSG, aSegs[cSegs]));
1689 if (RT_LIKELY(pSG))
1690 {
1691 if (fSrc & INTNETTRUNKDIR_WIRE)
1692 {
1693 /*
1694 * The packet came from wire, ethernet header was removed by device driver.
1695 * Restore it.
1696 */
1697 skb_push(pBuf, ETH_HLEN);
1698 }
1699
1700 vboxNetFltLinuxSkBufToSG(pThis, pBuf, pSG, cSegs, fSrc, NULL /*pGsoCtx*/);
1701
1702 vboxNetFltDumpPacket(pSG, false, (fSrc & INTNETTRUNKDIR_HOST) ? "host" : "wire", 1);
1703 pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, NULL /* pvIf */, pSG, fSrc);
1704
1705 vboxNetFltLinuxDestroySG(pSG);
1706 rc = VINF_SUCCESS;
1707 }
1708 else
1709 {
1710 Log(("VBoxNetFlt: Failed to allocate SG buffer.\n"));
1711 rc = VERR_NO_MEMORY;
1712 }
1713 }
1714 else
1715 {
1716 Log(("VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n", cSegs));
1717 rc = VERR_INTERNAL_ERROR_3;
1718 }
1719
1720 Log4(("VBoxNetFlt: Dropping the sk_buff.\n"));
1721 dev_kfree_skb(pBuf);
1722 return rc;
1723}
1724
1725/**
1726 *
1727 * @param pBuf The socket buffer. This is consumed by this function.
1728 */
1729static void vboxNetFltLinuxForwardToIntNet(PVBOXNETFLTINS pThis, struct sk_buff *pBuf)
1730{
1731 uint32_t fSrc = pBuf->pkt_type == PACKET_OUTGOING ? INTNETTRUNKDIR_HOST : INTNETTRUNKDIR_WIRE;
1732
1733#ifdef VBOXNETFLT_WITH_GSO
1734 if (skb_is_gso(pBuf))
1735 {
1736 PDMNETWORKGSO GsoCtx;
1737 Log3(("vboxNetFltLinuxForwardToIntNet: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x ip_summed=%d\n",
1738 pBuf->len, pBuf->data_len, pBuf->truesize, pBuf->next, skb_shinfo(pBuf)->nr_frags, skb_shinfo(pBuf)->gso_size, skb_shinfo(pBuf)->gso_segs, skb_shinfo(pBuf)->gso_type, skb_shinfo(pBuf)->frag_list, pBuf->pkt_type, pBuf->ip_summed));
1739# ifdef VBOXNETFLT_WITH_GSO_RECV
1740 if ( (skb_shinfo(pBuf)->gso_type & (SKB_GSO_UDP | SKB_GSO_TCPV6 | SKB_GSO_TCPV4))
1741 && vboxNetFltLinuxCanForwardAsGso(pThis, pBuf, fSrc, &GsoCtx) )
1742 vboxNetFltLinuxForwardAsGso(pThis, pBuf, fSrc, &GsoCtx);
1743 else
1744# endif
1745 {
1746 /* Need to segment the packet */
1747 struct sk_buff *pNext;
1748 struct sk_buff *pSegment = skb_gso_segment(pBuf, 0 /*supported features*/);
1749 if (IS_ERR(pSegment))
1750 {
1751 dev_kfree_skb(pBuf);
1752 LogRel(("VBoxNetFlt: Failed to segment a packet (%d).\n", PTR_ERR(pSegment)));
1753 return;
1754 }
1755
1756 for (; pSegment; pSegment = pNext)
1757 {
1758 Log3(("vboxNetFltLinuxForwardToIntNet: segment len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
1759 pSegment->len, pSegment->data_len, pSegment->truesize, pSegment->next, skb_shinfo(pSegment)->nr_frags, skb_shinfo(pSegment)->gso_size, skb_shinfo(pSegment)->gso_segs, skb_shinfo(pSegment)->gso_type, skb_shinfo(pSegment)->frag_list, pSegment->pkt_type));
1760 pNext = pSegment->next;
1761 pSegment->next = 0;
1762 vboxNetFltLinuxForwardSegment(pThis, pSegment, fSrc);
1763 }
1764 dev_kfree_skb(pBuf);
1765 }
1766 }
1767 else
1768#endif /* VBOXNETFLT_WITH_GSO */
1769 {
1770 if (pBuf->ip_summed == CHECKSUM_PARTIAL && pBuf->pkt_type == PACKET_OUTGOING)
1771 {
1772#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1773 /*
1774 * Try to work around the problem with CentOS 4.7 and 5.2 (2.6.9
1775 * and 2.6.18 kernels), they pass wrong 'h' pointer down. We take IP
1776 * header length from the header itself and reconstruct 'h' pointer
1777 * to TCP (or whatever) header.
1778 */
1779 unsigned char *tmp = pBuf->h.raw;
1780 if (pBuf->h.raw == pBuf->nh.raw && pBuf->protocol == htons(ETH_P_IP))
1781 pBuf->h.raw = pBuf->nh.raw + pBuf->nh.iph->ihl * 4;
1782#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */
1783 if (VBOX_SKB_CHECKSUM_HELP(pBuf))
1784 {
1785 LogRel(("VBoxNetFlt: Failed to compute checksum, dropping the packet.\n"));
1786 dev_kfree_skb(pBuf);
1787 return;
1788 }
1789#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1790 /* Restore the original (wrong) pointer. */
1791 pBuf->h.raw = tmp;
1792#endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */
1793 }
1794 vboxNetFltLinuxForwardSegment(pThis, pBuf, fSrc);
1795 }
1796}
1797
1798#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
1799/**
1800 * Work queue handler that forwards the socket buffers queued by
1801 * vboxNetFltLinuxPacketHandler to the internal network.
1802 *
1803 * @param pWork The work queue.
1804 */
1805# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
1806static void vboxNetFltLinuxXmitTask(struct work_struct *pWork)
1807# else
1808static void vboxNetFltLinuxXmitTask(void *pWork)
1809# endif
1810{
1811 PVBOXNETFLTINS pThis = VBOX_FLT_XT_TO_INST(pWork);
1812 struct sk_buff *pBuf;
1813
1814 Log4(("vboxNetFltLinuxXmitTask: Got work %p.\n", pWork));
1815
1816 /*
1817 * Active? Retain the instance and increment the busy counter.
1818 */
1819 if (vboxNetFltTryRetainBusyActive(pThis))
1820 {
1821 while ((pBuf = skb_dequeue(&pThis->u.s.XmitQueue)) != NULL)
1822 vboxNetFltLinuxForwardToIntNet(pThis, pBuf);
1823
1824 vboxNetFltRelease(pThis, true /* fBusy */);
1825 }
1826 else
1827 {
1828 /** @todo Shouldn't we just drop the packets here? There is little point in
1829 * making them accumulate when the VM is paused and it'll only waste
1830 * kernel memory anyway... Hmm. maybe wait a short while (2-5 secs)
1831 * before start draining the packets (goes for the intnet ring buf
1832 * too)? */
1833 }
1834}
1835#endif /* !VBOXNETFLT_LINUX_NO_XMIT_QUEUE */
1836
1837/**
1838 * Reports the GSO capabilites of the hardware NIC.
1839 *
1840 * @param pThis The net filter instance. The caller hold a
1841 * reference to this.
1842 */
1843static void vboxNetFltLinuxReportNicGsoCapabilities(PVBOXNETFLTINS pThis)
1844{
1845#ifdef VBOXNETFLT_WITH_GSO_XMIT_WIRE
1846 if (vboxNetFltTryRetainBusyNotDisconnected(pThis))
1847 {
1848 struct net_device *pDev;
1849 PINTNETTRUNKSWPORT pSwitchPort;
1850 unsigned int fFeatures;
1851 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1852
1853 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1854
1855 pSwitchPort = pThis->pSwitchPort; /* this doesn't need to be here, but it doesn't harm. */
1856 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
1857 if (pDev)
1858 fFeatures = pDev->features;
1859 else
1860 fFeatures = 0;
1861
1862 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1863
1864 if (pThis->pSwitchPort)
1865 {
1866 /* Set/update the GSO capabilities of the NIC. */
1867 uint32_t fGsoCapabilites = 0;
1868 if (fFeatures & NETIF_F_TSO)
1869 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_TCP);
1870 if (fFeatures & NETIF_F_TSO6)
1871 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_TCP);
1872# if 0 /** @todo GSO: Test UDP offloading (UFO) on linux. */
1873 if (fFeatures & NETIF_F_UFO)
1874 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_UDP);
1875 if (fFeatures & NETIF_F_UFO)
1876 fGsoCapabilites |= RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_UDP);
1877# endif
1878 pThis->pSwitchPort->pfnReportGsoCapabilities(pThis->pSwitchPort, fGsoCapabilites, INTNETTRUNKDIR_WIRE);
1879 }
1880
1881 vboxNetFltRelease(pThis, true /*fBusy*/);
1882 }
1883#endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE */
1884}
1885
1886/**
1887 * Helper that determins whether the host (ignoreing us) is operating the
1888 * interface in promiscuous mode or not.
1889 */
1890static bool vboxNetFltLinuxPromiscuous(PVBOXNETFLTINS pThis)
1891{
1892 bool fRc = false;
1893 struct net_device * pDev = vboxNetFltLinuxRetainNetDev(pThis);
1894 if (pDev)
1895 {
1896 fRc = !!(pDev->promiscuity - (ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet) & 1));
1897 LogFlow(("vboxNetFltPortOsIsPromiscuous: returns %d, pDev->promiscuity=%d, fPromiscuousSet=%d\n",
1898 fRc, pDev->promiscuity, pThis->u.s.fPromiscuousSet));
1899 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
1900 }
1901 return fRc;
1902}
1903
1904/**
1905 * Internal worker for vboxNetFltLinuxNotifierCallback.
1906 *
1907 * @returns VBox status code.
1908 * @param pThis The instance.
1909 * @param fRediscovery If set we're doing a rediscovery attempt, so, don't
1910 * flood the release log.
1911 */
1912static int vboxNetFltLinuxAttachToInterface(PVBOXNETFLTINS pThis, struct net_device *pDev)
1913{
1914 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1915 LogFlow(("vboxNetFltLinuxAttachToInterface: pThis=%p (%s)\n", pThis, pThis->szName));
1916
1917 /*
1918 * Retain and store the device.
1919 */
1920 dev_hold(pDev);
1921
1922 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1923 ASMAtomicUoWritePtr(&pThis->u.s.pDev, pDev);
1924 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1925
1926 Log(("vboxNetFltLinuxAttachToInterface: Device %p(%s) retained. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1927 Log(("vboxNetFltLinuxAttachToInterface: Got pDev=%p pThis=%p pThis->u.s.pDev=%p\n", pDev, pThis, ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *)));
1928
1929 /* Get the mac address while we still have a valid net_device reference. */
1930 memcpy(&pThis->u.s.MacAddr, pDev->dev_addr, sizeof(pThis->u.s.MacAddr));
1931
1932 /*
1933 * Install a packet filter for this device with a protocol wildcard (ETH_P_ALL).
1934 */
1935 pThis->u.s.PacketType.type = __constant_htons(ETH_P_ALL);
1936 pThis->u.s.PacketType.dev = pDev;
1937 pThis->u.s.PacketType.func = vboxNetFltLinuxPacketHandler;
1938 dev_add_pack(&pThis->u.s.PacketType);
1939
1940#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1941 vboxNetFltLinuxHookDev(pThis, pDev);
1942#endif
1943#ifdef VBOXNETFLT_WITH_QDISC
1944 vboxNetFltLinuxQdiscInstall(pThis, pDev);
1945#endif /* VBOXNETFLT_WITH_QDISC */
1946
1947 /*
1948 * Set indicators that require the spinlock. Be abit paranoid about racing
1949 * the device notification handle.
1950 */
1951 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1952 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
1953 if (pDev)
1954 {
1955 ASMAtomicUoWriteBool(&pThis->fDisconnectedFromHost, false);
1956 ASMAtomicUoWriteBool(&pThis->u.s.fRegistered, true);
1957 pDev = NULL; /* don't dereference it */
1958 }
1959 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1960 Log(("vboxNetFltLinuxAttachToInterface: this=%p: Packet handler installed.\n", pThis));
1961
1962 /*
1963 * If the above succeeded report GSO capabilites, if not undo and
1964 * release the device.
1965 */
1966 if (!pDev)
1967 {
1968 Assert(pThis->pSwitchPort);
1969 if (vboxNetFltTryRetainBusyNotDisconnected(pThis))
1970 {
1971 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
1972 pThis->pSwitchPort->pfnReportMacAddress(pThis->pSwitchPort, &pThis->u.s.MacAddr);
1973 pThis->pSwitchPort->pfnReportPromiscuousMode(pThis->pSwitchPort, vboxNetFltLinuxPromiscuous(pThis));
1974 pThis->pSwitchPort->pfnReportNoPreemptDsts(pThis->pSwitchPort, INTNETTRUNKDIR_WIRE | INTNETTRUNKDIR_HOST);
1975 vboxNetFltRelease(pThis, true /*fBusy*/);
1976 }
1977 }
1978 else
1979 {
1980#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
1981 vboxNetFltLinuxUnhookDev(pThis, pDev);
1982#endif
1983#ifdef VBOXNETFLT_WITH_QDISC
1984 vboxNetFltLinuxQdiscRemove(pThis, pDev);
1985#endif /* VBOXNETFLT_WITH_QDISC */
1986 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
1987 ASMAtomicUoWriteNullPtr(&pThis->u.s.pDev);
1988 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
1989 dev_put(pDev);
1990 Log(("vboxNetFltLinuxAttachToInterface: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
1991 }
1992
1993 LogRel(("VBoxNetFlt: attached to '%s' / %.*Rhxs\n", pThis->szName, sizeof(pThis->u.s.MacAddr), &pThis->u.s.MacAddr));
1994 return VINF_SUCCESS;
1995}
1996
1997
1998static int vboxNetFltLinuxUnregisterDevice(PVBOXNETFLTINS pThis, struct net_device *pDev)
1999{
2000 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2001
2002 Assert(!pThis->fDisconnectedFromHost);
2003
2004#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
2005 vboxNetFltLinuxUnhookDev(pThis, pDev);
2006#endif
2007#ifdef VBOXNETFLT_WITH_QDISC
2008 vboxNetFltLinuxQdiscRemove(pThis, pDev);
2009#endif /* VBOXNETFLT_WITH_QDISC */
2010
2011 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
2012 ASMAtomicWriteBool(&pThis->u.s.fRegistered, false);
2013 ASMAtomicWriteBool(&pThis->fDisconnectedFromHost, true);
2014 ASMAtomicUoWriteNullPtr(&pThis->u.s.pDev);
2015 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
2016
2017 dev_remove_pack(&pThis->u.s.PacketType);
2018#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
2019 skb_queue_purge(&pThis->u.s.XmitQueue);
2020#endif
2021 Log(("vboxNetFltLinuxUnregisterDevice: this=%p: Packet handler removed, xmit queue purged.\n", pThis));
2022 Log(("vboxNetFltLinuxUnregisterDevice: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
2023 dev_put(pDev);
2024
2025 return NOTIFY_OK;
2026}
2027
2028static int vboxNetFltLinuxDeviceIsUp(PVBOXNETFLTINS pThis, struct net_device *pDev)
2029{
2030 /* Check if we are not suspended and promiscuous mode has not been set. */
2031 if ( pThis->enmTrunkState == INTNETTRUNKIFSTATE_ACTIVE
2032 && !ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet))
2033 {
2034 /* Note that there is no need for locking as the kernel got hold of the lock already. */
2035 dev_set_promiscuity(pDev, 1);
2036 ASMAtomicWriteBool(&pThis->u.s.fPromiscuousSet, true);
2037 Log(("vboxNetFltLinuxDeviceIsUp: enabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
2038 }
2039 else
2040 Log(("vboxNetFltLinuxDeviceIsUp: no need to enable promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
2041 return NOTIFY_OK;
2042}
2043
2044static int vboxNetFltLinuxDeviceGoingDown(PVBOXNETFLTINS pThis, struct net_device *pDev)
2045{
2046 /* Undo promiscuous mode if we has set it. */
2047 if (ASMAtomicUoReadBool(&pThis->u.s.fPromiscuousSet))
2048 {
2049 /* Note that there is no need for locking as the kernel got hold of the lock already. */
2050 dev_set_promiscuity(pDev, -1);
2051 ASMAtomicWriteBool(&pThis->u.s.fPromiscuousSet, false);
2052 Log(("vboxNetFltLinuxDeviceGoingDown: disabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
2053 }
2054 else
2055 Log(("vboxNetFltLinuxDeviceGoingDown: no need to disable promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
2056 return NOTIFY_OK;
2057}
2058
2059#ifdef LOG_ENABLED
2060/** Stringify the NETDEV_XXX constants. */
2061static const char *vboxNetFltLinuxGetNetDevEventName(unsigned long ulEventType)
2062{
2063 const char *pszEvent = "NETDRV_<unknown>";
2064 switch (ulEventType)
2065 {
2066 case NETDEV_REGISTER: pszEvent = "NETDEV_REGISTER"; break;
2067 case NETDEV_UNREGISTER: pszEvent = "NETDEV_UNREGISTER"; break;
2068 case NETDEV_UP: pszEvent = "NETDEV_UP"; break;
2069 case NETDEV_DOWN: pszEvent = "NETDEV_DOWN"; break;
2070 case NETDEV_REBOOT: pszEvent = "NETDEV_REBOOT"; break;
2071 case NETDEV_CHANGENAME: pszEvent = "NETDEV_CHANGENAME"; break;
2072 case NETDEV_CHANGE: pszEvent = "NETDEV_CHANGE"; break;
2073 case NETDEV_CHANGEMTU: pszEvent = "NETDEV_CHANGEMTU"; break;
2074 case NETDEV_CHANGEADDR: pszEvent = "NETDEV_CHANGEADDR"; break;
2075 case NETDEV_GOING_DOWN: pszEvent = "NETDEV_GOING_DOWN"; break;
2076# ifdef NETDEV_FEAT_CHANGE
2077 case NETDEV_FEAT_CHANGE: pszEvent = "NETDEV_FEAT_CHANGE"; break;
2078# endif
2079 }
2080 return pszEvent;
2081}
2082#endif /* LOG_ENABLED */
2083
2084/**
2085 * Callback for listening to netdevice events.
2086 *
2087 * This works the rediscovery, clean up on unregistration, promiscuity on
2088 * up/down, and GSO feature changes from ethtool.
2089 *
2090 * @returns NOTIFY_OK
2091 * @param self Pointer to our notifier registration block.
2092 * @param ulEventType The event.
2093 * @param ptr Event specific, but it is usually the device it
2094 * relates to.
2095 */
2096static int vboxNetFltLinuxNotifierCallback(struct notifier_block *self, unsigned long ulEventType, void *ptr)
2097
2098{
2099 PVBOXNETFLTINS pThis = VBOX_FLT_NB_TO_INST(self);
2100 struct net_device *pDev = (struct net_device *)ptr;
2101 int rc = NOTIFY_OK;
2102
2103 Log(("VBoxNetFlt: got event %s(0x%lx) on %s, pDev=%p pThis=%p pThis->u.s.pDev=%p\n",
2104 vboxNetFltLinuxGetNetDevEventName(ulEventType), ulEventType, pDev->name, pDev, pThis, ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *)));
2105 if ( ulEventType == NETDEV_REGISTER
2106 && !strcmp(pDev->name, pThis->szName))
2107 {
2108 vboxNetFltLinuxAttachToInterface(pThis, pDev);
2109 }
2110 else
2111 {
2112 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
2113 if (pDev == ptr)
2114 {
2115 switch (ulEventType)
2116 {
2117 case NETDEV_UNREGISTER:
2118 rc = vboxNetFltLinuxUnregisterDevice(pThis, pDev);
2119 break;
2120 case NETDEV_UP:
2121 rc = vboxNetFltLinuxDeviceIsUp(pThis, pDev);
2122 break;
2123 case NETDEV_GOING_DOWN:
2124 rc = vboxNetFltLinuxDeviceGoingDown(pThis, pDev);
2125 break;
2126 case NETDEV_CHANGENAME:
2127 break;
2128#ifdef NETDEV_FEAT_CHANGE
2129 case NETDEV_FEAT_CHANGE:
2130 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
2131 break;
2132#endif
2133 }
2134 }
2135 }
2136
2137 return rc;
2138}
2139
2140bool vboxNetFltOsMaybeRediscovered(PVBOXNETFLTINS pThis)
2141{
2142 return !ASMAtomicUoReadBool(&pThis->fDisconnectedFromHost);
2143}
2144
2145int vboxNetFltPortOsXmit(PVBOXNETFLTINS pThis, void *pvIfData, PINTNETSG pSG, uint32_t fDst)
2146{
2147 struct net_device * pDev;
2148 int err;
2149 int rc = VINF_SUCCESS;
2150 NOREF(pvIfData);
2151
2152 LogFlow(("vboxNetFltPortOsXmit: pThis=%p (%s)\n", pThis, pThis->szName));
2153
2154 pDev = vboxNetFltLinuxRetainNetDev(pThis);
2155 if (pDev)
2156 {
2157 /*
2158 * Create a sk_buff for the gather list and push it onto the wire.
2159 */
2160 if (fDst & INTNETTRUNKDIR_WIRE)
2161 {
2162 struct sk_buff *pBuf = vboxNetFltLinuxSkBufFromSG(pThis, pSG, true);
2163 if (pBuf)
2164 {
2165 vboxNetFltDumpPacket(pSG, true, "wire", 1);
2166 Log4(("vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
2167 Log4(("vboxNetFltPortOsXmit: dev_queue_xmit(%p)\n", pBuf));
2168 err = dev_queue_xmit(pBuf);
2169 if (err)
2170 rc = RTErrConvertFromErrno(err);
2171 }
2172 else
2173 rc = VERR_NO_MEMORY;
2174 }
2175
2176 /*
2177 * Create a sk_buff for the gather list and push it onto the host stack.
2178 */
2179 if (fDst & INTNETTRUNKDIR_HOST)
2180 {
2181 struct sk_buff *pBuf = vboxNetFltLinuxSkBufFromSG(pThis, pSG, false);
2182 if (pBuf)
2183 {
2184 vboxNetFltDumpPacket(pSG, true, "host", (fDst & INTNETTRUNKDIR_WIRE) ? 0 : 1);
2185 Log4(("vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n", sizeof(pBuf->cb), pBuf->cb));
2186 Log4(("vboxNetFltPortOsXmit: netif_rx_ni(%p)\n", pBuf));
2187 err = netif_rx_ni(pBuf);
2188 if (err)
2189 rc = RTErrConvertFromErrno(err);
2190 }
2191 else
2192 rc = VERR_NO_MEMORY;
2193 }
2194
2195 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
2196 }
2197
2198 return rc;
2199}
2200
2201
2202void vboxNetFltPortOsSetActive(PVBOXNETFLTINS pThis, bool fActive)
2203{
2204 struct net_device * pDev;
2205
2206 LogFlow(("vboxNetFltPortOsSetActive: pThis=%p (%s), fActive=%s, fDisablePromiscuous=%s\n",
2207 pThis, pThis->szName, fActive?"true":"false",
2208 pThis->fDisablePromiscuous?"true":"false"));
2209
2210 if (pThis->fDisablePromiscuous)
2211 return;
2212
2213 pDev = vboxNetFltLinuxRetainNetDev(pThis);
2214 if (pDev)
2215 {
2216 /*
2217 * This api is a bit weird, the best reference is the code.
2218 *
2219 * Also, we have a bit or race conditions wrt the maintance of
2220 * host the interface promiscuity for vboxNetFltPortOsIsPromiscuous.
2221 */
2222#ifdef LOG_ENABLED
2223 u_int16_t fIf;
2224 unsigned const cPromiscBefore = pDev->promiscuity;
2225#endif
2226 if (fActive)
2227 {
2228 Assert(!pThis->u.s.fPromiscuousSet);
2229
2230 rtnl_lock();
2231 dev_set_promiscuity(pDev, 1);
2232 rtnl_unlock();
2233 pThis->u.s.fPromiscuousSet = true;
2234 Log(("vboxNetFltPortOsSetActive: enabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
2235 }
2236 else
2237 {
2238 if (pThis->u.s.fPromiscuousSet)
2239 {
2240 rtnl_lock();
2241 dev_set_promiscuity(pDev, -1);
2242 rtnl_unlock();
2243 Log(("vboxNetFltPortOsSetActive: disabled promiscuous mode on %s (%d)\n", pThis->szName, pDev->promiscuity));
2244 }
2245 pThis->u.s.fPromiscuousSet = false;
2246
2247#ifdef LOG_ENABLED
2248 fIf = dev_get_flags(pDev);
2249 Log(("VBoxNetFlt: fIf=%#x; %d->%d\n", fIf, cPromiscBefore, pDev->promiscuity));
2250#endif
2251 }
2252
2253 vboxNetFltLinuxReleaseNetDev(pThis, pDev);
2254 }
2255}
2256
2257
2258int vboxNetFltOsDisconnectIt(PVBOXNETFLTINS pThis)
2259{
2260#ifdef VBOXNETFLT_WITH_QDISC
2261 vboxNetFltLinuxQdiscRemove(pThis, NULL);
2262#endif /* VBOXNETFLT_WITH_QDISC */
2263 /*
2264 * Remove packet handler when we get disconnected from internal switch as
2265 * we don't want the handler to forward packets to disconnected switch.
2266 */
2267 dev_remove_pack(&pThis->u.s.PacketType);
2268 return VINF_SUCCESS;
2269}
2270
2271
2272int vboxNetFltOsConnectIt(PVBOXNETFLTINS pThis)
2273{
2274 /*
2275 * Report the GSO capabilities of the host and device (if connected).
2276 * Note! No need to mark ourselves busy here.
2277 */
2278 /** @todo duplicate work here now? Attach */
2279#if defined(VBOXNETFLT_WITH_GSO_XMIT_HOST)
2280 pThis->pSwitchPort->pfnReportGsoCapabilities(pThis->pSwitchPort,
2281 0
2282 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_TCP)
2283 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_TCP)
2284# if 0 /** @todo GSO: Test UDP offloading (UFO) on linux. */
2285 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV4_UDP)
2286 | RT_BIT_32(PDMNETWORKGSOTYPE_IPV6_UDP)
2287# endif
2288 , INTNETTRUNKDIR_HOST);
2289
2290#endif
2291 vboxNetFltLinuxReportNicGsoCapabilities(pThis);
2292
2293 return VINF_SUCCESS;
2294}
2295
2296
2297void vboxNetFltOsDeleteInstance(PVBOXNETFLTINS pThis)
2298{
2299 struct net_device *pDev;
2300 bool fRegistered;
2301 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2302
2303#ifdef VBOXNETFLT_WITH_FILTER_HOST2GUEST_SKBS_EXPERIMENT
2304 vboxNetFltLinuxUnhookDev(pThis, NULL);
2305#endif
2306
2307 /** @todo This code may race vboxNetFltLinuxUnregisterDevice (very very
2308 * unlikely, but none the less). Since it doesn't actually update the
2309 * state (just reads it), it is likely to panic in some interesting
2310 * ways. */
2311
2312 RTSpinlockAcquireNoInts(pThis->hSpinlock, &Tmp);
2313 pDev = ASMAtomicUoReadPtrT(&pThis->u.s.pDev, struct net_device *);
2314 fRegistered = ASMAtomicUoReadBool(&pThis->u.s.fRegistered);
2315 RTSpinlockReleaseNoInts(pThis->hSpinlock, &Tmp);
2316
2317 if (fRegistered)
2318 {
2319#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
2320 skb_queue_purge(&pThis->u.s.XmitQueue);
2321#endif
2322 Log(("vboxNetFltOsDeleteInstance: this=%p: Packet handler removed, xmit queue purged.\n", pThis));
2323 Log(("vboxNetFltOsDeleteInstance: Device %p(%s) released. ref=%d\n", pDev, pDev->name, atomic_read(&pDev->refcnt)));
2324 dev_put(pDev);
2325 }
2326 Log(("vboxNetFltOsDeleteInstance: this=%p: Notifier removed.\n", pThis));
2327 unregister_netdevice_notifier(&pThis->u.s.Notifier);
2328 module_put(THIS_MODULE);
2329}
2330
2331
2332int vboxNetFltOsInitInstance(PVBOXNETFLTINS pThis, void *pvContext)
2333{
2334 int err;
2335 NOREF(pvContext);
2336
2337 pThis->u.s.Notifier.notifier_call = vboxNetFltLinuxNotifierCallback;
2338 err = register_netdevice_notifier(&pThis->u.s.Notifier);
2339 if (err)
2340 return VERR_INTNET_FLT_IF_FAILED;
2341 if (!pThis->u.s.fRegistered)
2342 {
2343 unregister_netdevice_notifier(&pThis->u.s.Notifier);
2344 LogRel(("VBoxNetFlt: failed to find %s.\n", pThis->szName));
2345 return VERR_INTNET_FLT_IF_NOT_FOUND;
2346 }
2347
2348 Log(("vboxNetFltOsInitInstance: this=%p: Notifier installed.\n", pThis));
2349 if ( pThis->fDisconnectedFromHost
2350 || !try_module_get(THIS_MODULE))
2351 return VERR_INTNET_FLT_IF_FAILED;
2352
2353 return VINF_SUCCESS;
2354}
2355
2356int vboxNetFltOsPreInitInstance(PVBOXNETFLTINS pThis)
2357{
2358 /*
2359 * Init the linux specific members.
2360 */
2361 pThis->u.s.pDev = NULL;
2362 pThis->u.s.fRegistered = false;
2363 pThis->u.s.fPromiscuousSet = false;
2364 memset(&pThis->u.s.PacketType, 0, sizeof(pThis->u.s.PacketType));
2365#ifndef VBOXNETFLT_LINUX_NO_XMIT_QUEUE
2366 skb_queue_head_init(&pThis->u.s.XmitQueue);
2367# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
2368 INIT_WORK(&pThis->u.s.XmitTask, vboxNetFltLinuxXmitTask);
2369# else
2370 INIT_WORK(&pThis->u.s.XmitTask, vboxNetFltLinuxXmitTask, &pThis->u.s.XmitTask);
2371# endif
2372#endif
2373
2374 return VINF_SUCCESS;
2375}
2376
2377
2378void vboxNetFltPortOsNotifyMacAddress(PVBOXNETFLTINS pThis, void *pvIfData, PCRTMAC pMac)
2379{
2380 NOREF(pThis); NOREF(pvIfData); NOREF(pMac);
2381}
2382
2383
2384int vboxNetFltPortOsConnectInterface(PVBOXNETFLTINS pThis, void *pvIf, void **pvIfData)
2385{
2386 /* Nothing to do */
2387 NOREF(pThis); NOREF(pvIf); NOREF(pvIfData);
2388 return VINF_SUCCESS;
2389}
2390
2391
2392int vboxNetFltPortOsDisconnectInterface(PVBOXNETFLTINS pThis, void *pvIfData)
2393{
2394 /* Nothing to do */
2395 NOREF(pThis); NOREF(pvIfData);
2396 return VINF_SUCCESS;
2397}
2398
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette