VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/ip_input.c@ 28587

Last change on this file since 28587 was 28449, checked in by vboxsync, 15 years ago

NAT: slirp file headers

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.0 KB
Line 
1/* $Id: ip_input.c 28449 2010-04-19 09:52:59Z vboxsync $ */
2/** @file
3 * NAT - IP input.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*
23 * This code is based on:
24 *
25 * Copyright (c) 1982, 1986, 1988, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
57 * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp
58 */
59
60/*
61 * Changes and additions relating to SLiRP are
62 * Copyright (c) 1995 Danny Gasparovski.
63 *
64 * Please read the file COPYRIGHT for the
65 * terms and conditions of the copyright.
66 */
67
68#include <slirp.h>
69#include "ip_icmp.h"
70#include "alias.h"
71
72
73/*
74 * IP initialization: fill in IP protocol switch table.
75 * All protocols not implemented in kernel go to raw IP protocol handler.
76 */
77void
78ip_init(PNATState pData)
79{
80 int i = 0;
81 for (i = 0; i < IPREASS_NHASH; ++i)
82 TAILQ_INIT(&ipq[i]);
83 maxnipq = 100; /* ??? */
84 maxfragsperpacket = 16;
85 nipq = 0;
86 ip_currid = tt.tv_sec & 0xffff;
87 udp_init(pData);
88 tcp_init(pData);
89}
90
91static struct libalias *select_alias(PNATState pData, struct mbuf* m)
92{
93 struct libalias *la = pData->proxy_alias;
94 struct udphdr *udp = NULL;
95 struct ip *pip = NULL;
96
97#ifndef VBOX_WITH_SLIRP_BSD_MBUF
98 if (m->m_la)
99 return m->m_la;
100#else
101 struct m_tag *t;
102 if ((t = m_tag_find(m, PACKET_TAG_ALIAS, NULL)) != 0)
103 return (struct libalias *)&t[1];
104#endif
105
106 return la;
107}
108
109/*
110 * Ip input routine. Checksum and byte swap header. If fragmented
111 * try to reassemble. Process options. Pass to next level.
112 */
113void
114ip_input(PNATState pData, struct mbuf *m)
115{
116 register struct ip *ip;
117 int hlen = 0;
118 int mlen = 0;
119
120 STAM_PROFILE_START(&pData->StatIP_input, a);
121
122 DEBUG_CALL("ip_input");
123 DEBUG_ARG("m = %lx", (long)m);
124 ip = mtod(m, struct ip *);
125 Log2(("ip_dst=%R[IP4](len:%d) m_len = %d", &ip->ip_dst, RT_N2H_U16(ip->ip_len), m->m_len));
126 Log2(("ip_dst=%R[IP4](len:%d) m_len = %d\n", &ip->ip_dst, RT_N2H_U16(ip->ip_len), m->m_len));
127
128 ipstat.ips_total++;
129 {
130 int rc;
131 STAM_PROFILE_START(&pData->StatALIAS_input, b);
132 rc = LibAliasIn(select_alias(pData, m), mtod(m, char *), m->m_len);
133 STAM_PROFILE_STOP(&pData->StatALIAS_input, b);
134 Log2(("NAT: LibAlias return %d\n", rc));
135 if (m->m_len != RT_N2H_U16(ip->ip_len))
136 m->m_len = RT_N2H_U16(ip->ip_len);
137 }
138
139 mlen = m->m_len;
140
141 if (mlen < sizeof(struct ip))
142 {
143 ipstat.ips_toosmall++;
144 STAM_PROFILE_STOP(&pData->StatIP_input, a);
145 return;
146 }
147
148 ip = mtod(m, struct ip *);
149 if (ip->ip_v != IPVERSION)
150 {
151 ipstat.ips_badvers++;
152 goto bad;
153 }
154
155 hlen = ip->ip_hl << 2;
156 if ( hlen < sizeof(struct ip)
157 || hlen > m->m_len)
158 {
159 /* min header length */
160 ipstat.ips_badhlen++; /* or packet too short */
161 goto bad;
162 }
163
164 /* keep ip header intact for ICMP reply
165 * ip->ip_sum = cksum(m, hlen);
166 * if (ip->ip_sum) {
167 */
168 if (cksum(m, hlen))
169 {
170 ipstat.ips_badsum++;
171 goto bad;
172 }
173
174 /*
175 * Convert fields to host representation.
176 */
177 NTOHS(ip->ip_len);
178 if (ip->ip_len < hlen)
179 {
180 ipstat.ips_badlen++;
181 goto bad;
182 }
183
184 NTOHS(ip->ip_id);
185 NTOHS(ip->ip_off);
186
187 /*
188 * Check that the amount of data in the buffers
189 * is as at least much as the IP header would have us expect.
190 * Trim mbufs if longer than we expect.
191 * Drop packet if shorter than we expect.
192 */
193 if (mlen < ip->ip_len)
194 {
195 ipstat.ips_tooshort++;
196 goto bad;
197 }
198
199 /* Should drop packet if mbuf too long? hmmm... */
200 if (mlen > ip->ip_len)
201 m_adj(m, ip->ip_len - m->m_len);
202
203 /* check ip_ttl for a correct ICMP reply */
204 if (ip->ip_ttl==0 || ip->ip_ttl == 1)
205 {
206 icmp_error(pData, m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl");
207 goto bad;
208 }
209
210 ip->ip_ttl--;
211 /*
212 * If offset or IP_MF are set, must reassemble.
213 * Otherwise, nothing need be done.
214 * (We could look in the reassembly queue to see
215 * if the packet was previously fragmented,
216 * but it's not worth the time; just let them time out.)
217 *
218 */
219 if (ip->ip_off & (IP_MF | IP_OFFMASK))
220 {
221 m = ip_reass(pData, m);
222 if (m == NULL)
223 {
224 STAM_PROFILE_STOP(&pData->StatIP_input, a);
225 return;
226 }
227 ip = mtod(m, struct ip *);
228 hlen = ip->ip_hl << 2;
229 }
230 else
231 ip->ip_len -= hlen;
232
233 /*
234 * Switch out to protocol's input routine.
235 */
236 ipstat.ips_delivered++;
237 switch (ip->ip_p)
238 {
239 case IPPROTO_TCP:
240 tcp_input(pData, m, hlen, (struct socket *)NULL);
241 break;
242 case IPPROTO_UDP:
243 udp_input(pData, m, hlen);
244 break;
245 case IPPROTO_ICMP:
246 icmp_input(pData, m, hlen);
247 break;
248 default:
249 ipstat.ips_noproto++;
250 m_freem(pData, m);
251 }
252 STAM_PROFILE_STOP(&pData->StatIP_input, a);
253 return;
254
255bad:
256 Log2(("NAT: IP datagram to %R[IP4] with size(%d) claimed as bad\n",
257 &ip->ip_dst, ip->ip_len));
258 m_freem(pData, m);
259 STAM_PROFILE_STOP(&pData->StatIP_input, a);
260 return;
261}
262
263struct mbuf *
264ip_reass(PNATState pData, struct mbuf* m)
265{
266 struct ip *ip;
267 struct mbuf *p, *q, *nq;
268 struct ipq_t *fp = NULL;
269 struct ipqhead *head;
270 int i, hlen, next;
271 u_short hash;
272
273 /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
274 if ( maxnipq == 0
275 || maxfragsperpacket == 0)
276 {
277 ipstat.ips_fragments++;
278 ipstat.ips_fragdropped++;
279 m_freem(pData, m);
280 return (NULL);
281 }
282
283 ip = mtod(m, struct ip *);
284 hlen = ip->ip_hl << 2;
285
286 hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
287 head = &ipq[hash];
288
289 /*
290 * Look for queue of fragments
291 * of this datagram.
292 */
293 TAILQ_FOREACH(fp, head, ipq_list)
294 if (ip->ip_id == fp->ipq_id &&
295 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
296 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
297 ip->ip_p == fp->ipq_p)
298 goto found;
299
300 fp = NULL;
301
302 /*
303 * Attempt to trim the number of allocated fragment queues if it
304 * exceeds the administrative limit.
305 */
306 if ((nipq > maxnipq) && (maxnipq > 0))
307 {
308 /*
309 * drop something from the tail of the current queue
310 * before proceeding further
311 */
312 struct ipq_t *pHead = TAILQ_LAST(head, ipqhead);
313 if (pHead == NULL)
314 {
315 /* gak */
316 for (i = 0; i < IPREASS_NHASH; i++)
317 {
318 struct ipq_t *pTail = TAILQ_LAST(&ipq[i], ipqhead);
319 if (pTail)
320 {
321 ipstat.ips_fragtimeout += pTail->ipq_nfrags;
322 ip_freef(pData, &ipq[i], pTail);
323 break;
324 }
325 }
326 }
327 else
328 {
329 ipstat.ips_fragtimeout += pHead->ipq_nfrags;
330 ip_freef(pData, head, pHead);
331 }
332 }
333
334found:
335 /*
336 * Adjust ip_len to not reflect header,
337 * convert offset of this to bytes.
338 */
339 ip->ip_len -= hlen;
340 if (ip->ip_off & IP_MF)
341 {
342 /*
343 * Make sure that fragments have a data length
344 * that's a non-zero multiple of 8 bytes.
345 */
346 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0)
347 {
348 ipstat.ips_toosmall++; /* XXX */
349 goto dropfrag;
350 }
351 m->m_flags |= M_FRAG;
352 }
353 else
354 m->m_flags &= ~M_FRAG;
355 ip->ip_off <<= 3;
356
357
358 /*
359 * Attempt reassembly; if it succeeds, proceed.
360 * ip_reass() will return a different mbuf.
361 */
362 ipstat.ips_fragments++;
363
364 /* Previous ip_reass() started here. */
365 /*
366 * Presence of header sizes in mbufs
367 * would confuse code below.
368 */
369 m->m_data += hlen;
370 m->m_len -= hlen;
371
372 /*
373 * If first fragment to arrive, create a reassembly queue.
374 */
375 if (fp == NULL)
376 {
377 fp = RTMemAlloc(sizeof(struct ipq_t));
378 if (fp == NULL)
379 goto dropfrag;
380 TAILQ_INSERT_HEAD(head, fp, ipq_list);
381 nipq++;
382 fp->ipq_nfrags = 1;
383 fp->ipq_ttl = IPFRAGTTL;
384 fp->ipq_p = ip->ip_p;
385 fp->ipq_id = ip->ip_id;
386 fp->ipq_src = ip->ip_src;
387 fp->ipq_dst = ip->ip_dst;
388 fp->ipq_frags = m;
389 m->m_nextpkt = NULL;
390 goto done;
391 }
392 else
393 {
394 fp->ipq_nfrags++;
395 }
396
397#ifndef VBOX_WITH_SLIRP_BSD_MBUF
398#define GETIP(m) ((struct ip*)(MBUF_IP_HEADER(m)))
399#else
400#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
401#endif
402
403
404 /*
405 * Find a segment which begins after this one does.
406 */
407 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
408 if (GETIP(q)->ip_off > ip->ip_off)
409 break;
410
411 /*
412 * If there is a preceding segment, it may provide some of
413 * our data already. If so, drop the data from the incoming
414 * segment. If it provides all of our data, drop us, otherwise
415 * stick new segment in the proper place.
416 *
417 * If some of the data is dropped from the the preceding
418 * segment, then it's checksum is invalidated.
419 */
420 if (p)
421 {
422 i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
423 if (i > 0)
424 {
425 if (i >= ip->ip_len)
426 goto dropfrag;
427 m_adj(m, i);
428 ip->ip_off += i;
429 ip->ip_len -= i;
430 }
431 m->m_nextpkt = p->m_nextpkt;
432 p->m_nextpkt = m;
433 }
434 else
435 {
436 m->m_nextpkt = fp->ipq_frags;
437 fp->ipq_frags = m;
438 }
439
440 /*
441 * While we overlap succeeding segments trim them or,
442 * if they are completely covered, dequeue them.
443 */
444 for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
445 q = nq)
446 {
447 i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
448 if (i < GETIP(q)->ip_len)
449 {
450 GETIP(q)->ip_len -= i;
451 GETIP(q)->ip_off += i;
452 m_adj(q, i);
453 break;
454 }
455 nq = q->m_nextpkt;
456 m->m_nextpkt = nq;
457 ipstat.ips_fragdropped++;
458 fp->ipq_nfrags--;
459 m_freem(pData, q);
460 }
461
462 /*
463 * Check for complete reassembly and perform frag per packet
464 * limiting.
465 *
466 * Frag limiting is performed here so that the nth frag has
467 * a chance to complete the packet before we drop the packet.
468 * As a result, n+1 frags are actually allowed per packet, but
469 * only n will ever be stored. (n = maxfragsperpacket.)
470 *
471 */
472 next = 0;
473 for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
474 {
475 if (GETIP(q)->ip_off != next)
476 {
477 if (fp->ipq_nfrags > maxfragsperpacket)
478 {
479 ipstat.ips_fragdropped += fp->ipq_nfrags;
480 ip_freef(pData, head, fp);
481 }
482 goto done;
483 }
484 next += GETIP(q)->ip_len;
485 }
486 /* Make sure the last packet didn't have the IP_MF flag */
487 if (p->m_flags & M_FRAG)
488 {
489 if (fp->ipq_nfrags > maxfragsperpacket)
490 {
491 ipstat.ips_fragdropped += fp->ipq_nfrags;
492 ip_freef(pData, head, fp);
493 }
494 goto done;
495 }
496
497 /*
498 * Reassembly is complete. Make sure the packet is a sane size.
499 */
500 q = fp->ipq_frags;
501 ip = GETIP(q);
502 hlen = ip->ip_hl << 2;
503 if (next + hlen > IP_MAXPACKET)
504 {
505 ipstat.ips_fragdropped += fp->ipq_nfrags;
506 ip_freef(pData, head, fp);
507 goto done;
508 }
509
510 /*
511 * Concatenate fragments.
512 */
513 m = q;
514 nq = q->m_nextpkt;
515 q->m_nextpkt = NULL;
516 for (q = nq; q != NULL; q = nq)
517 {
518 nq = q->m_nextpkt;
519 q->m_nextpkt = NULL;
520 m_cat(pData, m, q);
521
522 m->m_len += hlen;
523 m->m_data -= hlen;
524 ip = mtod(m, struct ip *); /*update ip pointer */
525 hlen = ip->ip_hl << 2;
526 m->m_len -= hlen;
527 m->m_data += hlen;
528 }
529 m->m_len += hlen;
530 m->m_data -= hlen;
531
532 /*
533 * Create header for new ip packet by modifying header of first
534 * packet; dequeue and discard fragment reassembly header.
535 * Make header visible.
536 */
537
538 ip->ip_len = next;
539 ip->ip_src = fp->ipq_src;
540 ip->ip_dst = fp->ipq_dst;
541 TAILQ_REMOVE(head, fp, ipq_list);
542 nipq--;
543 RTMemFree(fp);
544
545 Assert((ip->ip_len == next));
546 /* some debugging cruft by sklower, below, will go away soon */
547#if 0
548 if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
549 m_fixhdr(m);
550#endif
551 ipstat.ips_reassembled++;
552 return (m);
553
554dropfrag:
555 ipstat.ips_fragdropped++;
556 if (fp != NULL)
557 fp->ipq_nfrags--;
558 m_freem(pData, m);
559
560done:
561 return NULL;
562
563#undef GETIP
564}
565
566void
567ip_freef(PNATState pData, struct ipqhead *fhp, struct ipq_t *fp)
568{
569 struct mbuf *q;
570
571 while (fp->ipq_frags)
572 {
573 q = fp->ipq_frags;
574 fp->ipq_frags = q->m_nextpkt;
575 m_freem(pData, q);
576 }
577 TAILQ_REMOVE(fhp, fp, ipq_list);
578 RTMemFree(fp);
579 nipq--;
580}
581
582/*
583 * IP timer processing;
584 * if a timer expires on a reassembly
585 * queue, discard it.
586 */
587void
588ip_slowtimo(PNATState pData)
589{
590 register struct ipq_t *fp;
591
592 /* XXX: the fragment expiration is the same but requier
593 * additional loop see (see ip_input.c in FreeBSD tree)
594 */
595 int i;
596 DEBUG_CALL("ip_slowtimo");
597 for (i = 0; i < IPREASS_NHASH; i++)
598 {
599 for(fp = TAILQ_FIRST(&ipq[i]); fp;)
600 {
601 struct ipq_t *fpp;
602
603 fpp = fp;
604 fp = TAILQ_NEXT(fp, ipq_list);
605 if(--fpp->ipq_ttl == 0)
606 {
607 ipstat.ips_fragtimeout += fpp->ipq_nfrags;
608 ip_freef(pData, &ipq[i], fpp);
609 }
610 }
611 }
612 /*
613 * If we are over the maximum number of fragments
614 * (due to the limit being lowered), drain off
615 * enough to get down to the new limit.
616 */
617 if (maxnipq >= 0 && nipq > maxnipq)
618 {
619 for (i = 0; i < IPREASS_NHASH; i++)
620 {
621 while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i]))
622 {
623 ipstat.ips_fragdropped += TAILQ_FIRST(&ipq[i])->ipq_nfrags;
624 ip_freef(pData, &ipq[i], TAILQ_FIRST(&ipq[i]));
625 }
626 }
627 }
628}
629
630
631/*
632 * Strip out IP options, at higher
633 * level protocol in the kernel.
634 * Second argument is buffer to which options
635 * will be moved, and return value is their length.
636 * (XXX) should be deleted; last arg currently ignored.
637 */
638void
639ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
640{
641 register int i;
642 struct ip *ip = mtod(m, struct ip *);
643 register caddr_t opts;
644 int olen;
645
646 olen = (ip->ip_hl<<2) - sizeof(struct ip);
647 opts = (caddr_t)(ip + 1);
648 i = m->m_len - (sizeof(struct ip) + olen);
649 memcpy(opts, opts + olen, (unsigned)i);
650 m->m_len -= olen;
651
652 ip->ip_hl = sizeof(struct ip) >> 2;
653}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette