VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 28449

Last change on this file since 28449 was 28449, checked in by vboxsync, 15 years ago

NAT: slirp file headers

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.0 KB
Line 
1/* $Id: socket.c 28449 2010-04-19 09:52:59Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*
23 * This code is based on:
24 *
25 * Copyright (c) 1995 Danny Gasparovski.
26 *
27 * Please read the file COPYRIGHT for the
28 * terms and conditions of the copyright.
29 */
30
31#define WANT_SYS_IOCTL_H
32#include <slirp.h>
33#include "ip_icmp.h"
34#include "main.h"
35#ifdef __sun__
36#include <sys/filio.h>
37#endif
38#include <VBox/pdmdrv.h>
39#if defined (RT_OS_WINDOWS)
40#include <iphlpapi.h>
41#include <icmpapi.h>
42#endif
43
44
45static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
46#ifdef RT_OS_WINDOWS
47static void sorecvfrom_icmp_win(PNATState, struct socket *);
48#else /* RT_OS_WINDOWS */
49static void sorecvfrom_icmp_unix(PNATState, struct socket *);
50#endif /* !RT_OS_WINDOWS */
51
52void
53so_init()
54{
55}
56
57struct socket *
58solookup(struct socket *head, struct in_addr laddr,
59 u_int lport, struct in_addr faddr, u_int fport)
60{
61 struct socket *so;
62
63 for (so = head->so_next; so != head; so = so->so_next)
64 {
65 if ( so->so_lport == lport
66 && so->so_laddr.s_addr == laddr.s_addr
67 && so->so_faddr.s_addr == faddr.s_addr
68 && so->so_fport == fport)
69 return so;
70 }
71
72 return (struct socket *)NULL;
73}
74
75/*
76 * Create a new socket, initialise the fields
77 * It is the responsibility of the caller to
78 * insque() it into the correct linked-list
79 */
80struct socket *
81socreate()
82{
83 struct socket *so;
84
85 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
86 if (so)
87 {
88 so->so_state = SS_NOFDREF;
89 so->s = -1;
90#if !defined(RT_OS_WINDOWS)
91 so->so_poll_index = -1;
92#endif
93 }
94 return so;
95}
96
97/*
98 * remque and free a socket, clobber cache
99 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
100 * in sofree we don't know from which queue item beeing removed.
101 */
102void
103sofree(PNATState pData, struct socket *so)
104{
105 struct socket *so_prev = NULL;
106 if (so == tcp_last_so)
107 tcp_last_so = &tcb;
108 else if (so == udp_last_so)
109 udp_last_so = &udb;
110
111 /* check if mbuf haven't been already freed */
112 if (so->so_m != NULL)
113 m_freem(pData, so->so_m);
114#ifndef VBOX_WITH_SLIRP_MT
115 if (so->so_next && so->so_prev)
116 {
117 remque(pData, so); /* crashes if so is not in a queue */
118 NSOCK_DEC();
119 }
120
121 RTMemFree(so);
122#else
123 so->so_deleted = 1;
124#endif
125}
126
127#ifdef VBOX_WITH_SLIRP_MT
128void
129soread_queue(PNATState pData, struct socket *so, int *ret)
130{
131 *ret = soread(pData, so);
132}
133#endif
134
135/*
136 * Read from so's socket into sb_snd, updating all relevant sbuf fields
137 * NOTE: This will only be called if it is select()ed for reading, so
138 * a read() of 0 (or less) means it's disconnected
139 */
140int
141soread(PNATState pData, struct socket *so)
142{
143 int n, nn, lss, total;
144 struct sbuf *sb = &so->so_snd;
145 size_t len = sb->sb_datalen - sb->sb_cc;
146 struct iovec iov[2];
147 int mss = so->so_tcpcb->t_maxseg;
148
149 STAM_PROFILE_START(&pData->StatIOread, a);
150 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
151 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
152
153 QSOCKET_LOCK(tcb);
154 SOCKET_LOCK(so);
155 QSOCKET_UNLOCK(tcb);
156
157 DEBUG_CALL("soread");
158 DEBUG_ARG("so = %lx", (long)so);
159
160 /*
161 * No need to check if there's enough room to read.
162 * soread wouldn't have been called if there weren't
163 */
164
165 len = sb->sb_datalen - sb->sb_cc;
166
167 iov[0].iov_base = sb->sb_wptr;
168 iov[1].iov_base = 0;
169 iov[1].iov_len = 0;
170 if (sb->sb_wptr < sb->sb_rptr)
171 {
172 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
173 /* Should never succeed, but... */
174 if (iov[0].iov_len > len)
175 iov[0].iov_len = len;
176 if (iov[0].iov_len > mss)
177 iov[0].iov_len -= iov[0].iov_len%mss;
178 n = 1;
179 }
180 else
181 {
182 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
183 /* Should never succeed, but... */
184 if (iov[0].iov_len > len)
185 iov[0].iov_len = len;
186 len -= iov[0].iov_len;
187 if (len)
188 {
189 iov[1].iov_base = sb->sb_data;
190 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
191 if (iov[1].iov_len > len)
192 iov[1].iov_len = len;
193 total = iov[0].iov_len + iov[1].iov_len;
194 if (total > mss)
195 {
196 lss = total % mss;
197 if (iov[1].iov_len > lss)
198 {
199 iov[1].iov_len -= lss;
200 n = 2;
201 }
202 else
203 {
204 lss -= iov[1].iov_len;
205 iov[0].iov_len -= lss;
206 n = 1;
207 }
208 }
209 else
210 n = 2;
211 }
212 else
213 {
214 if (iov[0].iov_len > mss)
215 iov[0].iov_len -= iov[0].iov_len%mss;
216 n = 1;
217 }
218 }
219
220#ifdef HAVE_READV
221 nn = readv(so->s, (struct iovec *)iov, n);
222 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
223#else
224 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
225#endif
226 if (nn <= 0)
227 {
228 /*
229 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
230 * _could_ mean that the connection is closed. But we will receive an
231 * FD_CLOSE event later if the connection was _really_ closed. With
232 * www.youtube.com I see this very often. Closing the socket too early
233 * would be dangerous.
234 */
235 int status;
236 unsigned long pending = 0;
237 status = ioctlsocket(so->s, FIONREAD, &pending);
238 if (status < 0)
239 LogRel(("NAT:error in WSAIoctl: %d\n", errno));
240 if (nn == 0 && (pending != 0))
241 {
242 SOCKET_UNLOCK(so);
243 STAM_PROFILE_STOP(&pData->StatIOread, a);
244 return 0;
245 }
246 if ( nn < 0
247 && ( errno == EINTR
248 || errno == EAGAIN
249 || errno == EWOULDBLOCK))
250 {
251 SOCKET_UNLOCK(so);
252 STAM_PROFILE_STOP(&pData->StatIOread, a);
253 return 0;
254 }
255 else
256 {
257 /* nn == 0 means peer has performed an orderly shutdown */
258 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
259 nn, errno, strerror(errno)));
260 sofcantrcvmore(so);
261 tcp_sockclosed(pData, sototcpcb(so));
262 SOCKET_UNLOCK(so);
263 STAM_PROFILE_STOP(&pData->StatIOread, a);
264 return -1;
265 }
266 }
267 STAM_STATS(
268 if (n == 1)
269 {
270 STAM_COUNTER_INC(&pData->StatIORead_in_1);
271 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
272 }
273 else
274 {
275 STAM_COUNTER_INC(&pData->StatIORead_in_2);
276 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
277 }
278 );
279
280#ifndef HAVE_READV
281 /*
282 * If there was no error, try and read the second time round
283 * We read again if n = 2 (ie, there's another part of the buffer)
284 * and we read as much as we could in the first read
285 * We don't test for <= 0 this time, because there legitimately
286 * might not be any more data (since the socket is non-blocking),
287 * a close will be detected on next iteration.
288 * A return of -1 wont (shouldn't) happen, since it didn't happen above
289 */
290 if (n == 2 && nn == iov[0].iov_len)
291 {
292 int ret;
293 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
294 if (ret > 0)
295 nn += ret;
296 STAM_STATS(
297 if (ret > 0)
298 {
299 STAM_COUNTER_INC(&pData->StatIORead_in_2);
300 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
301 }
302 );
303 }
304
305 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
306#endif
307
308 /* Update fields */
309 sb->sb_cc += nn;
310 sb->sb_wptr += nn;
311 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
312 sb->sb_wptr -= sb->sb_datalen;
313 STAM_PROFILE_STOP(&pData->StatIOread, a);
314 SOCKET_UNLOCK(so);
315 return nn;
316}
317
318/*
319 * Get urgent data
320 *
321 * When the socket is created, we set it SO_OOBINLINE,
322 * so when OOB data arrives, we soread() it and everything
323 * in the send buffer is sent as urgent data
324 */
325void
326sorecvoob(PNATState pData, struct socket *so)
327{
328 struct tcpcb *tp = sototcpcb(so);
329 ssize_t ret;
330
331 DEBUG_CALL("sorecvoob");
332 DEBUG_ARG("so = %lx", (long)so);
333
334 /*
335 * We take a guess at how much urgent data has arrived.
336 * In most situations, when urgent data arrives, the next
337 * read() should get all the urgent data. This guess will
338 * be wrong however if more data arrives just after the
339 * urgent data, or the read() doesn't return all the
340 * urgent data.
341 */
342 ret = soread(pData, so);
343 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
344 tp->t_force = 1;
345 tcp_output(pData, tp);
346 tp->t_force = 0;
347}
348
349/*
350 * Send urgent data
351 * There's a lot duplicated code here, but...
352 */
353int
354sosendoob(struct socket *so)
355{
356 struct sbuf *sb = &so->so_rcv;
357 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
358
359 int n, len;
360
361 DEBUG_CALL("sosendoob");
362 DEBUG_ARG("so = %lx", (long)so);
363 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
364
365 if (so->so_urgc > sizeof(buff))
366 so->so_urgc = sizeof(buff); /* XXX */
367
368 if (sb->sb_rptr < sb->sb_wptr)
369 {
370 /* We can send it directly */
371 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
372 so->so_urgc -= n;
373
374 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
375 n, so->so_urgc));
376 }
377 else
378 {
379 /*
380 * Since there's no sendv or sendtov like writev,
381 * we must copy all data to a linear buffer then
382 * send it all
383 */
384 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
385 if (len > so->so_urgc)
386 len = so->so_urgc;
387 memcpy(buff, sb->sb_rptr, len);
388 so->so_urgc -= len;
389 if (so->so_urgc)
390 {
391 n = sb->sb_wptr - sb->sb_data;
392 if (n > so->so_urgc)
393 n = so->so_urgc;
394 memcpy(buff + len, sb->sb_data, n);
395 so->so_urgc -= n;
396 len += n;
397 }
398 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
399#ifdef DEBUG
400 if (n != len)
401 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
402#endif
403 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
404 n, so->so_urgc));
405 }
406
407 sb->sb_cc -= n;
408 sb->sb_rptr += n;
409 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
410 sb->sb_rptr -= sb->sb_datalen;
411
412 return n;
413}
414
415/*
416 * Write data from so_rcv to so's socket,
417 * updating all sbuf field as necessary
418 */
419int
420sowrite(PNATState pData, struct socket *so)
421{
422 int n, nn;
423 struct sbuf *sb = &so->so_rcv;
424 size_t len = sb->sb_cc;
425 struct iovec iov[2];
426
427 STAM_PROFILE_START(&pData->StatIOwrite, a);
428 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
429 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
430 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
431 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
432 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
433 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
434 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
435 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
436 DEBUG_CALL("sowrite");
437 DEBUG_ARG("so = %lx", (long)so);
438 QSOCKET_LOCK(tcb);
439 SOCKET_LOCK(so);
440 QSOCKET_UNLOCK(tcb);
441 if (so->so_urgc)
442 {
443 sosendoob(so);
444 if (sb->sb_cc == 0)
445 {
446 SOCKET_UNLOCK(so);
447 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
448 return 0;
449 }
450 }
451
452 /*
453 * No need to check if there's something to write,
454 * sowrite wouldn't have been called otherwise
455 */
456
457 len = sb->sb_cc;
458
459 iov[0].iov_base = sb->sb_rptr;
460 iov[1].iov_base = 0;
461 iov[1].iov_len = 0;
462 if (sb->sb_rptr < sb->sb_wptr)
463 {
464 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
465 /* Should never succeed, but... */
466 if (iov[0].iov_len > len)
467 iov[0].iov_len = len;
468 n = 1;
469 }
470 else
471 {
472 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
473 if (iov[0].iov_len > len)
474 iov[0].iov_len = len;
475 len -= iov[0].iov_len;
476 if (len)
477 {
478 iov[1].iov_base = sb->sb_data;
479 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
480 if (iov[1].iov_len > len)
481 iov[1].iov_len = len;
482 n = 2;
483 }
484 else
485 n = 1;
486 }
487 STAM_STATS({
488 if (n == 1)
489 {
490 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
491 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
492 }
493 else
494 {
495 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
496 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
497 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
498 }
499 });
500 /* Check if there's urgent data to send, and if so, send it */
501#ifdef HAVE_READV
502 nn = writev(so->s, (const struct iovec *)iov, n);
503 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
504#else
505 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
506#endif
507 /* This should never happen, but people tell me it does *shrug* */
508 if ( nn < 0
509 && ( errno == EAGAIN
510 || errno == EINTR
511 || errno == EWOULDBLOCK))
512 {
513 SOCKET_UNLOCK(so);
514 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
515 return 0;
516 }
517
518 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
519 {
520 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
521 so->so_state, errno));
522 sofcantsendmore(so);
523 tcp_sockclosed(pData, sototcpcb(so));
524 SOCKET_UNLOCK(so);
525 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
526 return -1;
527 }
528
529#ifndef HAVE_READV
530 if (n == 2 && nn == iov[0].iov_len)
531 {
532 int ret;
533 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
534 if (ret > 0)
535 nn += ret;
536 STAM_STATS({
537 if (ret > 0 && ret != iov[1].iov_len)
538 {
539 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
540 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (ret - iov[1].iov_len));
541 }
542 });
543 }
544 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
545#endif
546
547 /* Update sbuf */
548 sb->sb_cc -= nn;
549 sb->sb_rptr += nn;
550 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
551 sb->sb_rptr -= sb->sb_datalen;
552
553 /*
554 * If in DRAIN mode, and there's no more data, set
555 * it CANTSENDMORE
556 */
557 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
558 sofcantsendmore(so);
559
560 SOCKET_UNLOCK(so);
561 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
562 return nn;
563}
564
565/*
566 * recvfrom() a UDP socket
567 */
568void
569sorecvfrom(PNATState pData, struct socket *so)
570{
571 ssize_t ret = 0;
572 struct sockaddr_in addr;
573 socklen_t addrlen = sizeof(struct sockaddr_in);
574
575 DEBUG_CALL("sorecvfrom");
576 DEBUG_ARG("so = %lx", (long)so);
577
578 if (so->so_type == IPPROTO_ICMP)
579 {
580 /* This is a "ping" reply */
581#ifdef RT_OS_WINDOWS
582 sorecvfrom_icmp_win(pData, so);
583#else /* RT_OS_WINDOWS */
584 sorecvfrom_icmp_unix(pData, so);
585#endif /* !RT_OS_WINDOWS */
586 udp_detach(pData, so);
587 }
588 else
589 {
590 /* A "normal" UDP packet */
591 struct mbuf *m;
592 ssize_t len;
593 u_long n = 0;
594#ifdef VBOX_WITH_SLIRP_BSD_MBUF
595 int size;
596#endif
597 int rc = 0;
598 static int signalled = 0;
599
600 QSOCKET_LOCK(udb);
601 SOCKET_LOCK(so);
602 QSOCKET_UNLOCK(udb);
603
604#ifndef VBOX_WITH_SLIRP_BSD_MBUF
605 if (!(m = m_get(pData)))
606 {
607 SOCKET_UNLOCK(so);
608 return;
609 }
610 /* adjust both parameters to maks M_FREEROOM calculate correct */
611 m->m_data += if_maxlinkhdr + sizeof(struct udphdr) + sizeof(struct ip);
612
613 /*
614 * XXX Shouldn't FIONREAD packets destined for port 53,
615 * but I don't know the max packet size for DNS lookups
616 */
617 len = M_FREEROOM(m);
618 /* if (so->so_fport != RT_H2N_U16_C(53)) */
619 rc = ioctlsocket(so->s, FIONREAD, &n);
620 if ( rc == -1
621 && ( errno == EAGAIN
622 || errno == EWOULDBLOCK
623 || errno == EINPROGRESS
624 || errno == ENOTCONN))
625 {
626 m_freem(pData, m);
627 return;
628 }
629
630 Log2(("NAT: %R[natsock] ioctlsocket before read "
631 "(rc:%d errno:%d, n:%d)\n", so, rc, errno, n));
632
633 if (rc == -1 && signalled == 0)
634 {
635 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
636 signalled = 1;
637 m_freem(pData, m);
638 return;
639 }
640
641 if (rc != -1 && n > len)
642 {
643 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
644 m_inc(m, n);
645 len = M_FREEROOM(m);
646 }
647 ret = recvfrom(so->s, m->m_data, len, 0,
648 (struct sockaddr *)&addr, &addrlen);
649 Log2(("NAT: %R[natsock] ioctlsocket after read "
650 "(rc:%d errno:%d, n:%d) ret:%d, len:%d\n", so,
651 rc, errno, n, ret, len));
652#else
653 /*How many data has been received ?*/
654 /*
655 * 1. calculate how much we can read
656 * 2. read as much as possible
657 * 3. attach buffer to allocated header mbuf
658 */
659 rc = ioctlsocket(so->s, FIONREAD, &n);
660 if (rc == -1 && signalled == 0)
661 {
662 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
663 signalled = 1;
664 }
665
666 len = sizeof(struct udpiphdr) + ETH_HLEN;
667 if (n > (if_mtu - len))
668 {
669 n = if_mtu - len; /* can't read than we can put in the mbuf*/
670 }
671 len += n;
672
673 size = MCLBYTES;
674 if (len < MSIZE)
675 size = MCLBYTES;
676 else if (len < MCLBYTES)
677 size = MCLBYTES;
678 else if (len < MJUM9BYTES)
679 size = MJUM9BYTES;
680 else if (len < MJUM16BYTES)
681 size = MJUM16BYTES;
682 else
683 AssertMsgFailed(("Unsupported size"));
684
685 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
686 m->m_data += ETH_HLEN;
687 m->m_pkthdr.header = mtod(m, void *);
688 m->m_data += sizeof(struct udpiphdr);
689 ret = recvfrom(so->s, mtod(m, char *), n, 0,
690 (struct sockaddr *)&addr, &addrlen);
691 /* @todo (vvl) check which flags and type should be passed */
692#endif
693 m->m_len = ret;
694 if (ret < 0)
695 {
696 u_char code = ICMP_UNREACH_PORT;
697
698 if (errno == EHOSTUNREACH)
699 code = ICMP_UNREACH_HOST;
700 else if (errno == ENETUNREACH)
701 code = ICMP_UNREACH_NET;
702
703 m_freem(pData, m);
704 if ( errno == EAGAIN
705 || errno == EWOULDBLOCK
706 || errno == EINPROGRESS
707 || errno == ENOTCONN)
708 {
709 return;
710 }
711
712 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
713 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
714 so->so_m = NULL;
715 }
716 else
717 {
718 /*
719 * Hack: domain name lookup will be used the most for UDP,
720 * and since they'll only be used once there's no need
721 * for the 4 minute (or whatever) timeout... So we time them
722 * out much quicker (10 seconds for now...)
723 */
724 if (so->so_expire)
725 {
726 if (so->so_fport != RT_H2N_U16_C(53))
727 so->so_expire = curtime + SO_EXPIRE;
728 }
729 /*
730 * last argument should be changed if Slirp will inject IP attributes
731 * Note: Here we can't check if dnsproxy's sent initial request
732 */
733#ifndef VBOX_WITH_SLIRP_BSD_MBUF
734 if (so->so_fport == RT_H2N_U16_C(53))
735 dnsproxy_answer(pData, so, m);
736#endif
737
738#if 0
739 if (m->m_len == len)
740 {
741 m_inc(m, MINCSIZE);
742 m->m_len = 0;
743 }
744#endif
745
746 /*
747 * If this packet was destined for CTL_ADDR,
748 * make it look like that's where it came from, done by udp_output
749 */
750 udp_output(pData, so, m, &addr);
751 SOCKET_UNLOCK(so);
752 } /* rx error */
753 } /* if ping packet */
754}
755
756/*
757 * sendto() a socket
758 */
759int
760sosendto(PNATState pData, struct socket *so, struct mbuf *m)
761{
762 int ret;
763 struct sockaddr_in *paddr;
764 struct sockaddr addr;
765#if 0
766 struct sockaddr_in host_addr;
767#endif
768#ifdef VBOX_WITH_SLIRP_BSD_MBUF
769 caddr_t buf;
770 int mlen;
771#endif
772
773 DEBUG_CALL("sosendto");
774 DEBUG_ARG("so = %lx", (long)so);
775 DEBUG_ARG("m = %lx", (long)m);
776
777 memset(&addr, 0, sizeof(struct sockaddr));
778#ifdef RT_OS_DARWIN
779 addr.sa_len = sizeof(struct sockaddr_in);
780#endif
781 paddr = (struct sockaddr_in *)&addr;
782 paddr->sin_family = AF_INET;
783 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
784 {
785 /* It's an alias */
786 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
787 switch(last_byte)
788 {
789#if 0
790 /* handle this case at 'default:' */
791 case CTL_BROADCAST:
792 addr.sin_addr.s_addr = INADDR_BROADCAST;
793 /* Send the packet to host to fully emulate broadcast */
794 /** @todo r=klaus: on Linux host this causes the host to receive
795 * the packet twice for some reason. And I cannot find any place
796 * in the man pages which states that sending a broadcast does not
797 * reach the host itself. */
798 host_addr.sin_family = AF_INET;
799 host_addr.sin_port = so->so_fport;
800 host_addr.sin_addr = our_addr;
801 sendto(so->s, m->m_data, m->m_len, 0,
802 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
803 break;
804#endif
805 case CTL_DNS:
806 case CTL_ALIAS:
807 default:
808 if (last_byte == ~pData->netmask)
809 paddr->sin_addr.s_addr = INADDR_BROADCAST;
810 else
811 paddr->sin_addr = loopback_addr;
812 break;
813 }
814 }
815 else
816 paddr->sin_addr = so->so_faddr;
817 paddr->sin_port = so->so_fport;
818
819 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
820 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
821
822 /* Don't care what port we get */
823#ifndef VBOX_WITH_SLIRP_BSD_MBUF
824 ret = sendto(so->s, m->m_data, m->m_len, 0, &addr, sizeof (struct sockaddr_in));
825#else
826 mlen = m_length(m, NULL);
827 buf = RTMemAlloc(mlen);
828 if (buf == NULL)
829 {
830 return -1;
831 }
832 m_copydata(m, 0, mlen, buf);
833 ret = sendto(so->s, buf, mlen, 0,
834 (struct sockaddr *)&addr, sizeof (struct sockaddr));
835#endif
836 if (ret < 0)
837 {
838 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
839 return -1;
840 }
841
842 /*
843 * Kill the socket if there's no reply in 4 minutes,
844 * but only if it's an expirable socket
845 */
846 if (so->so_expire)
847 so->so_expire = curtime + SO_EXPIRE;
848 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
849 return 0;
850}
851
852/*
853 * XXX This should really be tcp_listen
854 */
855struct socket *
856solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
857{
858 struct sockaddr_in addr;
859 struct socket *so;
860 socklen_t addrlen = sizeof(addr);
861 int s, opt = 1;
862 int status;
863
864 DEBUG_CALL("solisten");
865 DEBUG_ARG("port = %d", port);
866 DEBUG_ARG("laddr = %x", laddr);
867 DEBUG_ARG("lport = %d", lport);
868 DEBUG_ARG("flags = %x", flags);
869
870 if ((so = socreate()) == NULL)
871 {
872 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
873 return NULL;
874 }
875
876 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
877 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
878 {
879 RTMemFree(so);
880 return NULL;
881 }
882
883 SOCKET_LOCK_CREATE(so);
884 SOCKET_LOCK(so);
885 QSOCKET_LOCK(tcb);
886 insque(pData, so,&tcb);
887 NSOCK_INC();
888 QSOCKET_UNLOCK(tcb);
889
890 /*
891 * SS_FACCEPTONCE sockets must time out.
892 */
893 if (flags & SS_FACCEPTONCE)
894 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
895
896 so->so_state = (SS_FACCEPTCONN|flags);
897 so->so_lport = lport; /* Kept in network format */
898 so->so_laddr.s_addr = laddr; /* Ditto */
899
900 memset(&addr, 0, sizeof(addr));
901#ifdef RT_OS_DARWIN
902 addr.sin_len = sizeof(addr);
903#endif
904 addr.sin_family = AF_INET;
905 addr.sin_addr.s_addr = bind_addr;
906 addr.sin_port = port;
907
908 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
909 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
910 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
911 || (listen(s, 1) < 0))
912 {
913#ifdef RT_OS_WINDOWS
914 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
915 closesocket(s);
916 QSOCKET_LOCK(tcb);
917 sofree(pData, so);
918 QSOCKET_UNLOCK(tcb);
919 /* Restore the real errno */
920 WSASetLastError(tmperrno);
921#else
922 int tmperrno = errno; /* Don't clobber the real reason we failed */
923 close(s);
924 QSOCKET_LOCK(tcb);
925 sofree(pData, so);
926 QSOCKET_UNLOCK(tcb);
927 /* Restore the real errno */
928 errno = tmperrno;
929#endif
930 return NULL;
931 }
932 fd_nonblock(s);
933 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
934
935 getsockname(s,(struct sockaddr *)&addr,&addrlen);
936 so->so_fport = addr.sin_port;
937 /* set socket buffers */
938 opt = pData->socket_rcv;
939 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
940 if (status < 0)
941 {
942 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
943 goto no_sockopt;
944 }
945 opt = pData->socket_snd;
946 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
947 if (status < 0)
948 {
949 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
950 goto no_sockopt;
951 }
952no_sockopt:
953 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
954 so->so_faddr = alias_addr;
955 else
956 so->so_faddr = addr.sin_addr;
957
958 so->s = s;
959 SOCKET_UNLOCK(so);
960 return so;
961}
962
963/*
964 * Data is available in so_rcv
965 * Just write() the data to the socket
966 * XXX not yet...
967 */
968void
969sorwakeup(struct socket *so)
970{
971#if 0
972 sowrite(so);
973 FD_CLR(so->s,&writefds);
974#endif
975}
976
977/*
978 * Data has been freed in so_snd
979 * We have room for a read() if we want to
980 * For now, don't read, it'll be done in the main loop
981 */
982void
983sowwakeup(struct socket *so)
984{
985}
986
987/*
988 * Various session state calls
989 * XXX Should be #define's
990 * The socket state stuff needs work, these often get call 2 or 3
991 * times each when only 1 was needed
992 */
993void
994soisfconnecting(struct socket *so)
995{
996 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
997 SS_FCANTSENDMORE|SS_FWDRAIN);
998 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
999}
1000
1001void
1002soisfconnected(struct socket *so)
1003{
1004 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1005 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1006}
1007
1008void
1009sofcantrcvmore(struct socket *so)
1010{
1011 if ((so->so_state & SS_NOFDREF) == 0)
1012 {
1013 shutdown(so->s, 0);
1014 }
1015 so->so_state &= ~(SS_ISFCONNECTING);
1016 if (so->so_state & SS_FCANTSENDMORE)
1017 so->so_state = SS_NOFDREF; /* Don't select it */
1018 /* XXX close() here as well? */
1019 else
1020 so->so_state |= SS_FCANTRCVMORE;
1021}
1022
1023void
1024sofcantsendmore(struct socket *so)
1025{
1026 if ((so->so_state & SS_NOFDREF) == 0)
1027 shutdown(so->s, 1); /* send FIN to fhost */
1028
1029 so->so_state &= ~(SS_ISFCONNECTING);
1030 if (so->so_state & SS_FCANTRCVMORE)
1031 so->so_state = SS_NOFDREF; /* as above */
1032 else
1033 so->so_state |= SS_FCANTSENDMORE;
1034}
1035
1036void
1037soisfdisconnected(struct socket *so)
1038{
1039#if 0
1040 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1041 close(so->s);
1042 so->so_state = SS_ISFDISCONNECTED;
1043 /*
1044 * XXX Do nothing ... ?
1045 */
1046#endif
1047}
1048
1049/*
1050 * Set write drain mode
1051 * Set CANTSENDMORE once all data has been write()n
1052 */
1053void
1054sofwdrain(struct socket *so)
1055{
1056 if (so->so_rcv.sb_cc)
1057 so->so_state |= SS_FWDRAIN;
1058 else
1059 sofcantsendmore(so);
1060}
1061
1062static void
1063send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
1064{
1065 struct ip *ip;
1066 uint32_t dst, src;
1067 char ip_copy[256];
1068 struct icmp *icp;
1069 int old_ip_len = 0;
1070 int hlen, original_hlen = 0;
1071 struct mbuf *m;
1072 struct icmp_msg *icm;
1073 uint8_t proto;
1074 int type = 0;
1075#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1076 int m_room;
1077#endif
1078
1079 ip = (struct ip *)buff;
1080 /* Fix ip->ip_len to contain the total packet length including the header
1081 * in _host_ byte order for all OSes. On Darwin, that value already is in
1082 * host byte order. Solaris and Darwin report only the payload. */
1083#ifndef RT_OS_DARWIN
1084 ip->ip_len = RT_N2H_U16(ip->ip_len);
1085#endif
1086 hlen = (ip->ip_hl << 2);
1087#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1088 ip->ip_len += hlen;
1089#endif
1090 if (ip->ip_len < hlen + ICMP_MINLEN)
1091 {
1092 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1093 return;
1094 }
1095 icp = (struct icmp *)((char *)ip + hlen);
1096
1097 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1098 if ( icp->icmp_type != ICMP_ECHOREPLY
1099 && icp->icmp_type != ICMP_TIMXCEED
1100 && icp->icmp_type != ICMP_UNREACH)
1101 {
1102 return;
1103 }
1104
1105 /*
1106 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1107 * ICMP_ECHOREPLY assuming data 0
1108 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1109 */
1110 if (ip->ip_len < hlen + 8)
1111 {
1112 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1113 return;
1114 }
1115
1116 type = icp->icmp_type;
1117 if ( type == ICMP_TIMXCEED
1118 || type == ICMP_UNREACH)
1119 {
1120 /*
1121 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1122 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1123 */
1124 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1125 {
1126 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1127 return;
1128 }
1129 ip = &icp->icmp_ip;
1130 }
1131
1132 icm = icmp_find_original_mbuf(pData, ip);
1133 if (icm == NULL)
1134 {
1135 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1136 return;
1137 }
1138
1139 m = icm->im_m;
1140 Assert(m != NULL);
1141
1142 src = addr->sin_addr.s_addr;
1143 if (type == ICMP_ECHOREPLY)
1144 {
1145 struct ip *ip0 = mtod(m, struct ip *);
1146 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1147 if (icp0->icmp_type != ICMP_ECHO)
1148 {
1149 Log(("NAT: we haven't found echo for this reply\n"));
1150 return;
1151 }
1152 /*
1153 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1154 * IP header combined by OS network stack, our local copy of IP header contians values
1155 * in host byte order so no byte order conversion is required. IP headers fields are converting
1156 * in ip_output0 routine only.
1157 */
1158 if ( (ip->ip_len - hlen)
1159 != (ip0->ip_len - (ip0->ip_hl << 2)))
1160 {
1161 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1162 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1163 return;
1164 }
1165 }
1166
1167 /* ip points on origianal ip header */
1168 ip = mtod(m, struct ip *);
1169 proto = ip->ip_p;
1170 /* Now ip is pointing on header we've sent from guest */
1171 if ( icp->icmp_type == ICMP_TIMXCEED
1172 || icp->icmp_type == ICMP_UNREACH)
1173 {
1174 old_ip_len = (ip->ip_hl << 2) + 64;
1175 if (old_ip_len > sizeof(ip_copy))
1176 old_ip_len = sizeof(ip_copy);
1177 memcpy(ip_copy, ip, old_ip_len);
1178 }
1179
1180 /* source address from original IP packet*/
1181 dst = ip->ip_src.s_addr;
1182
1183 /* overide ther tail of old packet */
1184 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1185 original_hlen = ip->ip_hl << 2;
1186 /* saves original ip header and options */
1187#ifdef VBOX_WITH_SLIRP_BSD_MBUF
1188 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1189 ip->ip_len = m_length(m, NULL);
1190#else
1191 /* m_room space in the saved m buffer */
1192 m_room = M_ROOM(m);
1193 if (m_room < len - hlen + original_hlen)
1194 {
1195 /* we need involve ether header length into new buffer buffer calculation */
1196 m_inc(m, if_maxlinkhdr + len - hlen + original_hlen);
1197 if (m->m_size < if_maxlinkhdr + len - hlen + original_hlen)
1198 {
1199 Log(("send_icmp_to_guest: extending buffer was failed (packet is dropped)\n"));
1200 return;
1201 }
1202 }
1203 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
1204 m->m_len = len - hlen + original_hlen;
1205 ip->ip_len = m->m_len;
1206#endif
1207 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1208
1209 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1210 type = icp->icmp_type;
1211 if ( type == ICMP_TIMXCEED
1212 || type == ICMP_UNREACH)
1213 {
1214 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1215 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1216 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1217 }
1218
1219 ip->ip_src.s_addr = src;
1220 ip->ip_dst.s_addr = dst;
1221 icmp_reflect(pData, m);
1222 LIST_REMOVE(icm, im_list);
1223 /* Don't call m_free here*/
1224
1225 if ( type == ICMP_TIMXCEED
1226 || type == ICMP_UNREACH)
1227 {
1228 icm->im_so->so_m = NULL;
1229 switch (proto)
1230 {
1231 case IPPROTO_UDP:
1232 /*XXX: so->so_m already freed so we shouldn't call sofree */
1233 udp_detach(pData, icm->im_so);
1234 break;
1235 case IPPROTO_TCP:
1236 /*close tcp should be here */
1237 break;
1238 default:
1239 /* do nothing */
1240 break;
1241 }
1242 }
1243 RTMemFree(icm);
1244}
1245
1246#ifdef RT_OS_WINDOWS
1247static void
1248sorecvfrom_icmp_win(PNATState pData, struct socket *so)
1249{
1250 int len;
1251 int i;
1252 struct ip *ip;
1253 struct mbuf *m;
1254 struct icmp *icp;
1255 struct icmp_msg *icm;
1256 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
1257 uint32_t src;
1258 ICMP_ECHO_REPLY *icr;
1259 int hlen = 0;
1260 int data_len = 0;
1261 int nbytes = 0;
1262 u_char code = ~0;
1263 int out_len;
1264 int size;
1265
1266 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1267 if (len < 0)
1268 {
1269 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1270 return;
1271 }
1272 if (len == 0)
1273 return; /* no error */
1274
1275 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1276 for (i = 0; i < len; ++i)
1277 {
1278 switch(icr[i].Status)
1279 {
1280 case IP_DEST_HOST_UNREACHABLE:
1281 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1282 case IP_DEST_NET_UNREACHABLE:
1283 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1284 case IP_DEST_PROT_UNREACHABLE:
1285 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1286 /* UNREACH error inject here */
1287 case IP_DEST_PORT_UNREACHABLE:
1288 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1289 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1290 so->so_m = NULL;
1291 break;
1292 case IP_SUCCESS: /* echo replied */
1293# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1294 m = m_get(pData);
1295# else
1296 out_len = ETH_HLEN + sizeof(struct ip) + 8;
1297 size;
1298 size = MCLBYTES;
1299 if (out_len < MSIZE)
1300 size = MCLBYTES;
1301 else if (out_len < MCLBYTES)
1302 size = MCLBYTES;
1303 else if (out_len < MJUM9BYTES)
1304 size = MJUM9BYTES;
1305 else if (out_len < MJUM16BYTES)
1306 size = MJUM16BYTES;
1307 else
1308 AssertMsgFailed(("Unsupported size"));
1309
1310 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
1311# endif
1312 m->m_len = 0;
1313 m->m_data += if_maxlinkhdr;
1314 ip = mtod(m, struct ip *);
1315 ip->ip_src.s_addr = icr[i].Address;
1316 ip->ip_p = IPPROTO_ICMP;
1317 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1318 data_len = sizeof(struct ip);
1319 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1320 ip->ip_ttl = icr[i].Options.Ttl;
1321
1322 icp = (struct icmp *)&ip[1]; /* no options */
1323 icp->icmp_type = ICMP_ECHOREPLY;
1324 icp->icmp_code = 0;
1325 icp->icmp_id = so->so_icmp_id;
1326 icp->icmp_seq = so->so_icmp_seq;
1327
1328 data_len += ICMP_MINLEN;
1329
1330# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1331 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
1332 memcpy(icp->icmp_data, icr[i].Data, nbytes);
1333# else
1334 hlen = (ip->ip_hl << 2);
1335 m->m_pkthdr.header = mtod(m, void *);
1336 m->m_len = data_len;
1337
1338 m_copyback(pData, m, hlen + 8, icr[i].DataSize, icr[i].Data);
1339# endif
1340
1341 data_len += icr[i].DataSize;
1342
1343 ip->ip_len = data_len;
1344 m->m_len = ip->ip_len;
1345
1346 icmp_reflect(pData, m);
1347 break;
1348 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1349
1350 ip_broken = icr[i].Data;
1351 icm = icmp_find_original_mbuf(pData, ip_broken);
1352 if (icm == NULL) {
1353 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1354 return;
1355 }
1356 m = icm->im_m;
1357 ip = mtod(m, struct ip *);
1358 ip->ip_ttl = icr[i].Options.Ttl;
1359 src = ip->ip_src.s_addr;
1360 ip->ip_dst.s_addr = src;
1361 ip->ip_dst.s_addr = icr[i].Address;
1362
1363 hlen = (ip->ip_hl << 2);
1364 icp = (struct icmp *)((char *)ip + hlen);
1365 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1366 data_len = (ip_broken->ip_hl << 2) + 64;
1367
1368#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1369 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1370 memcpy(icp->icmp_data, ip_broken, nbytes);
1371#else
1372 m->m_len = data_len;
1373 m->m_pkthdr.header = mtod(m, void *);
1374 m_copyback(pData, m, ip->ip_hl >> 2, icr[i].DataSize, icr[i].Data);
1375#endif
1376 icmp_reflect(pData, m);
1377 break;
1378 default:
1379 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1380 break;
1381 }
1382 }
1383}
1384#else /* !RT_OS_WINDOWS */
1385static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1386{
1387 struct sockaddr_in addr;
1388 socklen_t addrlen = sizeof(struct sockaddr_in);
1389 struct ip ip;
1390 char *buff;
1391 int len = 0;
1392
1393 /* 1- step: read the ip header */
1394 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1395 (struct sockaddr *)&addr, &addrlen);
1396 if ( len < 0
1397 && ( errno == EAGAIN
1398 || errno == EWOULDBLOCK
1399 || errno == EINPROGRESS
1400 || errno == ENOTCONN))
1401 {
1402 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1403 return;
1404 }
1405
1406 if ( len < sizeof(struct ip)
1407 || len < 0
1408 || len == 0)
1409 {
1410 u_char code;
1411 code = ICMP_UNREACH_PORT;
1412
1413 if (errno == EHOSTUNREACH)
1414 code = ICMP_UNREACH_HOST;
1415 else if (errno == ENETUNREACH)
1416 code = ICMP_UNREACH_NET;
1417
1418 LogRel((" udp icmp rx errno = %d-%s\n",
1419 errno, strerror(errno)));
1420 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1421 so->so_m = NULL;
1422 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm \n"));
1423 return;
1424 }
1425 /* basic check of IP header */
1426 if ( ip.ip_v != IPVERSION
1427# ifndef RT_OS_DARWIN
1428 || ip.ip_p != IPPROTO_ICMP
1429# endif
1430 )
1431 {
1432 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4 \n"));
1433 return;
1434 }
1435# ifndef RT_OS_DARWIN
1436 /* Darwin reports the IP length already in host byte order. */
1437 ip.ip_len = RT_N2H_U16(ip.ip_len);
1438# endif
1439# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1440 /* Solaris and Darwin report the payload only */
1441 ip.ip_len += (ip.ip_hl << 2);
1442# endif
1443 /* Note: ip->ip_len in host byte order (all OS) */
1444 len = ip.ip_len;
1445 buff = RTMemAlloc(len);
1446 if (buff == NULL)
1447 {
1448 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1449 return;
1450 }
1451 /* 2 - step: we're reading rest of the datagramm to the buffer */
1452 addrlen = sizeof(struct sockaddr_in);
1453 memset(&addr, 0, addrlen);
1454 len = recvfrom(so->s, buff, len, 0,
1455 (struct sockaddr *)&addr, &addrlen);
1456 if ( len < 0
1457 && ( errno == EAGAIN
1458 || errno == EWOULDBLOCK
1459 || errno == EINPROGRESS
1460 || errno == ENOTCONN))
1461 {
1462 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1463 ip.ip_len));
1464 RTMemFree(buff);
1465 return;
1466 }
1467 if ( len < 0
1468 || len == 0)
1469 {
1470 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1471 errno, len, (ip.ip_len - sizeof(struct ip))));
1472 RTMemFree(buff);
1473 return;
1474 }
1475 /* len is modified in 2nd read, when the rest of the datagramm was read */
1476 send_icmp_to_guest(pData, buff, len, so, &addr);
1477 RTMemFree(buff);
1478}
1479#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette