VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 29980

Last change on this file since 29980 was 29968, checked in by vboxsync, 15 years ago

NAT: do dns proxy , if the mode is enabled.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 42.9 KB
Line 
1/* $Id: socket.c 29968 2010-06-02 04:44:43Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#define WANT_SYS_IOCTL_H
28#include <slirp.h>
29#include "ip_icmp.h"
30#include "main.h"
31#ifdef __sun__
32#include <sys/filio.h>
33#endif
34#include <VBox/pdmdrv.h>
35#if defined (RT_OS_WINDOWS)
36#include <iphlpapi.h>
37#include <icmpapi.h>
38#endif
39
40
41static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
42#ifdef RT_OS_WINDOWS
43static void sorecvfrom_icmp_win(PNATState, struct socket *);
44#else /* RT_OS_WINDOWS */
45static void sorecvfrom_icmp_unix(PNATState, struct socket *);
46#endif /* !RT_OS_WINDOWS */
47
48void
49so_init()
50{
51}
52
53struct socket *
54solookup(struct socket *head, struct in_addr laddr,
55 u_int lport, struct in_addr faddr, u_int fport)
56{
57 struct socket *so;
58
59 for (so = head->so_next; so != head; so = so->so_next)
60 {
61 if ( so->so_lport == lport
62 && so->so_laddr.s_addr == laddr.s_addr
63 && so->so_faddr.s_addr == faddr.s_addr
64 && so->so_fport == fport)
65 return so;
66 }
67
68 return (struct socket *)NULL;
69}
70
71/*
72 * Create a new socket, initialise the fields
73 * It is the responsibility of the caller to
74 * insque() it into the correct linked-list
75 */
76struct socket *
77socreate()
78{
79 struct socket *so;
80
81 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
82 if (so)
83 {
84 so->so_state = SS_NOFDREF;
85 so->s = -1;
86#if !defined(RT_OS_WINDOWS)
87 so->so_poll_index = -1;
88#endif
89 }
90 return so;
91}
92
93/*
94 * remque and free a socket, clobber cache
95 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
96 * in sofree we don't know from which queue item beeing removed.
97 */
98void
99sofree(PNATState pData, struct socket *so)
100{
101 struct socket *so_prev = NULL;
102 if (so == tcp_last_so)
103 tcp_last_so = &tcb;
104 else if (so == udp_last_so)
105 udp_last_so = &udb;
106
107 /* check if mbuf haven't been already freed */
108 if (so->so_m != NULL)
109 m_freem(pData, so->so_m);
110#ifndef VBOX_WITH_SLIRP_MT
111 if (so->so_next && so->so_prev)
112 {
113 remque(pData, so); /* crashes if so is not in a queue */
114 NSOCK_DEC();
115 }
116
117 RTMemFree(so);
118#else
119 so->so_deleted = 1;
120#endif
121}
122
123#ifdef VBOX_WITH_SLIRP_MT
124void
125soread_queue(PNATState pData, struct socket *so, int *ret)
126{
127 *ret = soread(pData, so);
128}
129#endif
130
131/*
132 * Read from so's socket into sb_snd, updating all relevant sbuf fields
133 * NOTE: This will only be called if it is select()ed for reading, so
134 * a read() of 0 (or less) means it's disconnected
135 */
136int
137soread(PNATState pData, struct socket *so)
138{
139 int n, nn, lss, total;
140 struct sbuf *sb = &so->so_snd;
141 size_t len = sb->sb_datalen - sb->sb_cc;
142 struct iovec iov[2];
143 int mss = so->so_tcpcb->t_maxseg;
144
145 STAM_PROFILE_START(&pData->StatIOread, a);
146 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
147 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
148
149 QSOCKET_LOCK(tcb);
150 SOCKET_LOCK(so);
151 QSOCKET_UNLOCK(tcb);
152
153 DEBUG_CALL("soread");
154 DEBUG_ARG("so = %lx", (long)so);
155
156 /*
157 * No need to check if there's enough room to read.
158 * soread wouldn't have been called if there weren't
159 */
160
161 len = sb->sb_datalen - sb->sb_cc;
162
163 iov[0].iov_base = sb->sb_wptr;
164 iov[1].iov_base = 0;
165 iov[1].iov_len = 0;
166 if (sb->sb_wptr < sb->sb_rptr)
167 {
168 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
169 /* Should never succeed, but... */
170 if (iov[0].iov_len > len)
171 iov[0].iov_len = len;
172 if (iov[0].iov_len > mss)
173 iov[0].iov_len -= iov[0].iov_len%mss;
174 n = 1;
175 }
176 else
177 {
178 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
179 /* Should never succeed, but... */
180 if (iov[0].iov_len > len)
181 iov[0].iov_len = len;
182 len -= iov[0].iov_len;
183 if (len)
184 {
185 iov[1].iov_base = sb->sb_data;
186 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
187 if (iov[1].iov_len > len)
188 iov[1].iov_len = len;
189 total = iov[0].iov_len + iov[1].iov_len;
190 if (total > mss)
191 {
192 lss = total % mss;
193 if (iov[1].iov_len > lss)
194 {
195 iov[1].iov_len -= lss;
196 n = 2;
197 }
198 else
199 {
200 lss -= iov[1].iov_len;
201 iov[0].iov_len -= lss;
202 n = 1;
203 }
204 }
205 else
206 n = 2;
207 }
208 else
209 {
210 if (iov[0].iov_len > mss)
211 iov[0].iov_len -= iov[0].iov_len%mss;
212 n = 1;
213 }
214 }
215
216#ifdef HAVE_READV
217 nn = readv(so->s, (struct iovec *)iov, n);
218 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
219#else
220 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
221#endif
222 if (nn <= 0)
223 {
224 /*
225 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
226 * _could_ mean that the connection is closed. But we will receive an
227 * FD_CLOSE event later if the connection was _really_ closed. With
228 * www.youtube.com I see this very often. Closing the socket too early
229 * would be dangerous.
230 */
231 int status;
232 unsigned long pending = 0;
233 status = ioctlsocket(so->s, FIONREAD, &pending);
234 if (status < 0)
235 LogRel(("NAT:error in WSAIoctl: %d\n", errno));
236 if (nn == 0 && (pending != 0))
237 {
238 SOCKET_UNLOCK(so);
239 STAM_PROFILE_STOP(&pData->StatIOread, a);
240 return 0;
241 }
242 if ( nn < 0
243 && ( errno == EINTR
244 || errno == EAGAIN
245 || errno == EWOULDBLOCK))
246 {
247 SOCKET_UNLOCK(so);
248 STAM_PROFILE_STOP(&pData->StatIOread, a);
249 return 0;
250 }
251 else
252 {
253 /* nn == 0 means peer has performed an orderly shutdown */
254 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
255 nn, errno, strerror(errno)));
256 sofcantrcvmore(so);
257 tcp_sockclosed(pData, sototcpcb(so));
258 SOCKET_UNLOCK(so);
259 STAM_PROFILE_STOP(&pData->StatIOread, a);
260 return -1;
261 }
262 }
263 STAM_STATS(
264 if (n == 1)
265 {
266 STAM_COUNTER_INC(&pData->StatIORead_in_1);
267 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
268 }
269 else
270 {
271 STAM_COUNTER_INC(&pData->StatIORead_in_2);
272 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
273 }
274 );
275
276#ifndef HAVE_READV
277 /*
278 * If there was no error, try and read the second time round
279 * We read again if n = 2 (ie, there's another part of the buffer)
280 * and we read as much as we could in the first read
281 * We don't test for <= 0 this time, because there legitimately
282 * might not be any more data (since the socket is non-blocking),
283 * a close will be detected on next iteration.
284 * A return of -1 wont (shouldn't) happen, since it didn't happen above
285 */
286 if (n == 2 && nn == iov[0].iov_len)
287 {
288 int ret;
289 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
290 if (ret > 0)
291 nn += ret;
292 STAM_STATS(
293 if (ret > 0)
294 {
295 STAM_COUNTER_INC(&pData->StatIORead_in_2);
296 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
297 }
298 );
299 }
300
301 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
302#endif
303
304 /* Update fields */
305 sb->sb_cc += nn;
306 sb->sb_wptr += nn;
307 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
308 sb->sb_wptr -= sb->sb_datalen;
309 STAM_PROFILE_STOP(&pData->StatIOread, a);
310 SOCKET_UNLOCK(so);
311 return nn;
312}
313
314/*
315 * Get urgent data
316 *
317 * When the socket is created, we set it SO_OOBINLINE,
318 * so when OOB data arrives, we soread() it and everything
319 * in the send buffer is sent as urgent data
320 */
321void
322sorecvoob(PNATState pData, struct socket *so)
323{
324 struct tcpcb *tp = sototcpcb(so);
325 ssize_t ret;
326
327 DEBUG_CALL("sorecvoob");
328 DEBUG_ARG("so = %lx", (long)so);
329
330 /*
331 * We take a guess at how much urgent data has arrived.
332 * In most situations, when urgent data arrives, the next
333 * read() should get all the urgent data. This guess will
334 * be wrong however if more data arrives just after the
335 * urgent data, or the read() doesn't return all the
336 * urgent data.
337 */
338 ret = soread(pData, so);
339 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
340 tp->t_force = 1;
341 tcp_output(pData, tp);
342 tp->t_force = 0;
343}
344
345/*
346 * Send urgent data
347 * There's a lot duplicated code here, but...
348 */
349int
350sosendoob(struct socket *so)
351{
352 struct sbuf *sb = &so->so_rcv;
353 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
354
355 int n, len;
356
357 DEBUG_CALL("sosendoob");
358 DEBUG_ARG("so = %lx", (long)so);
359 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
360
361 if (so->so_urgc > sizeof(buff))
362 so->so_urgc = sizeof(buff); /* XXX */
363
364 if (sb->sb_rptr < sb->sb_wptr)
365 {
366 /* We can send it directly */
367 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
368 so->so_urgc -= n;
369
370 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
371 n, so->so_urgc));
372 }
373 else
374 {
375 /*
376 * Since there's no sendv or sendtov like writev,
377 * we must copy all data to a linear buffer then
378 * send it all
379 */
380 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
381 if (len > so->so_urgc)
382 len = so->so_urgc;
383 memcpy(buff, sb->sb_rptr, len);
384 so->so_urgc -= len;
385 if (so->so_urgc)
386 {
387 n = sb->sb_wptr - sb->sb_data;
388 if (n > so->so_urgc)
389 n = so->so_urgc;
390 memcpy(buff + len, sb->sb_data, n);
391 so->so_urgc -= n;
392 len += n;
393 }
394 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
395#ifdef DEBUG
396 if (n != len)
397 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
398#endif
399 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
400 n, so->so_urgc));
401 }
402
403 sb->sb_cc -= n;
404 sb->sb_rptr += n;
405 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
406 sb->sb_rptr -= sb->sb_datalen;
407
408 return n;
409}
410
411/*
412 * Write data from so_rcv to so's socket,
413 * updating all sbuf field as necessary
414 */
415int
416sowrite(PNATState pData, struct socket *so)
417{
418 int n, nn;
419 struct sbuf *sb = &so->so_rcv;
420 size_t len = sb->sb_cc;
421 struct iovec iov[2];
422
423 STAM_PROFILE_START(&pData->StatIOwrite, a);
424 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
425 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
426 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
427 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
428 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
429 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
430 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
431 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
432 DEBUG_CALL("sowrite");
433 DEBUG_ARG("so = %lx", (long)so);
434 QSOCKET_LOCK(tcb);
435 SOCKET_LOCK(so);
436 QSOCKET_UNLOCK(tcb);
437 if (so->so_urgc)
438 {
439 sosendoob(so);
440 if (sb->sb_cc == 0)
441 {
442 SOCKET_UNLOCK(so);
443 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
444 return 0;
445 }
446 }
447
448 /*
449 * No need to check if there's something to write,
450 * sowrite wouldn't have been called otherwise
451 */
452
453 len = sb->sb_cc;
454
455 iov[0].iov_base = sb->sb_rptr;
456 iov[1].iov_base = 0;
457 iov[1].iov_len = 0;
458 if (sb->sb_rptr < sb->sb_wptr)
459 {
460 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
461 /* Should never succeed, but... */
462 if (iov[0].iov_len > len)
463 iov[0].iov_len = len;
464 n = 1;
465 }
466 else
467 {
468 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
469 if (iov[0].iov_len > len)
470 iov[0].iov_len = len;
471 len -= iov[0].iov_len;
472 if (len)
473 {
474 iov[1].iov_base = sb->sb_data;
475 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
476 if (iov[1].iov_len > len)
477 iov[1].iov_len = len;
478 n = 2;
479 }
480 else
481 n = 1;
482 }
483 STAM_STATS({
484 if (n == 1)
485 {
486 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
487 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
488 }
489 else
490 {
491 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
492 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
493 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
494 }
495 });
496 /* Check if there's urgent data to send, and if so, send it */
497#ifdef HAVE_READV
498 nn = writev(so->s, (const struct iovec *)iov, n);
499 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
500#else
501 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
502#endif
503 /* This should never happen, but people tell me it does *shrug* */
504 if ( nn < 0
505 && ( errno == EAGAIN
506 || errno == EINTR
507 || errno == EWOULDBLOCK))
508 {
509 SOCKET_UNLOCK(so);
510 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
511 return 0;
512 }
513
514 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
515 {
516 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
517 so->so_state, errno));
518 sofcantsendmore(so);
519 tcp_sockclosed(pData, sototcpcb(so));
520 SOCKET_UNLOCK(so);
521 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
522 return -1;
523 }
524
525#ifndef HAVE_READV
526 if (n == 2 && nn == iov[0].iov_len)
527 {
528 int ret;
529 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
530 if (ret > 0)
531 nn += ret;
532 STAM_STATS({
533 if (ret > 0 && ret != iov[1].iov_len)
534 {
535 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
536 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (ret - iov[1].iov_len));
537 }
538 });
539 }
540 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
541#endif
542
543 /* Update sbuf */
544 sb->sb_cc -= nn;
545 sb->sb_rptr += nn;
546 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
547 sb->sb_rptr -= sb->sb_datalen;
548
549 /*
550 * If in DRAIN mode, and there's no more data, set
551 * it CANTSENDMORE
552 */
553 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
554 sofcantsendmore(so);
555
556 SOCKET_UNLOCK(so);
557 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
558 return nn;
559}
560
561/*
562 * recvfrom() a UDP socket
563 */
564void
565sorecvfrom(PNATState pData, struct socket *so)
566{
567 ssize_t ret = 0;
568 struct sockaddr_in addr;
569 socklen_t addrlen = sizeof(struct sockaddr_in);
570
571 DEBUG_CALL("sorecvfrom");
572 DEBUG_ARG("so = %lx", (long)so);
573
574 if (so->so_type == IPPROTO_ICMP)
575 {
576 /* This is a "ping" reply */
577#ifdef RT_OS_WINDOWS
578 sorecvfrom_icmp_win(pData, so);
579#else /* RT_OS_WINDOWS */
580 sorecvfrom_icmp_unix(pData, so);
581#endif /* !RT_OS_WINDOWS */
582 udp_detach(pData, so);
583 }
584 else
585 {
586 /* A "normal" UDP packet */
587 struct mbuf *m;
588 ssize_t len;
589 u_long n = 0;
590#ifdef VBOX_WITH_SLIRP_BSD_MBUF
591 int size;
592#endif
593 int rc = 0;
594 static int signalled = 0;
595
596 QSOCKET_LOCK(udb);
597 SOCKET_LOCK(so);
598 QSOCKET_UNLOCK(udb);
599
600#ifndef VBOX_WITH_SLIRP_BSD_MBUF
601 if (!(m = m_get(pData)))
602 {
603 SOCKET_UNLOCK(so);
604 return;
605 }
606 /* adjust both parameters to maks M_FREEROOM calculate correct */
607 m->m_data += if_maxlinkhdr + sizeof(struct udphdr) + sizeof(struct ip);
608
609 /*
610 * XXX Shouldn't FIONREAD packets destined for port 53,
611 * but I don't know the max packet size for DNS lookups
612 */
613 len = M_FREEROOM(m);
614 /* if (so->so_fport != RT_H2N_U16_C(53)) */
615 rc = ioctlsocket(so->s, FIONREAD, &n);
616 if ( rc == -1
617 && ( errno == EAGAIN
618 || errno == EWOULDBLOCK
619 || errno == EINPROGRESS
620 || errno == ENOTCONN))
621 {
622 m_freem(pData, m);
623 return;
624 }
625
626 Log2(("NAT: %R[natsock] ioctlsocket before read "
627 "(rc:%d errno:%d, n:%d)\n", so, rc, errno, n));
628
629 if (rc == -1 && signalled == 0)
630 {
631 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
632 signalled = 1;
633 m_freem(pData, m);
634 return;
635 }
636
637 if (rc != -1 && n > len)
638 {
639 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
640 m_inc(m, n);
641 len = M_FREEROOM(m);
642 }
643 ret = recvfrom(so->s, m->m_data, len, 0,
644 (struct sockaddr *)&addr, &addrlen);
645 Log2(("NAT: %R[natsock] ioctlsocket after read "
646 "(rc:%d errno:%d, n:%d) ret:%d, len:%d\n", so,
647 rc, errno, n, ret, len));
648#else
649 /*How many data has been received ?*/
650 /*
651 * 1. calculate how much we can read
652 * 2. read as much as possible
653 * 3. attach buffer to allocated header mbuf
654 */
655 rc = ioctlsocket(so->s, FIONREAD, &n);
656 if (rc == -1 && signalled == 0)
657 {
658 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
659 signalled = 1;
660 }
661
662 len = sizeof(struct udpiphdr) + ETH_HLEN;
663 if (n > (if_mtu - len))
664 {
665 n = if_mtu - len; /* can't read than we can put in the mbuf*/
666 }
667 len += n;
668
669 size = MCLBYTES;
670 if (len < MSIZE)
671 size = MCLBYTES;
672 else if (len < MCLBYTES)
673 size = MCLBYTES;
674 else if (len < MJUM9BYTES)
675 size = MJUM9BYTES;
676 else if (len < MJUM16BYTES)
677 size = MJUM16BYTES;
678 else
679 AssertMsgFailed(("Unsupported size"));
680
681 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
682 if (m == NULL)
683 return;
684 m->m_data += ETH_HLEN;
685 m->m_pkthdr.header = mtod(m, void *);
686 m->m_data += sizeof(struct udpiphdr);
687 ret = recvfrom(so->s, mtod(m, char *), n, 0,
688 (struct sockaddr *)&addr, &addrlen);
689 /* @todo (vvl) check which flags and type should be passed */
690#endif
691 m->m_len = ret;
692 if (ret < 0)
693 {
694 u_char code = ICMP_UNREACH_PORT;
695
696 if (errno == EHOSTUNREACH)
697 code = ICMP_UNREACH_HOST;
698 else if (errno == ENETUNREACH)
699 code = ICMP_UNREACH_NET;
700
701 m_freem(pData, m);
702 if ( errno == EAGAIN
703 || errno == EWOULDBLOCK
704 || errno == EINPROGRESS
705 || errno == ENOTCONN)
706 {
707 return;
708 }
709
710 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
711 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
712 so->so_m = NULL;
713 }
714 else
715 {
716 /*
717 * Hack: domain name lookup will be used the most for UDP,
718 * and since they'll only be used once there's no need
719 * for the 4 minute (or whatever) timeout... So we time them
720 * out much quicker (10 seconds for now...)
721 */
722 if (so->so_expire)
723 {
724 if (so->so_fport != RT_H2N_U16_C(53))
725 so->so_expire = curtime + SO_EXPIRE;
726 }
727 /*
728 * last argument should be changed if Slirp will inject IP attributes
729 * Note: Here we can't check if dnsproxy's sent initial request
730 */
731 if ( pData->fUseDnsProxy
732 && so->so_fport == RT_H2N_U16_C(53))
733 dnsproxy_answer(pData, so, m);
734
735#if 0
736 if (m->m_len == len)
737 {
738 m_inc(m, MINCSIZE);
739 m->m_len = 0;
740 }
741#endif
742
743 /*
744 * If this packet was destined for CTL_ADDR,
745 * make it look like that's where it came from, done by udp_output
746 */
747 udp_output(pData, so, m, &addr);
748 SOCKET_UNLOCK(so);
749 } /* rx error */
750 } /* if ping packet */
751}
752
753/*
754 * sendto() a socket
755 */
756int
757sosendto(PNATState pData, struct socket *so, struct mbuf *m)
758{
759 int ret;
760 struct sockaddr_in *paddr;
761 struct sockaddr addr;
762#if 0
763 struct sockaddr_in host_addr;
764#endif
765#ifdef VBOX_WITH_SLIRP_BSD_MBUF
766 caddr_t buf;
767 int mlen;
768#endif
769
770 DEBUG_CALL("sosendto");
771 DEBUG_ARG("so = %lx", (long)so);
772 DEBUG_ARG("m = %lx", (long)m);
773
774 memset(&addr, 0, sizeof(struct sockaddr));
775#ifdef RT_OS_DARWIN
776 addr.sa_len = sizeof(struct sockaddr_in);
777#endif
778 paddr = (struct sockaddr_in *)&addr;
779 paddr->sin_family = AF_INET;
780 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
781 {
782 /* It's an alias */
783 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
784 switch(last_byte)
785 {
786#if 0
787 /* handle this case at 'default:' */
788 case CTL_BROADCAST:
789 addr.sin_addr.s_addr = INADDR_BROADCAST;
790 /* Send the packet to host to fully emulate broadcast */
791 /** @todo r=klaus: on Linux host this causes the host to receive
792 * the packet twice for some reason. And I cannot find any place
793 * in the man pages which states that sending a broadcast does not
794 * reach the host itself. */
795 host_addr.sin_family = AF_INET;
796 host_addr.sin_port = so->so_fport;
797 host_addr.sin_addr = our_addr;
798 sendto(so->s, m->m_data, m->m_len, 0,
799 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
800 break;
801#endif
802 case CTL_DNS:
803 case CTL_ALIAS:
804 default:
805 if (last_byte == ~pData->netmask)
806 paddr->sin_addr.s_addr = INADDR_BROADCAST;
807 else
808 paddr->sin_addr = loopback_addr;
809 break;
810 }
811 }
812 else
813 paddr->sin_addr = so->so_faddr;
814 paddr->sin_port = so->so_fport;
815
816 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
817 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
818
819 /* Don't care what port we get */
820#ifndef VBOX_WITH_SLIRP_BSD_MBUF
821 ret = sendto(so->s, m->m_data, m->m_len, 0, &addr, sizeof (struct sockaddr_in));
822#else
823 mlen = m_length(m, NULL);
824 buf = RTMemAlloc(mlen);
825 if (buf == NULL)
826 {
827 return -1;
828 }
829 m_copydata(m, 0, mlen, buf);
830 ret = sendto(so->s, buf, mlen, 0,
831 (struct sockaddr *)&addr, sizeof (struct sockaddr));
832#endif
833 if (ret < 0)
834 {
835 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
836 return -1;
837 }
838
839 /*
840 * Kill the socket if there's no reply in 4 minutes,
841 * but only if it's an expirable socket
842 */
843 if (so->so_expire)
844 so->so_expire = curtime + SO_EXPIRE;
845 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
846 return 0;
847}
848
849/*
850 * XXX This should really be tcp_listen
851 */
852struct socket *
853solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
854{
855 struct sockaddr_in addr;
856 struct socket *so;
857 socklen_t addrlen = sizeof(addr);
858 int s, opt = 1;
859 int status;
860
861 DEBUG_CALL("solisten");
862 DEBUG_ARG("port = %d", port);
863 DEBUG_ARG("laddr = %x", laddr);
864 DEBUG_ARG("lport = %d", lport);
865 DEBUG_ARG("flags = %x", flags);
866
867 if ((so = socreate()) == NULL)
868 {
869 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
870 return NULL;
871 }
872
873 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
874 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
875 {
876 RTMemFree(so);
877 return NULL;
878 }
879
880 SOCKET_LOCK_CREATE(so);
881 SOCKET_LOCK(so);
882 QSOCKET_LOCK(tcb);
883 insque(pData, so,&tcb);
884 NSOCK_INC();
885 QSOCKET_UNLOCK(tcb);
886
887 /*
888 * SS_FACCEPTONCE sockets must time out.
889 */
890 if (flags & SS_FACCEPTONCE)
891 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
892
893 so->so_state = (SS_FACCEPTCONN|flags);
894 so->so_lport = lport; /* Kept in network format */
895 so->so_laddr.s_addr = laddr; /* Ditto */
896
897 memset(&addr, 0, sizeof(addr));
898#ifdef RT_OS_DARWIN
899 addr.sin_len = sizeof(addr);
900#endif
901 addr.sin_family = AF_INET;
902 addr.sin_addr.s_addr = bind_addr;
903 addr.sin_port = port;
904
905 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
906 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
907 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
908 || (listen(s, 1) < 0))
909 {
910#ifdef RT_OS_WINDOWS
911 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
912 closesocket(s);
913 QSOCKET_LOCK(tcb);
914 sofree(pData, so);
915 QSOCKET_UNLOCK(tcb);
916 /* Restore the real errno */
917 WSASetLastError(tmperrno);
918#else
919 int tmperrno = errno; /* Don't clobber the real reason we failed */
920 close(s);
921 QSOCKET_LOCK(tcb);
922 sofree(pData, so);
923 QSOCKET_UNLOCK(tcb);
924 /* Restore the real errno */
925 errno = tmperrno;
926#endif
927 return NULL;
928 }
929 fd_nonblock(s);
930 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
931
932 getsockname(s,(struct sockaddr *)&addr,&addrlen);
933 so->so_fport = addr.sin_port;
934 /* set socket buffers */
935 opt = pData->socket_rcv;
936 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
937 if (status < 0)
938 {
939 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
940 goto no_sockopt;
941 }
942 opt = pData->socket_snd;
943 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
944 if (status < 0)
945 {
946 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
947 goto no_sockopt;
948 }
949no_sockopt:
950 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
951 so->so_faddr = alias_addr;
952 else
953 so->so_faddr = addr.sin_addr;
954
955 so->s = s;
956 SOCKET_UNLOCK(so);
957 return so;
958}
959
960/*
961 * Data is available in so_rcv
962 * Just write() the data to the socket
963 * XXX not yet...
964 */
965void
966sorwakeup(struct socket *so)
967{
968#if 0
969 sowrite(so);
970 FD_CLR(so->s,&writefds);
971#endif
972}
973
974/*
975 * Data has been freed in so_snd
976 * We have room for a read() if we want to
977 * For now, don't read, it'll be done in the main loop
978 */
979void
980sowwakeup(struct socket *so)
981{
982}
983
984/*
985 * Various session state calls
986 * XXX Should be #define's
987 * The socket state stuff needs work, these often get call 2 or 3
988 * times each when only 1 was needed
989 */
990void
991soisfconnecting(struct socket *so)
992{
993 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
994 SS_FCANTSENDMORE|SS_FWDRAIN);
995 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
996}
997
998void
999soisfconnected(struct socket *so)
1000{
1001 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1002 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1003}
1004
1005void
1006sofcantrcvmore(struct socket *so)
1007{
1008 if ((so->so_state & SS_NOFDREF) == 0)
1009 {
1010 shutdown(so->s, 0);
1011 }
1012 so->so_state &= ~(SS_ISFCONNECTING);
1013 if (so->so_state & SS_FCANTSENDMORE)
1014 so->so_state = SS_NOFDREF; /* Don't select it */
1015 /* XXX close() here as well? */
1016 else
1017 so->so_state |= SS_FCANTRCVMORE;
1018}
1019
1020void
1021sofcantsendmore(struct socket *so)
1022{
1023 if ((so->so_state & SS_NOFDREF) == 0)
1024 shutdown(so->s, 1); /* send FIN to fhost */
1025
1026 so->so_state &= ~(SS_ISFCONNECTING);
1027 if (so->so_state & SS_FCANTRCVMORE)
1028 so->so_state = SS_NOFDREF; /* as above */
1029 else
1030 so->so_state |= SS_FCANTSENDMORE;
1031}
1032
1033void
1034soisfdisconnected(struct socket *so)
1035{
1036#if 0
1037 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1038 close(so->s);
1039 so->so_state = SS_ISFDISCONNECTED;
1040 /*
1041 * XXX Do nothing ... ?
1042 */
1043#endif
1044}
1045
1046/*
1047 * Set write drain mode
1048 * Set CANTSENDMORE once all data has been write()n
1049 */
1050void
1051sofwdrain(struct socket *so)
1052{
1053 if (so->so_rcv.sb_cc)
1054 so->so_state |= SS_FWDRAIN;
1055 else
1056 sofcantsendmore(so);
1057}
1058
1059static void
1060send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
1061{
1062 struct ip *ip;
1063 uint32_t dst, src;
1064 char ip_copy[256];
1065 struct icmp *icp;
1066 int old_ip_len = 0;
1067 int hlen, original_hlen = 0;
1068 struct mbuf *m;
1069 struct icmp_msg *icm;
1070 uint8_t proto;
1071 int type = 0;
1072#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1073 int m_room;
1074#endif
1075
1076 ip = (struct ip *)buff;
1077 /* Fix ip->ip_len to contain the total packet length including the header
1078 * in _host_ byte order for all OSes. On Darwin, that value already is in
1079 * host byte order. Solaris and Darwin report only the payload. */
1080#ifndef RT_OS_DARWIN
1081 ip->ip_len = RT_N2H_U16(ip->ip_len);
1082#endif
1083 hlen = (ip->ip_hl << 2);
1084#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1085 ip->ip_len += hlen;
1086#endif
1087 if (ip->ip_len < hlen + ICMP_MINLEN)
1088 {
1089 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1090 return;
1091 }
1092 icp = (struct icmp *)((char *)ip + hlen);
1093
1094 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1095 if ( icp->icmp_type != ICMP_ECHOREPLY
1096 && icp->icmp_type != ICMP_TIMXCEED
1097 && icp->icmp_type != ICMP_UNREACH)
1098 {
1099 return;
1100 }
1101
1102 /*
1103 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1104 * ICMP_ECHOREPLY assuming data 0
1105 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1106 */
1107 if (ip->ip_len < hlen + 8)
1108 {
1109 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1110 return;
1111 }
1112
1113 type = icp->icmp_type;
1114 if ( type == ICMP_TIMXCEED
1115 || type == ICMP_UNREACH)
1116 {
1117 /*
1118 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1119 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1120 */
1121 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1122 {
1123 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1124 return;
1125 }
1126 ip = &icp->icmp_ip;
1127 }
1128
1129 icm = icmp_find_original_mbuf(pData, ip);
1130 if (icm == NULL)
1131 {
1132 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1133 return;
1134 }
1135
1136 m = icm->im_m;
1137 Assert(m != NULL);
1138
1139 src = addr->sin_addr.s_addr;
1140 if (type == ICMP_ECHOREPLY)
1141 {
1142 struct ip *ip0 = mtod(m, struct ip *);
1143 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1144 if (icp0->icmp_type != ICMP_ECHO)
1145 {
1146 Log(("NAT: we haven't found echo for this reply\n"));
1147 return;
1148 }
1149 /*
1150 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1151 * IP header combined by OS network stack, our local copy of IP header contians values
1152 * in host byte order so no byte order conversion is required. IP headers fields are converting
1153 * in ip_output0 routine only.
1154 */
1155 if ( (ip->ip_len - hlen)
1156 != (ip0->ip_len - (ip0->ip_hl << 2)))
1157 {
1158 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1159 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1160 return;
1161 }
1162 }
1163
1164 /* ip points on origianal ip header */
1165 ip = mtod(m, struct ip *);
1166 proto = ip->ip_p;
1167 /* Now ip is pointing on header we've sent from guest */
1168 if ( icp->icmp_type == ICMP_TIMXCEED
1169 || icp->icmp_type == ICMP_UNREACH)
1170 {
1171 old_ip_len = (ip->ip_hl << 2) + 64;
1172 if (old_ip_len > sizeof(ip_copy))
1173 old_ip_len = sizeof(ip_copy);
1174 memcpy(ip_copy, ip, old_ip_len);
1175 }
1176
1177 /* source address from original IP packet*/
1178 dst = ip->ip_src.s_addr;
1179
1180 /* overide ther tail of old packet */
1181 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1182 original_hlen = ip->ip_hl << 2;
1183 /* saves original ip header and options */
1184#ifdef VBOX_WITH_SLIRP_BSD_MBUF
1185 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1186 ip->ip_len = m_length(m, NULL);
1187#else
1188 /* m_room space in the saved m buffer */
1189 m_room = M_ROOM(m);
1190 if (m_room < len - hlen + original_hlen)
1191 {
1192 /* we need involve ether header length into new buffer buffer calculation */
1193 m_inc(m, if_maxlinkhdr + len - hlen + original_hlen);
1194 if (m->m_size < if_maxlinkhdr + len - hlen + original_hlen)
1195 {
1196 Log(("send_icmp_to_guest: extending buffer was failed (packet is dropped)\n"));
1197 return;
1198 }
1199 }
1200 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
1201 m->m_len = len - hlen + original_hlen;
1202 ip->ip_len = m->m_len;
1203#endif
1204 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1205
1206 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1207 type = icp->icmp_type;
1208 if ( type == ICMP_TIMXCEED
1209 || type == ICMP_UNREACH)
1210 {
1211 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1212 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1213 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1214 }
1215
1216 ip->ip_src.s_addr = src;
1217 ip->ip_dst.s_addr = dst;
1218 icmp_reflect(pData, m);
1219 LIST_REMOVE(icm, im_list);
1220 /* Don't call m_free here*/
1221
1222 if ( type == ICMP_TIMXCEED
1223 || type == ICMP_UNREACH)
1224 {
1225 icm->im_so->so_m = NULL;
1226 switch (proto)
1227 {
1228 case IPPROTO_UDP:
1229 /*XXX: so->so_m already freed so we shouldn't call sofree */
1230 udp_detach(pData, icm->im_so);
1231 break;
1232 case IPPROTO_TCP:
1233 /*close tcp should be here */
1234 break;
1235 default:
1236 /* do nothing */
1237 break;
1238 }
1239 }
1240 RTMemFree(icm);
1241}
1242
1243#ifdef RT_OS_WINDOWS
1244static void
1245sorecvfrom_icmp_win(PNATState pData, struct socket *so)
1246{
1247 int len;
1248 int i;
1249 struct ip *ip;
1250 struct mbuf *m;
1251 struct icmp *icp;
1252 struct icmp_msg *icm;
1253 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
1254 uint32_t src;
1255 ICMP_ECHO_REPLY *icr;
1256 int hlen = 0;
1257 int data_len = 0;
1258 int nbytes = 0;
1259 u_char code = ~0;
1260 int out_len;
1261 int size;
1262
1263 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1264 if (len < 0)
1265 {
1266 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1267 return;
1268 }
1269 if (len == 0)
1270 return; /* no error */
1271
1272 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1273 for (i = 0; i < len; ++i)
1274 {
1275 switch(icr[i].Status)
1276 {
1277 case IP_DEST_HOST_UNREACHABLE:
1278 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1279 case IP_DEST_NET_UNREACHABLE:
1280 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1281 case IP_DEST_PROT_UNREACHABLE:
1282 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1283 /* UNREACH error inject here */
1284 case IP_DEST_PORT_UNREACHABLE:
1285 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1286 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1287 so->so_m = NULL;
1288 break;
1289 case IP_SUCCESS: /* echo replied */
1290# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1291 m = m_get(pData);
1292# else
1293 out_len = ETH_HLEN + sizeof(struct ip) + 8;
1294 size;
1295 size = MCLBYTES;
1296 if (out_len < MSIZE)
1297 size = MCLBYTES;
1298 else if (out_len < MCLBYTES)
1299 size = MCLBYTES;
1300 else if (out_len < MJUM9BYTES)
1301 size = MJUM9BYTES;
1302 else if (out_len < MJUM16BYTES)
1303 size = MJUM16BYTES;
1304 else
1305 AssertMsgFailed(("Unsupported size"));
1306
1307 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
1308 if (m == NULL)
1309 return;
1310# endif
1311 m->m_len = 0;
1312 m->m_data += if_maxlinkhdr;
1313 ip = mtod(m, struct ip *);
1314 ip->ip_src.s_addr = icr[i].Address;
1315 ip->ip_p = IPPROTO_ICMP;
1316 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1317 data_len = sizeof(struct ip);
1318 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1319 ip->ip_ttl = icr[i].Options.Ttl;
1320
1321 icp = (struct icmp *)&ip[1]; /* no options */
1322 icp->icmp_type = ICMP_ECHOREPLY;
1323 icp->icmp_code = 0;
1324 icp->icmp_id = so->so_icmp_id;
1325 icp->icmp_seq = so->so_icmp_seq;
1326
1327 data_len += ICMP_MINLEN;
1328
1329# ifndef VBOX_WITH_SLIRP_BSD_MBUF
1330 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
1331 memcpy(icp->icmp_data, icr[i].Data, nbytes);
1332# else
1333 hlen = (ip->ip_hl << 2);
1334 m->m_pkthdr.header = mtod(m, void *);
1335 m->m_len = data_len;
1336
1337 m_copyback(pData, m, hlen + 8, icr[i].DataSize, icr[i].Data);
1338# endif
1339
1340 data_len += icr[i].DataSize;
1341
1342 ip->ip_len = data_len;
1343 m->m_len = ip->ip_len;
1344
1345 icmp_reflect(pData, m);
1346 break;
1347 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1348
1349 ip_broken = icr[i].Data;
1350 icm = icmp_find_original_mbuf(pData, ip_broken);
1351 if (icm == NULL) {
1352 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1353 return;
1354 }
1355 m = icm->im_m;
1356 ip = mtod(m, struct ip *);
1357 ip->ip_ttl = icr[i].Options.Ttl;
1358 src = ip->ip_src.s_addr;
1359 ip->ip_dst.s_addr = src;
1360 ip->ip_dst.s_addr = icr[i].Address;
1361
1362 hlen = (ip->ip_hl << 2);
1363 icp = (struct icmp *)((char *)ip + hlen);
1364 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1365 data_len = (ip_broken->ip_hl << 2) + 64;
1366
1367#ifndef VBOX_WITH_SLIRP_BSD_MBUF
1368 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1369 memcpy(icp->icmp_data, ip_broken, nbytes);
1370#else
1371 m->m_len = data_len;
1372 m->m_pkthdr.header = mtod(m, void *);
1373 m_copyback(pData, m, ip->ip_hl >> 2, icr[i].DataSize, icr[i].Data);
1374#endif
1375 icmp_reflect(pData, m);
1376 break;
1377 default:
1378 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1379 break;
1380 }
1381 }
1382}
1383#else /* !RT_OS_WINDOWS */
1384static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1385{
1386 struct sockaddr_in addr;
1387 socklen_t addrlen = sizeof(struct sockaddr_in);
1388 struct ip ip;
1389 char *buff;
1390 int len = 0;
1391
1392 /* 1- step: read the ip header */
1393 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1394 (struct sockaddr *)&addr, &addrlen);
1395 if ( len < 0
1396 && ( errno == EAGAIN
1397 || errno == EWOULDBLOCK
1398 || errno == EINPROGRESS
1399 || errno == ENOTCONN))
1400 {
1401 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1402 return;
1403 }
1404
1405 if ( len < sizeof(struct ip)
1406 || len < 0
1407 || len == 0)
1408 {
1409 u_char code;
1410 code = ICMP_UNREACH_PORT;
1411
1412 if (errno == EHOSTUNREACH)
1413 code = ICMP_UNREACH_HOST;
1414 else if (errno == ENETUNREACH)
1415 code = ICMP_UNREACH_NET;
1416
1417 LogRel((" udp icmp rx errno = %d-%s\n",
1418 errno, strerror(errno)));
1419 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1420 so->so_m = NULL;
1421 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm \n"));
1422 return;
1423 }
1424 /* basic check of IP header */
1425 if ( ip.ip_v != IPVERSION
1426# ifndef RT_OS_DARWIN
1427 || ip.ip_p != IPPROTO_ICMP
1428# endif
1429 )
1430 {
1431 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4 \n"));
1432 return;
1433 }
1434# ifndef RT_OS_DARWIN
1435 /* Darwin reports the IP length already in host byte order. */
1436 ip.ip_len = RT_N2H_U16(ip.ip_len);
1437# endif
1438# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1439 /* Solaris and Darwin report the payload only */
1440 ip.ip_len += (ip.ip_hl << 2);
1441# endif
1442 /* Note: ip->ip_len in host byte order (all OS) */
1443 len = ip.ip_len;
1444 buff = RTMemAlloc(len);
1445 if (buff == NULL)
1446 {
1447 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1448 return;
1449 }
1450 /* 2 - step: we're reading rest of the datagramm to the buffer */
1451 addrlen = sizeof(struct sockaddr_in);
1452 memset(&addr, 0, addrlen);
1453 len = recvfrom(so->s, buff, len, 0,
1454 (struct sockaddr *)&addr, &addrlen);
1455 if ( len < 0
1456 && ( errno == EAGAIN
1457 || errno == EWOULDBLOCK
1458 || errno == EINPROGRESS
1459 || errno == ENOTCONN))
1460 {
1461 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1462 ip.ip_len));
1463 RTMemFree(buff);
1464 return;
1465 }
1466 if ( len < 0
1467 || len == 0)
1468 {
1469 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1470 errno, len, (ip.ip_len - sizeof(struct ip))));
1471 RTMemFree(buff);
1472 return;
1473 }
1474 /* len is modified in 2nd read, when the rest of the datagramm was read */
1475 send_icmp_to_guest(pData, buff, len, so, &addr);
1476 RTMemFree(buff);
1477}
1478#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette