VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 34042

Last change on this file since 34042 was 34042, checked in by vboxsync, 14 years ago

NAT: warning.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 44.4 KB
Line 
1/* $Id: socket.c 34042 2010-11-12 19:03:55Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#define WANT_SYS_IOCTL_H
28#include <slirp.h>
29#include "ip_icmp.h"
30#include "main.h"
31#ifdef __sun__
32#include <sys/filio.h>
33#endif
34#include <VBox/pdmdrv.h>
35#if defined (RT_OS_WINDOWS)
36#include <iphlpapi.h>
37#include <icmpapi.h>
38#endif
39
40
41static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
42#ifdef RT_OS_WINDOWS
43static void sorecvfrom_icmp_win(PNATState, struct socket *);
44#else /* RT_OS_WINDOWS */
45static void sorecvfrom_icmp_unix(PNATState, struct socket *);
46#endif /* !RT_OS_WINDOWS */
47
48void
49so_init()
50{
51}
52
53struct socket *
54solookup(struct socket *head, struct in_addr laddr,
55 u_int lport, struct in_addr faddr, u_int fport)
56{
57 struct socket *so;
58
59 for (so = head->so_next; so != head; so = so->so_next)
60 {
61 if ( so->so_lport == lport
62 && so->so_laddr.s_addr == laddr.s_addr
63 && so->so_faddr.s_addr == faddr.s_addr
64 && so->so_fport == fport)
65 return so;
66 }
67
68 return (struct socket *)NULL;
69}
70
71/*
72 * Create a new socket, initialise the fields
73 * It is the responsibility of the caller to
74 * insque() it into the correct linked-list
75 */
76struct socket *
77socreate()
78{
79 struct socket *so;
80
81 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
82 if (so)
83 {
84 so->so_state = SS_NOFDREF;
85 so->s = -1;
86#if !defined(RT_OS_WINDOWS)
87 so->so_poll_index = -1;
88#endif
89 }
90 return so;
91}
92
93/*
94 * remque and free a socket, clobber cache
95 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
96 * in sofree we don't know from which queue item beeing removed.
97 */
98void
99sofree(PNATState pData, struct socket *so)
100{
101 struct socket *so_prev = NULL;
102 if (so == tcp_last_so)
103 tcp_last_so = &tcb;
104 else if (so == udp_last_so)
105 udp_last_so = &udb;
106
107 /* check if mbuf haven't been already freed */
108 if (so->so_m != NULL)
109 m_freem(pData, so->so_m);
110#ifndef VBOX_WITH_SLIRP_MT
111 if (so->so_next && so->so_prev)
112 {
113 remque(pData, so); /* crashes if so is not in a queue */
114 NSOCK_DEC();
115 }
116
117 RTMemFree(so);
118#else
119 so->so_deleted = 1;
120#endif
121}
122
123#ifdef VBOX_WITH_SLIRP_MT
124void
125soread_queue(PNATState pData, struct socket *so, int *ret)
126{
127 *ret = soread(pData, so);
128}
129#endif
130
131/*
132 * Read from so's socket into sb_snd, updating all relevant sbuf fields
133 * NOTE: This will only be called if it is select()ed for reading, so
134 * a read() of 0 (or less) means it's disconnected
135 */
136#ifndef VBOX_WITH_SLIRP_BSD_SBUF
137int
138soread(PNATState pData, struct socket *so)
139{
140 int n, nn, lss, total;
141 struct sbuf *sb = &so->so_snd;
142 size_t len = sb->sb_datalen - sb->sb_cc;
143 struct iovec iov[2];
144 int mss = so->so_tcpcb->t_maxseg;
145
146 STAM_PROFILE_START(&pData->StatIOread, a);
147 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
148 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
149
150 QSOCKET_LOCK(tcb);
151 SOCKET_LOCK(so);
152 QSOCKET_UNLOCK(tcb);
153
154 DEBUG_CALL("soread");
155 DEBUG_ARG("so = %lx", (long)so);
156
157 /*
158 * No need to check if there's enough room to read.
159 * soread wouldn't have been called if there weren't
160 */
161
162 len = sb->sb_datalen - sb->sb_cc;
163
164 iov[0].iov_base = sb->sb_wptr;
165 iov[1].iov_base = 0;
166 iov[1].iov_len = 0;
167 if (sb->sb_wptr < sb->sb_rptr)
168 {
169 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
170 /* Should never succeed, but... */
171 if (iov[0].iov_len > len)
172 iov[0].iov_len = len;
173 if (iov[0].iov_len > mss)
174 iov[0].iov_len -= iov[0].iov_len%mss;
175 n = 1;
176 }
177 else
178 {
179 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
180 /* Should never succeed, but... */
181 if (iov[0].iov_len > len)
182 iov[0].iov_len = len;
183 len -= iov[0].iov_len;
184 if (len)
185 {
186 iov[1].iov_base = sb->sb_data;
187 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
188 if (iov[1].iov_len > len)
189 iov[1].iov_len = len;
190 total = iov[0].iov_len + iov[1].iov_len;
191 if (total > mss)
192 {
193 lss = total % mss;
194 if (iov[1].iov_len > lss)
195 {
196 iov[1].iov_len -= lss;
197 n = 2;
198 }
199 else
200 {
201 lss -= iov[1].iov_len;
202 iov[0].iov_len -= lss;
203 n = 1;
204 }
205 }
206 else
207 n = 2;
208 }
209 else
210 {
211 if (iov[0].iov_len > mss)
212 iov[0].iov_len -= iov[0].iov_len%mss;
213 n = 1;
214 }
215 }
216
217#ifdef HAVE_READV
218 nn = readv(so->s, (struct iovec *)iov, n);
219 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
220#else
221 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
222#endif
223 if (nn <= 0)
224 {
225 /*
226 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
227 * _could_ mean that the connection is closed. But we will receive an
228 * FD_CLOSE event later if the connection was _really_ closed. With
229 * www.youtube.com I see this very often. Closing the socket too early
230 * would be dangerous.
231 */
232 int status;
233 unsigned long pending = 0;
234 status = ioctlsocket(so->s, FIONREAD, &pending);
235 if (status < 0)
236 Log(("NAT:error in WSAIoctl: %d\n", errno));
237 if (nn == 0 && (pending != 0))
238 {
239 SOCKET_UNLOCK(so);
240 STAM_PROFILE_STOP(&pData->StatIOread, a);
241 return 0;
242 }
243 if ( nn < 0
244 && ( errno == EINTR
245 || errno == EAGAIN
246 || errno == EWOULDBLOCK))
247 {
248 SOCKET_UNLOCK(so);
249 STAM_PROFILE_STOP(&pData->StatIOread, a);
250 return 0;
251 }
252 else
253 {
254 /* nn == 0 means peer has performed an orderly shutdown */
255 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
256 nn, errno, strerror(errno)));
257 sofcantrcvmore(so);
258 tcp_sockclosed(pData, sototcpcb(so));
259 SOCKET_UNLOCK(so);
260 STAM_PROFILE_STOP(&pData->StatIOread, a);
261 return -1;
262 }
263 }
264 STAM_STATS(
265 if (n == 1)
266 {
267 STAM_COUNTER_INC(&pData->StatIORead_in_1);
268 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
269 }
270 else
271 {
272 STAM_COUNTER_INC(&pData->StatIORead_in_2);
273 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
274 }
275 );
276
277#ifndef HAVE_READV
278 /*
279 * If there was no error, try and read the second time round
280 * We read again if n = 2 (ie, there's another part of the buffer)
281 * and we read as much as we could in the first read
282 * We don't test for <= 0 this time, because there legitimately
283 * might not be any more data (since the socket is non-blocking),
284 * a close will be detected on next iteration.
285 * A return of -1 wont (shouldn't) happen, since it didn't happen above
286 */
287 if (n == 2 && nn == iov[0].iov_len)
288 {
289 int ret;
290 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
291 if (ret > 0)
292 nn += ret;
293 STAM_STATS(
294 if (ret > 0)
295 {
296 STAM_COUNTER_INC(&pData->StatIORead_in_2);
297 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
298 }
299 );
300 }
301
302 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
303#endif
304
305 /* Update fields */
306 sb->sb_cc += nn;
307 sb->sb_wptr += nn;
308 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
309 sb->sb_wptr -= sb->sb_datalen;
310 STAM_PROFILE_STOP(&pData->StatIOread, a);
311 SOCKET_UNLOCK(so);
312 return nn;
313}
314#else /* VBOX_WITH_SLIRP_BSD_SBUF */
315int
316soread(PNATState pData, struct socket *so)
317{
318 int n;
319 char *buf;
320 struct sbuf *sb = &so->so_snd;
321 size_t len = sbspace(sb);
322 int mss = so->so_tcpcb->t_maxseg;
323
324 STAM_PROFILE_START(&pData->StatIOread, a);
325 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
326 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
327
328 QSOCKET_LOCK(tcb);
329 SOCKET_LOCK(so);
330 QSOCKET_UNLOCK(tcb);
331
332 DEBUG_CALL("soread");
333 DEBUG_ARG("so = %lx", (long)so);
334
335 if (len > mss)
336 len -= len % mss;
337 buf = RTMemAlloc(len);
338 if (buf == NULL)
339 {
340 Log(("NAT: can't alloc enough memory\n"));
341 return -1;
342 }
343
344 n = recv(so->s, buf, len, (so->so_tcpcb->t_force? MSG_OOB:0));
345 if (n <= 0)
346 {
347 /*
348 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
349 * _could_ mean that the connection is closed. But we will receive an
350 * FD_CLOSE event later if the connection was _really_ closed. With
351 * www.youtube.com I see this very often. Closing the socket too early
352 * would be dangerous.
353 */
354 int status;
355 unsigned long pending = 0;
356 status = ioctlsocket(so->s, FIONREAD, &pending);
357 if (status < 0)
358 Log(("NAT:error in WSAIoctl: %d\n", errno));
359 if (n == 0 && (pending != 0))
360 {
361 SOCKET_UNLOCK(so);
362 STAM_PROFILE_STOP(&pData->StatIOread, a);
363 RTMemFree(buf);
364 return 0;
365 }
366 if ( n < 0
367 && ( errno == EINTR
368 || errno == EAGAIN
369 || errno == EWOULDBLOCK))
370 {
371 SOCKET_UNLOCK(so);
372 STAM_PROFILE_STOP(&pData->StatIOread, a);
373 RTMemFree(buf);
374 return 0;
375 }
376 else
377 {
378 DEBUG_MISC((dfd, " --- soread() disconnected, n = %d, errno = %d-%s\n",
379 n, errno, strerror(errno)));
380 sofcantrcvmore(so);
381 tcp_sockclosed(pData, sototcpcb(so));
382 SOCKET_UNLOCK(so);
383 STAM_PROFILE_STOP(&pData->StatIOread, a);
384 RTMemFree(buf);
385 return -1;
386 }
387 }
388
389 sbuf_bcat(sb, buf, n);
390 RTMemFree(buf);
391 return n;
392}
393#endif
394
395/*
396 * Get urgent data
397 *
398 * When the socket is created, we set it SO_OOBINLINE,
399 * so when OOB data arrives, we soread() it and everything
400 * in the send buffer is sent as urgent data
401 */
402void
403sorecvoob(PNATState pData, struct socket *so)
404{
405 struct tcpcb *tp = sototcpcb(so);
406 ssize_t ret;
407
408 DEBUG_CALL("sorecvoob");
409 DEBUG_ARG("so = %lx", (long)so);
410
411 /*
412 * We take a guess at how much urgent data has arrived.
413 * In most situations, when urgent data arrives, the next
414 * read() should get all the urgent data. This guess will
415 * be wrong however if more data arrives just after the
416 * urgent data, or the read() doesn't return all the
417 * urgent data.
418 */
419 ret = soread(pData, so);
420 tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
421 tp->t_force = 1;
422 tcp_output(pData, tp);
423 tp->t_force = 0;
424}
425#ifndef VBOX_WITH_SLIRP_BSD_SBUF
426/*
427 * Send urgent data
428 * There's a lot duplicated code here, but...
429 */
430int
431sosendoob(struct socket *so)
432{
433 struct sbuf *sb = &so->so_rcv;
434 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
435
436 int n, len;
437
438 DEBUG_CALL("sosendoob");
439 DEBUG_ARG("so = %lx", (long)so);
440
441 if (so->so_urgc > sizeof(buff))
442 so->so_urgc = sizeof(buff); /* XXX */
443
444 if (sb->sb_rptr < sb->sb_wptr)
445 {
446 /* We can send it directly */
447 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
448 so->so_urgc -= n;
449
450 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
451 n, so->so_urgc));
452 }
453 else
454 {
455 /*
456 * Since there's no sendv or sendtov like writev,
457 * we must copy all data to a linear buffer then
458 * send it all
459 */
460 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
461 if (len > so->so_urgc)
462 len = so->so_urgc;
463 memcpy(buff, sb->sb_rptr, len);
464 so->so_urgc -= len;
465 if (so->so_urgc)
466 {
467 n = sb->sb_wptr - sb->sb_data;
468 if (n > so->so_urgc)
469 n = so->so_urgc;
470 memcpy(buff + len, sb->sb_data, n);
471 so->so_urgc -= n;
472 len += n;
473 }
474 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
475#ifdef DEBUG
476 if (n != len)
477 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
478#endif
479 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
480 n, so->so_urgc));
481 }
482
483 sb->sb_cc -= n;
484 sb->sb_rptr += n;
485 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
486 sb->sb_rptr -= sb->sb_datalen;
487
488 return n;
489}
490
491/*
492 * Write data from so_rcv to so's socket,
493 * updating all sbuf field as necessary
494 */
495int
496sowrite(PNATState pData, struct socket *so)
497{
498 int n, nn;
499 struct sbuf *sb = &so->so_rcv;
500 size_t len = sb->sb_cc;
501 struct iovec iov[2];
502
503 STAM_PROFILE_START(&pData->StatIOwrite, a);
504 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
505 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
506 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
507 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
508 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
509 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
510 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
511 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
512 DEBUG_CALL("sowrite");
513 DEBUG_ARG("so = %lx", (long)so);
514 QSOCKET_LOCK(tcb);
515 SOCKET_LOCK(so);
516 QSOCKET_UNLOCK(tcb);
517 if (so->so_urgc)
518 {
519 sosendoob(so);
520 if (sb->sb_cc == 0)
521 {
522 SOCKET_UNLOCK(so);
523 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
524 return 0;
525 }
526 }
527
528 /*
529 * No need to check if there's something to write,
530 * sowrite wouldn't have been called otherwise
531 */
532
533 len = sb->sb_cc;
534
535 iov[0].iov_base = sb->sb_rptr;
536 iov[1].iov_base = 0;
537 iov[1].iov_len = 0;
538 if (sb->sb_rptr < sb->sb_wptr)
539 {
540 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
541 /* Should never succeed, but... */
542 if (iov[0].iov_len > len)
543 iov[0].iov_len = len;
544 n = 1;
545 }
546 else
547 {
548 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
549 if (iov[0].iov_len > len)
550 iov[0].iov_len = len;
551 len -= iov[0].iov_len;
552 if (len)
553 {
554 iov[1].iov_base = sb->sb_data;
555 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
556 if (iov[1].iov_len > len)
557 iov[1].iov_len = len;
558 n = 2;
559 }
560 else
561 n = 1;
562 }
563 STAM_STATS({
564 if (n == 1)
565 {
566 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
567 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
568 }
569 else
570 {
571 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
572 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
573 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
574 }
575 });
576 /* Check if there's urgent data to send, and if so, send it */
577#ifdef HAVE_READV
578 nn = writev(so->s, (const struct iovec *)iov, n);
579 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
580#else
581 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
582#endif
583 /* This should never happen, but people tell me it does *shrug* */
584 if ( nn < 0
585 && ( errno == EAGAIN
586 || errno == EINTR
587 || errno == EWOULDBLOCK))
588 {
589 SOCKET_UNLOCK(so);
590 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
591 return 0;
592 }
593
594 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
595 {
596 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
597 so->so_state, errno));
598 sofcantsendmore(so);
599 tcp_sockclosed(pData, sototcpcb(so));
600 SOCKET_UNLOCK(so);
601 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
602 return -1;
603 }
604
605#ifndef HAVE_READV
606 if (n == 2 && nn == iov[0].iov_len)
607 {
608 int ret;
609 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
610 if (ret > 0)
611 nn += ret;
612 STAM_STATS({
613 if (ret > 0 && ret != iov[1].iov_len)
614 {
615 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
616 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
617 }
618 });
619 }
620 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
621#endif
622
623 /* Update sbuf */
624 sb->sb_cc -= nn;
625 sb->sb_rptr += nn;
626 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
627 sb->sb_rptr -= sb->sb_datalen;
628
629 /*
630 * If in DRAIN mode, and there's no more data, set
631 * it CANTSENDMORE
632 */
633 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
634 sofcantsendmore(so);
635
636 SOCKET_UNLOCK(so);
637 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
638 return nn;
639}
640#else /* VBOX_WITH_SLIRP_BSD_SBUF */
641static int
642do_sosend(struct socket *so, int fUrg)
643{
644 struct sbuf *sb = &so->so_rcv;
645
646 int n, len;
647
648 DEBUG_CALL("sosendoob");
649 DEBUG_ARG("so = %lx", (long)so);
650
651 len = sbuf_len(sb);
652
653 n = send(so->s, sbuf_data(sb), len, (fUrg ? MSG_OOB : 0));
654 if (n < 0)
655 Log(("NAT: Can't sent sbuf via socket.\n"));
656 if (fUrg)
657 so->so_urgc -= n;
658 if (n > 0 && n < len)
659 {
660 char *ptr;
661 char *buff;
662 buff = RTMemAlloc(len);
663 if (buff == NULL)
664 {
665 Log(("NAT: No space to allocate temporal buffer\n"));
666 return -1;
667 }
668 ptr = sbuf_data(sb);
669 memcpy(buff, &ptr[n], len - n);
670 sbuf_bcpy(sb, buff, len - n);
671 RTMemFree(buff);
672 return n;
673 }
674 sbuf_clear(sb);
675 return n;
676}
677int
678sosendoob(struct socket *so)
679{
680 return do_sosend(so, 1);
681}
682
683/*
684 * Write data from so_rcv to so's socket,
685 * updating all sbuf field as necessary
686 */
687int
688sowrite(PNATState pData, struct socket *so)
689{
690 return do_sosend(so, 0);
691}
692#endif
693
694/*
695 * recvfrom() a UDP socket
696 */
697void
698sorecvfrom(PNATState pData, struct socket *so)
699{
700 ssize_t ret = 0;
701 struct sockaddr_in addr;
702 socklen_t addrlen = sizeof(struct sockaddr_in);
703
704 DEBUG_CALL("sorecvfrom");
705 DEBUG_ARG("so = %lx", (long)so);
706
707 if (so->so_type == IPPROTO_ICMP)
708 {
709 /* This is a "ping" reply */
710#ifdef RT_OS_WINDOWS
711 sorecvfrom_icmp_win(pData, so);
712#else /* RT_OS_WINDOWS */
713 sorecvfrom_icmp_unix(pData, so);
714#endif /* !RT_OS_WINDOWS */
715 udp_detach(pData, so);
716 }
717 else
718 {
719 /* A "normal" UDP packet */
720 struct mbuf *m;
721 ssize_t len;
722 u_long n = 0;
723 int size;
724 int rc = 0;
725 static int signalled = 0;
726 char *pchBuffer = NULL;
727 bool fWithTemporalBuffer = false;
728
729 QSOCKET_LOCK(udb);
730 SOCKET_LOCK(so);
731 QSOCKET_UNLOCK(udb);
732
733 /*How many data has been received ?*/
734 /*
735 * 1. calculate how much we can read
736 * 2. read as much as possible
737 * 3. attach buffer to allocated header mbuf
738 */
739 rc = ioctlsocket(so->s, FIONREAD, &n);
740 if (rc == -1)
741 {
742 if ( errno == EAGAIN
743 || errno == EWOULDBLOCK
744 || errno == EINPROGRESS
745 || errno == ENOTCONN)
746 return;
747 else if (signalled == 0)
748 {
749 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
750 signalled = 1;
751 }
752 return;
753 }
754
755 len = sizeof(struct udpiphdr);
756 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
757 if (m == NULL)
758 return;
759
760 len += n;
761 m->m_data += ETH_HLEN;
762 m->m_pkthdr.header = mtod(m, void *);
763 m->m_data += sizeof(struct udpiphdr);
764
765 pchBuffer = mtod(m, char *);
766 fWithTemporalBuffer = false;
767 /*
768 * Even if amounts of bytes on socket is greater than MTU value
769 * Slirp will able fragment it, but we won't create temporal location
770 * here.
771 */
772 if (n > (slirp_size(pData) - sizeof(struct udpiphdr)))
773 {
774 pchBuffer = RTMemAlloc((n) * sizeof(char));
775 if (!pchBuffer)
776 {
777 m_freem(pData, m);
778 return;
779 }
780 fWithTemporalBuffer = true;
781 }
782 ret = recvfrom(so->s, pchBuffer, n, 0,
783 (struct sockaddr *)&addr, &addrlen);
784 if (fWithTemporalBuffer)
785 {
786 if (ret > 0)
787 {
788 m_copyback(pData, m, 0, ret, pchBuffer);
789 /*
790 * If we've met comporison below our size prediction was failed
791 * it's not fatal just we've allocated for nothing. (@todo add counter here
792 * to calculate how rare we here)
793 */
794 if(ret < slirp_size(pData) && !m->m_next)
795 Log(("NAT:udp: Expected size(%d) lesser than real(%d) and less minimal mbuf size(%d) \n",
796 n, ret, slirp_size(pData)));
797 }
798 /* we're freeing buffer anyway */
799 RTMemFree(pchBuffer);
800 }
801 else
802 m->m_len = ret;
803
804 if (ret < 0)
805 {
806 u_char code = ICMP_UNREACH_PORT;
807
808 if (errno == EHOSTUNREACH)
809 code = ICMP_UNREACH_HOST;
810 else if (errno == ENETUNREACH)
811 code = ICMP_UNREACH_NET;
812
813 m_freem(pData, m);
814 if ( errno == EAGAIN
815 || errno == EWOULDBLOCK
816 || errno == EINPROGRESS
817 || errno == ENOTCONN)
818 {
819 return;
820 }
821
822 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
823 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
824 m_freem(pData, so->so_m);
825 so->so_m = NULL;
826 }
827 else
828 {
829 Assert((m_length(m,NULL) == ret));
830 /*
831 * Hack: domain name lookup will be used the most for UDP,
832 * and since they'll only be used once there's no need
833 * for the 4 minute (or whatever) timeout... So we time them
834 * out much quicker (10 seconds for now...)
835 */
836 if (so->so_expire)
837 {
838 if (so->so_fport != RT_H2N_U16_C(53))
839 so->so_expire = curtime + SO_EXPIRE;
840 }
841 /*
842 * last argument should be changed if Slirp will inject IP attributes
843 * Note: Here we can't check if dnsproxy's sent initial request
844 */
845 if ( pData->fUseDnsProxy
846 && so->so_fport == RT_H2N_U16_C(53))
847 dnsproxy_answer(pData, so, m);
848
849#if 0
850 if (m->m_len == len)
851 {
852 m_inc(m, MINCSIZE);
853 m->m_len = 0;
854 }
855#endif
856
857 /*
858 * If this packet was destined for CTL_ADDR,
859 * make it look like that's where it came from, done by udp_output
860 */
861 udp_output(pData, so, m, &addr);
862 SOCKET_UNLOCK(so);
863 } /* rx error */
864 } /* if ping packet */
865}
866
867/*
868 * sendto() a socket
869 */
870int
871sosendto(PNATState pData, struct socket *so, struct mbuf *m)
872{
873 int ret;
874 struct sockaddr_in *paddr;
875 struct sockaddr addr;
876#if 0
877 struct sockaddr_in host_addr;
878#endif
879 caddr_t buf;
880 int mlen;
881
882 DEBUG_CALL("sosendto");
883 DEBUG_ARG("so = %lx", (long)so);
884 DEBUG_ARG("m = %lx", (long)m);
885
886 memset(&addr, 0, sizeof(struct sockaddr));
887#ifdef RT_OS_DARWIN
888 addr.sa_len = sizeof(struct sockaddr_in);
889#endif
890 paddr = (struct sockaddr_in *)&addr;
891 paddr->sin_family = AF_INET;
892 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
893 {
894 /* It's an alias */
895 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
896 switch(last_byte)
897 {
898#if 0
899 /* handle this case at 'default:' */
900 case CTL_BROADCAST:
901 addr.sin_addr.s_addr = INADDR_BROADCAST;
902 /* Send the packet to host to fully emulate broadcast */
903 /** @todo r=klaus: on Linux host this causes the host to receive
904 * the packet twice for some reason. And I cannot find any place
905 * in the man pages which states that sending a broadcast does not
906 * reach the host itself. */
907 host_addr.sin_family = AF_INET;
908 host_addr.sin_port = so->so_fport;
909 host_addr.sin_addr = our_addr;
910 sendto(so->s, m->m_data, m->m_len, 0,
911 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
912 break;
913#endif
914 case CTL_DNS:
915 case CTL_ALIAS:
916 default:
917 if (last_byte == ~pData->netmask)
918 paddr->sin_addr.s_addr = INADDR_BROADCAST;
919 else
920 paddr->sin_addr = loopback_addr;
921 break;
922 }
923 }
924 else
925 paddr->sin_addr = so->so_faddr;
926 paddr->sin_port = so->so_fport;
927
928 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
929 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
930
931 /* Don't care what port we get */
932 mlen = m_length(m, NULL);
933 buf = RTMemAlloc(mlen);
934 if (buf == NULL)
935 {
936 return -1;
937 }
938 m_copydata(m, 0, mlen, buf);
939 ret = sendto(so->s, buf, mlen, 0,
940 (struct sockaddr *)&addr, sizeof (struct sockaddr));
941 RTMemFree(buf);
942 if (ret < 0)
943 {
944 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
945 return -1;
946 }
947
948 /*
949 * Kill the socket if there's no reply in 4 minutes,
950 * but only if it's an expirable socket
951 */
952 if (so->so_expire)
953 so->so_expire = curtime + SO_EXPIRE;
954 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
955 return 0;
956}
957
958/*
959 * XXX This should really be tcp_listen
960 */
961struct socket *
962solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
963{
964 struct sockaddr_in addr;
965 struct socket *so;
966 socklen_t addrlen = sizeof(addr);
967 int s, opt = 1;
968 int status;
969
970 DEBUG_CALL("solisten");
971 DEBUG_ARG("port = %d", port);
972 DEBUG_ARG("laddr = %x", laddr);
973 DEBUG_ARG("lport = %d", lport);
974 DEBUG_ARG("flags = %x", flags);
975
976 if ((so = socreate()) == NULL)
977 {
978 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
979 return NULL;
980 }
981
982 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
983 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
984 {
985 RTMemFree(so);
986 return NULL;
987 }
988
989 SOCKET_LOCK_CREATE(so);
990 SOCKET_LOCK(so);
991 QSOCKET_LOCK(tcb);
992 insque(pData, so,&tcb);
993 NSOCK_INC();
994 QSOCKET_UNLOCK(tcb);
995
996 /*
997 * SS_FACCEPTONCE sockets must time out.
998 */
999 if (flags & SS_FACCEPTONCE)
1000 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
1001
1002 so->so_state = (SS_FACCEPTCONN|flags);
1003 so->so_lport = lport; /* Kept in network format */
1004 so->so_laddr.s_addr = laddr; /* Ditto */
1005
1006 memset(&addr, 0, sizeof(addr));
1007#ifdef RT_OS_DARWIN
1008 addr.sin_len = sizeof(addr);
1009#endif
1010 addr.sin_family = AF_INET;
1011 addr.sin_addr.s_addr = bind_addr;
1012 addr.sin_port = port;
1013
1014 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
1015 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
1016 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
1017 || (listen(s, 1) < 0))
1018 {
1019#ifdef RT_OS_WINDOWS
1020 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
1021 closesocket(s);
1022 QSOCKET_LOCK(tcb);
1023 sofree(pData, so);
1024 QSOCKET_UNLOCK(tcb);
1025 /* Restore the real errno */
1026 WSASetLastError(tmperrno);
1027#else
1028 int tmperrno = errno; /* Don't clobber the real reason we failed */
1029 close(s);
1030 QSOCKET_LOCK(tcb);
1031 sofree(pData, so);
1032 QSOCKET_UNLOCK(tcb);
1033 /* Restore the real errno */
1034 errno = tmperrno;
1035#endif
1036 return NULL;
1037 }
1038 fd_nonblock(s);
1039 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
1040
1041 getsockname(s,(struct sockaddr *)&addr,&addrlen);
1042 so->so_fport = addr.sin_port;
1043 /* set socket buffers */
1044 opt = pData->socket_rcv;
1045 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
1046 if (status < 0)
1047 {
1048 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
1049 goto no_sockopt;
1050 }
1051 opt = pData->socket_snd;
1052 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
1053 if (status < 0)
1054 {
1055 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
1056 goto no_sockopt;
1057 }
1058no_sockopt:
1059 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
1060 so->so_faddr = alias_addr;
1061 else
1062 so->so_faddr = addr.sin_addr;
1063
1064 so->s = s;
1065 SOCKET_UNLOCK(so);
1066 return so;
1067}
1068
1069/*
1070 * Data is available in so_rcv
1071 * Just write() the data to the socket
1072 * XXX not yet...
1073 */
1074void
1075sorwakeup(struct socket *so)
1076{
1077#if 0
1078 sowrite(so);
1079 FD_CLR(so->s,&writefds);
1080#endif
1081}
1082
1083/*
1084 * Data has been freed in so_snd
1085 * We have room for a read() if we want to
1086 * For now, don't read, it'll be done in the main loop
1087 */
1088void
1089sowwakeup(struct socket *so)
1090{
1091}
1092
1093/*
1094 * Various session state calls
1095 * XXX Should be #define's
1096 * The socket state stuff needs work, these often get call 2 or 3
1097 * times each when only 1 was needed
1098 */
1099void
1100soisfconnecting(struct socket *so)
1101{
1102 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
1103 SS_FCANTSENDMORE|SS_FWDRAIN);
1104 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1105}
1106
1107void
1108soisfconnected(struct socket *so)
1109{
1110 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1111 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1112}
1113
1114void
1115sofcantrcvmore(struct socket *so)
1116{
1117 if ((so->so_state & SS_NOFDREF) == 0)
1118 {
1119 shutdown(so->s, 0);
1120 }
1121 so->so_state &= ~(SS_ISFCONNECTING);
1122 if (so->so_state & SS_FCANTSENDMORE)
1123 so->so_state = SS_NOFDREF; /* Don't select it */
1124 /* XXX close() here as well? */
1125 else
1126 so->so_state |= SS_FCANTRCVMORE;
1127}
1128
1129void
1130sofcantsendmore(struct socket *so)
1131{
1132 if ((so->so_state & SS_NOFDREF) == 0)
1133 shutdown(so->s, 1); /* send FIN to fhost */
1134
1135 so->so_state &= ~(SS_ISFCONNECTING);
1136 if (so->so_state & SS_FCANTRCVMORE)
1137 so->so_state = SS_NOFDREF; /* as above */
1138 else
1139 so->so_state |= SS_FCANTSENDMORE;
1140}
1141
1142void
1143soisfdisconnected(struct socket *so)
1144{
1145#if 0
1146 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1147 close(so->s);
1148 so->so_state = SS_ISFDISCONNECTED;
1149 /*
1150 * XXX Do nothing ... ?
1151 */
1152#endif
1153}
1154
1155/*
1156 * Set write drain mode
1157 * Set CANTSENDMORE once all data has been write()n
1158 */
1159void
1160sofwdrain(struct socket *so)
1161{
1162 if (SBUF_LEN(&so->so_rcv))
1163 so->so_state |= SS_FWDRAIN;
1164 else
1165 sofcantsendmore(so);
1166}
1167
1168static void
1169send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
1170{
1171 struct ip *ip;
1172 uint32_t dst, src;
1173 char ip_copy[256];
1174 struct icmp *icp;
1175 int old_ip_len = 0;
1176 int hlen, original_hlen = 0;
1177 struct mbuf *m;
1178 struct icmp_msg *icm;
1179 uint8_t proto;
1180 int type = 0;
1181
1182 ip = (struct ip *)buff;
1183 /* Fix ip->ip_len to contain the total packet length including the header
1184 * in _host_ byte order for all OSes. On Darwin, that value already is in
1185 * host byte order. Solaris and Darwin report only the payload. */
1186#ifndef RT_OS_DARWIN
1187 ip->ip_len = RT_N2H_U16(ip->ip_len);
1188#endif
1189 hlen = (ip->ip_hl << 2);
1190#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1191 ip->ip_len += hlen;
1192#endif
1193 if (ip->ip_len < hlen + ICMP_MINLEN)
1194 {
1195 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1196 return;
1197 }
1198 icp = (struct icmp *)((char *)ip + hlen);
1199
1200 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1201 if ( icp->icmp_type != ICMP_ECHOREPLY
1202 && icp->icmp_type != ICMP_TIMXCEED
1203 && icp->icmp_type != ICMP_UNREACH)
1204 {
1205 return;
1206 }
1207
1208 /*
1209 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1210 * ICMP_ECHOREPLY assuming data 0
1211 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1212 */
1213 if (ip->ip_len < hlen + 8)
1214 {
1215 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1216 return;
1217 }
1218
1219 type = icp->icmp_type;
1220 if ( type == ICMP_TIMXCEED
1221 || type == ICMP_UNREACH)
1222 {
1223 /*
1224 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1225 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1226 */
1227 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1228 {
1229 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1230 return;
1231 }
1232 ip = &icp->icmp_ip;
1233 }
1234
1235 icm = icmp_find_original_mbuf(pData, ip);
1236 if (icm == NULL)
1237 {
1238 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1239 return;
1240 }
1241
1242 m = icm->im_m;
1243 Assert(m != NULL);
1244
1245 src = addr->sin_addr.s_addr;
1246 if (type == ICMP_ECHOREPLY)
1247 {
1248 struct ip *ip0 = mtod(m, struct ip *);
1249 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1250 if (icp0->icmp_type != ICMP_ECHO)
1251 {
1252 Log(("NAT: we haven't found echo for this reply\n"));
1253 return;
1254 }
1255 /*
1256 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1257 * IP header combined by OS network stack, our local copy of IP header contians values
1258 * in host byte order so no byte order conversion is required. IP headers fields are converting
1259 * in ip_output0 routine only.
1260 */
1261 if ( (ip->ip_len - hlen)
1262 != (ip0->ip_len - (ip0->ip_hl << 2)))
1263 {
1264 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1265 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1266 return;
1267 }
1268 }
1269
1270 /* ip points on origianal ip header */
1271 ip = mtod(m, struct ip *);
1272 proto = ip->ip_p;
1273 /* Now ip is pointing on header we've sent from guest */
1274 if ( icp->icmp_type == ICMP_TIMXCEED
1275 || icp->icmp_type == ICMP_UNREACH)
1276 {
1277 old_ip_len = (ip->ip_hl << 2) + 64;
1278 if (old_ip_len > sizeof(ip_copy))
1279 old_ip_len = sizeof(ip_copy);
1280 memcpy(ip_copy, ip, old_ip_len);
1281 }
1282
1283 /* source address from original IP packet*/
1284 dst = ip->ip_src.s_addr;
1285
1286 /* overide ther tail of old packet */
1287 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1288 original_hlen = ip->ip_hl << 2;
1289 /* saves original ip header and options */
1290 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1291 ip->ip_len = m_length(m, NULL);
1292 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1293
1294 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1295 type = icp->icmp_type;
1296 if ( type == ICMP_TIMXCEED
1297 || type == ICMP_UNREACH)
1298 {
1299 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1300 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1301 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1302 }
1303
1304 ip->ip_src.s_addr = src;
1305 ip->ip_dst.s_addr = dst;
1306 icmp_reflect(pData, m);
1307 LIST_REMOVE(icm, im_list);
1308 /* Don't call m_free here*/
1309
1310 if ( type == ICMP_TIMXCEED
1311 || type == ICMP_UNREACH)
1312 {
1313 icm->im_so->so_m = NULL;
1314 switch (proto)
1315 {
1316 case IPPROTO_UDP:
1317 /*XXX: so->so_m already freed so we shouldn't call sofree */
1318 udp_detach(pData, icm->im_so);
1319 break;
1320 case IPPROTO_TCP:
1321 /*close tcp should be here */
1322 break;
1323 default:
1324 /* do nothing */
1325 break;
1326 }
1327 }
1328 RTMemFree(icm);
1329}
1330
1331#ifdef RT_OS_WINDOWS
1332static void
1333sorecvfrom_icmp_win(PNATState pData, struct socket *so)
1334{
1335 int len;
1336 int i;
1337 struct ip *ip;
1338 struct mbuf *m;
1339 struct icmp *icp;
1340 struct icmp_msg *icm;
1341 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
1342 uint32_t src;
1343 ICMP_ECHO_REPLY *icr;
1344 int hlen = 0;
1345 int data_len = 0;
1346 int nbytes = 0;
1347 u_char code = ~0;
1348 int out_len;
1349 int size;
1350
1351 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1352 if (len < 0)
1353 {
1354 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1355 return;
1356 }
1357 if (len == 0)
1358 return; /* no error */
1359
1360 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1361 for (i = 0; i < len; ++i)
1362 {
1363 switch(icr[i].Status)
1364 {
1365 case IP_DEST_HOST_UNREACHABLE:
1366 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1367 case IP_DEST_NET_UNREACHABLE:
1368 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1369 case IP_DEST_PROT_UNREACHABLE:
1370 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1371 /* UNREACH error inject here */
1372 case IP_DEST_PORT_UNREACHABLE:
1373 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1374 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1375 m_freem(pData, so->so_m);
1376 so->so_m = NULL;
1377 break;
1378 case IP_SUCCESS: /* echo replied */
1379 out_len = ETH_HLEN + sizeof(struct ip) + 8;
1380 size;
1381 size = MCLBYTES;
1382 if (out_len < MSIZE)
1383 size = MCLBYTES;
1384 else if (out_len < MCLBYTES)
1385 size = MCLBYTES;
1386 else if (out_len < MJUM9BYTES)
1387 size = MJUM9BYTES;
1388 else if (out_len < MJUM16BYTES)
1389 size = MJUM16BYTES;
1390 else
1391 AssertMsgFailed(("Unsupported size"));
1392
1393 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
1394 if (m == NULL)
1395 return;
1396 m->m_len = 0;
1397 m->m_data += if_maxlinkhdr;
1398 ip = mtod(m, struct ip *);
1399 ip->ip_src.s_addr = icr[i].Address;
1400 ip->ip_p = IPPROTO_ICMP;
1401 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1402 data_len = sizeof(struct ip);
1403 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1404 ip->ip_ttl = icr[i].Options.Ttl;
1405
1406 icp = (struct icmp *)&ip[1]; /* no options */
1407 icp->icmp_type = ICMP_ECHOREPLY;
1408 icp->icmp_code = 0;
1409 icp->icmp_id = so->so_icmp_id;
1410 icp->icmp_seq = so->so_icmp_seq;
1411
1412 data_len += ICMP_MINLEN;
1413
1414 hlen = (ip->ip_hl << 2);
1415 m->m_pkthdr.header = mtod(m, void *);
1416 m->m_len = data_len;
1417
1418 m_copyback(pData, m, hlen + 8, icr[i].DataSize, icr[i].Data);
1419
1420 data_len += icr[i].DataSize;
1421
1422 ip->ip_len = data_len;
1423 m->m_len = ip->ip_len;
1424
1425 icmp_reflect(pData, m);
1426 break;
1427 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1428
1429 ip_broken = icr[i].Data;
1430 icm = icmp_find_original_mbuf(pData, ip_broken);
1431 if (icm == NULL) {
1432 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1433 return;
1434 }
1435 m = icm->im_m;
1436 ip = mtod(m, struct ip *);
1437 ip->ip_ttl = icr[i].Options.Ttl;
1438 src = ip->ip_src.s_addr;
1439 ip->ip_dst.s_addr = src;
1440 ip->ip_dst.s_addr = icr[i].Address;
1441
1442 hlen = (ip->ip_hl << 2);
1443 icp = (struct icmp *)((char *)ip + hlen);
1444 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1445 data_len = (ip_broken->ip_hl << 2) + 64;
1446
1447 m->m_len = data_len;
1448 m->m_pkthdr.header = mtod(m, void *);
1449 m_copyback(pData, m, ip->ip_hl >> 2, icr[i].DataSize, icr[i].Data);
1450 icmp_reflect(pData, m);
1451 break;
1452 default:
1453 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1454 break;
1455 }
1456 }
1457}
1458#else /* !RT_OS_WINDOWS */
1459static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1460{
1461 struct sockaddr_in addr;
1462 socklen_t addrlen = sizeof(struct sockaddr_in);
1463 struct ip ip;
1464 char *buff;
1465 int len = 0;
1466
1467 /* 1- step: read the ip header */
1468 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1469 (struct sockaddr *)&addr, &addrlen);
1470 if ( len < 0
1471 && ( errno == EAGAIN
1472 || errno == EWOULDBLOCK
1473 || errno == EINPROGRESS
1474 || errno == ENOTCONN))
1475 {
1476 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1477 return;
1478 }
1479
1480 if ( len < sizeof(struct ip)
1481 || len < 0
1482 || len == 0)
1483 {
1484 u_char code;
1485 code = ICMP_UNREACH_PORT;
1486
1487 if (errno == EHOSTUNREACH)
1488 code = ICMP_UNREACH_HOST;
1489 else if (errno == ENETUNREACH)
1490 code = ICMP_UNREACH_NET;
1491
1492 LogRel((" udp icmp rx errno = %d-%s\n",
1493 errno, strerror(errno)));
1494 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1495 m_freem(pData, so->so_m);
1496 so->so_m = NULL;
1497 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm \n"));
1498 return;
1499 }
1500 /* basic check of IP header */
1501 if ( ip.ip_v != IPVERSION
1502# ifndef RT_OS_DARWIN
1503 || ip.ip_p != IPPROTO_ICMP
1504# endif
1505 )
1506 {
1507 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4 \n"));
1508 return;
1509 }
1510# ifndef RT_OS_DARWIN
1511 /* Darwin reports the IP length already in host byte order. */
1512 ip.ip_len = RT_N2H_U16(ip.ip_len);
1513# endif
1514# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1515 /* Solaris and Darwin report the payload only */
1516 ip.ip_len += (ip.ip_hl << 2);
1517# endif
1518 /* Note: ip->ip_len in host byte order (all OS) */
1519 len = ip.ip_len;
1520 buff = RTMemAlloc(len);
1521 if (buff == NULL)
1522 {
1523 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1524 return;
1525 }
1526 /* 2 - step: we're reading rest of the datagramm to the buffer */
1527 addrlen = sizeof(struct sockaddr_in);
1528 memset(&addr, 0, addrlen);
1529 len = recvfrom(so->s, buff, len, 0,
1530 (struct sockaddr *)&addr, &addrlen);
1531 if ( len < 0
1532 && ( errno == EAGAIN
1533 || errno == EWOULDBLOCK
1534 || errno == EINPROGRESS
1535 || errno == ENOTCONN))
1536 {
1537 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1538 ip.ip_len));
1539 RTMemFree(buff);
1540 return;
1541 }
1542 if ( len < 0
1543 || len == 0)
1544 {
1545 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1546 errno, len, (ip.ip_len - sizeof(struct ip))));
1547 RTMemFree(buff);
1548 return;
1549 }
1550 /* len is modified in 2nd read, when the rest of the datagramm was read */
1551 send_icmp_to_guest(pData, buff, len, so, &addr);
1552 RTMemFree(buff);
1553}
1554#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette