VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 37423

Last change on this file since 37423 was 37423, checked in by vboxsync, 13 years ago

Ran the source code massager (scm).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 44.1 KB
Line 
1/* $Id: socket.c 37423 2011-06-12 18:37:56Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#define WANT_SYS_IOCTL_H
28#include <slirp.h>
29#include "ip_icmp.h"
30#include "main.h"
31#ifdef __sun__
32#include <sys/filio.h>
33#endif
34#include <VBox/vmm/pdmdrv.h>
35#if defined (RT_OS_WINDOWS)
36#include <iphlpapi.h>
37#include <icmpapi.h>
38#endif
39
40
41static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
42#ifdef RT_OS_WINDOWS
43static void sorecvfrom_icmp_win(PNATState, struct socket *);
44#else /* RT_OS_WINDOWS */
45static void sorecvfrom_icmp_unix(PNATState, struct socket *);
46#endif /* !RT_OS_WINDOWS */
47
48void
49so_init()
50{
51}
52
53struct socket *
54solookup(struct socket *head, struct in_addr laddr,
55 u_int lport, struct in_addr faddr, u_int fport)
56{
57 struct socket *so;
58
59 for (so = head->so_next; so != head; so = so->so_next)
60 {
61 if ( so->so_lport == lport
62 && so->so_laddr.s_addr == laddr.s_addr
63 && so->so_faddr.s_addr == faddr.s_addr
64 && so->so_fport == fport)
65 return so;
66 }
67
68 return (struct socket *)NULL;
69}
70
71/*
72 * Create a new socket, initialise the fields
73 * It is the responsibility of the caller to
74 * insque() it into the correct linked-list
75 */
76struct socket *
77socreate()
78{
79 struct socket *so;
80
81 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
82 if (so)
83 {
84 so->so_state = SS_NOFDREF;
85 so->s = -1;
86#if !defined(RT_OS_WINDOWS)
87 so->so_poll_index = -1;
88#endif
89 }
90 return so;
91}
92
93/*
94 * remque and free a socket, clobber cache
95 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
96 * in sofree we don't know from which queue item beeing removed.
97 */
98void
99sofree(PNATState pData, struct socket *so)
100{
101 struct socket *so_prev = NULL;
102 if (so == tcp_last_so)
103 tcp_last_so = &tcb;
104 else if (so == udp_last_so)
105 udp_last_so = &udb;
106
107 /* check if mbuf haven't been already freed */
108 if (so->so_m != NULL)
109 m_freem(pData, so->so_m);
110#ifndef VBOX_WITH_SLIRP_MT
111 if (so->so_next && so->so_prev)
112 {
113 remque(pData, so); /* crashes if so is not in a queue */
114 NSOCK_DEC();
115 }
116
117 RTMemFree(so);
118#else
119 so->so_deleted = 1;
120#endif
121}
122
123#ifdef VBOX_WITH_SLIRP_MT
124void
125soread_queue(PNATState pData, struct socket *so, int *ret)
126{
127 *ret = soread(pData, so);
128}
129#endif
130
131/*
132 * Read from so's socket into sb_snd, updating all relevant sbuf fields
133 * NOTE: This will only be called if it is select()ed for reading, so
134 * a read() of 0 (or less) means it's disconnected
135 */
136#ifndef VBOX_WITH_SLIRP_BSD_SBUF
137int
138soread(PNATState pData, struct socket *so)
139{
140 int n, nn, lss, total;
141 struct sbuf *sb = &so->so_snd;
142 size_t len = sb->sb_datalen - sb->sb_cc;
143 struct iovec iov[2];
144 int mss = so->so_tcpcb->t_maxseg;
145
146 STAM_PROFILE_START(&pData->StatIOread, a);
147 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
148 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
149
150 QSOCKET_LOCK(tcb);
151 SOCKET_LOCK(so);
152 QSOCKET_UNLOCK(tcb);
153
154 LogFlow(("soread: so = %lx\n", (long)so));
155
156 /*
157 * No need to check if there's enough room to read.
158 * soread wouldn't have been called if there weren't
159 */
160
161 len = sb->sb_datalen - sb->sb_cc;
162
163 iov[0].iov_base = sb->sb_wptr;
164 iov[1].iov_base = 0;
165 iov[1].iov_len = 0;
166 if (sb->sb_wptr < sb->sb_rptr)
167 {
168 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
169 /* Should never succeed, but... */
170 if (iov[0].iov_len > len)
171 iov[0].iov_len = len;
172 if (iov[0].iov_len > mss)
173 iov[0].iov_len -= iov[0].iov_len%mss;
174 n = 1;
175 }
176 else
177 {
178 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
179 /* Should never succeed, but... */
180 if (iov[0].iov_len > len)
181 iov[0].iov_len = len;
182 len -= iov[0].iov_len;
183 if (len)
184 {
185 iov[1].iov_base = sb->sb_data;
186 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
187 if (iov[1].iov_len > len)
188 iov[1].iov_len = len;
189 total = iov[0].iov_len + iov[1].iov_len;
190 if (total > mss)
191 {
192 lss = total % mss;
193 if (iov[1].iov_len > lss)
194 {
195 iov[1].iov_len -= lss;
196 n = 2;
197 }
198 else
199 {
200 lss -= iov[1].iov_len;
201 iov[0].iov_len -= lss;
202 n = 1;
203 }
204 }
205 else
206 n = 2;
207 }
208 else
209 {
210 if (iov[0].iov_len > mss)
211 iov[0].iov_len -= iov[0].iov_len%mss;
212 n = 1;
213 }
214 }
215
216#ifdef HAVE_READV
217 nn = readv(so->s, (struct iovec *)iov, n);
218 Log2((" ... read nn = %d bytes\n", nn));
219#else
220 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
221#endif
222 if (nn <= 0)
223 {
224 /*
225 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
226 * _could_ mean that the connection is closed. But we will receive an
227 * FD_CLOSE event later if the connection was _really_ closed. With
228 * www.youtube.com I see this very often. Closing the socket too early
229 * would be dangerous.
230 */
231 int status;
232 unsigned long pending = 0;
233 status = ioctlsocket(so->s, FIONREAD, &pending);
234 if (status < 0)
235 Log(("NAT:error in WSAIoctl: %d\n", errno));
236 if (nn == 0 && (pending != 0))
237 {
238 SOCKET_UNLOCK(so);
239 STAM_PROFILE_STOP(&pData->StatIOread, a);
240 return 0;
241 }
242 if ( nn < 0
243 && ( errno == EINTR
244 || errno == EAGAIN
245 || errno == EWOULDBLOCK))
246 {
247 SOCKET_UNLOCK(so);
248 STAM_PROFILE_STOP(&pData->StatIOread, a);
249 return 0;
250 }
251 else
252 {
253 /* nn == 0 means peer has performed an orderly shutdown */
254 Log2((" --- soread() disconnected, nn = %d, errno = %d (%s)\n",
255 nn, errno, strerror(errno)));
256 sofcantrcvmore(so);
257 tcp_sockclosed(pData, sototcpcb(so));
258 SOCKET_UNLOCK(so);
259 STAM_PROFILE_STOP(&pData->StatIOread, a);
260 return -1;
261 }
262 }
263 STAM_STATS(
264 if (n == 1)
265 {
266 STAM_COUNTER_INC(&pData->StatIORead_in_1);
267 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
268 }
269 else
270 {
271 STAM_COUNTER_INC(&pData->StatIORead_in_2);
272 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
273 }
274 );
275
276#ifndef HAVE_READV
277 /*
278 * If there was no error, try and read the second time round
279 * We read again if n = 2 (ie, there's another part of the buffer)
280 * and we read as much as we could in the first read
281 * We don't test for <= 0 this time, because there legitimately
282 * might not be any more data (since the socket is non-blocking),
283 * a close will be detected on next iteration.
284 * A return of -1 wont (shouldn't) happen, since it didn't happen above
285 */
286 if (n == 2 && nn == iov[0].iov_len)
287 {
288 int ret;
289 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
290 if (ret > 0)
291 nn += ret;
292 STAM_STATS(
293 if (ret > 0)
294 {
295 STAM_COUNTER_INC(&pData->StatIORead_in_2);
296 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
297 }
298 );
299 }
300
301 Log2((" ... read nn = %d bytes\n", nn));
302#endif
303
304 /* Update fields */
305 sb->sb_cc += nn;
306 sb->sb_wptr += nn;
307 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
308 sb->sb_wptr -= sb->sb_datalen;
309 STAM_PROFILE_STOP(&pData->StatIOread, a);
310 SOCKET_UNLOCK(so);
311 return nn;
312}
313#else /* VBOX_WITH_SLIRP_BSD_SBUF */
314int
315soread(PNATState pData, struct socket *so)
316{
317 int n;
318 char *buf;
319 struct sbuf *sb = &so->so_snd;
320 size_t len = sbspace(sb);
321 int mss = so->so_tcpcb->t_maxseg;
322
323 STAM_PROFILE_START(&pData->StatIOread, a);
324 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
325 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
326
327 QSOCKET_LOCK(tcb);
328 SOCKET_LOCK(so);
329 QSOCKET_UNLOCK(tcb);
330
331 LogFlow(("soread: so = %lx\n", (long)so));
332
333 if (len > mss)
334 len -= len % mss;
335 buf = RTMemAlloc(len);
336 if (buf == NULL)
337 {
338 Log(("NAT: can't alloc enough memory\n"));
339 return -1;
340 }
341
342 n = recv(so->s, buf, len, (so->so_tcpcb->t_force? MSG_OOB:0));
343 if (n <= 0)
344 {
345 /*
346 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
347 * _could_ mean that the connection is closed. But we will receive an
348 * FD_CLOSE event later if the connection was _really_ closed. With
349 * www.youtube.com I see this very often. Closing the socket too early
350 * would be dangerous.
351 */
352 int status;
353 unsigned long pending = 0;
354 status = ioctlsocket(so->s, FIONREAD, &pending);
355 if (status < 0)
356 Log(("NAT:error in WSAIoctl: %d\n", errno));
357 if (n == 0 && (pending != 0))
358 {
359 SOCKET_UNLOCK(so);
360 STAM_PROFILE_STOP(&pData->StatIOread, a);
361 RTMemFree(buf);
362 return 0;
363 }
364 if ( n < 0
365 && ( errno == EINTR
366 || errno == EAGAIN
367 || errno == EWOULDBLOCK))
368 {
369 SOCKET_UNLOCK(so);
370 STAM_PROFILE_STOP(&pData->StatIOread, a);
371 RTMemFree(buf);
372 return 0;
373 }
374 else
375 {
376 Log2((" --- soread() disconnected, n = %d, errno = %d (%s)\n",
377 n, errno, strerror(errno)));
378 sofcantrcvmore(so);
379 tcp_sockclosed(pData, sototcpcb(so));
380 SOCKET_UNLOCK(so);
381 STAM_PROFILE_STOP(&pData->StatIOread, a);
382 RTMemFree(buf);
383 return -1;
384 }
385 }
386
387 sbuf_bcat(sb, buf, n);
388 RTMemFree(buf);
389 return n;
390}
391#endif
392
393/*
394 * Get urgent data
395 *
396 * When the socket is created, we set it SO_OOBINLINE,
397 * so when OOB data arrives, we soread() it and everything
398 * in the send buffer is sent as urgent data
399 */
400void
401sorecvoob(PNATState pData, struct socket *so)
402{
403 struct tcpcb *tp = sototcpcb(so);
404 ssize_t ret;
405
406 LogFlow(("sorecvoob: so = %lx\n", (long)so));
407
408 /*
409 * We take a guess at how much urgent data has arrived.
410 * In most situations, when urgent data arrives, the next
411 * read() should get all the urgent data. This guess will
412 * be wrong however if more data arrives just after the
413 * urgent data, or the read() doesn't return all the
414 * urgent data.
415 */
416 ret = soread(pData, so);
417 tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
418 tp->t_force = 1;
419 tcp_output(pData, tp);
420 tp->t_force = 0;
421}
422#ifndef VBOX_WITH_SLIRP_BSD_SBUF
423/*
424 * Send urgent data
425 * There's a lot duplicated code here, but...
426 */
427int
428sosendoob(struct socket *so)
429{
430 struct sbuf *sb = &so->so_rcv;
431 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
432
433 int n, len;
434
435 LogFlow(("sosendoob so = %lx\n", (long)so));
436
437 if (so->so_urgc > sizeof(buff))
438 so->so_urgc = sizeof(buff); /* XXX */
439
440 if (sb->sb_rptr < sb->sb_wptr)
441 {
442 /* We can send it directly */
443 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
444 so->so_urgc -= n;
445
446 Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n",
447 n, so->so_urgc));
448 }
449 else
450 {
451 /*
452 * Since there's no sendv or sendtov like writev,
453 * we must copy all data to a linear buffer then
454 * send it all
455 */
456 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
457 if (len > so->so_urgc)
458 len = so->so_urgc;
459 memcpy(buff, sb->sb_rptr, len);
460 so->so_urgc -= len;
461 if (so->so_urgc)
462 {
463 n = sb->sb_wptr - sb->sb_data;
464 if (n > so->so_urgc)
465 n = so->so_urgc;
466 memcpy(buff + len, sb->sb_data, n);
467 so->so_urgc -= n;
468 len += n;
469 }
470 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
471#ifdef DEBUG
472 if (n != len)
473 Log(("Didn't send all data urgently XXXXX\n"));
474#endif
475 Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
476 n, so->so_urgc));
477 }
478
479 sb->sb_cc -= n;
480 sb->sb_rptr += n;
481 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
482 sb->sb_rptr -= sb->sb_datalen;
483
484 return n;
485}
486
487/*
488 * Write data from so_rcv to so's socket,
489 * updating all sbuf field as necessary
490 */
491int
492sowrite(PNATState pData, struct socket *so)
493{
494 int n, nn;
495 struct sbuf *sb = &so->so_rcv;
496 size_t len = sb->sb_cc;
497 struct iovec iov[2];
498
499 STAM_PROFILE_START(&pData->StatIOwrite, a);
500 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
501 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
502 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
503 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
504 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
505 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
506 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
507 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
508 LogFlow(("sowrite: so = %lx\n", (long)so));
509 QSOCKET_LOCK(tcb);
510 SOCKET_LOCK(so);
511 QSOCKET_UNLOCK(tcb);
512 if (so->so_urgc)
513 {
514 sosendoob(so);
515 if (sb->sb_cc == 0)
516 {
517 SOCKET_UNLOCK(so);
518 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
519 return 0;
520 }
521 }
522
523 /*
524 * No need to check if there's something to write,
525 * sowrite wouldn't have been called otherwise
526 */
527
528 len = sb->sb_cc;
529
530 iov[0].iov_base = sb->sb_rptr;
531 iov[1].iov_base = 0;
532 iov[1].iov_len = 0;
533 if (sb->sb_rptr < sb->sb_wptr)
534 {
535 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
536 /* Should never succeed, but... */
537 if (iov[0].iov_len > len)
538 iov[0].iov_len = len;
539 n = 1;
540 }
541 else
542 {
543 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
544 if (iov[0].iov_len > len)
545 iov[0].iov_len = len;
546 len -= iov[0].iov_len;
547 if (len)
548 {
549 iov[1].iov_base = sb->sb_data;
550 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
551 if (iov[1].iov_len > len)
552 iov[1].iov_len = len;
553 n = 2;
554 }
555 else
556 n = 1;
557 }
558 STAM_STATS({
559 if (n == 1)
560 {
561 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
562 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
563 }
564 else
565 {
566 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
567 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
568 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
569 }
570 });
571 /* Check if there's urgent data to send, and if so, send it */
572#ifdef HAVE_READV
573 nn = writev(so->s, (const struct iovec *)iov, n);
574 Log2((" ... wrote nn = %d bytes\n", nn));
575#else
576 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
577#endif
578 /* This should never happen, but people tell me it does *shrug* */
579 if ( nn < 0
580 && ( errno == EAGAIN
581 || errno == EINTR
582 || errno == EWOULDBLOCK))
583 {
584 SOCKET_UNLOCK(so);
585 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
586 return 0;
587 }
588
589 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
590 {
591 Log2((" --- sowrite disconnected, so->so_state = %x, errno = %d\n",
592 so->so_state, errno));
593 sofcantsendmore(so);
594 tcp_sockclosed(pData, sototcpcb(so));
595 SOCKET_UNLOCK(so);
596 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
597 return -1;
598 }
599
600#ifndef HAVE_READV
601 if (n == 2 && nn == iov[0].iov_len)
602 {
603 int ret;
604 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
605 if (ret > 0)
606 nn += ret;
607 STAM_STATS({
608 if (ret > 0 && ret != iov[1].iov_len)
609 {
610 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
611 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
612 }
613 });
614 }
615 Log2((" ... wrote nn = %d bytes\n", nn));
616#endif
617
618 /* Update sbuf */
619 sb->sb_cc -= nn;
620 sb->sb_rptr += nn;
621 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
622 sb->sb_rptr -= sb->sb_datalen;
623
624 /*
625 * If in DRAIN mode, and there's no more data, set
626 * it CANTSENDMORE
627 */
628 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
629 sofcantsendmore(so);
630
631 SOCKET_UNLOCK(so);
632 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
633 return nn;
634}
635#else /* VBOX_WITH_SLIRP_BSD_SBUF */
636static int
637do_sosend(struct socket *so, int fUrg)
638{
639 struct sbuf *sb = &so->so_rcv;
640
641 int n, len;
642
643 LogFlow(("sosendoob: so = %lx\n", (long)so));
644
645 len = sbuf_len(sb);
646
647 n = send(so->s, sbuf_data(sb), len, (fUrg ? MSG_OOB : 0));
648 if (n < 0)
649 Log(("NAT: Can't sent sbuf via socket.\n"));
650 if (fUrg)
651 so->so_urgc -= n;
652 if (n > 0 && n < len)
653 {
654 char *ptr;
655 char *buff;
656 buff = RTMemAlloc(len);
657 if (buff == NULL)
658 {
659 Log(("NAT: No space to allocate temporal buffer\n"));
660 return -1;
661 }
662 ptr = sbuf_data(sb);
663 memcpy(buff, &ptr[n], len - n);
664 sbuf_bcpy(sb, buff, len - n);
665 RTMemFree(buff);
666 return n;
667 }
668 sbuf_clear(sb);
669 return n;
670}
671int
672sosendoob(struct socket *so)
673{
674 return do_sosend(so, 1);
675}
676
677/*
678 * Write data from so_rcv to so's socket,
679 * updating all sbuf field as necessary
680 */
681int
682sowrite(PNATState pData, struct socket *so)
683{
684 return do_sosend(so, 0);
685}
686#endif
687
688/*
689 * recvfrom() a UDP socket
690 */
691void
692sorecvfrom(PNATState pData, struct socket *so)
693{
694 ssize_t ret = 0;
695 struct sockaddr_in addr;
696 socklen_t addrlen = sizeof(struct sockaddr_in);
697
698 LogFlow(("sorecvfrom: so = %lx\n", (long)so));
699
700 if (so->so_type == IPPROTO_ICMP)
701 {
702 /* This is a "ping" reply */
703#ifdef RT_OS_WINDOWS
704 sorecvfrom_icmp_win(pData, so);
705#else /* RT_OS_WINDOWS */
706 sorecvfrom_icmp_unix(pData, so);
707#endif /* !RT_OS_WINDOWS */
708 udp_detach(pData, so);
709 }
710 else
711 {
712 /* A "normal" UDP packet */
713 struct mbuf *m;
714 ssize_t len;
715 u_long n = 0;
716 int size;
717 int rc = 0;
718 static int signalled = 0;
719 char *pchBuffer = NULL;
720 bool fWithTemporalBuffer = false;
721
722 QSOCKET_LOCK(udb);
723 SOCKET_LOCK(so);
724 QSOCKET_UNLOCK(udb);
725
726 /*How many data has been received ?*/
727 /*
728 * 1. calculate how much we can read
729 * 2. read as much as possible
730 * 3. attach buffer to allocated header mbuf
731 */
732 rc = ioctlsocket(so->s, FIONREAD, &n);
733 if (rc == -1)
734 {
735 if ( errno == EAGAIN
736 || errno == EWOULDBLOCK
737 || errno == EINPROGRESS
738 || errno == ENOTCONN)
739 return;
740 else if (signalled == 0)
741 {
742 LogRel(("NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n", so));
743 signalled = 1;
744 }
745 return;
746 }
747
748 len = sizeof(struct udpiphdr);
749 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
750 if (m == NULL)
751 return;
752
753 len += n;
754 m->m_data += ETH_HLEN;
755 m->m_pkthdr.header = mtod(m, void *);
756 m->m_data += sizeof(struct udpiphdr);
757
758 pchBuffer = mtod(m, char *);
759 fWithTemporalBuffer = false;
760 /*
761 * Even if amounts of bytes on socket is greater than MTU value
762 * Slirp will able fragment it, but we won't create temporal location
763 * here.
764 */
765 if (n > (slirp_size(pData) - sizeof(struct udpiphdr)))
766 {
767 pchBuffer = RTMemAlloc((n) * sizeof(char));
768 if (!pchBuffer)
769 {
770 m_freem(pData, m);
771 return;
772 }
773 fWithTemporalBuffer = true;
774 }
775 ret = recvfrom(so->s, pchBuffer, n, 0,
776 (struct sockaddr *)&addr, &addrlen);
777 if (fWithTemporalBuffer)
778 {
779 if (ret > 0)
780 {
781 m_copyback(pData, m, 0, ret, pchBuffer);
782 /*
783 * If we've met comporison below our size prediction was failed
784 * it's not fatal just we've allocated for nothing. (@todo add counter here
785 * to calculate how rare we here)
786 */
787 if(ret < slirp_size(pData) && !m->m_next)
788 Log(("NAT:udp: Expected size(%d) lesser than real(%d) and less minimal mbuf size(%d)\n",
789 n, ret, slirp_size(pData)));
790 }
791 /* we're freeing buffer anyway */
792 RTMemFree(pchBuffer);
793 }
794 else
795 m->m_len = ret;
796
797 if (ret < 0)
798 {
799 u_char code = ICMP_UNREACH_PORT;
800
801 if (errno == EHOSTUNREACH)
802 code = ICMP_UNREACH_HOST;
803 else if (errno == ENETUNREACH)
804 code = ICMP_UNREACH_NET;
805
806 m_freem(pData, m);
807 if ( errno == EAGAIN
808 || errno == EWOULDBLOCK
809 || errno == EINPROGRESS
810 || errno == ENOTCONN)
811 {
812 return;
813 }
814
815 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
816 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
817 so->so_m = NULL;
818 }
819 else
820 {
821 Assert((m_length(m,NULL) == ret));
822 /*
823 * Hack: domain name lookup will be used the most for UDP,
824 * and since they'll only be used once there's no need
825 * for the 4 minute (or whatever) timeout... So we time them
826 * out much quicker (10 seconds for now...)
827 */
828 if (so->so_expire)
829 {
830 if (so->so_fport != RT_H2N_U16_C(53))
831 so->so_expire = curtime + SO_EXPIRE;
832 }
833 /*
834 * last argument should be changed if Slirp will inject IP attributes
835 * Note: Here we can't check if dnsproxy's sent initial request
836 */
837 if ( pData->fUseDnsProxy
838 && so->so_fport == RT_H2N_U16_C(53))
839 dnsproxy_answer(pData, so, m);
840
841#if 0
842 if (m->m_len == len)
843 {
844 m_inc(m, MINCSIZE);
845 m->m_len = 0;
846 }
847#endif
848
849 /*
850 * If this packet was destined for CTL_ADDR,
851 * make it look like that's where it came from, done by udp_output
852 */
853 udp_output(pData, so, m, &addr);
854 SOCKET_UNLOCK(so);
855 } /* rx error */
856 } /* if ping packet */
857}
858
859/*
860 * sendto() a socket
861 */
862int
863sosendto(PNATState pData, struct socket *so, struct mbuf *m)
864{
865 int ret;
866 struct sockaddr_in *paddr;
867 struct sockaddr addr;
868#if 0
869 struct sockaddr_in host_addr;
870#endif
871 caddr_t buf = 0;
872 int mlen;
873
874 LogFlow(("sosendto: so = %lx, m = %lx\n", (long)so, (long)m));
875
876 memset(&addr, 0, sizeof(struct sockaddr));
877#ifdef RT_OS_DARWIN
878 addr.sa_len = sizeof(struct sockaddr_in);
879#endif
880 paddr = (struct sockaddr_in *)&addr;
881 paddr->sin_family = AF_INET;
882 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
883 {
884 /* It's an alias */
885 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
886 switch(last_byte)
887 {
888#if 0
889 /* handle this case at 'default:' */
890 case CTL_BROADCAST:
891 addr.sin_addr.s_addr = INADDR_BROADCAST;
892 /* Send the packet to host to fully emulate broadcast */
893 /** @todo r=klaus: on Linux host this causes the host to receive
894 * the packet twice for some reason. And I cannot find any place
895 * in the man pages which states that sending a broadcast does not
896 * reach the host itself. */
897 host_addr.sin_family = AF_INET;
898 host_addr.sin_port = so->so_fport;
899 host_addr.sin_addr = our_addr;
900 sendto(so->s, m->m_data, m->m_len, 0,
901 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
902 break;
903#endif
904 case CTL_DNS:
905 case CTL_ALIAS:
906 default:
907 if (last_byte == ~pData->netmask)
908 paddr->sin_addr.s_addr = INADDR_BROADCAST;
909 else
910 paddr->sin_addr = loopback_addr;
911 break;
912 }
913 }
914 else
915 paddr->sin_addr = so->so_faddr;
916 paddr->sin_port = so->so_fport;
917
918 Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
919 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
920
921 /* Don't care what port we get */
922 /*
923 * > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255
924 * generates bodyless messages, annoying memmory management system.
925 */
926 mlen = m_length(m, NULL);
927 if (mlen > 0)
928 {
929 buf = RTMemAlloc(mlen);
930 if (buf == NULL)
931 {
932 return -1;
933 }
934 m_copydata(m, 0, mlen, buf);
935 }
936 ret = sendto(so->s, buf, mlen, 0,
937 (struct sockaddr *)&addr, sizeof (struct sockaddr));
938 if (buf)
939 RTMemFree(buf);
940 if (ret < 0)
941 {
942 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
943 return -1;
944 }
945
946 /*
947 * Kill the socket if there's no reply in 4 minutes,
948 * but only if it's an expirable socket
949 */
950 if (so->so_expire)
951 so->so_expire = curtime + SO_EXPIRE;
952 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
953 return 0;
954}
955
956/*
957 * XXX This should really be tcp_listen
958 */
959struct socket *
960solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
961{
962 struct sockaddr_in addr;
963 struct socket *so;
964 socklen_t addrlen = sizeof(addr);
965 int s, opt = 1;
966 int status;
967
968 LogFlow(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags));
969
970 if ((so = socreate()) == NULL)
971 {
972 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
973 return NULL;
974 }
975
976 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
977 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
978 {
979 RTMemFree(so);
980 return NULL;
981 }
982
983 SOCKET_LOCK_CREATE(so);
984 SOCKET_LOCK(so);
985 QSOCKET_LOCK(tcb);
986 insque(pData, so,&tcb);
987 NSOCK_INC();
988 QSOCKET_UNLOCK(tcb);
989
990 /*
991 * SS_FACCEPTONCE sockets must time out.
992 */
993 if (flags & SS_FACCEPTONCE)
994 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
995
996 so->so_state = (SS_FACCEPTCONN|flags);
997 so->so_lport = lport; /* Kept in network format */
998 so->so_laddr.s_addr = laddr; /* Ditto */
999
1000 memset(&addr, 0, sizeof(addr));
1001#ifdef RT_OS_DARWIN
1002 addr.sin_len = sizeof(addr);
1003#endif
1004 addr.sin_family = AF_INET;
1005 addr.sin_addr.s_addr = bind_addr;
1006 addr.sin_port = port;
1007
1008 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
1009 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
1010 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
1011 || (listen(s, 1) < 0))
1012 {
1013#ifdef RT_OS_WINDOWS
1014 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
1015 closesocket(s);
1016 QSOCKET_LOCK(tcb);
1017 sofree(pData, so);
1018 QSOCKET_UNLOCK(tcb);
1019 /* Restore the real errno */
1020 WSASetLastError(tmperrno);
1021#else
1022 int tmperrno = errno; /* Don't clobber the real reason we failed */
1023 close(s);
1024 QSOCKET_LOCK(tcb);
1025 sofree(pData, so);
1026 QSOCKET_UNLOCK(tcb);
1027 /* Restore the real errno */
1028 errno = tmperrno;
1029#endif
1030 return NULL;
1031 }
1032 fd_nonblock(s);
1033 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
1034
1035 getsockname(s,(struct sockaddr *)&addr,&addrlen);
1036 so->so_fport = addr.sin_port;
1037 /* set socket buffers */
1038 opt = pData->socket_rcv;
1039 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
1040 if (status < 0)
1041 {
1042 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
1043 goto no_sockopt;
1044 }
1045 opt = pData->socket_snd;
1046 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
1047 if (status < 0)
1048 {
1049 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
1050 goto no_sockopt;
1051 }
1052no_sockopt:
1053 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
1054 so->so_faddr = alias_addr;
1055 else
1056 so->so_faddr = addr.sin_addr;
1057
1058 so->s = s;
1059 SOCKET_UNLOCK(so);
1060 return so;
1061}
1062
1063/*
1064 * Data is available in so_rcv
1065 * Just write() the data to the socket
1066 * XXX not yet...
1067 */
1068void
1069sorwakeup(struct socket *so)
1070{
1071#if 0
1072 sowrite(so);
1073 FD_CLR(so->s,&writefds);
1074#endif
1075}
1076
1077/*
1078 * Data has been freed in so_snd
1079 * We have room for a read() if we want to
1080 * For now, don't read, it'll be done in the main loop
1081 */
1082void
1083sowwakeup(struct socket *so)
1084{
1085}
1086
1087/*
1088 * Various session state calls
1089 * XXX Should be #define's
1090 * The socket state stuff needs work, these often get call 2 or 3
1091 * times each when only 1 was needed
1092 */
1093void
1094soisfconnecting(struct socket *so)
1095{
1096 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
1097 SS_FCANTSENDMORE|SS_FWDRAIN);
1098 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1099}
1100
1101void
1102soisfconnected(struct socket *so)
1103{
1104 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1105 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1106}
1107
1108void
1109sofcantrcvmore(struct socket *so)
1110{
1111 if ((so->so_state & SS_NOFDREF) == 0)
1112 {
1113 shutdown(so->s, 0);
1114 }
1115 so->so_state &= ~(SS_ISFCONNECTING);
1116 if (so->so_state & SS_FCANTSENDMORE)
1117 so->so_state = SS_NOFDREF; /* Don't select it */
1118 /* XXX close() here as well? */
1119 else
1120 so->so_state |= SS_FCANTRCVMORE;
1121}
1122
1123void
1124sofcantsendmore(struct socket *so)
1125{
1126 if ((so->so_state & SS_NOFDREF) == 0)
1127 shutdown(so->s, 1); /* send FIN to fhost */
1128
1129 so->so_state &= ~(SS_ISFCONNECTING);
1130 if (so->so_state & SS_FCANTRCVMORE)
1131 so->so_state = SS_NOFDREF; /* as above */
1132 else
1133 so->so_state |= SS_FCANTSENDMORE;
1134}
1135
1136void
1137soisfdisconnected(struct socket *so)
1138{
1139#if 0
1140 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1141 close(so->s);
1142 so->so_state = SS_ISFDISCONNECTED;
1143 /*
1144 * XXX Do nothing ... ?
1145 */
1146#endif
1147}
1148
1149/*
1150 * Set write drain mode
1151 * Set CANTSENDMORE once all data has been write()n
1152 */
1153void
1154sofwdrain(struct socket *so)
1155{
1156 if (SBUF_LEN(&so->so_rcv))
1157 so->so_state |= SS_FWDRAIN;
1158 else
1159 sofcantsendmore(so);
1160}
1161
1162static void
1163send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
1164{
1165 struct ip *ip;
1166 uint32_t dst, src;
1167 char ip_copy[256];
1168 struct icmp *icp;
1169 int old_ip_len = 0;
1170 int hlen, original_hlen = 0;
1171 struct mbuf *m;
1172 struct icmp_msg *icm;
1173 uint8_t proto;
1174 int type = 0;
1175
1176 ip = (struct ip *)buff;
1177 /* Fix ip->ip_len to contain the total packet length including the header
1178 * in _host_ byte order for all OSes. On Darwin, that value already is in
1179 * host byte order. Solaris and Darwin report only the payload. */
1180#ifndef RT_OS_DARWIN
1181 ip->ip_len = RT_N2H_U16(ip->ip_len);
1182#endif
1183 hlen = (ip->ip_hl << 2);
1184#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1185 ip->ip_len += hlen;
1186#endif
1187 if (ip->ip_len < hlen + ICMP_MINLEN)
1188 {
1189 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1190 return;
1191 }
1192 icp = (struct icmp *)((char *)ip + hlen);
1193
1194 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1195 if ( icp->icmp_type != ICMP_ECHOREPLY
1196 && icp->icmp_type != ICMP_TIMXCEED
1197 && icp->icmp_type != ICMP_UNREACH)
1198 {
1199 return;
1200 }
1201
1202 /*
1203 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1204 * ICMP_ECHOREPLY assuming data 0
1205 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1206 */
1207 if (ip->ip_len < hlen + 8)
1208 {
1209 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1210 return;
1211 }
1212
1213 type = icp->icmp_type;
1214 if ( type == ICMP_TIMXCEED
1215 || type == ICMP_UNREACH)
1216 {
1217 /*
1218 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1219 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1220 */
1221 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1222 {
1223 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1224 return;
1225 }
1226 ip = &icp->icmp_ip;
1227 }
1228
1229 icm = icmp_find_original_mbuf(pData, ip);
1230 if (icm == NULL)
1231 {
1232 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1233 return;
1234 }
1235
1236 m = icm->im_m;
1237 Assert(m != NULL);
1238
1239 src = addr->sin_addr.s_addr;
1240 if (type == ICMP_ECHOREPLY)
1241 {
1242 struct ip *ip0 = mtod(m, struct ip *);
1243 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1244 if (icp0->icmp_type != ICMP_ECHO)
1245 {
1246 Log(("NAT: we haven't found echo for this reply\n"));
1247 return;
1248 }
1249 /*
1250 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1251 * IP header combined by OS network stack, our local copy of IP header contians values
1252 * in host byte order so no byte order conversion is required. IP headers fields are converting
1253 * in ip_output0 routine only.
1254 */
1255 if ( (ip->ip_len - hlen)
1256 != (ip0->ip_len - (ip0->ip_hl << 2)))
1257 {
1258 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1259 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1260 return;
1261 }
1262 }
1263
1264 /* ip points on origianal ip header */
1265 ip = mtod(m, struct ip *);
1266 proto = ip->ip_p;
1267 /* Now ip is pointing on header we've sent from guest */
1268 if ( icp->icmp_type == ICMP_TIMXCEED
1269 || icp->icmp_type == ICMP_UNREACH)
1270 {
1271 old_ip_len = (ip->ip_hl << 2) + 64;
1272 if (old_ip_len > sizeof(ip_copy))
1273 old_ip_len = sizeof(ip_copy);
1274 memcpy(ip_copy, ip, old_ip_len);
1275 }
1276
1277 /* source address from original IP packet*/
1278 dst = ip->ip_src.s_addr;
1279
1280 /* overide ther tail of old packet */
1281 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1282 original_hlen = ip->ip_hl << 2;
1283 /* saves original ip header and options */
1284 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1285 ip->ip_len = m_length(m, NULL);
1286 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1287
1288 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1289 type = icp->icmp_type;
1290 if ( type == ICMP_TIMXCEED
1291 || type == ICMP_UNREACH)
1292 {
1293 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1294 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1295 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1296 }
1297
1298 ip->ip_src.s_addr = src;
1299 ip->ip_dst.s_addr = dst;
1300 icmp_reflect(pData, m);
1301 LIST_REMOVE(icm, im_list);
1302 /* Don't call m_free here*/
1303
1304 if ( type == ICMP_TIMXCEED
1305 || type == ICMP_UNREACH)
1306 {
1307 icm->im_so->so_m = NULL;
1308 switch (proto)
1309 {
1310 case IPPROTO_UDP:
1311 /*XXX: so->so_m already freed so we shouldn't call sofree */
1312 udp_detach(pData, icm->im_so);
1313 break;
1314 case IPPROTO_TCP:
1315 /*close tcp should be here */
1316 break;
1317 default:
1318 /* do nothing */
1319 break;
1320 }
1321 }
1322 RTMemFree(icm);
1323}
1324
1325#ifdef RT_OS_WINDOWS
1326static void
1327sorecvfrom_icmp_win(PNATState pData, struct socket *so)
1328{
1329 int len;
1330 int i;
1331 struct ip *ip;
1332 struct mbuf *m;
1333 struct icmp *icp;
1334 struct icmp_msg *icm;
1335 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
1336 uint32_t src;
1337 ICMP_ECHO_REPLY *icr;
1338 int hlen = 0;
1339 int data_len = 0;
1340 int nbytes = 0;
1341 u_char code = ~0;
1342 int out_len;
1343 int size;
1344
1345 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1346 if (len < 0)
1347 {
1348 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1349 return;
1350 }
1351 if (len == 0)
1352 return; /* no error */
1353
1354 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1355 for (i = 0; i < len; ++i)
1356 {
1357 switch(icr[i].Status)
1358 {
1359 case IP_DEST_HOST_UNREACHABLE:
1360 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1361 case IP_DEST_NET_UNREACHABLE:
1362 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1363 case IP_DEST_PROT_UNREACHABLE:
1364 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1365 /* UNREACH error inject here */
1366 case IP_DEST_PORT_UNREACHABLE:
1367 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1368 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1369 so->so_m = NULL;
1370 break;
1371 case IP_SUCCESS: /* echo replied */
1372 out_len = ETH_HLEN + sizeof(struct ip) + 8;
1373 size;
1374 size = MCLBYTES;
1375 if (out_len < MSIZE)
1376 size = MCLBYTES;
1377 else if (out_len < MCLBYTES)
1378 size = MCLBYTES;
1379 else if (out_len < MJUM9BYTES)
1380 size = MJUM9BYTES;
1381 else if (out_len < MJUM16BYTES)
1382 size = MJUM16BYTES;
1383 else
1384 AssertMsgFailed(("Unsupported size"));
1385
1386 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, size);
1387 if (m == NULL)
1388 return;
1389 m->m_len = 0;
1390 m->m_data += if_maxlinkhdr;
1391 ip = mtod(m, struct ip *);
1392 ip->ip_src.s_addr = icr[i].Address;
1393 ip->ip_p = IPPROTO_ICMP;
1394 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1395 data_len = sizeof(struct ip);
1396 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1397 ip->ip_ttl = icr[i].Options.Ttl;
1398
1399 icp = (struct icmp *)&ip[1]; /* no options */
1400 icp->icmp_type = ICMP_ECHOREPLY;
1401 icp->icmp_code = 0;
1402 icp->icmp_id = so->so_icmp_id;
1403 icp->icmp_seq = so->so_icmp_seq;
1404
1405 data_len += ICMP_MINLEN;
1406
1407 hlen = (ip->ip_hl << 2);
1408 m->m_pkthdr.header = mtod(m, void *);
1409 m->m_len = data_len;
1410
1411 m_copyback(pData, m, hlen + 8, icr[i].DataSize, icr[i].Data);
1412
1413 data_len += icr[i].DataSize;
1414
1415 ip->ip_len = data_len;
1416 m->m_len = ip->ip_len;
1417
1418 icmp_reflect(pData, m);
1419 break;
1420 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1421
1422 ip_broken = icr[i].Data;
1423 icm = icmp_find_original_mbuf(pData, ip_broken);
1424 if (icm == NULL) {
1425 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1426 return;
1427 }
1428 m = icm->im_m;
1429 ip = mtod(m, struct ip *);
1430 ip->ip_ttl = icr[i].Options.Ttl;
1431 src = ip->ip_src.s_addr;
1432 ip->ip_dst.s_addr = src;
1433 ip->ip_dst.s_addr = icr[i].Address;
1434
1435 hlen = (ip->ip_hl << 2);
1436 icp = (struct icmp *)((char *)ip + hlen);
1437 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1438 data_len = (ip_broken->ip_hl << 2) + 64;
1439
1440 m->m_len = data_len;
1441 m->m_pkthdr.header = mtod(m, void *);
1442 m_copyback(pData, m, ip->ip_hl >> 2, icr[i].DataSize, icr[i].Data);
1443 icmp_reflect(pData, m);
1444 break;
1445 default:
1446 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1447 break;
1448 }
1449 }
1450}
1451#else /* !RT_OS_WINDOWS */
1452static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1453{
1454 struct sockaddr_in addr;
1455 socklen_t addrlen = sizeof(struct sockaddr_in);
1456 struct ip ip;
1457 char *buff;
1458 int len = 0;
1459
1460 /* 1- step: read the ip header */
1461 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1462 (struct sockaddr *)&addr, &addrlen);
1463 if ( len < 0
1464 && ( errno == EAGAIN
1465 || errno == EWOULDBLOCK
1466 || errno == EINPROGRESS
1467 || errno == ENOTCONN))
1468 {
1469 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1470 return;
1471 }
1472
1473 if ( len < sizeof(struct ip)
1474 || len < 0
1475 || len == 0)
1476 {
1477 u_char code;
1478 code = ICMP_UNREACH_PORT;
1479
1480 if (errno == EHOSTUNREACH)
1481 code = ICMP_UNREACH_HOST;
1482 else if (errno == ENETUNREACH)
1483 code = ICMP_UNREACH_NET;
1484
1485 LogRel((" udp icmp rx errno = %d (%s)\n", errno, strerror(errno)));
1486 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1487 so->so_m = NULL;
1488 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
1489 return;
1490 }
1491 /* basic check of IP header */
1492 if ( ip.ip_v != IPVERSION
1493# ifndef RT_OS_DARWIN
1494 || ip.ip_p != IPPROTO_ICMP
1495# endif
1496 )
1497 {
1498 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
1499 return;
1500 }
1501# ifndef RT_OS_DARWIN
1502 /* Darwin reports the IP length already in host byte order. */
1503 ip.ip_len = RT_N2H_U16(ip.ip_len);
1504# endif
1505# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1506 /* Solaris and Darwin report the payload only */
1507 ip.ip_len += (ip.ip_hl << 2);
1508# endif
1509 /* Note: ip->ip_len in host byte order (all OS) */
1510 len = ip.ip_len;
1511 buff = RTMemAlloc(len);
1512 if (buff == NULL)
1513 {
1514 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1515 return;
1516 }
1517 /* 2 - step: we're reading rest of the datagramm to the buffer */
1518 addrlen = sizeof(struct sockaddr_in);
1519 memset(&addr, 0, addrlen);
1520 len = recvfrom(so->s, buff, len, 0,
1521 (struct sockaddr *)&addr, &addrlen);
1522 if ( len < 0
1523 && ( errno == EAGAIN
1524 || errno == EWOULDBLOCK
1525 || errno == EINPROGRESS
1526 || errno == ENOTCONN))
1527 {
1528 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1529 ip.ip_len));
1530 RTMemFree(buff);
1531 return;
1532 }
1533 if ( len < 0
1534 || len == 0)
1535 {
1536 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1537 errno, len, (ip.ip_len - sizeof(struct ip))));
1538 RTMemFree(buff);
1539 return;
1540 }
1541 /* len is modified in 2nd read, when the rest of the datagramm was read */
1542 send_icmp_to_guest(pData, buff, len, so, &addr);
1543 RTMemFree(buff);
1544}
1545#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette