VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 74950

Last change on this file since 74950 was 72292, checked in by vboxsync, 7 years ago

NAT: sobind - untabify.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.1 KB
Line 
1/* $Id: socket.c 72292 2018-05-22 23:42:27Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#include <slirp.h>
28#include "ip_icmp.h"
29#include "main.h"
30#ifdef __sun__
31#include <sys/filio.h>
32#endif
33#include <VBox/vmm/pdmdrv.h>
34#if defined (RT_OS_WINDOWS)
35#include <iprt/win/iphlpapi.h>
36#include <icmpapi.h>
37#endif
38#include <alias.h>
39
40#if defined(DECLARE_IOVEC) && defined(RT_OS_WINDOWS)
41AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, WSABUF, buf);
42AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, WSABUF, len);
43#endif
44
45#ifdef VBOX_WITH_NAT_SEND2HOME
46DECLINLINE(bool) slirpSend2Home(PNATState pData, struct socket *pSo, const void *pvBuf, uint32_t cbBuf, int iFlags)
47{
48 int idxAddr;
49 int ret = 0;
50 bool fSendDone = false;
51 LogFlowFunc(("Enter pSo:%R[natsock] pvBuf: %p, cbBuf: %d, iFlags: %d\n", pSo, pvBuf, cbBuf, iFlags));
52 for (idxAddr = 0; idxAddr < pData->cInHomeAddressSize; ++idxAddr)
53 {
54
55 struct socket *pNewSocket = soCloneUDPSocketWithForegnAddr(pData, pSo, pData->pInSockAddrHomeAddress[idxAddr].sin_addr);
56 AssertReturn((pNewSocket, false));
57 pData->pInSockAddrHomeAddress[idxAddr].sin_port = pSo->so_fport;
58 /** @todo more verbose on errors,
59 * @note: we shouldn't care if this send fail or not (we're in broadcast).
60 */
61 LogFunc(("send %d bytes to %RTnaipv4 from %R[natsock]\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr, pNewSocket));
62 ret = sendto(pNewSocket->s, pvBuf, cbBuf, iFlags, (struct sockaddr *)&pData->pInSockAddrHomeAddress[idxAddr], sizeof(struct sockaddr_in));
63 if (ret < 0)
64 LogFunc(("Failed to send %d bytes to %RTnaipv4\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr));
65 fSendDone |= ret > 0;
66 }
67 LogFlowFunc(("Leave %RTbool\n", fSendDone));
68 return fSendDone;
69}
70#endif /* !VBOX_WITH_NAT_SEND2HOME */
71
72#if !defined(RT_OS_WINDOWS)
73static void send_icmp_to_guest(PNATState, char *, size_t, const struct sockaddr_in *);
74static void sorecvfrom_icmp_unix(PNATState, struct socket *);
75#endif /* !RT_OS_WINDOWS */
76
77void
78so_init(void)
79{
80}
81
82struct socket *
83solookup(struct socket *head, struct in_addr laddr,
84 u_int lport, struct in_addr faddr, u_int fport)
85{
86 struct socket *so;
87
88 for (so = head->so_next; so != head; so = so->so_next)
89 {
90 if ( so->so_lport == lport
91 && so->so_laddr.s_addr == laddr.s_addr
92 && so->so_faddr.s_addr == faddr.s_addr
93 && so->so_fport == fport)
94 return so;
95 }
96
97 return (struct socket *)NULL;
98}
99
100/*
101 * Create a new socket, initialise the fields
102 * It is the responsibility of the caller to
103 * insque() it into the correct linked-list
104 */
105struct socket *
106socreate(void)
107{
108 struct socket *so;
109
110 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
111 if (so)
112 {
113 so->so_state = SS_NOFDREF;
114 so->s = -1;
115#if !defined(RT_OS_WINDOWS)
116 so->so_poll_index = -1;
117#endif
118 }
119 return so;
120}
121
122/*
123 * remque and free a socket, clobber cache
124 */
125void
126sofree(PNATState pData, struct socket *so)
127{
128 LogFlowFunc(("ENTER:%R[natsock]\n", so));
129 /*
130 * We should not remove socket when polling routine do the polling
131 * instead we mark it for deletion.
132 */
133 if (so->fUnderPolling)
134 {
135 so->fShouldBeRemoved = 1;
136 LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so));
137 return;
138 }
139 /**
140 * Check that we don't freeng socket with tcbcb
141 */
142 Assert(!sototcpcb(so));
143 /* udp checks */
144 Assert(!so->so_timeout);
145 Assert(!so->so_timeout_arg);
146 if (so == tcp_last_so)
147 tcp_last_so = &tcb;
148 else if (so == udp_last_so)
149 udp_last_so = &udb;
150
151 /* check if mbuf haven't been already freed */
152 if (so->so_m != NULL)
153 {
154 m_freem(pData, so->so_m);
155 so->so_m = NULL;
156 }
157
158 if (so->so_ohdr != NULL)
159 {
160 RTMemFree(so->so_ohdr);
161 so->so_ohdr = NULL;
162 }
163
164 if (so->so_next && so->so_prev)
165 {
166 remque(pData, so); /* crashes if so is not in a queue */
167 NSOCK_DEC();
168 }
169
170 RTMemFree(so);
171 LogFlowFuncLeave();
172}
173
174
175/*
176 * Worker for sobind() below.
177 */
178static int
179sobindto(struct socket *so, uint32_t addr, uint16_t port)
180{
181 struct sockaddr_in self;
182 int status;
183
184 if (addr == INADDR_ANY && port == 0 && so->so_type != IPPROTO_UDP)
185 {
186 /* TCP sockets without constraints don't need to be bound */
187 Log2(("NAT: sobind: %s guest %RTnaipv4:%d - nothing to do\n",
188 so->so_type == IPPROTO_UDP ? "udp" : "tcp",
189 so->so_laddr.s_addr, ntohs(so->so_lport)));
190 return 0;
191 }
192
193 RT_ZERO(self);
194#ifdef RT_OS_DARWIN
195 self.sin_len = sizeof(self);
196#endif
197 self.sin_family = AF_INET;
198 self.sin_addr.s_addr = addr;
199 self.sin_port = port;
200
201 status = bind(so->s, (struct sockaddr *)&self, sizeof(self));
202 if (status == 0)
203 {
204 Log2(("NAT: sobind: %s guest %RTnaipv4:%d to host %RTnaipv4:%d\n",
205 so->so_type == IPPROTO_UDP ? "udp" : "tcp",
206 so->so_laddr.s_addr, ntohs(so->so_lport), addr, ntohs(port)));
207 return 0;
208 }
209
210 Log2(("NAT: sobind: %s guest %RTnaipv4:%d to host %RTnaipv4:%d error %d%s\n",
211 so->so_type == IPPROTO_UDP ? "udp" : "tcp",
212 so->so_laddr.s_addr, ntohs(so->so_lport),
213 addr, ntohs(port),
214 errno, port ? " (will retry with random port)" : ""));
215
216 if (port) /* retry without */
217 status = sobindto(so, addr, 0);
218
219 if (addr)
220 return status;
221 else
222 return 0;
223}
224
225
226/*
227 * Bind the socket to specific host address and/or port if necessary.
228 * We also always bind udp sockets to force the local port to be
229 * allocated and known in advance.
230 */
231int
232sobind(PNATState pData, struct socket *so)
233{
234 uint32_t addr = pData->bindIP.s_addr; /* may be INADDR_ANY */
235 bool fSamePorts = !!(pData->i32AliasMode & PKT_ALIAS_SAME_PORTS);
236 uint16_t port;
237 int status;
238
239 if (fSamePorts)
240 {
241 int opt = 1;
242 setsockopt(so->s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(opt));
243 port = so->so_lport;
244 }
245 else
246 {
247 port = 0;
248 }
249
250 status = sobindto(so, addr, port);
251 return status;
252}
253
254
255/*
256 * Read from so's socket into sb_snd, updating all relevant sbuf fields
257 * NOTE: This will only be called if it is select()ed for reading, so
258 * a read() of 0 (or less) means it's disconnected
259 */
260int
261soread(PNATState pData, struct socket *so)
262{
263 int n, nn, lss, total;
264 struct sbuf *sb = &so->so_snd;
265 u_int len = sb->sb_datalen - sb->sb_cc;
266 struct iovec iov[2];
267 int mss = so->so_tcpcb->t_maxseg;
268 int sockerr;
269
270 STAM_PROFILE_START(&pData->StatIOread, a);
271 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
272 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
273
274 QSOCKET_LOCK(tcb);
275 SOCKET_LOCK(so);
276 QSOCKET_UNLOCK(tcb);
277
278 LogFlow(("soread: so = %R[natsock]\n", so));
279 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
280
281 /*
282 * No need to check if there's enough room to read.
283 * soread wouldn't have been called if there weren't
284 */
285
286 len = sb->sb_datalen - sb->sb_cc;
287
288 iov[0].iov_base = sb->sb_wptr;
289 iov[1].iov_base = 0;
290 iov[1].iov_len = 0;
291 if (sb->sb_wptr < sb->sb_rptr)
292 {
293 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
294 /* Should never succeed, but... */
295 if (iov[0].iov_len > len)
296 iov[0].iov_len = len;
297 if (iov[0].iov_len > mss)
298 iov[0].iov_len -= iov[0].iov_len%mss;
299 n = 1;
300 }
301 else
302 {
303 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
304 /* Should never succeed, but... */
305 if (iov[0].iov_len > len)
306 iov[0].iov_len = len;
307 len -= iov[0].iov_len;
308 if (len)
309 {
310 iov[1].iov_base = sb->sb_data;
311 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
312 if (iov[1].iov_len > len)
313 iov[1].iov_len = len;
314 total = iov[0].iov_len + iov[1].iov_len;
315 if (total > mss)
316 {
317 lss = total % mss;
318 if (iov[1].iov_len > lss)
319 {
320 iov[1].iov_len -= lss;
321 n = 2;
322 }
323 else
324 {
325 lss -= iov[1].iov_len;
326 iov[0].iov_len -= lss;
327 n = 1;
328 }
329 }
330 else
331 n = 2;
332 }
333 else
334 {
335 if (iov[0].iov_len > mss)
336 iov[0].iov_len -= iov[0].iov_len%mss;
337 n = 1;
338 }
339 }
340
341#ifdef HAVE_READV
342 nn = readv(so->s, (struct iovec *)iov, n);
343#else
344 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
345#endif
346 if (nn < 0)
347 sockerr = errno; /* save it, as it may be clobbered by logging */
348 else
349 sockerr = 0;
350
351 Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
352 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
353 if (nn <= 0)
354 {
355#ifdef RT_OS_WINDOWS
356 /*
357 * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE)
358 * instead of just returning EOF indication.
359 */
360 if (nn < 0 && sockerr == ESHUTDOWN)
361 {
362 nn = 0;
363 sockerr = 0;
364 }
365#endif
366
367 if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */
368 {
369 /*
370 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
371 * _could_ mean that the connection is closed. But we will receive an
372 * FD_CLOSE event later if the connection was _really_ closed. With
373 * www.youtube.com I see this very often. Closing the socket too early
374 * would be dangerous.
375 */
376 int status;
377 unsigned long pending = 0;
378 status = ioctlsocket(so->s, FIONREAD, &pending);
379 if (status < 0)
380 Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno));
381 if (pending != 0)
382 {
383 SOCKET_UNLOCK(so);
384 STAM_PROFILE_STOP(&pData->StatIOread, a);
385 return 0;
386 }
387 }
388
389 if ( nn < 0
390 && soIgnorableErrorCode(sockerr))
391 {
392 SOCKET_UNLOCK(so);
393 STAM_PROFILE_STOP(&pData->StatIOread, a);
394 return 0;
395 }
396 else
397 {
398 int fUninitializedTemplate = 0;
399 int shuterr;
400
401 fUninitializedTemplate = RT_BOOL(( sototcpcb(so)
402 && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY
403 || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY)));
404 /* nn == 0 means peer has performed an orderly shutdown */
405 Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n",
406 RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr)));
407
408 shuterr = sofcantrcvmore(so);
409 if (!sockerr && !shuterr && !fUninitializedTemplate)
410 tcp_sockclosed(pData, sototcpcb(so));
411 else
412 {
413 LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so));
414 tcp_drop(pData, sototcpcb(so), sockerr);
415 }
416 SOCKET_UNLOCK(so);
417 STAM_PROFILE_STOP(&pData->StatIOread, a);
418 return -1;
419 }
420 }
421 STAM_STATS(
422 if (n == 1)
423 {
424 STAM_COUNTER_INC(&pData->StatIORead_in_1);
425 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
426 }
427 else
428 {
429 STAM_COUNTER_INC(&pData->StatIORead_in_2);
430 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
431 }
432 );
433
434#ifndef HAVE_READV
435 /*
436 * If there was no error, try and read the second time round
437 * We read again if n = 2 (ie, there's another part of the buffer)
438 * and we read as much as we could in the first read
439 * We don't test for <= 0 this time, because there legitimately
440 * might not be any more data (since the socket is non-blocking),
441 * a close will be detected on next iteration.
442 * A return of -1 wont (shouldn't) happen, since it didn't happen above
443 */
444 if (n == 2 && (unsigned)nn == iov[0].iov_len)
445 {
446 int ret;
447 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
448 if (ret > 0)
449 nn += ret;
450 STAM_STATS(
451 if (ret > 0)
452 {
453 STAM_COUNTER_INC(&pData->StatIORead_in_2);
454 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
455 }
456 );
457 }
458
459 Log2(("%s: read(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
460#endif
461
462 /* Update fields */
463 sb->sb_cc += nn;
464 sb->sb_wptr += nn;
465 Log2(("%s: update so_snd (readed nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
466 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
467 {
468 sb->sb_wptr -= sb->sb_datalen;
469 Log2(("%s: alter sb_wptr so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
470 }
471 STAM_PROFILE_STOP(&pData->StatIOread, a);
472 SOCKET_UNLOCK(so);
473 return nn;
474}
475
476/*
477 * Get urgent data
478 *
479 * When the socket is created, we set it SO_OOBINLINE,
480 * so when OOB data arrives, we soread() it and everything
481 * in the send buffer is sent as urgent data
482 */
483void
484sorecvoob(PNATState pData, struct socket *so)
485{
486 struct tcpcb *tp = sototcpcb(so);
487 ssize_t ret;
488
489 LogFlowFunc(("sorecvoob: so = %R[natsock]\n", so));
490
491 /*
492 * We take a guess at how much urgent data has arrived.
493 * In most situations, when urgent data arrives, the next
494 * read() should get all the urgent data. This guess will
495 * be wrong however if more data arrives just after the
496 * urgent data, or the read() doesn't return all the
497 * urgent data.
498 */
499 ret = soread(pData, so);
500 if (RT_LIKELY(ret > 0))
501 {
502 tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
503 tp->t_force = 1;
504 tcp_output(pData, tp);
505 tp->t_force = 0;
506 }
507}
508
509/*
510 * Send urgent data
511 * There's a lot duplicated code here, but...
512 */
513int
514sosendoob(struct socket *so)
515{
516 struct sbuf *sb = &so->so_rcv;
517 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
518
519 int n, len;
520
521 LogFlowFunc(("sosendoob so = %R[natsock]\n", so));
522
523 if (so->so_urgc > sizeof(buff))
524 so->so_urgc = sizeof(buff); /* XXX */
525
526 if (sb->sb_rptr < sb->sb_wptr)
527 {
528 /* We can send it directly */
529 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
530 so->so_urgc -= n;
531
532 Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n",
533 n, so->so_urgc));
534 }
535 else
536 {
537 /*
538 * Since there's no sendv or sendtov like writev,
539 * we must copy all data to a linear buffer then
540 * send it all
541 */
542 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
543 if (len > so->so_urgc)
544 len = so->so_urgc;
545 memcpy(buff, sb->sb_rptr, len);
546 so->so_urgc -= len;
547 if (so->so_urgc)
548 {
549 n = sb->sb_wptr - sb->sb_data;
550 if (n > so->so_urgc)
551 n = so->so_urgc;
552 memcpy(buff + len, sb->sb_data, n);
553 so->so_urgc -= n;
554 len += n;
555 }
556 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
557#ifdef DEBUG
558 if (n != len)
559 Log(("Didn't send all data urgently XXXXX\n"));
560#endif
561 Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
562 n, so->so_urgc));
563 }
564
565 sb->sb_cc -= n;
566 sb->sb_rptr += n;
567 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
568 sb->sb_rptr -= sb->sb_datalen;
569
570 return n;
571}
572
573/*
574 * Write data from so_rcv to so's socket,
575 * updating all sbuf field as necessary
576 */
577int
578sowrite(PNATState pData, struct socket *so)
579{
580 int n, nn;
581 struct sbuf *sb = &so->so_rcv;
582 u_int len = sb->sb_cc;
583 struct iovec iov[2];
584
585 STAM_PROFILE_START(&pData->StatIOwrite, a);
586 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
587 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
588 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
589 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
590 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
591 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
592 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
593 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
594 LogFlowFunc(("so = %R[natsock]\n", so));
595 Log2(("%s: so = %R[natsock] so->so_rcv = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
596 QSOCKET_LOCK(tcb);
597 SOCKET_LOCK(so);
598 QSOCKET_UNLOCK(tcb);
599 if (so->so_urgc)
600 {
601 sosendoob(so);
602 if (sb->sb_cc == 0)
603 {
604 SOCKET_UNLOCK(so);
605 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
606 return 0;
607 }
608 }
609
610 /*
611 * No need to check if there's something to write,
612 * sowrite wouldn't have been called otherwise
613 */
614
615 len = sb->sb_cc;
616
617 iov[0].iov_base = sb->sb_rptr;
618 iov[1].iov_base = 0;
619 iov[1].iov_len = 0;
620 if (sb->sb_rptr < sb->sb_wptr)
621 {
622 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
623 /* Should never succeed, but... */
624 if (iov[0].iov_len > len)
625 iov[0].iov_len = len;
626 n = 1;
627 }
628 else
629 {
630 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
631 if (iov[0].iov_len > len)
632 iov[0].iov_len = len;
633 len -= iov[0].iov_len;
634 if (len)
635 {
636 iov[1].iov_base = sb->sb_data;
637 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
638 if (iov[1].iov_len > len)
639 iov[1].iov_len = len;
640 n = 2;
641 }
642 else
643 n = 1;
644 }
645 STAM_STATS({
646 if (n == 1)
647 {
648 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
649 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
650 }
651 else
652 {
653 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
654 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
655 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
656 }
657 });
658 /* Check if there's urgent data to send, and if so, send it */
659#ifdef HAVE_READV
660 nn = writev(so->s, (const struct iovec *)iov, n);
661#else
662 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
663#endif
664 Log2(("%s: wrote(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
665 /* This should never happen, but people tell me it does *shrug* */
666 if ( nn < 0
667 && soIgnorableErrorCode(errno))
668 {
669 SOCKET_UNLOCK(so);
670 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
671 return 0;
672 }
673
674 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
675 {
676 Log2(("%s: disconnected, so->so_state = %x, errno = %d\n",
677 RT_GCC_EXTENSION __PRETTY_FUNCTION__, so->so_state, errno));
678 sofcantsendmore(so);
679 tcp_sockclosed(pData, sototcpcb(so));
680 SOCKET_UNLOCK(so);
681 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
682 return -1;
683 }
684
685#ifndef HAVE_READV
686 if (n == 2 && (unsigned)nn == iov[0].iov_len)
687 {
688 int ret;
689 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
690 if (ret > 0)
691 nn += ret;
692# ifdef VBOX_WITH_STATISTICS
693 if (ret > 0 && ret != (ssize_t)iov[1].iov_len)
694 {
695 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
696 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
697 }
698#endif
699 }
700 Log2(("%s: wrote(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
701#endif
702
703 /* Update sbuf */
704 sb->sb_cc -= nn;
705 sb->sb_rptr += nn;
706 Log2(("%s: update so_rcv (written nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
707 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
708 {
709 sb->sb_rptr -= sb->sb_datalen;
710 Log2(("%s: alter sb_rptr of so_rcv %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
711 }
712
713 /*
714 * If in DRAIN mode, and there's no more data, set
715 * it CANTSENDMORE
716 */
717 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
718 sofcantsendmore(so);
719
720 SOCKET_UNLOCK(so);
721 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
722 return nn;
723}
724
725/*
726 * recvfrom() a UDP socket
727 */
728void
729sorecvfrom(PNATState pData, struct socket *so)
730{
731 LogFlowFunc(("sorecvfrom: so = %p\n", so));
732
733#ifdef RT_OS_WINDOWS
734 /* ping is handled with ICMP API in ip_icmpwin.c */
735 Assert(so->so_type == IPPROTO_UDP);
736#else
737 if (so->so_type == IPPROTO_ICMP)
738 {
739 /* This is a "ping" reply */
740 sorecvfrom_icmp_unix(pData, so);
741 udp_detach(pData, so);
742 }
743 else
744#endif /* !RT_OS_WINDOWS */
745 {
746 static char achBuf[64 * 1024];
747
748 /* A "normal" UDP packet */
749 struct sockaddr_in addr;
750 socklen_t addrlen = sizeof(struct sockaddr_in);
751 struct iovec iov[2];
752 ssize_t nread;
753 struct mbuf *m;
754
755 QSOCKET_LOCK(udb);
756 SOCKET_LOCK(so);
757 QSOCKET_UNLOCK(udb);
758
759 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
760 if (m == NULL)
761 {
762 SOCKET_UNLOCK(so);
763 return;
764 }
765
766 m->m_data += ETH_HLEN;
767 m->m_pkthdr.header = mtod(m, void *);
768
769 m->m_data += sizeof(struct udpiphdr);
770
771 /* small packets will fit without copying */
772 iov[0].iov_base = mtod(m, char *);
773 iov[0].iov_len = M_TRAILINGSPACE(m);
774
775 /* large packets will spill into a temp buffer */
776 iov[1].iov_base = achBuf;
777 iov[1].iov_len = sizeof(achBuf);
778
779#if !defined(RT_OS_WINDOWS)
780 {
781 struct msghdr mh;
782 memset(&mh, 0, sizeof(mh));
783
784 mh.msg_iov = iov;
785 mh.msg_iovlen = 2;
786 mh.msg_name = &addr;
787 mh.msg_namelen = addrlen;
788
789 nread = recvmsg(so->s, &mh, 0);
790 }
791#else /* RT_OS_WINDOWS */
792 {
793 DWORD nbytes; /* NB: can't use nread b/c of different size */
794 DWORD flags = 0;
795 int status;
796 AssertCompile(sizeof(WSABUF) == sizeof(struct iovec));
797 AssertCompileMembersSameSizeAndOffset(WSABUF, len, struct iovec, iov_len);
798 AssertCompileMembersSameSizeAndOffset(WSABUF, buf, struct iovec, iov_base);
799 status = WSARecvFrom(so->s, (WSABUF *)&iov[0], 2, &nbytes, &flags,
800 (struct sockaddr *)&addr, &addrlen,
801 NULL, NULL);
802 if (status != SOCKET_ERROR)
803 nread = nbytes;
804 else
805 nread = -1;
806 }
807#endif
808 if (nread >= 0)
809 {
810 if (nread <= iov[0].iov_len)
811 m->m_len = nread;
812 else
813 {
814 m->m_len = iov[0].iov_len;
815 m_append(pData, m, nread - iov[0].iov_len, iov[1].iov_base);
816 }
817 Assert(m_length(m, NULL) == (size_t)nread);
818
819 /*
820 * Hack: domain name lookup will be used the most for UDP,
821 * and since they'll only be used once there's no need
822 * for the 4 minute (or whatever) timeout... So we time them
823 * out much quicker (10 seconds for now...)
824 */
825 if (so->so_expire)
826 {
827 if (so->so_fport != RT_H2N_U16_C(53))
828 so->so_expire = curtime + SO_EXPIRE;
829 }
830
831 /*
832 * DNS proxy requests are forwarded to the real resolver,
833 * but its socket's so_faddr is that of the DNS proxy
834 * itself.
835 *
836 * last argument should be changed if Slirp will inject IP attributes
837 */
838 if ( pData->fUseDnsProxy
839 && so->so_fport == RT_H2N_U16_C(53)
840 && CTL_CHECK(so->so_faddr.s_addr, CTL_DNS))
841 dnsproxy_answer(pData, so, m);
842
843 /* packets definetly will be fragmented, could confuse receiver peer. */
844 if (nread > if_mtu)
845 m->m_flags |= M_SKIP_FIREWALL;
846
847 /*
848 * If this packet was destined for CTL_ADDR,
849 * make it look like that's where it came from, done by udp_output
850 */
851 udp_output(pData, so, m, &addr);
852 }
853 else
854 {
855 m_freem(pData, m);
856
857 if (!soIgnorableErrorCode(errno))
858 {
859 u_char code;
860 if (errno == EHOSTUNREACH)
861 code = ICMP_UNREACH_HOST;
862 else if (errno == ENETUNREACH)
863 code = ICMP_UNREACH_NET;
864 else
865 code = ICMP_UNREACH_PORT;
866
867 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
868 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
869 so->so_m = NULL;
870 }
871 }
872
873 SOCKET_UNLOCK(so);
874 }
875}
876
877/*
878 * sendto() a socket
879 */
880int
881sosendto(PNATState pData, struct socket *so, struct mbuf *m)
882{
883 int ret;
884 struct sockaddr_in *paddr;
885 struct sockaddr addr;
886#if 0
887 struct sockaddr_in host_addr;
888#endif
889 caddr_t buf = 0;
890 int mlen;
891
892 LogFlowFunc(("sosendto: so = %R[natsock], m = %p\n", so, m));
893
894 memset(&addr, 0, sizeof(struct sockaddr));
895#ifdef RT_OS_DARWIN
896 addr.sa_len = sizeof(struct sockaddr_in);
897#endif
898 paddr = (struct sockaddr_in *)&addr;
899 paddr->sin_family = AF_INET;
900 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
901 {
902 /* It's an alias */
903 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
904 switch(last_byte)
905 {
906#if 0
907 /* handle this case at 'default:' */
908 case CTL_BROADCAST:
909 addr.sin_addr.s_addr = INADDR_BROADCAST;
910 /* Send the packet to host to fully emulate broadcast */
911 /** @todo r=klaus: on Linux host this causes the host to receive
912 * the packet twice for some reason. And I cannot find any place
913 * in the man pages which states that sending a broadcast does not
914 * reach the host itself. */
915 host_addr.sin_family = AF_INET;
916 host_addr.sin_port = so->so_fport;
917 host_addr.sin_addr = our_addr;
918 sendto(so->s, m->m_data, m->m_len, 0,
919 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
920 break;
921#endif
922 case CTL_DNS:
923 case CTL_ALIAS:
924 default:
925 if (last_byte == ~pData->netmask)
926 paddr->sin_addr.s_addr = INADDR_BROADCAST;
927 else
928 paddr->sin_addr = loopback_addr;
929 break;
930 }
931 }
932 else
933 paddr->sin_addr = so->so_faddr;
934 paddr->sin_port = so->so_fport;
935
936 Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
937 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
938
939 /* Don't care what port we get */
940 /*
941 * > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255
942 * generates bodyless messages, annoying memmory management system.
943 */
944 mlen = m_length(m, NULL);
945 if (mlen > 0)
946 {
947 buf = RTMemAlloc(mlen);
948 if (buf == NULL)
949 {
950 return -1;
951 }
952 m_copydata(m, 0, mlen, buf);
953 }
954 ret = sendto(so->s, buf, mlen, 0,
955 (struct sockaddr *)&addr, sizeof (struct sockaddr));
956#ifdef VBOX_WITH_NAT_SEND2HOME
957 if (slirpIsWideCasting(pData, so->so_faddr.s_addr))
958 {
959 slirpSend2Home(pData, so, buf, mlen, 0);
960 }
961#endif
962 if (buf)
963 RTMemFree(buf);
964 if (ret < 0)
965 {
966 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
967 return -1;
968 }
969
970 /*
971 * Kill the socket if there's no reply in 4 minutes,
972 * but only if it's an expirable socket
973 */
974 if (so->so_expire)
975 so->so_expire = curtime + SO_EXPIRE;
976 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
977 return 0;
978}
979
980/*
981 * XXX This should really be tcp_listen
982 */
983struct socket *
984solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
985{
986 struct sockaddr_in addr;
987 struct socket *so;
988 socklen_t addrlen = sizeof(addr);
989 int s, opt = 1;
990 int status;
991
992 LogFlowFunc(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags));
993
994 if ((so = socreate()) == NULL)
995 {
996 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
997 return NULL;
998 }
999
1000 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
1001 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
1002 {
1003 RTMemFree(so);
1004 return NULL;
1005 }
1006
1007 SOCKET_LOCK_CREATE(so);
1008 SOCKET_LOCK(so);
1009 QSOCKET_LOCK(tcb);
1010 insque(pData, so,&tcb);
1011 NSOCK_INC();
1012 QSOCKET_UNLOCK(tcb);
1013
1014 /*
1015 * SS_FACCEPTONCE sockets must time out.
1016 */
1017 if (flags & SS_FACCEPTONCE)
1018 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
1019
1020 so->so_state = (SS_FACCEPTCONN|flags);
1021 so->so_lport = lport; /* Kept in network format */
1022 so->so_laddr.s_addr = laddr; /* Ditto */
1023
1024 memset(&addr, 0, sizeof(addr));
1025#ifdef RT_OS_DARWIN
1026 addr.sin_len = sizeof(addr);
1027#endif
1028 addr.sin_family = AF_INET;
1029 addr.sin_addr.s_addr = bind_addr;
1030 addr.sin_port = port;
1031
1032 /**
1033 * changing listen(,1->SOMAXCONN) shouldn't be harmful for NAT's TCP/IP stack,
1034 * kernel will choose the optimal value for requests queue length.
1035 * @note: MSDN recommends low (2-4) values for bluetooth networking devices.
1036 */
1037 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
1038 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
1039 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
1040 || (listen(s, pData->soMaxConn) < 0))
1041 {
1042#ifdef RT_OS_WINDOWS
1043 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
1044 closesocket(s);
1045 QSOCKET_LOCK(tcb);
1046 sofree(pData, so);
1047 QSOCKET_UNLOCK(tcb);
1048 /* Restore the real errno */
1049 WSASetLastError(tmperrno);
1050#else
1051 int tmperrno = errno; /* Don't clobber the real reason we failed */
1052 close(s);
1053 if (sototcpcb(so))
1054 tcp_close(pData, sototcpcb(so));
1055 else
1056 sofree(pData, so);
1057 /* Restore the real errno */
1058 errno = tmperrno;
1059#endif
1060 return NULL;
1061 }
1062 fd_nonblock(s);
1063 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
1064
1065 getsockname(s,(struct sockaddr *)&addr,&addrlen);
1066 so->so_fport = addr.sin_port;
1067 /* set socket buffers */
1068 opt = pData->socket_rcv;
1069 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
1070 if (status < 0)
1071 {
1072 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
1073 goto no_sockopt;
1074 }
1075 opt = pData->socket_snd;
1076 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
1077 if (status < 0)
1078 {
1079 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
1080 goto no_sockopt;
1081 }
1082no_sockopt:
1083 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
1084 so->so_faddr = alias_addr;
1085 else
1086 so->so_faddr = addr.sin_addr;
1087
1088 so->s = s;
1089 SOCKET_UNLOCK(so);
1090 return so;
1091}
1092
1093/*
1094 * Data is available in so_rcv
1095 * Just write() the data to the socket
1096 * XXX not yet...
1097 * @todo do we really need this function, what it's intended to do?
1098 */
1099void
1100sorwakeup(struct socket *so)
1101{
1102 NOREF(so);
1103#if 0
1104 sowrite(so);
1105 FD_CLR(so->s,&writefds);
1106#endif
1107}
1108
1109/*
1110 * Data has been freed in so_snd
1111 * We have room for a read() if we want to
1112 * For now, don't read, it'll be done in the main loop
1113 */
1114void
1115sowwakeup(struct socket *so)
1116{
1117 NOREF(so);
1118}
1119
1120/*
1121 * Various session state calls
1122 * XXX Should be #define's
1123 * The socket state stuff needs work, these often get call 2 or 3
1124 * times each when only 1 was needed
1125 */
1126void
1127soisfconnecting(struct socket *so)
1128{
1129 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
1130 SS_FCANTSENDMORE|SS_FWDRAIN);
1131 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1132}
1133
1134void
1135soisfconnected(struct socket *so)
1136{
1137 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1138 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1139 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1140 LogFlowFunc(("LEAVE: so:%R[natsock]\n", so));
1141}
1142
1143int
1144sofcantrcvmore(struct socket *so)
1145{
1146 int err = 0;
1147
1148 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1149 if ((so->so_state & SS_NOFDREF) == 0)
1150 {
1151 /*
1152 * If remote closes first and then sends an RST, the recv() in
1153 * soread() will keep reporting EOF without any error
1154 * indication. As far as I can tell the only way to detect
1155 * this on Linux is to check if shutdown() succeeds here (but
1156 * see below).
1157 *
1158 * OTOH on OS X shutdown() "helpfully" checks if remote has
1159 * already closed and then always returns ENOTCONN
1160 * immediately.
1161 */
1162 int status = shutdown(so->s, SHUT_RD);
1163#if defined(RT_OS_LINUX)
1164 if (status < 0)
1165 err = errno;
1166#else
1167 RT_NOREF(status);
1168#endif
1169 }
1170 so->so_state &= ~(SS_ISFCONNECTING);
1171 if (so->so_state & SS_FCANTSENDMORE)
1172 {
1173#if defined(RT_OS_LINUX)
1174 /*
1175 * If we have closed first, and remote closes, shutdown will
1176 * return ENOTCONN, but this is expected. Don't tell the
1177 * caller there was an error.
1178 */
1179 if (err == ENOTCONN)
1180 err = 0;
1181#endif
1182 so->so_state = SS_NOFDREF; /* Don't select it */
1183 /* XXX close() here as well? */
1184 }
1185 else
1186 so->so_state |= SS_FCANTRCVMORE;
1187
1188 LogFlowFunc(("LEAVE: %d\n", err));
1189 return err;
1190}
1191
1192void
1193sofcantsendmore(struct socket *so)
1194{
1195 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1196 if ((so->so_state & SS_NOFDREF) == 0)
1197 shutdown(so->s, 1); /* send FIN to fhost */
1198
1199 so->so_state &= ~(SS_ISFCONNECTING);
1200 if (so->so_state & SS_FCANTRCVMORE)
1201 so->so_state = SS_NOFDREF; /* as above */
1202 else
1203 so->so_state |= SS_FCANTSENDMORE;
1204 LogFlowFuncLeave();
1205}
1206
1207void
1208soisfdisconnected(struct socket *so)
1209{
1210 NOREF(so);
1211#if 0
1212 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1213 close(so->s);
1214 so->so_state = SS_ISFDISCONNECTED;
1215 /*
1216 * XXX Do nothing ... ?
1217 */
1218#endif
1219}
1220
1221/*
1222 * Set write drain mode
1223 * Set CANTSENDMORE once all data has been write()n
1224 */
1225void
1226sofwdrain(struct socket *so)
1227{
1228 if (SBUF_LEN(&so->so_rcv))
1229 so->so_state |= SS_FWDRAIN;
1230 else
1231 sofcantsendmore(so);
1232}
1233
1234#if !defined(RT_OS_WINDOWS)
1235static void
1236send_icmp_to_guest(PNATState pData, char *buff, size_t len, const struct sockaddr_in *addr)
1237{
1238 struct ip *ip;
1239 uint32_t dst, src;
1240 char ip_copy[256];
1241 struct icmp *icp;
1242 int old_ip_len = 0;
1243 int hlen, original_hlen = 0;
1244 struct mbuf *m;
1245 struct icmp_msg *icm;
1246 uint8_t proto;
1247 int type = 0;
1248
1249 ip = (struct ip *)buff;
1250 /* Fix ip->ip_len to contain the total packet length including the header
1251 * in _host_ byte order for all OSes. On Darwin, that value already is in
1252 * host byte order. Solaris and Darwin report only the payload. */
1253#ifndef RT_OS_DARWIN
1254 ip->ip_len = RT_N2H_U16(ip->ip_len);
1255#endif
1256 hlen = (ip->ip_hl << 2);
1257#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1258 ip->ip_len += hlen;
1259#endif
1260 if (ip->ip_len < hlen + ICMP_MINLEN)
1261 {
1262 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1263 return;
1264 }
1265 icp = (struct icmp *)((char *)ip + hlen);
1266
1267 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1268 if ( icp->icmp_type != ICMP_ECHOREPLY
1269 && icp->icmp_type != ICMP_TIMXCEED
1270 && icp->icmp_type != ICMP_UNREACH)
1271 {
1272 return;
1273 }
1274
1275 /*
1276 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1277 * ICMP_ECHOREPLY assuming data 0
1278 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1279 */
1280 if (ip->ip_len < hlen + 8)
1281 {
1282 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1283 return;
1284 }
1285
1286 type = icp->icmp_type;
1287 if ( type == ICMP_TIMXCEED
1288 || type == ICMP_UNREACH)
1289 {
1290 /*
1291 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1292 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1293 */
1294 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1295 {
1296 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1297 return;
1298 }
1299 ip = &icp->icmp_ip;
1300 }
1301
1302 icm = icmp_find_original_mbuf(pData, ip);
1303 if (icm == NULL)
1304 {
1305 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1306 return;
1307 }
1308
1309 m = icm->im_m;
1310 if (!m)
1311 {
1312 LogFunc(("%R[natsock] hasn't stored it's mbuf on sent\n", icm->im_so));
1313 goto done;
1314 }
1315
1316 src = addr->sin_addr.s_addr;
1317 if (type == ICMP_ECHOREPLY)
1318 {
1319 struct ip *ip0 = mtod(m, struct ip *);
1320 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1321 if (icp0->icmp_type != ICMP_ECHO)
1322 {
1323 Log(("NAT: we haven't found echo for this reply\n"));
1324 goto done;
1325 }
1326 /*
1327 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1328 * IP header combined by OS network stack, our local copy of IP header contians values
1329 * in host byte order so no byte order conversion is required. IP headers fields are converting
1330 * in ip_output0 routine only.
1331 */
1332 if ( (ip->ip_len - hlen)
1333 != (ip0->ip_len - (ip0->ip_hl << 2)))
1334 {
1335 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1336 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1337 goto done;
1338 }
1339 }
1340
1341 /* ip points on origianal ip header */
1342 ip = mtod(m, struct ip *);
1343 proto = ip->ip_p;
1344 /* Now ip is pointing on header we've sent from guest */
1345 if ( icp->icmp_type == ICMP_TIMXCEED
1346 || icp->icmp_type == ICMP_UNREACH)
1347 {
1348 old_ip_len = (ip->ip_hl << 2) + 64;
1349 if (old_ip_len > sizeof(ip_copy))
1350 old_ip_len = sizeof(ip_copy);
1351 memcpy(ip_copy, ip, old_ip_len);
1352 }
1353
1354 /* source address from original IP packet*/
1355 dst = ip->ip_src.s_addr;
1356
1357 /* overide ther tail of old packet */
1358 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1359 original_hlen = ip->ip_hl << 2;
1360 /* saves original ip header and options */
1361 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1362 ip->ip_len = m_length(m, NULL);
1363 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1364
1365 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1366 type = icp->icmp_type;
1367 if ( type == ICMP_TIMXCEED
1368 || type == ICMP_UNREACH)
1369 {
1370 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1371 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1372
1373 /* undo byte order conversions done in ip_input() */
1374 HTONS(icp->icmp_ip.ip_len);
1375 HTONS(icp->icmp_ip.ip_id);
1376 HTONS(icp->icmp_ip.ip_off);
1377
1378 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1379 }
1380
1381 ip->ip_src.s_addr = src;
1382 ip->ip_dst.s_addr = dst;
1383 icmp_reflect(pData, m);
1384 /* m was freed */
1385 icm->im_m = NULL;
1386
1387 done:
1388 icmp_msg_delete(pData, icm);
1389}
1390
1391static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1392{
1393 struct sockaddr_in addr;
1394 socklen_t addrlen = sizeof(struct sockaddr_in);
1395 struct ip ip;
1396 char *buff;
1397 int len = 0;
1398
1399 /* 1- step: read the ip header */
1400 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1401 (struct sockaddr *)&addr, &addrlen);
1402 if ( len < 0
1403 && ( soIgnorableErrorCode(errno)
1404 || errno == ENOTCONN))
1405 {
1406 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1407 return;
1408 }
1409
1410 if ( len < sizeof(struct ip)
1411 || len < 0
1412 || len == 0)
1413 {
1414 u_char code;
1415 code = ICMP_UNREACH_PORT;
1416
1417 if (errno == EHOSTUNREACH)
1418 code = ICMP_UNREACH_HOST;
1419 else if (errno == ENETUNREACH)
1420 code = ICMP_UNREACH_NET;
1421
1422 LogRel(("NAT: UDP ICMP rx errno=%d (%s)\n", errno, strerror(errno)));
1423 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1424 so->so_m = NULL;
1425 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
1426 return;
1427 }
1428 /* basic check of IP header */
1429 if ( ip.ip_v != IPVERSION
1430# ifndef RT_OS_DARWIN
1431 || ip.ip_p != IPPROTO_ICMP
1432# endif
1433 )
1434 {
1435 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
1436 return;
1437 }
1438# ifndef RT_OS_DARWIN
1439 /* Darwin reports the IP length already in host byte order. */
1440 ip.ip_len = RT_N2H_U16(ip.ip_len);
1441# endif
1442# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1443 /* Solaris and Darwin report the payload only */
1444 ip.ip_len += (ip.ip_hl << 2);
1445# endif
1446 /* Note: ip->ip_len in host byte order (all OS) */
1447 len = ip.ip_len;
1448 buff = RTMemAlloc(len);
1449 if (buff == NULL)
1450 {
1451 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1452 return;
1453 }
1454 /* 2 - step: we're reading rest of the datagramm to the buffer */
1455 addrlen = sizeof(struct sockaddr_in);
1456 memset(&addr, 0, addrlen);
1457 len = recvfrom(so->s, buff, len, 0,
1458 (struct sockaddr *)&addr, &addrlen);
1459 if ( len < 0
1460 && ( soIgnorableErrorCode(errno)
1461 || errno == ENOTCONN))
1462 {
1463 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1464 ip.ip_len));
1465 RTMemFree(buff);
1466 return;
1467 }
1468 if ( len < 0
1469 || len == 0)
1470 {
1471 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1472 errno, len, (ip.ip_len - sizeof(struct ip))));
1473 RTMemFree(buff);
1474 return;
1475 }
1476 /* len is modified in 2nd read, when the rest of the datagramm was read */
1477 send_icmp_to_guest(pData, buff, len, &addr);
1478 RTMemFree(buff);
1479}
1480#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette