VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 71984

Last change on this file since 71984 was 71984, checked in by vboxsync, 7 years ago

NAT: trailing spaces

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.2 KB
Line 
1/* $Id: socket.c 71984 2018-04-23 15:07:03Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#include <slirp.h>
28#include "ip_icmp.h"
29#include "main.h"
30#ifdef __sun__
31#include <sys/filio.h>
32#endif
33#include <VBox/vmm/pdmdrv.h>
34#if defined (RT_OS_WINDOWS)
35#include <iprt/win/iphlpapi.h>
36#include <icmpapi.h>
37#endif
38#include <alias.h>
39
40#if defined(DECLARE_IOVEC) && defined(RT_OS_WINDOWS)
41AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, WSABUF, buf);
42AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, WSABUF, len);
43#endif
44
45#ifdef VBOX_WITH_NAT_SEND2HOME
46DECLINLINE(bool) slirpSend2Home(PNATState pData, struct socket *pSo, const void *pvBuf, uint32_t cbBuf, int iFlags)
47{
48 int idxAddr;
49 int ret = 0;
50 bool fSendDone = false;
51 LogFlowFunc(("Enter pSo:%R[natsock] pvBuf: %p, cbBuf: %d, iFlags: %d\n", pSo, pvBuf, cbBuf, iFlags));
52 for (idxAddr = 0; idxAddr < pData->cInHomeAddressSize; ++idxAddr)
53 {
54
55 struct socket *pNewSocket = soCloneUDPSocketWithForegnAddr(pData, pSo, pData->pInSockAddrHomeAddress[idxAddr].sin_addr);
56 AssertReturn((pNewSocket, false));
57 pData->pInSockAddrHomeAddress[idxAddr].sin_port = pSo->so_fport;
58 /** @todo more verbose on errors,
59 * @note: we shouldn't care if this send fail or not (we're in broadcast).
60 */
61 LogFunc(("send %d bytes to %RTnaipv4 from %R[natsock]\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr, pNewSocket));
62 ret = sendto(pNewSocket->s, pvBuf, cbBuf, iFlags, (struct sockaddr *)&pData->pInSockAddrHomeAddress[idxAddr], sizeof(struct sockaddr_in));
63 if (ret < 0)
64 LogFunc(("Failed to send %d bytes to %RTnaipv4\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr));
65 fSendDone |= ret > 0;
66 }
67 LogFlowFunc(("Leave %RTbool\n", fSendDone));
68 return fSendDone;
69}
70#endif /* !VBOX_WITH_NAT_SEND2HOME */
71
72#if !defined(RT_OS_WINDOWS)
73static void send_icmp_to_guest(PNATState, char *, size_t, const struct sockaddr_in *);
74static void sorecvfrom_icmp_unix(PNATState, struct socket *);
75#endif /* !RT_OS_WINDOWS */
76
77void
78so_init(void)
79{
80}
81
82struct socket *
83solookup(struct socket *head, struct in_addr laddr,
84 u_int lport, struct in_addr faddr, u_int fport)
85{
86 struct socket *so;
87
88 for (so = head->so_next; so != head; so = so->so_next)
89 {
90 if ( so->so_lport == lport
91 && so->so_laddr.s_addr == laddr.s_addr
92 && so->so_faddr.s_addr == faddr.s_addr
93 && so->so_fport == fport)
94 return so;
95 }
96
97 return (struct socket *)NULL;
98}
99
100/*
101 * Create a new socket, initialise the fields
102 * It is the responsibility of the caller to
103 * insque() it into the correct linked-list
104 */
105struct socket *
106socreate(void)
107{
108 struct socket *so;
109
110 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
111 if (so)
112 {
113 so->so_state = SS_NOFDREF;
114 so->s = -1;
115#if !defined(RT_OS_WINDOWS)
116 so->so_poll_index = -1;
117#endif
118 }
119 return so;
120}
121
122/*
123 * remque and free a socket, clobber cache
124 */
125void
126sofree(PNATState pData, struct socket *so)
127{
128 LogFlowFunc(("ENTER:%R[natsock]\n", so));
129 /*
130 * We should not remove socket when polling routine do the polling
131 * instead we mark it for deletion.
132 */
133 if (so->fUnderPolling)
134 {
135 so->fShouldBeRemoved = 1;
136 LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so));
137 return;
138 }
139 /**
140 * Check that we don't freeng socket with tcbcb
141 */
142 Assert(!sototcpcb(so));
143 /* udp checks */
144 Assert(!so->so_timeout);
145 Assert(!so->so_timeout_arg);
146 if (so == tcp_last_so)
147 tcp_last_so = &tcb;
148 else if (so == udp_last_so)
149 udp_last_so = &udb;
150
151 /* check if mbuf haven't been already freed */
152 if (so->so_m != NULL)
153 {
154 m_freem(pData, so->so_m);
155 so->so_m = NULL;
156 }
157
158 if (so->so_ohdr != NULL)
159 {
160 RTMemFree(so->so_ohdr);
161 so->so_ohdr = NULL;
162 }
163
164 if (so->so_next && so->so_prev)
165 {
166 remque(pData, so); /* crashes if so is not in a queue */
167 NSOCK_DEC();
168 }
169
170 RTMemFree(so);
171 LogFlowFuncLeave();
172}
173
174
175int
176sobind(PNATState pData, struct socket *so)
177{
178 bool fSamePorts = !!(pData->i32AliasMode & PKT_ALIAS_SAME_PORTS);
179 struct sockaddr_in self;
180 int opt;
181 int binderr;
182 int ret;
183
184 /* do we need to bind the socket to specific host address/port? */
185 if (pData->bindIP.s_addr == INADDR_ANY && !fSamePorts)
186 return 0;
187
188 opt = 1;
189 setsockopt(so->s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(opt));
190
191 RT_ZERO(self);
192#ifdef RT_OS_DARWIN
193 self.sin_len = sizeof(self);
194#endif
195 self.sin_family = AF_INET;
196 self.sin_addr = pData->bindIP;
197 self.sin_port = fSamePorts ? so->so_lport : 0;
198
199 Log2(("NAT: binding guest %RTnaipv4:%d to host %RTnaipv4:%d\n",
200 so->so_laddr.s_addr, ntohs(so->so_lport),
201 self.sin_addr.s_addr, ntohs(self.sin_port)));
202
203 ret = bind(so->s, (struct sockaddr *)&self, sizeof(self));
204 if (ret == 0)
205 {
206 Log2(("NAT: ... bind ok\n"));
207 return 0;
208 }
209
210 if (self.sin_port != 0)
211 {
212 if (pData->bindIP.s_addr != INADDR_ANY)
213 {
214 Log2(("NAT: failed to bind to %RTnaipv4:%d (bindip,sameports),"
215 " retrying with random port\n",
216 self.sin_addr.s_addr, self.sin_port));
217
218 /*
219 * don't try to guess why bind() failed, retry without
220 * requesting the same port port
221 */
222 self.sin_port = 0;
223
224 ret = bind(so->s, (struct sockaddr *)&self, sizeof(self));
225 if (ret == 0) /* bindIP ok (but port is not the same) */
226 {
227 Log2(("NAT: ... bind ok (without sameports)\n"));
228 return 0;
229 }
230 }
231 else
232 {
233 Log2(("NAT: failed to bind to 0.0.0.0:%d (sameports),"
234 " ignoring sameports\n",
235 self.sin_port));
236
237 /* it's ok if we failed to get the same port */
238 return 0;
239 }
240 }
241
242 binderr = errno;
243
244 Log2(("NAT: failed to bind to %RTnaipv4 (bindip)\n",
245 self.sin_addr.s_addr));
246
247 closesocket(so->s);
248 so->s = -1;
249#ifdef RT_OS_WINDOWS
250 WSASetLastError(binderr);
251#else
252 errno = binderr;
253#endif
254 return -1;
255}
256
257
258/*
259 * Read from so's socket into sb_snd, updating all relevant sbuf fields
260 * NOTE: This will only be called if it is select()ed for reading, so
261 * a read() of 0 (or less) means it's disconnected
262 */
263int
264soread(PNATState pData, struct socket *so)
265{
266 int n, nn, lss, total;
267 struct sbuf *sb = &so->so_snd;
268 u_int len = sb->sb_datalen - sb->sb_cc;
269 struct iovec iov[2];
270 int mss = so->so_tcpcb->t_maxseg;
271 int sockerr;
272
273 STAM_PROFILE_START(&pData->StatIOread, a);
274 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
275 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
276
277 QSOCKET_LOCK(tcb);
278 SOCKET_LOCK(so);
279 QSOCKET_UNLOCK(tcb);
280
281 LogFlow(("soread: so = %R[natsock]\n", so));
282 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
283
284 /*
285 * No need to check if there's enough room to read.
286 * soread wouldn't have been called if there weren't
287 */
288
289 len = sb->sb_datalen - sb->sb_cc;
290
291 iov[0].iov_base = sb->sb_wptr;
292 iov[1].iov_base = 0;
293 iov[1].iov_len = 0;
294 if (sb->sb_wptr < sb->sb_rptr)
295 {
296 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
297 /* Should never succeed, but... */
298 if (iov[0].iov_len > len)
299 iov[0].iov_len = len;
300 if (iov[0].iov_len > mss)
301 iov[0].iov_len -= iov[0].iov_len%mss;
302 n = 1;
303 }
304 else
305 {
306 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
307 /* Should never succeed, but... */
308 if (iov[0].iov_len > len)
309 iov[0].iov_len = len;
310 len -= iov[0].iov_len;
311 if (len)
312 {
313 iov[1].iov_base = sb->sb_data;
314 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
315 if (iov[1].iov_len > len)
316 iov[1].iov_len = len;
317 total = iov[0].iov_len + iov[1].iov_len;
318 if (total > mss)
319 {
320 lss = total % mss;
321 if (iov[1].iov_len > lss)
322 {
323 iov[1].iov_len -= lss;
324 n = 2;
325 }
326 else
327 {
328 lss -= iov[1].iov_len;
329 iov[0].iov_len -= lss;
330 n = 1;
331 }
332 }
333 else
334 n = 2;
335 }
336 else
337 {
338 if (iov[0].iov_len > mss)
339 iov[0].iov_len -= iov[0].iov_len%mss;
340 n = 1;
341 }
342 }
343
344#ifdef HAVE_READV
345 nn = readv(so->s, (struct iovec *)iov, n);
346#else
347 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
348#endif
349 if (nn < 0)
350 sockerr = errno; /* save it, as it may be clobbered by logging */
351 else
352 sockerr = 0;
353
354 Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
355 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
356 if (nn <= 0)
357 {
358#ifdef RT_OS_WINDOWS
359 /*
360 * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE)
361 * instead of just returning EOF indication.
362 */
363 if (nn < 0 && sockerr == ESHUTDOWN)
364 {
365 nn = 0;
366 sockerr = 0;
367 }
368#endif
369
370 if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */
371 {
372 /*
373 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
374 * _could_ mean that the connection is closed. But we will receive an
375 * FD_CLOSE event later if the connection was _really_ closed. With
376 * www.youtube.com I see this very often. Closing the socket too early
377 * would be dangerous.
378 */
379 int status;
380 unsigned long pending = 0;
381 status = ioctlsocket(so->s, FIONREAD, &pending);
382 if (status < 0)
383 Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno));
384 if (pending != 0)
385 {
386 SOCKET_UNLOCK(so);
387 STAM_PROFILE_STOP(&pData->StatIOread, a);
388 return 0;
389 }
390 }
391
392 if ( nn < 0
393 && soIgnorableErrorCode(sockerr))
394 {
395 SOCKET_UNLOCK(so);
396 STAM_PROFILE_STOP(&pData->StatIOread, a);
397 return 0;
398 }
399 else
400 {
401 int fUninitializedTemplate = 0;
402 int shuterr;
403
404 fUninitializedTemplate = RT_BOOL(( sototcpcb(so)
405 && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY
406 || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY)));
407 /* nn == 0 means peer has performed an orderly shutdown */
408 Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n",
409 RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr)));
410
411 shuterr = sofcantrcvmore(so);
412 if (!sockerr && !shuterr && !fUninitializedTemplate)
413 tcp_sockclosed(pData, sototcpcb(so));
414 else
415 {
416 LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so));
417 tcp_drop(pData, sototcpcb(so), sockerr);
418 }
419 SOCKET_UNLOCK(so);
420 STAM_PROFILE_STOP(&pData->StatIOread, a);
421 return -1;
422 }
423 }
424 STAM_STATS(
425 if (n == 1)
426 {
427 STAM_COUNTER_INC(&pData->StatIORead_in_1);
428 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
429 }
430 else
431 {
432 STAM_COUNTER_INC(&pData->StatIORead_in_2);
433 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
434 }
435 );
436
437#ifndef HAVE_READV
438 /*
439 * If there was no error, try and read the second time round
440 * We read again if n = 2 (ie, there's another part of the buffer)
441 * and we read as much as we could in the first read
442 * We don't test for <= 0 this time, because there legitimately
443 * might not be any more data (since the socket is non-blocking),
444 * a close will be detected on next iteration.
445 * A return of -1 wont (shouldn't) happen, since it didn't happen above
446 */
447 if (n == 2 && (unsigned)nn == iov[0].iov_len)
448 {
449 int ret;
450 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
451 if (ret > 0)
452 nn += ret;
453 STAM_STATS(
454 if (ret > 0)
455 {
456 STAM_COUNTER_INC(&pData->StatIORead_in_2);
457 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
458 }
459 );
460 }
461
462 Log2(("%s: read(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
463#endif
464
465 /* Update fields */
466 sb->sb_cc += nn;
467 sb->sb_wptr += nn;
468 Log2(("%s: update so_snd (readed nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
469 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
470 {
471 sb->sb_wptr -= sb->sb_datalen;
472 Log2(("%s: alter sb_wptr so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
473 }
474 STAM_PROFILE_STOP(&pData->StatIOread, a);
475 SOCKET_UNLOCK(so);
476 return nn;
477}
478
479/*
480 * Get urgent data
481 *
482 * When the socket is created, we set it SO_OOBINLINE,
483 * so when OOB data arrives, we soread() it and everything
484 * in the send buffer is sent as urgent data
485 */
486void
487sorecvoob(PNATState pData, struct socket *so)
488{
489 struct tcpcb *tp = sototcpcb(so);
490 ssize_t ret;
491
492 LogFlowFunc(("sorecvoob: so = %R[natsock]\n", so));
493
494 /*
495 * We take a guess at how much urgent data has arrived.
496 * In most situations, when urgent data arrives, the next
497 * read() should get all the urgent data. This guess will
498 * be wrong however if more data arrives just after the
499 * urgent data, or the read() doesn't return all the
500 * urgent data.
501 */
502 ret = soread(pData, so);
503 if (RT_LIKELY(ret > 0))
504 {
505 tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
506 tp->t_force = 1;
507 tcp_output(pData, tp);
508 tp->t_force = 0;
509 }
510}
511
512/*
513 * Send urgent data
514 * There's a lot duplicated code here, but...
515 */
516int
517sosendoob(struct socket *so)
518{
519 struct sbuf *sb = &so->so_rcv;
520 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
521
522 int n, len;
523
524 LogFlowFunc(("sosendoob so = %R[natsock]\n", so));
525
526 if (so->so_urgc > sizeof(buff))
527 so->so_urgc = sizeof(buff); /* XXX */
528
529 if (sb->sb_rptr < sb->sb_wptr)
530 {
531 /* We can send it directly */
532 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
533 so->so_urgc -= n;
534
535 Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n",
536 n, so->so_urgc));
537 }
538 else
539 {
540 /*
541 * Since there's no sendv or sendtov like writev,
542 * we must copy all data to a linear buffer then
543 * send it all
544 */
545 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
546 if (len > so->so_urgc)
547 len = so->so_urgc;
548 memcpy(buff, sb->sb_rptr, len);
549 so->so_urgc -= len;
550 if (so->so_urgc)
551 {
552 n = sb->sb_wptr - sb->sb_data;
553 if (n > so->so_urgc)
554 n = so->so_urgc;
555 memcpy(buff + len, sb->sb_data, n);
556 so->so_urgc -= n;
557 len += n;
558 }
559 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
560#ifdef DEBUG
561 if (n != len)
562 Log(("Didn't send all data urgently XXXXX\n"));
563#endif
564 Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
565 n, so->so_urgc));
566 }
567
568 sb->sb_cc -= n;
569 sb->sb_rptr += n;
570 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
571 sb->sb_rptr -= sb->sb_datalen;
572
573 return n;
574}
575
576/*
577 * Write data from so_rcv to so's socket,
578 * updating all sbuf field as necessary
579 */
580int
581sowrite(PNATState pData, struct socket *so)
582{
583 int n, nn;
584 struct sbuf *sb = &so->so_rcv;
585 u_int len = sb->sb_cc;
586 struct iovec iov[2];
587
588 STAM_PROFILE_START(&pData->StatIOwrite, a);
589 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
590 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
591 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
592 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
593 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
594 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
595 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
596 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
597 LogFlowFunc(("so = %R[natsock]\n", so));
598 Log2(("%s: so = %R[natsock] so->so_rcv = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
599 QSOCKET_LOCK(tcb);
600 SOCKET_LOCK(so);
601 QSOCKET_UNLOCK(tcb);
602 if (so->so_urgc)
603 {
604 sosendoob(so);
605 if (sb->sb_cc == 0)
606 {
607 SOCKET_UNLOCK(so);
608 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
609 return 0;
610 }
611 }
612
613 /*
614 * No need to check if there's something to write,
615 * sowrite wouldn't have been called otherwise
616 */
617
618 len = sb->sb_cc;
619
620 iov[0].iov_base = sb->sb_rptr;
621 iov[1].iov_base = 0;
622 iov[1].iov_len = 0;
623 if (sb->sb_rptr < sb->sb_wptr)
624 {
625 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
626 /* Should never succeed, but... */
627 if (iov[0].iov_len > len)
628 iov[0].iov_len = len;
629 n = 1;
630 }
631 else
632 {
633 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
634 if (iov[0].iov_len > len)
635 iov[0].iov_len = len;
636 len -= iov[0].iov_len;
637 if (len)
638 {
639 iov[1].iov_base = sb->sb_data;
640 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
641 if (iov[1].iov_len > len)
642 iov[1].iov_len = len;
643 n = 2;
644 }
645 else
646 n = 1;
647 }
648 STAM_STATS({
649 if (n == 1)
650 {
651 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
652 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
653 }
654 else
655 {
656 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
657 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
658 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
659 }
660 });
661 /* Check if there's urgent data to send, and if so, send it */
662#ifdef HAVE_READV
663 nn = writev(so->s, (const struct iovec *)iov, n);
664#else
665 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
666#endif
667 Log2(("%s: wrote(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
668 /* This should never happen, but people tell me it does *shrug* */
669 if ( nn < 0
670 && soIgnorableErrorCode(errno))
671 {
672 SOCKET_UNLOCK(so);
673 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
674 return 0;
675 }
676
677 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
678 {
679 Log2(("%s: disconnected, so->so_state = %x, errno = %d\n",
680 RT_GCC_EXTENSION __PRETTY_FUNCTION__, so->so_state, errno));
681 sofcantsendmore(so);
682 tcp_sockclosed(pData, sototcpcb(so));
683 SOCKET_UNLOCK(so);
684 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
685 return -1;
686 }
687
688#ifndef HAVE_READV
689 if (n == 2 && (unsigned)nn == iov[0].iov_len)
690 {
691 int ret;
692 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
693 if (ret > 0)
694 nn += ret;
695# ifdef VBOX_WITH_STATISTICS
696 if (ret > 0 && ret != (ssize_t)iov[1].iov_len)
697 {
698 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
699 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
700 }
701#endif
702 }
703 Log2(("%s: wrote(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
704#endif
705
706 /* Update sbuf */
707 sb->sb_cc -= nn;
708 sb->sb_rptr += nn;
709 Log2(("%s: update so_rcv (written nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
710 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
711 {
712 sb->sb_rptr -= sb->sb_datalen;
713 Log2(("%s: alter sb_rptr of so_rcv %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
714 }
715
716 /*
717 * If in DRAIN mode, and there's no more data, set
718 * it CANTSENDMORE
719 */
720 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
721 sofcantsendmore(so);
722
723 SOCKET_UNLOCK(so);
724 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
725 return nn;
726}
727
728/*
729 * recvfrom() a UDP socket
730 */
731void
732sorecvfrom(PNATState pData, struct socket *so)
733{
734 LogFlowFunc(("sorecvfrom: so = %p\n", so));
735
736#ifdef RT_OS_WINDOWS
737 /* ping is handled with ICMP API in ip_icmpwin.c */
738 Assert(so->so_type == IPPROTO_UDP);
739#else
740 if (so->so_type == IPPROTO_ICMP)
741 {
742 /* This is a "ping" reply */
743 sorecvfrom_icmp_unix(pData, so);
744 udp_detach(pData, so);
745 }
746 else
747#endif /* !RT_OS_WINDOWS */
748 {
749 static char achBuf[64 * 1024];
750
751 /* A "normal" UDP packet */
752 struct sockaddr_in addr;
753 socklen_t addrlen = sizeof(struct sockaddr_in);
754 struct iovec iov[2];
755 ssize_t nread;
756 struct mbuf *m;
757
758 QSOCKET_LOCK(udb);
759 SOCKET_LOCK(so);
760 QSOCKET_UNLOCK(udb);
761
762 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
763 if (m == NULL)
764 {
765 SOCKET_UNLOCK(so);
766 return;
767 }
768
769 m->m_data += ETH_HLEN;
770 m->m_pkthdr.header = mtod(m, void *);
771
772 m->m_data += sizeof(struct udpiphdr);
773
774 /* small packets will fit without copying */
775 iov[0].iov_base = mtod(m, char *);
776 iov[0].iov_len = M_TRAILINGSPACE(m);
777
778 /* large packets will spill into a temp buffer */
779 iov[1].iov_base = achBuf;
780 iov[1].iov_len = sizeof(achBuf);
781
782#if !defined(RT_OS_WINDOWS)
783 {
784 struct msghdr mh;
785 memset(&mh, 0, sizeof(mh));
786
787 mh.msg_iov = iov;
788 mh.msg_iovlen = 2;
789 mh.msg_name = &addr;
790 mh.msg_namelen = addrlen;
791
792 nread = recvmsg(so->s, &mh, 0);
793 }
794#else /* RT_OS_WINDOWS */
795 {
796 DWORD nbytes; /* NB: can't use nread b/c of different size */
797 DWORD flags = 0;
798 int status;
799 AssertCompile(sizeof(WSABUF) == sizeof(struct iovec));
800 AssertCompileMembersSameSizeAndOffset(WSABUF, len, struct iovec, iov_len);
801 AssertCompileMembersSameSizeAndOffset(WSABUF, buf, struct iovec, iov_base);
802 status = WSARecvFrom(so->s, (WSABUF *)&iov[0], 2, &nbytes, &flags,
803 (struct sockaddr *)&addr, &addrlen,
804 NULL, NULL);
805 if (status != SOCKET_ERROR)
806 nread = nbytes;
807 else
808 nread = -1;
809 }
810#endif
811 if (nread >= 0)
812 {
813 if (nread <= iov[0].iov_len)
814 m->m_len = nread;
815 else
816 {
817 m->m_len = iov[0].iov_len;
818 m_append(pData, m, nread - iov[0].iov_len, iov[1].iov_base);
819 }
820 Assert(m_length(m, NULL) == (size_t)nread);
821
822 /*
823 * Hack: domain name lookup will be used the most for UDP,
824 * and since they'll only be used once there's no need
825 * for the 4 minute (or whatever) timeout... So we time them
826 * out much quicker (10 seconds for now...)
827 */
828 if (so->so_expire)
829 {
830 if (so->so_fport != RT_H2N_U16_C(53))
831 so->so_expire = curtime + SO_EXPIRE;
832 }
833
834 /*
835 * DNS proxy requests are forwarded to the real resolver,
836 * but its socket's so_faddr is that of the DNS proxy
837 * itself.
838 *
839 * last argument should be changed if Slirp will inject IP attributes
840 */
841 if ( pData->fUseDnsProxy
842 && so->so_fport == RT_H2N_U16_C(53)
843 && CTL_CHECK(so->so_faddr.s_addr, CTL_DNS))
844 dnsproxy_answer(pData, so, m);
845
846 /* packets definetly will be fragmented, could confuse receiver peer. */
847 if (nread > if_mtu)
848 m->m_flags |= M_SKIP_FIREWALL;
849
850 /*
851 * If this packet was destined for CTL_ADDR,
852 * make it look like that's where it came from, done by udp_output
853 */
854 udp_output(pData, so, m, &addr);
855 }
856 else
857 {
858 m_freem(pData, m);
859
860 if (!soIgnorableErrorCode(errno))
861 {
862 u_char code;
863 if (errno == EHOSTUNREACH)
864 code = ICMP_UNREACH_HOST;
865 else if (errno == ENETUNREACH)
866 code = ICMP_UNREACH_NET;
867 else
868 code = ICMP_UNREACH_PORT;
869
870 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
871 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
872 so->so_m = NULL;
873 }
874 }
875
876 SOCKET_UNLOCK(so);
877 }
878}
879
880/*
881 * sendto() a socket
882 */
883int
884sosendto(PNATState pData, struct socket *so, struct mbuf *m)
885{
886 int ret;
887 struct sockaddr_in *paddr;
888 struct sockaddr addr;
889#if 0
890 struct sockaddr_in host_addr;
891#endif
892 caddr_t buf = 0;
893 int mlen;
894
895 LogFlowFunc(("sosendto: so = %R[natsock], m = %p\n", so, m));
896
897 memset(&addr, 0, sizeof(struct sockaddr));
898#ifdef RT_OS_DARWIN
899 addr.sa_len = sizeof(struct sockaddr_in);
900#endif
901 paddr = (struct sockaddr_in *)&addr;
902 paddr->sin_family = AF_INET;
903 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
904 {
905 /* It's an alias */
906 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
907 switch(last_byte)
908 {
909#if 0
910 /* handle this case at 'default:' */
911 case CTL_BROADCAST:
912 addr.sin_addr.s_addr = INADDR_BROADCAST;
913 /* Send the packet to host to fully emulate broadcast */
914 /** @todo r=klaus: on Linux host this causes the host to receive
915 * the packet twice for some reason. And I cannot find any place
916 * in the man pages which states that sending a broadcast does not
917 * reach the host itself. */
918 host_addr.sin_family = AF_INET;
919 host_addr.sin_port = so->so_fport;
920 host_addr.sin_addr = our_addr;
921 sendto(so->s, m->m_data, m->m_len, 0,
922 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
923 break;
924#endif
925 case CTL_DNS:
926 case CTL_ALIAS:
927 default:
928 if (last_byte == ~pData->netmask)
929 paddr->sin_addr.s_addr = INADDR_BROADCAST;
930 else
931 paddr->sin_addr = loopback_addr;
932 break;
933 }
934 }
935 else
936 paddr->sin_addr = so->so_faddr;
937 paddr->sin_port = so->so_fport;
938
939 Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
940 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
941
942 /* Don't care what port we get */
943 /*
944 * > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255
945 * generates bodyless messages, annoying memmory management system.
946 */
947 mlen = m_length(m, NULL);
948 if (mlen > 0)
949 {
950 buf = RTMemAlloc(mlen);
951 if (buf == NULL)
952 {
953 return -1;
954 }
955 m_copydata(m, 0, mlen, buf);
956 }
957 ret = sendto(so->s, buf, mlen, 0,
958 (struct sockaddr *)&addr, sizeof (struct sockaddr));
959#ifdef VBOX_WITH_NAT_SEND2HOME
960 if (slirpIsWideCasting(pData, so->so_faddr.s_addr))
961 {
962 slirpSend2Home(pData, so, buf, mlen, 0);
963 }
964#endif
965 if (buf)
966 RTMemFree(buf);
967 if (ret < 0)
968 {
969 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
970 return -1;
971 }
972
973 /*
974 * Kill the socket if there's no reply in 4 minutes,
975 * but only if it's an expirable socket
976 */
977 if (so->so_expire)
978 so->so_expire = curtime + SO_EXPIRE;
979 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
980 return 0;
981}
982
983/*
984 * XXX This should really be tcp_listen
985 */
986struct socket *
987solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
988{
989 struct sockaddr_in addr;
990 struct socket *so;
991 socklen_t addrlen = sizeof(addr);
992 int s, opt = 1;
993 int status;
994
995 LogFlowFunc(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags));
996
997 if ((so = socreate()) == NULL)
998 {
999 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
1000 return NULL;
1001 }
1002
1003 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
1004 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
1005 {
1006 RTMemFree(so);
1007 return NULL;
1008 }
1009
1010 SOCKET_LOCK_CREATE(so);
1011 SOCKET_LOCK(so);
1012 QSOCKET_LOCK(tcb);
1013 insque(pData, so,&tcb);
1014 NSOCK_INC();
1015 QSOCKET_UNLOCK(tcb);
1016
1017 /*
1018 * SS_FACCEPTONCE sockets must time out.
1019 */
1020 if (flags & SS_FACCEPTONCE)
1021 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
1022
1023 so->so_state = (SS_FACCEPTCONN|flags);
1024 so->so_lport = lport; /* Kept in network format */
1025 so->so_laddr.s_addr = laddr; /* Ditto */
1026
1027 memset(&addr, 0, sizeof(addr));
1028#ifdef RT_OS_DARWIN
1029 addr.sin_len = sizeof(addr);
1030#endif
1031 addr.sin_family = AF_INET;
1032 addr.sin_addr.s_addr = bind_addr;
1033 addr.sin_port = port;
1034
1035 /**
1036 * changing listen(,1->SOMAXCONN) shouldn't be harmful for NAT's TCP/IP stack,
1037 * kernel will choose the optimal value for requests queue length.
1038 * @note: MSDN recommends low (2-4) values for bluetooth networking devices.
1039 */
1040 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
1041 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
1042 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
1043 || (listen(s, pData->soMaxConn) < 0))
1044 {
1045#ifdef RT_OS_WINDOWS
1046 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
1047 closesocket(s);
1048 QSOCKET_LOCK(tcb);
1049 sofree(pData, so);
1050 QSOCKET_UNLOCK(tcb);
1051 /* Restore the real errno */
1052 WSASetLastError(tmperrno);
1053#else
1054 int tmperrno = errno; /* Don't clobber the real reason we failed */
1055 close(s);
1056 if (sototcpcb(so))
1057 tcp_close(pData, sototcpcb(so));
1058 else
1059 sofree(pData, so);
1060 /* Restore the real errno */
1061 errno = tmperrno;
1062#endif
1063 return NULL;
1064 }
1065 fd_nonblock(s);
1066 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
1067
1068 getsockname(s,(struct sockaddr *)&addr,&addrlen);
1069 so->so_fport = addr.sin_port;
1070 /* set socket buffers */
1071 opt = pData->socket_rcv;
1072 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
1073 if (status < 0)
1074 {
1075 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
1076 goto no_sockopt;
1077 }
1078 opt = pData->socket_snd;
1079 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
1080 if (status < 0)
1081 {
1082 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
1083 goto no_sockopt;
1084 }
1085no_sockopt:
1086 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
1087 so->so_faddr = alias_addr;
1088 else
1089 so->so_faddr = addr.sin_addr;
1090
1091 so->s = s;
1092 SOCKET_UNLOCK(so);
1093 return so;
1094}
1095
1096/*
1097 * Data is available in so_rcv
1098 * Just write() the data to the socket
1099 * XXX not yet...
1100 * @todo do we really need this function, what it's intended to do?
1101 */
1102void
1103sorwakeup(struct socket *so)
1104{
1105 NOREF(so);
1106#if 0
1107 sowrite(so);
1108 FD_CLR(so->s,&writefds);
1109#endif
1110}
1111
1112/*
1113 * Data has been freed in so_snd
1114 * We have room for a read() if we want to
1115 * For now, don't read, it'll be done in the main loop
1116 */
1117void
1118sowwakeup(struct socket *so)
1119{
1120 NOREF(so);
1121}
1122
1123/*
1124 * Various session state calls
1125 * XXX Should be #define's
1126 * The socket state stuff needs work, these often get call 2 or 3
1127 * times each when only 1 was needed
1128 */
1129void
1130soisfconnecting(struct socket *so)
1131{
1132 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
1133 SS_FCANTSENDMORE|SS_FWDRAIN);
1134 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1135}
1136
1137void
1138soisfconnected(struct socket *so)
1139{
1140 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1141 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1142 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1143 LogFlowFunc(("LEAVE: so:%R[natsock]\n", so));
1144}
1145
1146int
1147sofcantrcvmore(struct socket *so)
1148{
1149 int err = 0;
1150
1151 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1152 if ((so->so_state & SS_NOFDREF) == 0)
1153 {
1154 /*
1155 * If remote closes first and then sends an RST, the recv() in
1156 * soread() will keep reporting EOF without any error
1157 * indication. As far as I can tell the only way to detect
1158 * this on Linux is to check if shutdown() succeeds here (but
1159 * see below).
1160 *
1161 * OTOH on OS X shutdown() "helpfully" checks if remote has
1162 * already closed and then always returns ENOTCONN
1163 * immediately.
1164 */
1165 int status = shutdown(so->s, SHUT_RD);
1166#if defined(RT_OS_LINUX)
1167 if (status < 0)
1168 err = errno;
1169#else
1170 RT_NOREF(status);
1171#endif
1172 }
1173 so->so_state &= ~(SS_ISFCONNECTING);
1174 if (so->so_state & SS_FCANTSENDMORE)
1175 {
1176#if defined(RT_OS_LINUX)
1177 /*
1178 * If we have closed first, and remote closes, shutdown will
1179 * return ENOTCONN, but this is expected. Don't tell the
1180 * caller there was an error.
1181 */
1182 if (err == ENOTCONN)
1183 err = 0;
1184#endif
1185 so->so_state = SS_NOFDREF; /* Don't select it */
1186 /* XXX close() here as well? */
1187 }
1188 else
1189 so->so_state |= SS_FCANTRCVMORE;
1190
1191 LogFlowFunc(("LEAVE: %d\n", err));
1192 return err;
1193}
1194
1195void
1196sofcantsendmore(struct socket *so)
1197{
1198 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1199 if ((so->so_state & SS_NOFDREF) == 0)
1200 shutdown(so->s, 1); /* send FIN to fhost */
1201
1202 so->so_state &= ~(SS_ISFCONNECTING);
1203 if (so->so_state & SS_FCANTRCVMORE)
1204 so->so_state = SS_NOFDREF; /* as above */
1205 else
1206 so->so_state |= SS_FCANTSENDMORE;
1207 LogFlowFuncLeave();
1208}
1209
1210void
1211soisfdisconnected(struct socket *so)
1212{
1213 NOREF(so);
1214#if 0
1215 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1216 close(so->s);
1217 so->so_state = SS_ISFDISCONNECTED;
1218 /*
1219 * XXX Do nothing ... ?
1220 */
1221#endif
1222}
1223
1224/*
1225 * Set write drain mode
1226 * Set CANTSENDMORE once all data has been write()n
1227 */
1228void
1229sofwdrain(struct socket *so)
1230{
1231 if (SBUF_LEN(&so->so_rcv))
1232 so->so_state |= SS_FWDRAIN;
1233 else
1234 sofcantsendmore(so);
1235}
1236
1237#if !defined(RT_OS_WINDOWS)
1238static void
1239send_icmp_to_guest(PNATState pData, char *buff, size_t len, const struct sockaddr_in *addr)
1240{
1241 struct ip *ip;
1242 uint32_t dst, src;
1243 char ip_copy[256];
1244 struct icmp *icp;
1245 int old_ip_len = 0;
1246 int hlen, original_hlen = 0;
1247 struct mbuf *m;
1248 struct icmp_msg *icm;
1249 uint8_t proto;
1250 int type = 0;
1251
1252 ip = (struct ip *)buff;
1253 /* Fix ip->ip_len to contain the total packet length including the header
1254 * in _host_ byte order for all OSes. On Darwin, that value already is in
1255 * host byte order. Solaris and Darwin report only the payload. */
1256#ifndef RT_OS_DARWIN
1257 ip->ip_len = RT_N2H_U16(ip->ip_len);
1258#endif
1259 hlen = (ip->ip_hl << 2);
1260#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1261 ip->ip_len += hlen;
1262#endif
1263 if (ip->ip_len < hlen + ICMP_MINLEN)
1264 {
1265 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1266 return;
1267 }
1268 icp = (struct icmp *)((char *)ip + hlen);
1269
1270 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1271 if ( icp->icmp_type != ICMP_ECHOREPLY
1272 && icp->icmp_type != ICMP_TIMXCEED
1273 && icp->icmp_type != ICMP_UNREACH)
1274 {
1275 return;
1276 }
1277
1278 /*
1279 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1280 * ICMP_ECHOREPLY assuming data 0
1281 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1282 */
1283 if (ip->ip_len < hlen + 8)
1284 {
1285 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1286 return;
1287 }
1288
1289 type = icp->icmp_type;
1290 if ( type == ICMP_TIMXCEED
1291 || type == ICMP_UNREACH)
1292 {
1293 /*
1294 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1295 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1296 */
1297 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1298 {
1299 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1300 return;
1301 }
1302 ip = &icp->icmp_ip;
1303 }
1304
1305 icm = icmp_find_original_mbuf(pData, ip);
1306 if (icm == NULL)
1307 {
1308 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1309 return;
1310 }
1311
1312 m = icm->im_m;
1313 if (!m)
1314 {
1315 LogFunc(("%R[natsock] hasn't stored it's mbuf on sent\n", icm->im_so));
1316 goto done;
1317 }
1318
1319 src = addr->sin_addr.s_addr;
1320 if (type == ICMP_ECHOREPLY)
1321 {
1322 struct ip *ip0 = mtod(m, struct ip *);
1323 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1324 if (icp0->icmp_type != ICMP_ECHO)
1325 {
1326 Log(("NAT: we haven't found echo for this reply\n"));
1327 goto done;
1328 }
1329 /*
1330 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1331 * IP header combined by OS network stack, our local copy of IP header contians values
1332 * in host byte order so no byte order conversion is required. IP headers fields are converting
1333 * in ip_output0 routine only.
1334 */
1335 if ( (ip->ip_len - hlen)
1336 != (ip0->ip_len - (ip0->ip_hl << 2)))
1337 {
1338 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1339 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1340 goto done;
1341 }
1342 }
1343
1344 /* ip points on origianal ip header */
1345 ip = mtod(m, struct ip *);
1346 proto = ip->ip_p;
1347 /* Now ip is pointing on header we've sent from guest */
1348 if ( icp->icmp_type == ICMP_TIMXCEED
1349 || icp->icmp_type == ICMP_UNREACH)
1350 {
1351 old_ip_len = (ip->ip_hl << 2) + 64;
1352 if (old_ip_len > sizeof(ip_copy))
1353 old_ip_len = sizeof(ip_copy);
1354 memcpy(ip_copy, ip, old_ip_len);
1355 }
1356
1357 /* source address from original IP packet*/
1358 dst = ip->ip_src.s_addr;
1359
1360 /* overide ther tail of old packet */
1361 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1362 original_hlen = ip->ip_hl << 2;
1363 /* saves original ip header and options */
1364 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1365 ip->ip_len = m_length(m, NULL);
1366 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1367
1368 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1369 type = icp->icmp_type;
1370 if ( type == ICMP_TIMXCEED
1371 || type == ICMP_UNREACH)
1372 {
1373 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1374 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1375
1376 /* undo byte order conversions done in ip_input() */
1377 HTONS(icp->icmp_ip.ip_len);
1378 HTONS(icp->icmp_ip.ip_id);
1379 HTONS(icp->icmp_ip.ip_off);
1380
1381 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1382 }
1383
1384 ip->ip_src.s_addr = src;
1385 ip->ip_dst.s_addr = dst;
1386 icmp_reflect(pData, m);
1387 /* m was freed */
1388 icm->im_m = NULL;
1389
1390 done:
1391 icmp_msg_delete(pData, icm);
1392}
1393
1394static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1395{
1396 struct sockaddr_in addr;
1397 socklen_t addrlen = sizeof(struct sockaddr_in);
1398 struct ip ip;
1399 char *buff;
1400 int len = 0;
1401
1402 /* 1- step: read the ip header */
1403 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1404 (struct sockaddr *)&addr, &addrlen);
1405 if ( len < 0
1406 && ( soIgnorableErrorCode(errno)
1407 || errno == ENOTCONN))
1408 {
1409 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1410 return;
1411 }
1412
1413 if ( len < sizeof(struct ip)
1414 || len < 0
1415 || len == 0)
1416 {
1417 u_char code;
1418 code = ICMP_UNREACH_PORT;
1419
1420 if (errno == EHOSTUNREACH)
1421 code = ICMP_UNREACH_HOST;
1422 else if (errno == ENETUNREACH)
1423 code = ICMP_UNREACH_NET;
1424
1425 LogRel(("NAT: UDP ICMP rx errno=%d (%s)\n", errno, strerror(errno)));
1426 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1427 so->so_m = NULL;
1428 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
1429 return;
1430 }
1431 /* basic check of IP header */
1432 if ( ip.ip_v != IPVERSION
1433# ifndef RT_OS_DARWIN
1434 || ip.ip_p != IPPROTO_ICMP
1435# endif
1436 )
1437 {
1438 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
1439 return;
1440 }
1441# ifndef RT_OS_DARWIN
1442 /* Darwin reports the IP length already in host byte order. */
1443 ip.ip_len = RT_N2H_U16(ip.ip_len);
1444# endif
1445# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1446 /* Solaris and Darwin report the payload only */
1447 ip.ip_len += (ip.ip_hl << 2);
1448# endif
1449 /* Note: ip->ip_len in host byte order (all OS) */
1450 len = ip.ip_len;
1451 buff = RTMemAlloc(len);
1452 if (buff == NULL)
1453 {
1454 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1455 return;
1456 }
1457 /* 2 - step: we're reading rest of the datagramm to the buffer */
1458 addrlen = sizeof(struct sockaddr_in);
1459 memset(&addr, 0, addrlen);
1460 len = recvfrom(so->s, buff, len, 0,
1461 (struct sockaddr *)&addr, &addrlen);
1462 if ( len < 0
1463 && ( soIgnorableErrorCode(errno)
1464 || errno == ENOTCONN))
1465 {
1466 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1467 ip.ip_len));
1468 RTMemFree(buff);
1469 return;
1470 }
1471 if ( len < 0
1472 || len == 0)
1473 {
1474 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1475 errno, len, (ip.ip_len - sizeof(struct ip))));
1476 RTMemFree(buff);
1477 return;
1478 }
1479 /* len is modified in 2nd read, when the rest of the datagramm was read */
1480 send_icmp_to_guest(pData, buff, len, &addr);
1481 RTMemFree(buff);
1482}
1483#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette