VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 71945

Last change on this file since 71945 was 71945, checked in by vboxsync, 7 years ago

NAT: unifdef -U VBOX_WITH_NAT_UDP_SOCKET_CLONE - g/c old experiment.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 41.0 KB
Line 
1/* $Id: socket.c 71945 2018-04-20 14:10:36Z vboxsync $ */
2/** @file
3 * NAT - socket handling.
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1995 Danny Gasparovski.
22 *
23 * Please read the file COPYRIGHT for the
24 * terms and conditions of the copyright.
25 */
26
27#include <slirp.h>
28#include "ip_icmp.h"
29#include "main.h"
30#ifdef __sun__
31#include <sys/filio.h>
32#endif
33#include <VBox/vmm/pdmdrv.h>
34#if defined (RT_OS_WINDOWS)
35#include <iprt/win/iphlpapi.h>
36#include <icmpapi.h>
37#endif
38
39#if defined(DECLARE_IOVEC) && defined(RT_OS_WINDOWS)
40AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, WSABUF, buf);
41AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, WSABUF, len);
42#endif
43
44#ifdef VBOX_WITH_NAT_SEND2HOME
45DECLINLINE(bool) slirpSend2Home(PNATState pData, struct socket *pSo, const void *pvBuf, uint32_t cbBuf, int iFlags)
46{
47 int idxAddr;
48 int ret = 0;
49 bool fSendDone = false;
50 LogFlowFunc(("Enter pSo:%R[natsock] pvBuf: %p, cbBuf: %d, iFlags: %d\n", pSo, pvBuf, cbBuf, iFlags));
51 for (idxAddr = 0; idxAddr < pData->cInHomeAddressSize; ++idxAddr)
52 {
53
54 struct socket *pNewSocket = soCloneUDPSocketWithForegnAddr(pData, pSo, pData->pInSockAddrHomeAddress[idxAddr].sin_addr);
55 AssertReturn((pNewSocket, false));
56 pData->pInSockAddrHomeAddress[idxAddr].sin_port = pSo->so_fport;
57 /** @todo more verbose on errors,
58 * @note: we shouldn't care if this send fail or not (we're in broadcast).
59 */
60 LogFunc(("send %d bytes to %RTnaipv4 from %R[natsock]\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr, pNewSocket));
61 ret = sendto(pNewSocket->s, pvBuf, cbBuf, iFlags, (struct sockaddr *)&pData->pInSockAddrHomeAddress[idxAddr], sizeof(struct sockaddr_in));
62 if (ret < 0)
63 LogFunc(("Failed to send %d bytes to %RTnaipv4\n", cbBuf, pData->pInSockAddrHomeAddress[idxAddr].sin_addr.s_addr));
64 fSendDone |= ret > 0;
65 }
66 LogFlowFunc(("Leave %RTbool\n", fSendDone));
67 return fSendDone;
68}
69#endif /* !VBOX_WITH_NAT_SEND2HOME */
70
71#if !defined(RT_OS_WINDOWS)
72static void send_icmp_to_guest(PNATState, char *, size_t, const struct sockaddr_in *);
73static void sorecvfrom_icmp_unix(PNATState, struct socket *);
74#endif /* !RT_OS_WINDOWS */
75
76void
77so_init(void)
78{
79}
80
81struct socket *
82solookup(struct socket *head, struct in_addr laddr,
83 u_int lport, struct in_addr faddr, u_int fport)
84{
85 struct socket *so;
86
87 for (so = head->so_next; so != head; so = so->so_next)
88 {
89 if ( so->so_lport == lport
90 && so->so_laddr.s_addr == laddr.s_addr
91 && so->so_faddr.s_addr == faddr.s_addr
92 && so->so_fport == fport)
93 return so;
94 }
95
96 return (struct socket *)NULL;
97}
98
99/*
100 * Create a new socket, initialise the fields
101 * It is the responsibility of the caller to
102 * insque() it into the correct linked-list
103 */
104struct socket *
105socreate(void)
106{
107 struct socket *so;
108
109 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
110 if (so)
111 {
112 so->so_state = SS_NOFDREF;
113 so->s = -1;
114#if !defined(RT_OS_WINDOWS)
115 so->so_poll_index = -1;
116#endif
117 }
118 return so;
119}
120
121/*
122 * remque and free a socket, clobber cache
123 */
124void
125sofree(PNATState pData, struct socket *so)
126{
127 LogFlowFunc(("ENTER:%R[natsock]\n", so));
128 /*
129 * We should not remove socket when polling routine do the polling
130 * instead we mark it for deletion.
131 */
132 if (so->fUnderPolling)
133 {
134 so->fShouldBeRemoved = 1;
135 LogFlowFunc(("LEAVE:%R[natsock] postponed deletion\n", so));
136 return;
137 }
138 /**
139 * Check that we don't freeng socket with tcbcb
140 */
141 Assert(!sototcpcb(so));
142 /* udp checks */
143 Assert(!so->so_timeout);
144 Assert(!so->so_timeout_arg);
145 if (so == tcp_last_so)
146 tcp_last_so = &tcb;
147 else if (so == udp_last_so)
148 udp_last_so = &udb;
149
150 /* check if mbuf haven't been already freed */
151 if (so->so_m != NULL)
152 {
153 m_freem(pData, so->so_m);
154 so->so_m = NULL;
155 }
156
157 if (so->so_ohdr != NULL)
158 {
159 RTMemFree(so->so_ohdr);
160 so->so_ohdr = NULL;
161 }
162
163 if (so->so_next && so->so_prev)
164 {
165 remque(pData, so); /* crashes if so is not in a queue */
166 NSOCK_DEC();
167 }
168
169 RTMemFree(so);
170 LogFlowFuncLeave();
171}
172
173/*
174 * Read from so's socket into sb_snd, updating all relevant sbuf fields
175 * NOTE: This will only be called if it is select()ed for reading, so
176 * a read() of 0 (or less) means it's disconnected
177 */
178int
179soread(PNATState pData, struct socket *so)
180{
181 int n, nn, lss, total;
182 struct sbuf *sb = &so->so_snd;
183 u_int len = sb->sb_datalen - sb->sb_cc;
184 struct iovec iov[2];
185 int mss = so->so_tcpcb->t_maxseg;
186 int sockerr;
187
188 STAM_PROFILE_START(&pData->StatIOread, a);
189 STAM_COUNTER_RESET(&pData->StatIORead_in_1);
190 STAM_COUNTER_RESET(&pData->StatIORead_in_2);
191
192 QSOCKET_LOCK(tcb);
193 SOCKET_LOCK(so);
194 QSOCKET_UNLOCK(tcb);
195
196 LogFlow(("soread: so = %R[natsock]\n", so));
197 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
198
199 /*
200 * No need to check if there's enough room to read.
201 * soread wouldn't have been called if there weren't
202 */
203
204 len = sb->sb_datalen - sb->sb_cc;
205
206 iov[0].iov_base = sb->sb_wptr;
207 iov[1].iov_base = 0;
208 iov[1].iov_len = 0;
209 if (sb->sb_wptr < sb->sb_rptr)
210 {
211 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
212 /* Should never succeed, but... */
213 if (iov[0].iov_len > len)
214 iov[0].iov_len = len;
215 if (iov[0].iov_len > mss)
216 iov[0].iov_len -= iov[0].iov_len%mss;
217 n = 1;
218 }
219 else
220 {
221 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
222 /* Should never succeed, but... */
223 if (iov[0].iov_len > len)
224 iov[0].iov_len = len;
225 len -= iov[0].iov_len;
226 if (len)
227 {
228 iov[1].iov_base = sb->sb_data;
229 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
230 if (iov[1].iov_len > len)
231 iov[1].iov_len = len;
232 total = iov[0].iov_len + iov[1].iov_len;
233 if (total > mss)
234 {
235 lss = total % mss;
236 if (iov[1].iov_len > lss)
237 {
238 iov[1].iov_len -= lss;
239 n = 2;
240 }
241 else
242 {
243 lss -= iov[1].iov_len;
244 iov[0].iov_len -= lss;
245 n = 1;
246 }
247 }
248 else
249 n = 2;
250 }
251 else
252 {
253 if (iov[0].iov_len > mss)
254 iov[0].iov_len -= iov[0].iov_len%mss;
255 n = 1;
256 }
257 }
258
259#ifdef HAVE_READV
260 nn = readv(so->s, (struct iovec *)iov, n);
261#else
262 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, (so->so_tcpcb->t_force? MSG_OOB:0));
263#endif
264 if (nn < 0)
265 sockerr = errno; /* save it, as it may be clobbered by logging */
266 else
267 sockerr = 0;
268
269 Log2(("%s: read(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
270 Log2(("%s: so = %R[natsock] so->so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
271 if (nn <= 0)
272 {
273#ifdef RT_OS_WINDOWS
274 /*
275 * Windows reports ESHUTDOWN after SHUT_RD (SD_RECEIVE)
276 * instead of just returning EOF indication.
277 */
278 if (nn < 0 && sockerr == ESHUTDOWN)
279 {
280 nn = 0;
281 sockerr = 0;
282 }
283#endif
284
285 if (nn == 0) /* XXX: should this be inside #if defined(RT_OS_WINDOWS)? */
286 {
287 /*
288 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
289 * _could_ mean that the connection is closed. But we will receive an
290 * FD_CLOSE event later if the connection was _really_ closed. With
291 * www.youtube.com I see this very often. Closing the socket too early
292 * would be dangerous.
293 */
294 int status;
295 unsigned long pending = 0;
296 status = ioctlsocket(so->s, FIONREAD, &pending);
297 if (status < 0)
298 Log(("NAT:%s: error in WSAIoctl: %d\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, errno));
299 if (pending != 0)
300 {
301 SOCKET_UNLOCK(so);
302 STAM_PROFILE_STOP(&pData->StatIOread, a);
303 return 0;
304 }
305 }
306
307 if ( nn < 0
308 && soIgnorableErrorCode(sockerr))
309 {
310 SOCKET_UNLOCK(so);
311 STAM_PROFILE_STOP(&pData->StatIOread, a);
312 return 0;
313 }
314 else
315 {
316 int fUninitializedTemplate = 0;
317 int shuterr;
318
319 fUninitializedTemplate = RT_BOOL(( sototcpcb(so)
320 && ( sototcpcb(so)->t_template.ti_src.s_addr == INADDR_ANY
321 || sototcpcb(so)->t_template.ti_dst.s_addr == INADDR_ANY)));
322 /* nn == 0 means peer has performed an orderly shutdown */
323 Log2(("%s: disconnected, nn = %d, errno = %d (%s)\n",
324 RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sockerr, strerror(sockerr)));
325
326 shuterr = sofcantrcvmore(so);
327 if (!sockerr && !shuterr && !fUninitializedTemplate)
328 tcp_sockclosed(pData, sototcpcb(so));
329 else
330 {
331 LogRel2(("NAT: sockerr %d, shuterr %d - %R[natsock]\n", sockerr, shuterr, so));
332 tcp_drop(pData, sototcpcb(so), sockerr);
333 }
334 SOCKET_UNLOCK(so);
335 STAM_PROFILE_STOP(&pData->StatIOread, a);
336 return -1;
337 }
338 }
339 STAM_STATS(
340 if (n == 1)
341 {
342 STAM_COUNTER_INC(&pData->StatIORead_in_1);
343 STAM_COUNTER_ADD(&pData->StatIORead_in_1_bytes, nn);
344 }
345 else
346 {
347 STAM_COUNTER_INC(&pData->StatIORead_in_2);
348 STAM_COUNTER_ADD(&pData->StatIORead_in_2_1st_bytes, nn);
349 }
350 );
351
352#ifndef HAVE_READV
353 /*
354 * If there was no error, try and read the second time round
355 * We read again if n = 2 (ie, there's another part of the buffer)
356 * and we read as much as we could in the first read
357 * We don't test for <= 0 this time, because there legitimately
358 * might not be any more data (since the socket is non-blocking),
359 * a close will be detected on next iteration.
360 * A return of -1 wont (shouldn't) happen, since it didn't happen above
361 */
362 if (n == 2 && (unsigned)nn == iov[0].iov_len)
363 {
364 int ret;
365 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
366 if (ret > 0)
367 nn += ret;
368 STAM_STATS(
369 if (ret > 0)
370 {
371 STAM_COUNTER_INC(&pData->StatIORead_in_2);
372 STAM_COUNTER_ADD(&pData->StatIORead_in_2_2nd_bytes, ret);
373 }
374 );
375 }
376
377 Log2(("%s: read(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
378#endif
379
380 /* Update fields */
381 sb->sb_cc += nn;
382 sb->sb_wptr += nn;
383 Log2(("%s: update so_snd (readed nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
384 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
385 {
386 sb->sb_wptr -= sb->sb_datalen;
387 Log2(("%s: alter sb_wptr so_snd = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
388 }
389 STAM_PROFILE_STOP(&pData->StatIOread, a);
390 SOCKET_UNLOCK(so);
391 return nn;
392}
393
394/*
395 * Get urgent data
396 *
397 * When the socket is created, we set it SO_OOBINLINE,
398 * so when OOB data arrives, we soread() it and everything
399 * in the send buffer is sent as urgent data
400 */
401void
402sorecvoob(PNATState pData, struct socket *so)
403{
404 struct tcpcb *tp = sototcpcb(so);
405 ssize_t ret;
406
407 LogFlowFunc(("sorecvoob: so = %R[natsock]\n", so));
408
409 /*
410 * We take a guess at how much urgent data has arrived.
411 * In most situations, when urgent data arrives, the next
412 * read() should get all the urgent data. This guess will
413 * be wrong however if more data arrives just after the
414 * urgent data, or the read() doesn't return all the
415 * urgent data.
416 */
417 ret = soread(pData, so);
418 if (RT_LIKELY(ret > 0))
419 {
420 tp->snd_up = tp->snd_una + SBUF_LEN(&so->so_snd);
421 tp->t_force = 1;
422 tcp_output(pData, tp);
423 tp->t_force = 0;
424 }
425}
426
427/*
428 * Send urgent data
429 * There's a lot duplicated code here, but...
430 */
431int
432sosendoob(struct socket *so)
433{
434 struct sbuf *sb = &so->so_rcv;
435 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
436
437 int n, len;
438
439 LogFlowFunc(("sosendoob so = %R[natsock]\n", so));
440
441 if (so->so_urgc > sizeof(buff))
442 so->so_urgc = sizeof(buff); /* XXX */
443
444 if (sb->sb_rptr < sb->sb_wptr)
445 {
446 /* We can send it directly */
447 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
448 so->so_urgc -= n;
449
450 Log2((" --- sent %d bytes urgent data, %d urgent bytes left\n",
451 n, so->so_urgc));
452 }
453 else
454 {
455 /*
456 * Since there's no sendv or sendtov like writev,
457 * we must copy all data to a linear buffer then
458 * send it all
459 */
460 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
461 if (len > so->so_urgc)
462 len = so->so_urgc;
463 memcpy(buff, sb->sb_rptr, len);
464 so->so_urgc -= len;
465 if (so->so_urgc)
466 {
467 n = sb->sb_wptr - sb->sb_data;
468 if (n > so->so_urgc)
469 n = so->so_urgc;
470 memcpy(buff + len, sb->sb_data, n);
471 so->so_urgc -= n;
472 len += n;
473 }
474 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
475#ifdef DEBUG
476 if (n != len)
477 Log(("Didn't send all data urgently XXXXX\n"));
478#endif
479 Log2((" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
480 n, so->so_urgc));
481 }
482
483 sb->sb_cc -= n;
484 sb->sb_rptr += n;
485 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
486 sb->sb_rptr -= sb->sb_datalen;
487
488 return n;
489}
490
491/*
492 * Write data from so_rcv to so's socket,
493 * updating all sbuf field as necessary
494 */
495int
496sowrite(PNATState pData, struct socket *so)
497{
498 int n, nn;
499 struct sbuf *sb = &so->so_rcv;
500 u_int len = sb->sb_cc;
501 struct iovec iov[2];
502
503 STAM_PROFILE_START(&pData->StatIOwrite, a);
504 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1);
505 STAM_COUNTER_RESET(&pData->StatIOWrite_in_1_bytes);
506 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2);
507 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_1st_bytes);
508 STAM_COUNTER_RESET(&pData->StatIOWrite_in_2_2nd_bytes);
509 STAM_COUNTER_RESET(&pData->StatIOWrite_no_w);
510 STAM_COUNTER_RESET(&pData->StatIOWrite_rest);
511 STAM_COUNTER_RESET(&pData->StatIOWrite_rest_bytes);
512 LogFlowFunc(("so = %R[natsock]\n", so));
513 Log2(("%s: so = %R[natsock] so->so_rcv = %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, so, sb));
514 QSOCKET_LOCK(tcb);
515 SOCKET_LOCK(so);
516 QSOCKET_UNLOCK(tcb);
517 if (so->so_urgc)
518 {
519 sosendoob(so);
520 if (sb->sb_cc == 0)
521 {
522 SOCKET_UNLOCK(so);
523 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
524 return 0;
525 }
526 }
527
528 /*
529 * No need to check if there's something to write,
530 * sowrite wouldn't have been called otherwise
531 */
532
533 len = sb->sb_cc;
534
535 iov[0].iov_base = sb->sb_rptr;
536 iov[1].iov_base = 0;
537 iov[1].iov_len = 0;
538 if (sb->sb_rptr < sb->sb_wptr)
539 {
540 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
541 /* Should never succeed, but... */
542 if (iov[0].iov_len > len)
543 iov[0].iov_len = len;
544 n = 1;
545 }
546 else
547 {
548 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
549 if (iov[0].iov_len > len)
550 iov[0].iov_len = len;
551 len -= iov[0].iov_len;
552 if (len)
553 {
554 iov[1].iov_base = sb->sb_data;
555 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
556 if (iov[1].iov_len > len)
557 iov[1].iov_len = len;
558 n = 2;
559 }
560 else
561 n = 1;
562 }
563 STAM_STATS({
564 if (n == 1)
565 {
566 STAM_COUNTER_INC(&pData->StatIOWrite_in_1);
567 STAM_COUNTER_ADD(&pData->StatIOWrite_in_1_bytes, iov[0].iov_len);
568 }
569 else
570 {
571 STAM_COUNTER_INC(&pData->StatIOWrite_in_2);
572 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_1st_bytes, iov[0].iov_len);
573 STAM_COUNTER_ADD(&pData->StatIOWrite_in_2_2nd_bytes, iov[1].iov_len);
574 }
575 });
576 /* Check if there's urgent data to send, and if so, send it */
577#ifdef HAVE_READV
578 nn = writev(so->s, (const struct iovec *)iov, n);
579#else
580 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
581#endif
582 Log2(("%s: wrote(1) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
583 /* This should never happen, but people tell me it does *shrug* */
584 if ( nn < 0
585 && soIgnorableErrorCode(errno))
586 {
587 SOCKET_UNLOCK(so);
588 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
589 return 0;
590 }
591
592 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
593 {
594 Log2(("%s: disconnected, so->so_state = %x, errno = %d\n",
595 RT_GCC_EXTENSION __PRETTY_FUNCTION__, so->so_state, errno));
596 sofcantsendmore(so);
597 tcp_sockclosed(pData, sototcpcb(so));
598 SOCKET_UNLOCK(so);
599 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
600 return -1;
601 }
602
603#ifndef HAVE_READV
604 if (n == 2 && (unsigned)nn == iov[0].iov_len)
605 {
606 int ret;
607 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
608 if (ret > 0)
609 nn += ret;
610# ifdef VBOX_WITH_STATISTICS
611 if (ret > 0 && ret != (ssize_t)iov[1].iov_len)
612 {
613 STAM_COUNTER_INC(&pData->StatIOWrite_rest);
614 STAM_COUNTER_ADD(&pData->StatIOWrite_rest_bytes, (iov[1].iov_len - ret));
615 }
616#endif
617 }
618 Log2(("%s: wrote(2) nn = %d bytes\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn));
619#endif
620
621 /* Update sbuf */
622 sb->sb_cc -= nn;
623 sb->sb_rptr += nn;
624 Log2(("%s: update so_rcv (written nn = %d) %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, nn, sb));
625 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
626 {
627 sb->sb_rptr -= sb->sb_datalen;
628 Log2(("%s: alter sb_rptr of so_rcv %R[sbuf]\n", RT_GCC_EXTENSION __PRETTY_FUNCTION__, sb));
629 }
630
631 /*
632 * If in DRAIN mode, and there's no more data, set
633 * it CANTSENDMORE
634 */
635 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
636 sofcantsendmore(so);
637
638 SOCKET_UNLOCK(so);
639 STAM_PROFILE_STOP(&pData->StatIOwrite, a);
640 return nn;
641}
642
643/*
644 * recvfrom() a UDP socket
645 */
646void
647sorecvfrom(PNATState pData, struct socket *so)
648{
649 LogFlowFunc(("sorecvfrom: so = %p\n", so));
650
651#ifdef RT_OS_WINDOWS
652 /* ping is handled with ICMP API in ip_icmpwin.c */
653 Assert(so->so_type == IPPROTO_UDP);
654#else
655 if (so->so_type == IPPROTO_ICMP)
656 {
657 /* This is a "ping" reply */
658 sorecvfrom_icmp_unix(pData, so);
659 udp_detach(pData, so);
660 }
661 else
662#endif /* !RT_OS_WINDOWS */
663 {
664 static char achBuf[64 * 1024];
665
666 /* A "normal" UDP packet */
667 struct sockaddr_in addr;
668 socklen_t addrlen = sizeof(struct sockaddr_in);
669 struct iovec iov[2];
670 ssize_t nread;
671 struct mbuf *m;
672
673 QSOCKET_LOCK(udb);
674 SOCKET_LOCK(so);
675 QSOCKET_UNLOCK(udb);
676
677 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, slirp_size(pData));
678 if (m == NULL)
679 {
680 SOCKET_UNLOCK(so);
681 return;
682 }
683
684 m->m_data += ETH_HLEN;
685 m->m_pkthdr.header = mtod(m, void *);
686
687 m->m_data += sizeof(struct udpiphdr);
688
689 /* small packets will fit without copying */
690 iov[0].iov_base = mtod(m, char *);
691 iov[0].iov_len = M_TRAILINGSPACE(m);
692
693 /* large packets will spill into a temp buffer */
694 iov[1].iov_base = achBuf;
695 iov[1].iov_len = sizeof(achBuf);
696
697#if !defined(RT_OS_WINDOWS)
698 {
699 struct msghdr mh;
700 memset(&mh, 0, sizeof(mh));
701
702 mh.msg_iov = iov;
703 mh.msg_iovlen = 2;
704 mh.msg_name = &addr;
705 mh.msg_namelen = addrlen;
706
707 nread = recvmsg(so->s, &mh, 0);
708 }
709#else /* RT_OS_WINDOWS */
710 {
711 DWORD nbytes; /* NB: can't use nread b/c of different size */
712 DWORD flags = 0;
713 int status;
714 AssertCompile(sizeof(WSABUF) == sizeof(struct iovec));
715 AssertCompileMembersSameSizeAndOffset(WSABUF, len, struct iovec, iov_len);
716 AssertCompileMembersSameSizeAndOffset(WSABUF, buf, struct iovec, iov_base);
717 status = WSARecvFrom(so->s, (WSABUF *)&iov[0], 2, &nbytes, &flags,
718 (struct sockaddr *)&addr, &addrlen,
719 NULL, NULL);
720 if (status != SOCKET_ERROR)
721 nread = nbytes;
722 else
723 nread = -1;
724 }
725#endif
726 if (nread >= 0)
727 {
728 if (nread <= iov[0].iov_len)
729 m->m_len = nread;
730 else
731 {
732 m->m_len = iov[0].iov_len;
733 m_append(pData, m, nread - iov[0].iov_len, iov[1].iov_base);
734 }
735 Assert(m_length(m, NULL) == (size_t)nread);
736
737 /*
738 * Hack: domain name lookup will be used the most for UDP,
739 * and since they'll only be used once there's no need
740 * for the 4 minute (or whatever) timeout... So we time them
741 * out much quicker (10 seconds for now...)
742 */
743 if (so->so_expire)
744 {
745 if (so->so_fport != RT_H2N_U16_C(53))
746 so->so_expire = curtime + SO_EXPIRE;
747 }
748
749 /*
750 * DNS proxy requests are forwarded to the real resolver,
751 * but its socket's so_faddr is that of the DNS proxy
752 * itself.
753 *
754 * last argument should be changed if Slirp will inject IP attributes
755 */
756 if ( pData->fUseDnsProxy
757 && so->so_fport == RT_H2N_U16_C(53)
758 && CTL_CHECK(so->so_faddr.s_addr, CTL_DNS))
759 dnsproxy_answer(pData, so, m);
760
761 /* packets definetly will be fragmented, could confuse receiver peer. */
762 if (nread > if_mtu)
763 m->m_flags |= M_SKIP_FIREWALL;
764
765 /*
766 * If this packet was destined for CTL_ADDR,
767 * make it look like that's where it came from, done by udp_output
768 */
769 udp_output(pData, so, m, &addr);
770 }
771 else
772 {
773 m_freem(pData, m);
774
775 if (!soIgnorableErrorCode(errno))
776 {
777 u_char code;
778 if (errno == EHOSTUNREACH)
779 code = ICMP_UNREACH_HOST;
780 else if (errno == ENETUNREACH)
781 code = ICMP_UNREACH_NET;
782 else
783 code = ICMP_UNREACH_PORT;
784
785 Log2((" rx error, tx icmp ICMP_UNREACH:%i\n", code));
786 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
787 so->so_m = NULL;
788 }
789 }
790
791 SOCKET_UNLOCK(so);
792 }
793}
794
795/*
796 * sendto() a socket
797 */
798int
799sosendto(PNATState pData, struct socket *so, struct mbuf *m)
800{
801 int ret;
802 struct sockaddr_in *paddr;
803 struct sockaddr addr;
804#if 0
805 struct sockaddr_in host_addr;
806#endif
807 caddr_t buf = 0;
808 int mlen;
809
810 LogFlowFunc(("sosendto: so = %R[natsock], m = %p\n", so, m));
811
812 memset(&addr, 0, sizeof(struct sockaddr));
813#ifdef RT_OS_DARWIN
814 addr.sa_len = sizeof(struct sockaddr_in);
815#endif
816 paddr = (struct sockaddr_in *)&addr;
817 paddr->sin_family = AF_INET;
818 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
819 {
820 /* It's an alias */
821 uint32_t last_byte = RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask;
822 switch(last_byte)
823 {
824#if 0
825 /* handle this case at 'default:' */
826 case CTL_BROADCAST:
827 addr.sin_addr.s_addr = INADDR_BROADCAST;
828 /* Send the packet to host to fully emulate broadcast */
829 /** @todo r=klaus: on Linux host this causes the host to receive
830 * the packet twice for some reason. And I cannot find any place
831 * in the man pages which states that sending a broadcast does not
832 * reach the host itself. */
833 host_addr.sin_family = AF_INET;
834 host_addr.sin_port = so->so_fport;
835 host_addr.sin_addr = our_addr;
836 sendto(so->s, m->m_data, m->m_len, 0,
837 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
838 break;
839#endif
840 case CTL_DNS:
841 case CTL_ALIAS:
842 default:
843 if (last_byte == ~pData->netmask)
844 paddr->sin_addr.s_addr = INADDR_BROADCAST;
845 else
846 paddr->sin_addr = loopback_addr;
847 break;
848 }
849 }
850 else
851 paddr->sin_addr = so->so_faddr;
852 paddr->sin_port = so->so_fport;
853
854 Log2((" sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
855 RT_N2H_U16(paddr->sin_port), inet_ntoa(paddr->sin_addr)));
856
857 /* Don't care what port we get */
858 /*
859 * > nmap -sV -T4 -O -A -v -PU3483 255.255.255.255
860 * generates bodyless messages, annoying memmory management system.
861 */
862 mlen = m_length(m, NULL);
863 if (mlen > 0)
864 {
865 buf = RTMemAlloc(mlen);
866 if (buf == NULL)
867 {
868 return -1;
869 }
870 m_copydata(m, 0, mlen, buf);
871 }
872 ret = sendto(so->s, buf, mlen, 0,
873 (struct sockaddr *)&addr, sizeof (struct sockaddr));
874#ifdef VBOX_WITH_NAT_SEND2HOME
875 if (slirpIsWideCasting(pData, so->so_faddr.s_addr))
876 {
877 slirpSend2Home(pData, so, buf, mlen, 0);
878 }
879#endif
880 if (buf)
881 RTMemFree(buf);
882 if (ret < 0)
883 {
884 Log2(("UDP: sendto fails (%s)\n", strerror(errno)));
885 return -1;
886 }
887
888 /*
889 * Kill the socket if there's no reply in 4 minutes,
890 * but only if it's an expirable socket
891 */
892 if (so->so_expire)
893 so->so_expire = curtime + SO_EXPIRE;
894 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
895 return 0;
896}
897
898/*
899 * XXX This should really be tcp_listen
900 */
901struct socket *
902solisten(PNATState pData, u_int32_t bind_addr, u_int port, u_int32_t laddr, u_int lport, int flags)
903{
904 struct sockaddr_in addr;
905 struct socket *so;
906 socklen_t addrlen = sizeof(addr);
907 int s, opt = 1;
908 int status;
909
910 LogFlowFunc(("solisten: port = %d, laddr = %x, lport = %d, flags = %x\n", port, laddr, lport, flags));
911
912 if ((so = socreate()) == NULL)
913 {
914 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
915 return NULL;
916 }
917
918 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
919 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
920 {
921 RTMemFree(so);
922 return NULL;
923 }
924
925 SOCKET_LOCK_CREATE(so);
926 SOCKET_LOCK(so);
927 QSOCKET_LOCK(tcb);
928 insque(pData, so,&tcb);
929 NSOCK_INC();
930 QSOCKET_UNLOCK(tcb);
931
932 /*
933 * SS_FACCEPTONCE sockets must time out.
934 */
935 if (flags & SS_FACCEPTONCE)
936 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
937
938 so->so_state = (SS_FACCEPTCONN|flags);
939 so->so_lport = lport; /* Kept in network format */
940 so->so_laddr.s_addr = laddr; /* Ditto */
941
942 memset(&addr, 0, sizeof(addr));
943#ifdef RT_OS_DARWIN
944 addr.sin_len = sizeof(addr);
945#endif
946 addr.sin_family = AF_INET;
947 addr.sin_addr.s_addr = bind_addr;
948 addr.sin_port = port;
949
950 /**
951 * changing listen(,1->SOMAXCONN) shouldn't be harmful for NAT's TCP/IP stack,
952 * kernel will choose the optimal value for requests queue length.
953 * @note: MSDN recommends low (2-4) values for bluetooth networking devices.
954 */
955 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
956 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
957 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
958 || (listen(s, pData->soMaxConn) < 0))
959 {
960#ifdef RT_OS_WINDOWS
961 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
962 closesocket(s);
963 QSOCKET_LOCK(tcb);
964 sofree(pData, so);
965 QSOCKET_UNLOCK(tcb);
966 /* Restore the real errno */
967 WSASetLastError(tmperrno);
968#else
969 int tmperrno = errno; /* Don't clobber the real reason we failed */
970 close(s);
971 if (sototcpcb(so))
972 tcp_close(pData, sototcpcb(so));
973 else
974 sofree(pData, so);
975 /* Restore the real errno */
976 errno = tmperrno;
977#endif
978 return NULL;
979 }
980 fd_nonblock(s);
981 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
982
983 getsockname(s,(struct sockaddr *)&addr,&addrlen);
984 so->so_fport = addr.sin_port;
985 /* set socket buffers */
986 opt = pData->socket_rcv;
987 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
988 if (status < 0)
989 {
990 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
991 goto no_sockopt;
992 }
993 opt = pData->socket_snd;
994 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
995 if (status < 0)
996 {
997 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
998 goto no_sockopt;
999 }
1000no_sockopt:
1001 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
1002 so->so_faddr = alias_addr;
1003 else
1004 so->so_faddr = addr.sin_addr;
1005
1006 so->s = s;
1007 SOCKET_UNLOCK(so);
1008 return so;
1009}
1010
1011/*
1012 * Data is available in so_rcv
1013 * Just write() the data to the socket
1014 * XXX not yet...
1015 * @todo do we really need this function, what it's intended to do?
1016 */
1017void
1018sorwakeup(struct socket *so)
1019{
1020 NOREF(so);
1021#if 0
1022 sowrite(so);
1023 FD_CLR(so->s,&writefds);
1024#endif
1025}
1026
1027/*
1028 * Data has been freed in so_snd
1029 * We have room for a read() if we want to
1030 * For now, don't read, it'll be done in the main loop
1031 */
1032void
1033sowwakeup(struct socket *so)
1034{
1035 NOREF(so);
1036}
1037
1038/*
1039 * Various session state calls
1040 * XXX Should be #define's
1041 * The socket state stuff needs work, these often get call 2 or 3
1042 * times each when only 1 was needed
1043 */
1044void
1045soisfconnecting(struct socket *so)
1046{
1047 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
1048 SS_FCANTSENDMORE|SS_FWDRAIN);
1049 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
1050}
1051
1052void
1053soisfconnected(struct socket *so)
1054{
1055 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1056 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
1057 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
1058 LogFlowFunc(("LEAVE: so:%R[natsock]\n", so));
1059}
1060
1061int
1062sofcantrcvmore(struct socket *so)
1063{
1064 int err = 0;
1065
1066 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1067 if ((so->so_state & SS_NOFDREF) == 0)
1068 {
1069 /*
1070 * If remote closes first and then sends an RST, the recv() in
1071 * soread() will keep reporting EOF without any error
1072 * indication. As far as I can tell the only way to detect
1073 * this on Linux is to check if shutdown() succeeds here (but
1074 * see below).
1075 *
1076 * OTOH on OS X shutdown() "helpfully" checks if remote has
1077 * already closed and then always returns ENOTCONN
1078 * immediately.
1079 */
1080 int status = shutdown(so->s, SHUT_RD);
1081#if defined(RT_OS_LINUX)
1082 if (status < 0)
1083 err = errno;
1084#else
1085 RT_NOREF(status);
1086#endif
1087 }
1088 so->so_state &= ~(SS_ISFCONNECTING);
1089 if (so->so_state & SS_FCANTSENDMORE)
1090 {
1091#if defined(RT_OS_LINUX)
1092 /*
1093 * If we have closed first, and remote closes, shutdown will
1094 * return ENOTCONN, but this is expected. Don't tell the
1095 * caller there was an error.
1096 */
1097 if (err == ENOTCONN)
1098 err = 0;
1099#endif
1100 so->so_state = SS_NOFDREF; /* Don't select it */
1101 /* XXX close() here as well? */
1102 }
1103 else
1104 so->so_state |= SS_FCANTRCVMORE;
1105
1106 LogFlowFunc(("LEAVE: %d\n", err));
1107 return err;
1108}
1109
1110void
1111sofcantsendmore(struct socket *so)
1112{
1113 LogFlowFunc(("ENTER: so:%R[natsock]\n", so));
1114 if ((so->so_state & SS_NOFDREF) == 0)
1115 shutdown(so->s, 1); /* send FIN to fhost */
1116
1117 so->so_state &= ~(SS_ISFCONNECTING);
1118 if (so->so_state & SS_FCANTRCVMORE)
1119 so->so_state = SS_NOFDREF; /* as above */
1120 else
1121 so->so_state |= SS_FCANTSENDMORE;
1122 LogFlowFuncLeave();
1123}
1124
1125void
1126soisfdisconnected(struct socket *so)
1127{
1128 NOREF(so);
1129#if 0
1130 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
1131 close(so->s);
1132 so->so_state = SS_ISFDISCONNECTED;
1133 /*
1134 * XXX Do nothing ... ?
1135 */
1136#endif
1137}
1138
1139/*
1140 * Set write drain mode
1141 * Set CANTSENDMORE once all data has been write()n
1142 */
1143void
1144sofwdrain(struct socket *so)
1145{
1146 if (SBUF_LEN(&so->so_rcv))
1147 so->so_state |= SS_FWDRAIN;
1148 else
1149 sofcantsendmore(so);
1150}
1151
1152#if !defined(RT_OS_WINDOWS)
1153static void
1154send_icmp_to_guest(PNATState pData, char *buff, size_t len, const struct sockaddr_in *addr)
1155{
1156 struct ip *ip;
1157 uint32_t dst, src;
1158 char ip_copy[256];
1159 struct icmp *icp;
1160 int old_ip_len = 0;
1161 int hlen, original_hlen = 0;
1162 struct mbuf *m;
1163 struct icmp_msg *icm;
1164 uint8_t proto;
1165 int type = 0;
1166
1167 ip = (struct ip *)buff;
1168 /* Fix ip->ip_len to contain the total packet length including the header
1169 * in _host_ byte order for all OSes. On Darwin, that value already is in
1170 * host byte order. Solaris and Darwin report only the payload. */
1171#ifndef RT_OS_DARWIN
1172 ip->ip_len = RT_N2H_U16(ip->ip_len);
1173#endif
1174 hlen = (ip->ip_hl << 2);
1175#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1176 ip->ip_len += hlen;
1177#endif
1178 if (ip->ip_len < hlen + ICMP_MINLEN)
1179 {
1180 Log(("send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
1181 return;
1182 }
1183 icp = (struct icmp *)((char *)ip + hlen);
1184
1185 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
1186 if ( icp->icmp_type != ICMP_ECHOREPLY
1187 && icp->icmp_type != ICMP_TIMXCEED
1188 && icp->icmp_type != ICMP_UNREACH)
1189 {
1190 return;
1191 }
1192
1193 /*
1194 * ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1195 * ICMP_ECHOREPLY assuming data 0
1196 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)}
1197 */
1198 if (ip->ip_len < hlen + 8)
1199 {
1200 Log(("send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
1201 return;
1202 }
1203
1204 type = icp->icmp_type;
1205 if ( type == ICMP_TIMXCEED
1206 || type == ICMP_UNREACH)
1207 {
1208 /*
1209 * ICMP_TIMXCEED, ICMP_UNREACH minimal header size is
1210 * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram
1211 */
1212 if (ip->ip_len < hlen + 2*8 + sizeof(struct ip))
1213 {
1214 Log(("send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
1215 return;
1216 }
1217 ip = &icp->icmp_ip;
1218 }
1219
1220 icm = icmp_find_original_mbuf(pData, ip);
1221 if (icm == NULL)
1222 {
1223 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
1224 return;
1225 }
1226
1227 m = icm->im_m;
1228 if (!m)
1229 {
1230 LogFunc(("%R[natsock] hasn't stored it's mbuf on sent\n", icm->im_so));
1231 goto done;
1232 }
1233
1234 src = addr->sin_addr.s_addr;
1235 if (type == ICMP_ECHOREPLY)
1236 {
1237 struct ip *ip0 = mtod(m, struct ip *);
1238 struct icmp *icp0 = (struct icmp *)((char *)ip0 + (ip0->ip_hl << 2));
1239 if (icp0->icmp_type != ICMP_ECHO)
1240 {
1241 Log(("NAT: we haven't found echo for this reply\n"));
1242 goto done;
1243 }
1244 /*
1245 * while combining buffer to send (see ip_icmp.c) we control ICMP header only,
1246 * IP header combined by OS network stack, our local copy of IP header contians values
1247 * in host byte order so no byte order conversion is required. IP headers fields are converting
1248 * in ip_output0 routine only.
1249 */
1250 if ( (ip->ip_len - hlen)
1251 != (ip0->ip_len - (ip0->ip_hl << 2)))
1252 {
1253 Log(("NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
1254 (ip->ip_len - hlen), (ip0->ip_len - (ip0->ip_hl << 2))));
1255 goto done;
1256 }
1257 }
1258
1259 /* ip points on origianal ip header */
1260 ip = mtod(m, struct ip *);
1261 proto = ip->ip_p;
1262 /* Now ip is pointing on header we've sent from guest */
1263 if ( icp->icmp_type == ICMP_TIMXCEED
1264 || icp->icmp_type == ICMP_UNREACH)
1265 {
1266 old_ip_len = (ip->ip_hl << 2) + 64;
1267 if (old_ip_len > sizeof(ip_copy))
1268 old_ip_len = sizeof(ip_copy);
1269 memcpy(ip_copy, ip, old_ip_len);
1270 }
1271
1272 /* source address from original IP packet*/
1273 dst = ip->ip_src.s_addr;
1274
1275 /* overide ther tail of old packet */
1276 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
1277 original_hlen = ip->ip_hl << 2;
1278 /* saves original ip header and options */
1279 m_copyback(pData, m, original_hlen, len - hlen, buff + hlen);
1280 ip->ip_len = m_length(m, NULL);
1281 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
1282
1283 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
1284 type = icp->icmp_type;
1285 if ( type == ICMP_TIMXCEED
1286 || type == ICMP_UNREACH)
1287 {
1288 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
1289 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
1290
1291 /* undo byte order conversions done in ip_input() */
1292 HTONS(icp->icmp_ip.ip_len);
1293 HTONS(icp->icmp_ip.ip_id);
1294 HTONS(icp->icmp_ip.ip_off);
1295
1296 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
1297 }
1298
1299 ip->ip_src.s_addr = src;
1300 ip->ip_dst.s_addr = dst;
1301 icmp_reflect(pData, m);
1302 /* m was freed */
1303 icm->im_m = NULL;
1304
1305 done:
1306 icmp_msg_delete(pData, icm);
1307}
1308
1309static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1310{
1311 struct sockaddr_in addr;
1312 socklen_t addrlen = sizeof(struct sockaddr_in);
1313 struct ip ip;
1314 char *buff;
1315 int len = 0;
1316
1317 /* 1- step: read the ip header */
1318 len = recvfrom(so->s, &ip, sizeof(struct ip), MSG_PEEK,
1319 (struct sockaddr *)&addr, &addrlen);
1320 if ( len < 0
1321 && ( soIgnorableErrorCode(errno)
1322 || errno == ENOTCONN))
1323 {
1324 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
1325 return;
1326 }
1327
1328 if ( len < sizeof(struct ip)
1329 || len < 0
1330 || len == 0)
1331 {
1332 u_char code;
1333 code = ICMP_UNREACH_PORT;
1334
1335 if (errno == EHOSTUNREACH)
1336 code = ICMP_UNREACH_HOST;
1337 else if (errno == ENETUNREACH)
1338 code = ICMP_UNREACH_NET;
1339
1340 LogRel(("NAT: UDP ICMP rx errno=%d (%s)\n", errno, strerror(errno)));
1341 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1342 so->so_m = NULL;
1343 Log(("sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
1344 return;
1345 }
1346 /* basic check of IP header */
1347 if ( ip.ip_v != IPVERSION
1348# ifndef RT_OS_DARWIN
1349 || ip.ip_p != IPPROTO_ICMP
1350# endif
1351 )
1352 {
1353 Log(("sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
1354 return;
1355 }
1356# ifndef RT_OS_DARWIN
1357 /* Darwin reports the IP length already in host byte order. */
1358 ip.ip_len = RT_N2H_U16(ip.ip_len);
1359# endif
1360# if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
1361 /* Solaris and Darwin report the payload only */
1362 ip.ip_len += (ip.ip_hl << 2);
1363# endif
1364 /* Note: ip->ip_len in host byte order (all OS) */
1365 len = ip.ip_len;
1366 buff = RTMemAlloc(len);
1367 if (buff == NULL)
1368 {
1369 Log(("sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
1370 return;
1371 }
1372 /* 2 - step: we're reading rest of the datagramm to the buffer */
1373 addrlen = sizeof(struct sockaddr_in);
1374 memset(&addr, 0, addrlen);
1375 len = recvfrom(so->s, buff, len, 0,
1376 (struct sockaddr *)&addr, &addrlen);
1377 if ( len < 0
1378 && ( soIgnorableErrorCode(errno)
1379 || errno == ENOTCONN))
1380 {
1381 Log(("sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
1382 ip.ip_len));
1383 RTMemFree(buff);
1384 return;
1385 }
1386 if ( len < 0
1387 || len == 0)
1388 {
1389 Log(("sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
1390 errno, len, (ip.ip_len - sizeof(struct ip))));
1391 RTMemFree(buff);
1392 return;
1393 }
1394 /* len is modified in 2nd read, when the rest of the datagramm was read */
1395 send_icmp_to_guest(pData, buff, len, &addr);
1396 RTMemFree(buff);
1397}
1398#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette