VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 15919

Last change on this file since 15919 was 15890, checked in by vboxsync, 16 years ago

NAT: 1. wo sync enhancement branch is still functional (was corrupted with using ICMP file handler in select(1))

  1. after sending send queue doesn't need to synchronize with NAT thread to free mbuf instead NAT queue used to call freeing slirp routine.
  2. no more copying on slirp to guest sent.


  • Property svn:eol-style set to native
File size: 28.7 KB
Line 
1/*
2 * Copyright (c) 1995 Danny Gasparovski.
3 *
4 * Please read the file COPYRIGHT for the
5 * terms and conditions of the copyright.
6 */
7
8#define WANT_SYS_IOCTL_H
9#include <slirp.h>
10#include "ip_icmp.h"
11#include "main.h"
12#ifdef __sun__
13#include <sys/filio.h>
14#endif
15#if defined (RT_OS_WINDOWS)
16#include <iphlpapi.h>
17#include <icmpapi.h>
18#endif
19
20
21static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
22#ifdef RT_OS_WINDOWS
23static void sorecvfrom_icmp_win(PNATState, struct socket *);
24#else /* RT_OS_WINDOWS */
25static void sorecvfrom_icmp_unix(PNATState, struct socket *);
26#endif /* !RT_OS_WINDOWS */
27
28void
29so_init()
30{
31}
32
33
34struct socket *
35solookup(struct socket *head, struct in_addr laddr,
36 u_int lport, struct in_addr faddr, u_int fport)
37{
38 struct socket *so;
39
40 for (so = head->so_next; so != head; so = so->so_next)
41 {
42 if ( so->so_lport == lport
43 && so->so_laddr.s_addr == laddr.s_addr
44 && so->so_faddr.s_addr == faddr.s_addr
45 && so->so_fport == fport)
46 return so;
47 }
48
49 return (struct socket *)NULL;
50}
51
52/*
53 * Create a new socket, initialise the fields
54 * It is the responsibility of the caller to
55 * insque() it into the correct linked-list
56 */
57struct socket *
58socreate()
59{
60 struct socket *so;
61
62 so = (struct socket *)RTMemAlloc(sizeof(struct socket));
63 if(so)
64 {
65 memset(so, 0, sizeof(struct socket));
66 so->so_state = SS_NOFDREF;
67 so->s = -1;
68 }
69 return so;
70}
71
72/*
73 * remque and free a socket, clobber cache
74 */
75void
76sofree(PNATState pData, struct socket *so)
77{
78 if (so == tcp_last_so)
79 tcp_last_so = &tcb;
80 else if (so == udp_last_so)
81 udp_last_so = &udb;
82
83 /* check if mbuf haven't been already freed */
84 if (so->so_m != NULL)
85 m_free(pData, so->so_m);
86
87 if(so->so_next && so->so_prev)
88 remque(pData, so); /* crashes if so is not in a queue */
89
90 RTMemFree(so);
91}
92
93/*
94 * Read from so's socket into sb_snd, updating all relevant sbuf fields
95 * NOTE: This will only be called if it is select()ed for reading, so
96 * a read() of 0 (or less) means it's disconnected
97 */
98int
99soread(PNATState pData, struct socket *so, int fCloseIfNothingRead)
100{
101 int n, nn, lss, total;
102 struct sbuf *sb = &so->so_snd;
103 size_t len = sb->sb_datalen - sb->sb_cc;
104 struct iovec iov[2];
105 int mss = so->so_tcpcb->t_maxseg;
106
107 DEBUG_CALL("soread");
108 DEBUG_ARG("so = %lx", (long )so);
109
110 /*
111 * No need to check if there's enough room to read.
112 * soread wouldn't have been called if there weren't
113 */
114
115 len = sb->sb_datalen - sb->sb_cc;
116
117 iov[0].iov_base = sb->sb_wptr;
118 iov[1].iov_base = 0;
119 iov[1].iov_len = 0;
120 if (sb->sb_wptr < sb->sb_rptr)
121 {
122 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
123 /* Should never succeed, but... */
124 if (iov[0].iov_len > len)
125 iov[0].iov_len = len;
126 if (iov[0].iov_len > mss)
127 iov[0].iov_len -= iov[0].iov_len%mss;
128 n = 1;
129 }
130 else
131 {
132 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
133 /* Should never succeed, but... */
134 if (iov[0].iov_len > len)
135 iov[0].iov_len = len;
136 len -= iov[0].iov_len;
137 if (len)
138 {
139 iov[1].iov_base = sb->sb_data;
140 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
141 if(iov[1].iov_len > len)
142 iov[1].iov_len = len;
143 total = iov[0].iov_len + iov[1].iov_len;
144 if (total > mss)
145 {
146 lss = total % mss;
147 if (iov[1].iov_len > lss)
148 {
149 iov[1].iov_len -= lss;
150 n = 2;
151 }
152 else
153 {
154 lss -= iov[1].iov_len;
155 iov[0].iov_len -= lss;
156 n = 1;
157 }
158 }
159 else
160 n = 2;
161 }
162 else
163 {
164 if (iov[0].iov_len > mss)
165 iov[0].iov_len -= iov[0].iov_len%mss;
166 n = 1;
167 }
168 }
169
170#ifdef HAVE_READV
171 nn = readv(so->s, (struct iovec *)iov, n);
172 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
173#else
174 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
175#endif
176 if (nn <= 0)
177 {
178#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
179 /*
180 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
181 * _could_ mean that the connection is closed. But we will receive an
182 * FD_CLOSE event later if the connection was _really_ closed. With
183 * www.youtube.com I see this very often. Closing the socket too early
184 * would be dangerous.
185 */
186 if (nn == 0 && !fCloseIfNothingRead)
187 return 0;
188#endif
189 if (nn < 0 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK))
190 return 0;
191 else
192 {
193 /* nn == 0 means peer has performed an orderly shutdown */
194 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
195 nn, errno,strerror(errno)));
196 sofcantrcvmore(so);
197 tcp_sockclosed(pData, sototcpcb(so));
198 return -1;
199 }
200 }
201
202#ifndef HAVE_READV
203 /*
204 * If there was no error, try and read the second time round
205 * We read again if n = 2 (ie, there's another part of the buffer)
206 * and we read as much as we could in the first read
207 * We don't test for <= 0 this time, because there legitimately
208 * might not be any more data (since the socket is non-blocking),
209 * a close will be detected on next iteration.
210 * A return of -1 wont (shouldn't) happen, since it didn't happen above
211 */
212 if (n == 2 && nn == iov[0].iov_len)
213 {
214 int ret;
215 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
216 if (ret > 0)
217 nn += ret;
218 }
219
220 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
221#endif
222
223 /* Update fields */
224 sb->sb_cc += nn;
225 sb->sb_wptr += nn;
226 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
227 sb->sb_wptr -= sb->sb_datalen;
228 return nn;
229}
230
231/*
232 * Get urgent data
233 *
234 * When the socket is created, we set it SO_OOBINLINE,
235 * so when OOB data arrives, we soread() it and everything
236 * in the send buffer is sent as urgent data
237 */
238void
239sorecvoob(PNATState pData, struct socket *so)
240{
241 struct tcpcb *tp = sototcpcb(so);
242
243 DEBUG_CALL("sorecvoob");
244 DEBUG_ARG("so = %lx", (long)so);
245
246 /*
247 * We take a guess at how much urgent data has arrived.
248 * In most situations, when urgent data arrives, the next
249 * read() should get all the urgent data. This guess will
250 * be wrong however if more data arrives just after the
251 * urgent data, or the read() doesn't return all the
252 * urgent data.
253 */
254 soread(pData, so, /*fCloseIfNothingRead=*/false);
255 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
256 tp->t_force = 1;
257 tcp_output(pData, tp);
258 tp->t_force = 0;
259}
260
261/*
262 * Send urgent data
263 * There's a lot duplicated code here, but...
264 */
265int
266sosendoob(struct socket *so)
267{
268 struct sbuf *sb = &so->so_rcv;
269 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
270
271 int n, len;
272
273 DEBUG_CALL("sosendoob");
274 DEBUG_ARG("so = %lx", (long)so);
275 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
276
277 if (so->so_urgc > sizeof(buff))
278 so->so_urgc = sizeof(buff); /* XXX */
279
280 if (sb->sb_rptr < sb->sb_wptr)
281 {
282 /* We can send it directly */
283 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
284 so->so_urgc -= n;
285
286 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
287 n, so->so_urgc));
288 }
289 else
290 {
291 /*
292 * Since there's no sendv or sendtov like writev,
293 * we must copy all data to a linear buffer then
294 * send it all
295 */
296 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
297 if (len > so->so_urgc)
298 len = so->so_urgc;
299 memcpy(buff, sb->sb_rptr, len);
300 so->so_urgc -= len;
301 if (so->so_urgc)
302 {
303 n = sb->sb_wptr - sb->sb_data;
304 if (n > so->so_urgc)
305 n = so->so_urgc;
306 memcpy(buff + len, sb->sb_data, n);
307 so->so_urgc -= n;
308 len += n;
309 }
310 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
311#ifdef DEBUG
312 if (n != len)
313 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
314#endif
315 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
316 n, so->so_urgc));
317 }
318
319 sb->sb_cc -= n;
320 sb->sb_rptr += n;
321 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
322 sb->sb_rptr -= sb->sb_datalen;
323
324 return n;
325}
326
327/*
328 * Write data from so_rcv to so's socket,
329 * updating all sbuf field as necessary
330 */
331int
332sowrite(PNATState pData, struct socket *so)
333{
334 int n,nn;
335 struct sbuf *sb = &so->so_rcv;
336 size_t len = sb->sb_cc;
337 struct iovec iov[2];
338
339 DEBUG_CALL("sowrite");
340 DEBUG_ARG("so = %lx", (long)so);
341
342 if (so->so_urgc)
343 {
344 sosendoob(so);
345 if (sb->sb_cc == 0)
346 return 0;
347 }
348
349 /*
350 * No need to check if there's something to write,
351 * sowrite wouldn't have been called otherwise
352 */
353
354 len = sb->sb_cc;
355
356 iov[0].iov_base = sb->sb_rptr;
357 iov[1].iov_base = 0;
358 iov[1].iov_len = 0;
359 if (sb->sb_rptr < sb->sb_wptr)
360 {
361 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
362 /* Should never succeed, but... */
363 if (iov[0].iov_len > len)
364 iov[0].iov_len = len;
365 n = 1;
366 }
367 else
368 {
369 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
370 if (iov[0].iov_len > len)
371 iov[0].iov_len = len;
372 len -= iov[0].iov_len;
373 if (len)
374 {
375 iov[1].iov_base = sb->sb_data;
376 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
377 if (iov[1].iov_len > len)
378 iov[1].iov_len = len;
379 n = 2;
380 }
381 else
382 n = 1;
383 }
384 /* Check if there's urgent data to send, and if so, send it */
385#ifdef HAVE_READV
386 nn = writev(so->s, (const struct iovec *)iov, n);
387 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
388#else
389 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
390#endif
391 /* This should never happen, but people tell me it does *shrug* */
392 if (nn < 0 && (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK))
393 return 0;
394
395 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
396 {
397 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
398 so->so_state, errno));
399 sofcantsendmore(so);
400 tcp_sockclosed(pData, sototcpcb(so));
401 return -1;
402 }
403
404#ifndef HAVE_READV
405 if (n == 2 && nn == iov[0].iov_len)
406 {
407 int ret;
408 ret = send(so->s, iov[1].iov_base, iov[1].iov_len,0);
409 if (ret > 0)
410 nn += ret;
411 }
412 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
413#endif
414
415 /* Update sbuf */
416 sb->sb_cc -= nn;
417 sb->sb_rptr += nn;
418 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
419 sb->sb_rptr -= sb->sb_datalen;
420
421 /*
422 * If in DRAIN mode, and there's no more data, set
423 * it CANTSENDMORE
424 */
425 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
426 sofcantsendmore(so);
427
428 return nn;
429}
430
431/*
432 * recvfrom() a UDP socket
433 */
434void
435sorecvfrom(PNATState pData, struct socket *so)
436{
437 struct sockaddr_in addr;
438 socklen_t addrlen = sizeof(struct sockaddr_in);
439
440 DEBUG_CALL("sorecvfrom");
441 DEBUG_ARG("so = %lx", (long)so);
442
443 if (so->so_type == IPPROTO_ICMP)
444 {
445 /* This is a "ping" reply */
446#ifdef RT_OS_WINDOWS
447 sorecvfrom_icmp_win(pData, so);
448#else /* RT_OS_WINDOWS */
449 sorecvfrom_icmp_unix(pData, so);
450#endif /* !RT_OS_WINDOWS */
451 udp_detach(pData, so);
452 }
453 else
454 {
455 /* A "normal" UDP packet */
456 struct mbuf *m;
457 size_t len;
458 u_long n;
459
460 if (!(m = m_get(pData)))
461 return;
462 m->m_data += if_maxlinkhdr;
463#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
464 m->m_data += sizeof(struct udphdr)
465 + sizeof(struct ip); /*XXX: no options atm*/
466#endif
467
468 /*
469 * XXX Shouldn't FIONREAD packets destined for port 53,
470 * but I don't know the max packet size for DNS lookups
471 */
472 len = M_FREEROOM(m);
473 /* if (so->so_fport != htons(53)) */
474 {
475 ioctlsocket(so->s, FIONREAD, &n);
476
477 if (n > len)
478 {
479 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
480 m_inc(m, n);
481 len = M_FREEROOM(m);
482 }
483 }
484
485 m->m_len = recvfrom(so->s, m->m_data, len, 0,
486 (struct sockaddr *)&addr, &addrlen);
487 DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
488 m->m_len, errno,strerror(errno)));
489 if(m->m_len < 0)
490 {
491 u_char code = ICMP_UNREACH_PORT;
492
493 if (errno == EHOSTUNREACH)
494 code = ICMP_UNREACH_HOST;
495 else if(errno == ENETUNREACH)
496 code = ICMP_UNREACH_NET;
497
498 DEBUG_MISC((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code));
499 icmp_error(pData, so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
500 m_free(pData, m);
501 }
502 else
503 {
504 /*
505 * Hack: domain name lookup will be used the most for UDP,
506 * and since they'll only be used once there's no need
507 * for the 4 minute (or whatever) timeout... So we time them
508 * out much quicker (10 seconds for now...)
509 */
510 if (so->so_expire)
511 {
512 if (so->so_fport == htons(53))
513 so->so_expire = curtime + SO_EXPIREFAST;
514 else
515 so->so_expire = curtime + SO_EXPIRE;
516 }
517
518#if 0
519 if (m->m_len == len)
520 {
521 m_inc(m, MINCSIZE);
522 m->m_len = 0;
523 }
524#endif
525
526 /*
527 * If this packet was destined for CTL_ADDR,
528 * make it look like that's where it came from, done by udp_output
529 */
530 udp_output(pData, so, m, &addr);
531 } /* rx error */
532 } /* if ping packet */
533}
534
535/*
536 * sendto() a socket
537 */
538int
539sosendto(PNATState pData, struct socket *so, struct mbuf *m)
540{
541 int ret;
542 struct sockaddr_in addr;
543#if 0
544 struct sockaddr_in host_addr;
545#endif
546
547 DEBUG_CALL("sosendto");
548 DEBUG_ARG("so = %lx", (long)so);
549 DEBUG_ARG("m = %lx", (long)m);
550
551 addr.sin_family = AF_INET;
552 if ((so->so_faddr.s_addr & htonl(pData->netmask)) == special_addr.s_addr)
553 {
554 /* It's an alias */
555 uint32_t last_byte = ntohl(so->so_faddr.s_addr) & ~pData->netmask;
556 switch(last_byte)
557 {
558#if 0
559 /* handle this case at 'default:' */
560 case CTL_BROADCAST:
561 addr.sin_addr.s_addr = INADDR_BROADCAST;
562 /* Send the packet to host to fully emulate broadcast */
563 /** @todo r=klaus: on Linux host this causes the host to receive
564 * the packet twice for some reason. And I cannot find any place
565 * in the man pages which states that sending a broadcast does not
566 * reach the host itself. */
567 host_addr.sin_family = AF_INET;
568 host_addr.sin_port = so->so_fport;
569 host_addr.sin_addr = our_addr;
570 sendto(so->s, m->m_data, m->m_len, 0,
571 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
572 break;
573#endif
574 case CTL_DNS:
575 if (!get_dns_addr(pData, &dns_addr))
576 addr.sin_addr = dns_addr;
577 else
578 addr.sin_addr = loopback_addr;
579 break;
580 case CTL_ALIAS:
581 default:
582 if (last_byte == ~pData->netmask)
583 addr.sin_addr.s_addr = INADDR_BROADCAST;
584 else
585 addr.sin_addr = loopback_addr;
586 break;
587 }
588 }
589 else
590 addr.sin_addr = so->so_faddr;
591 addr.sin_port = so->so_fport;
592
593 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
594 ntohs(addr.sin_port), inet_ntoa(addr.sin_addr)));
595
596 /* Don't care what port we get */
597 ret = sendto(so->s, m->m_data, m->m_len, 0,
598 (struct sockaddr *)&addr, sizeof (struct sockaddr));
599 if (ret < 0)
600 {
601 LogRel(("UDP: sendto fails (%s)\n", strerror(errno)));
602 return -1;
603 }
604
605 /*
606 * Kill the socket if there's no reply in 4 minutes,
607 * but only if it's an expirable socket
608 */
609 if (so->so_expire)
610 so->so_expire = curtime + SO_EXPIRE;
611 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
612 return 0;
613}
614
615/*
616 * XXX This should really be tcp_listen
617 */
618struct socket *
619solisten(PNATState pData, u_int port, u_int32_t laddr, u_int lport, int flags)
620{
621 struct sockaddr_in addr;
622 struct socket *so;
623 socklen_t addrlen = sizeof(addr);
624 int s, opt = 1;
625
626 DEBUG_CALL("solisten");
627 DEBUG_ARG("port = %d", port);
628 DEBUG_ARG("laddr = %x", laddr);
629 DEBUG_ARG("lport = %d", lport);
630 DEBUG_ARG("flags = %x", flags);
631
632 if ((so = socreate()) == NULL)
633 {
634 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
635 return NULL;
636 }
637
638 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
639 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
640 {
641 RTMemFree(so);
642 return NULL;
643 }
644 insque(pData, so,&tcb);
645
646 /*
647 * SS_FACCEPTONCE sockets must time out.
648 */
649 if (flags & SS_FACCEPTONCE)
650 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
651
652 so->so_state = (SS_FACCEPTCONN|flags);
653 so->so_lport = lport; /* Kept in network format */
654 so->so_laddr.s_addr = laddr; /* Ditto */
655
656 addr.sin_family = AF_INET;
657 addr.sin_addr.s_addr = INADDR_ANY;
658 addr.sin_port = port;
659
660 if ( ((s = socket(AF_INET,SOCK_STREAM,0)) < 0)
661 || (setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)) < 0)
662 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
663 || (listen(s,1) < 0))
664 {
665#ifdef RT_OS_WINDOWS
666 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
667 closesocket(s);
668 sofree(pData, so);
669 /* Restore the real errno */
670 WSASetLastError(tmperrno);
671#else
672 int tmperrno = errno; /* Don't clobber the real reason we failed */
673 close(s);
674 sofree(pData, so);
675 /* Restore the real errno */
676 errno = tmperrno;
677#endif
678 return NULL;
679 }
680 setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
681
682 getsockname(s,(struct sockaddr *)&addr,&addrlen);
683 so->so_fport = addr.sin_port;
684 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
685 so->so_faddr = alias_addr;
686 else
687 so->so_faddr = addr.sin_addr;
688
689 so->s = s;
690 return so;
691}
692
693/*
694 * Data is available in so_rcv
695 * Just write() the data to the socket
696 * XXX not yet...
697 */
698void
699sorwakeup(struct socket *so)
700{
701#if 0
702 sowrite(so);
703 FD_CLR(so->s,&writefds);
704#endif
705}
706
707/*
708 * Data has been freed in so_snd
709 * We have room for a read() if we want to
710 * For now, don't read, it'll be done in the main loop
711 */
712void
713sowwakeup(struct socket *so)
714{
715}
716
717/*
718 * Various session state calls
719 * XXX Should be #define's
720 * The socket state stuff needs work, these often get call 2 or 3
721 * times each when only 1 was needed
722 */
723void
724soisfconnecting(struct socket *so)
725{
726 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
727 SS_FCANTSENDMORE|SS_FWDRAIN);
728 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
729}
730
731void
732soisfconnected(struct socket *so)
733{
734 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
735 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
736}
737
738void
739sofcantrcvmore(struct socket *so)
740{
741 if ((so->so_state & SS_NOFDREF) == 0)
742 {
743 shutdown(so->s,0);
744 }
745 so->so_state &= ~(SS_ISFCONNECTING);
746 if (so->so_state & SS_FCANTSENDMORE)
747 so->so_state = SS_NOFDREF; /* Don't select it */
748 /* XXX close() here as well? */
749 else
750 so->so_state |= SS_FCANTRCVMORE;
751}
752
753void
754sofcantsendmore(struct socket *so)
755{
756 if ((so->so_state & SS_NOFDREF) == 0)
757 shutdown(so->s, 1); /* send FIN to fhost */
758
759 so->so_state &= ~(SS_ISFCONNECTING);
760 if (so->so_state & SS_FCANTRCVMORE)
761 so->so_state = SS_NOFDREF; /* as above */
762 else
763 so->so_state |= SS_FCANTSENDMORE;
764}
765
766void
767soisfdisconnected(struct socket *so)
768{
769#if 0
770 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
771 close(so->s);
772 so->so_state = SS_ISFDISCONNECTED;
773 /*
774 * XXX Do nothing ... ?
775 */
776#endif
777}
778
779/*
780 * Set write drain mode
781 * Set CANTSENDMORE once all data has been write()n
782 */
783void
784sofwdrain(struct socket *so)
785{
786 if (so->so_rcv.sb_cc)
787 so->so_state |= SS_FWDRAIN;
788 else
789 sofcantsendmore(so);
790}
791
792static void
793send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
794{
795 struct ip *ip;
796 uint32_t dst,src;
797 char ip_copy[256];
798 struct icmp *icp;
799 int old_ip_len;
800 int hlen, original_hlen = 0;
801 struct mbuf *m;
802 struct icmp_msg *icm;
803 uint8_t proto;
804
805 ip = (struct ip *)buff;
806 hlen = (ip->ip_hl << 2);
807 icp = (struct icmp *)((char *)ip + hlen);
808
809 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
810 if ( icp->icmp_type != ICMP_ECHOREPLY
811 && icp->icmp_type != ICMP_TIMXCEED
812 && icp->icmp_type != ICMP_UNREACH)
813 {
814 return;
815 }
816
817 if ( icp->icmp_type == ICMP_TIMXCEED
818 || icp->icmp_type == ICMP_UNREACH)
819 {
820 ip = &icp->icmp_ip;
821 DO_ALIAS(&ip->ip_dst);
822 }
823 else
824 {
825 DO_ALIAS(&ip->ip_src);
826 }
827
828 icm = icmp_find_original_mbuf(pData, ip);
829
830 if (icm == NULL)
831 {
832 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
833 return;
834 }
835
836 m = icm->im_m;
837 Assert(m != NULL);
838
839 src = addr->sin_addr.s_addr;
840
841 ip = mtod(m, struct ip *);
842 proto = ip->ip_p;
843 /* Now ip is pointing on header we've sent from guest */
844 if ( icp->icmp_type == ICMP_TIMXCEED
845 || icp->icmp_type == ICMP_UNREACH)
846 {
847 old_ip_len = (ip->ip_hl << 2) + 64;
848 if (old_ip_len > sizeof(ip_copy))
849 old_ip_len = sizeof(ip_copy);
850 memcpy(ip_copy, ip, old_ip_len);
851 }
852
853 /* source address from original IP packet*/
854 dst = ip->ip_src.s_addr;
855
856 /* overide ther tail of old packet */
857 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
858 original_hlen = ip->ip_hl << 2;
859 /* saves original ip header and options */
860 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
861 m->m_len = len - hlen + original_hlen;
862 ip->ip_len = m->m_len;
863 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
864
865 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
866 if ( icp->icmp_type == ICMP_TIMXCEED
867 || icp->icmp_type == ICMP_UNREACH)
868 {
869 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
870 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
871 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
872 }
873
874 ip->ip_src.s_addr = src;
875 ip->ip_dst.s_addr = dst;
876 icmp_reflect(pData, m);
877 LIST_REMOVE(icm, im_list);
878 /* Don't call m_free here*/
879
880 if ( icp->icmp_type == ICMP_TIMXCEED
881 || icp->icmp_type == ICMP_UNREACH)
882 {
883 icm->im_so->so_m = NULL;
884 switch (proto)
885 {
886 case IPPROTO_UDP:
887 /*XXX: so->so_m already freed so we shouldn't call sofree */
888 udp_detach(pData, icm->im_so);
889 break;
890 case IPPROTO_TCP:
891 /*close tcp should be here */
892 break;
893 default:
894 /* do nothing */
895 break;
896 }
897 }
898 RTMemFree(icm);
899}
900
901#ifdef RT_OS_WINDOWS
902static void
903sorecvfrom_icmp_win(PNATState pData, struct socket *so)
904{
905 int len;
906 int i;
907 struct ip *ip;
908 struct mbuf *m;
909 struct icmp *icp;
910 struct icmp_msg *icm;
911 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
912 uint32_t src;
913 ICMP_ECHO_REPLY *icr;
914 int hlen = 0;
915 int data_len = 0;
916 int nbytes = 0;
917 u_char code = ~0;
918
919 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
920#ifndef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
921 fIcmp = 0; /* reply processed */
922#endif
923 if (len < 0)
924 {
925 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
926 return;
927 }
928 if (len == 0)
929 return; /* no error */
930
931 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
932 for (i = 0; i < len; ++i)
933 {
934 switch(icr[i].Status)
935 {
936 case IP_DEST_HOST_UNREACHABLE:
937 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
938 case IP_DEST_NET_UNREACHABLE:
939 code = (code != ~0 ? code : ICMP_UNREACH_NET);
940 case IP_DEST_PROT_UNREACHABLE:
941 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
942 /* UNREACH error inject here */
943 case IP_DEST_PORT_UNREACHABLE:
944 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
945 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
946 break;
947 case IP_SUCCESS: /* echo replied */
948 m = m_get(pData);
949 ip = mtod(m, struct ip *);
950 ip->ip_src.s_addr = icr[i].Address;
951 DO_ALIAS(&ip->ip_src);
952 ip->ip_p = IPPROTO_ICMP;
953 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
954 data_len = sizeof(struct ip);
955 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
956 ip->ip_ttl = icr[i].Options.Ttl;
957
958 icp = (struct icmp *)&ip[1]; /* no options */
959 icp->icmp_type = ICMP_ECHOREPLY;
960 icp->icmp_code = 0;
961 icp->icmp_id = so->so_icmp_id;
962 icp->icmp_seq = so->so_icmp_seq;
963
964 data_len += ICMP_MINLEN;
965
966 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
967 memcpy(icp->icmp_data, icr[i].Data, nbytes);
968
969 data_len += icr[i].DataSize;
970
971 ip->ip_len = data_len;
972 m->m_len = ip->ip_len;
973
974 icmp_reflect(pData, m);
975 break;
976 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
977
978 ip_broken = icr[i].Data;
979 icm = icmp_find_original_mbuf(pData, ip_broken);
980 if (icm == NULL) {
981 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
982 return;
983 }
984 m = icm->im_m;
985 ip = mtod(m, struct ip *);
986 ip->ip_ttl = icr[i].Options.Ttl;
987 src = ip->ip_src.s_addr;
988 ip->ip_dst.s_addr = src;
989 ip->ip_dst.s_addr = icr[i].Address;
990
991 hlen = (ip->ip_hl << 2);
992 icp = (struct icmp *)((char *)ip + hlen);
993 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
994 data_len = (ip_broken->ip_hl << 2) + 64;
995
996 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
997 memcpy(icp->icmp_data, ip_broken, nbytes);
998 icmp_reflect(pData, m);
999 break;
1000 default:
1001 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1002 break;
1003 }
1004 }
1005}
1006#else /* RT_OS_WINDOWS */
1007static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1008{
1009 struct sockaddr_in addr;
1010 socklen_t addrlen = sizeof(struct sockaddr_in);
1011 char buff[1500];
1012 int len;
1013 len = recvfrom(so->s, buff, 1500, 0,
1014 (struct sockaddr *)&addr, &addrlen);
1015 /* XXX Check if reply is "correct"? */
1016
1017 if (len == -1 || len == 0)
1018 {
1019 u_char code = ICMP_UNREACH_PORT;
1020
1021 if (errno == EHOSTUNREACH)
1022 code = ICMP_UNREACH_HOST;
1023 else if(errno == ENETUNREACH)
1024 code = ICMP_UNREACH_NET;
1025
1026 DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
1027 errno,strerror(errno)));
1028 icmp_error(pData, so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
1029 }
1030 else
1031 {
1032 send_icmp_to_guest(pData, buff, len, so, &addr);
1033 }
1034}
1035#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette