VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 16630

Last change on this file since 16630 was 16571, checked in by vboxsync, 16 years ago

NAT:warnings

  • Property svn:eol-style set to native
File size: 30.3 KB
Line 
1/*
2 * Copyright (c) 1995 Danny Gasparovski.
3 *
4 * Please read the file COPYRIGHT for the
5 * terms and conditions of the copyright.
6 */
7
8#define WANT_SYS_IOCTL_H
9#include <slirp.h>
10#include "ip_icmp.h"
11#include "main.h"
12#ifdef __sun__
13#include <sys/filio.h>
14#endif
15#if defined (RT_OS_WINDOWS)
16#include <iphlpapi.h>
17#include <icmpapi.h>
18#endif
19
20
21static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
22#ifdef RT_OS_WINDOWS
23static void sorecvfrom_icmp_win(PNATState, struct socket *);
24#else /* RT_OS_WINDOWS */
25static void sorecvfrom_icmp_unix(PNATState, struct socket *);
26#endif /* !RT_OS_WINDOWS */
27
28void
29so_init()
30{
31}
32
33
34struct socket *
35solookup(struct socket *head, struct in_addr laddr,
36 u_int lport, struct in_addr faddr, u_int fport)
37{
38 struct socket *so;
39
40 for (so = head->so_next; so != head; so = so->so_next)
41 {
42 if ( so->so_lport == lport
43 && so->so_laddr.s_addr == laddr.s_addr
44 && so->so_faddr.s_addr == faddr.s_addr
45 && so->so_fport == fport)
46 return so;
47 }
48
49 return (struct socket *)NULL;
50}
51
52/*
53 * Create a new socket, initialise the fields
54 * It is the responsibility of the caller to
55 * insque() it into the correct linked-list
56 */
57struct socket *
58socreate()
59{
60 struct socket *so;
61
62 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
63 if(so)
64 {
65 memset(so, 0, sizeof(struct socket));
66 so->so_state = SS_NOFDREF;
67 so->s = -1;
68 }
69 return so;
70}
71
72/*
73 * remque and free a socket, clobber cache
74 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
75 * in sofree we don't know from which queue item beeing removed.
76 */
77void
78sofree(PNATState pData, struct socket *so)
79{
80 struct socket *so_prev = NULL;
81 if (so == tcp_last_so)
82 tcp_last_so = &tcb;
83 else if (so == udp_last_so)
84 udp_last_so = &udb;
85
86 /* check if mbuf haven't been already freed */
87 if (so->so_m != NULL)
88 m_free(pData, so->so_m);
89#ifndef VBOX_WITH_SLIRP_MT
90 if(so->so_next && so->so_prev)
91 remque(pData, so); /* crashes if so is not in a queue */
92 NSOCK_DEC();
93 so->so_state = SS_NOFDREF; /* for debugging purposes */
94
95 RTMemFree(so);
96#else
97 so->so_deleted = 1;
98#endif
99}
100
101#ifdef VBOX_WITH_SLIRP_MT
102void
103soread_queue(PNATState pData, struct socket *so, int *ret)
104{
105 *ret = soread(pData, so);
106}
107#endif
108
109/*
110 * Read from so's socket into sb_snd, updating all relevant sbuf fields
111 * NOTE: This will only be called if it is select()ed for reading, so
112 * a read() of 0 (or less) means it's disconnected
113 */
114int
115soread(PNATState pData, struct socket *so)
116{
117 int n, nn, lss, total;
118 struct sbuf *sb = &so->so_snd;
119 size_t len = sb->sb_datalen - sb->sb_cc;
120 struct iovec iov[2];
121 int mss = so->so_tcpcb->t_maxseg;
122 QSOCKET_LOCK(tcb);
123 SOCKET_LOCK(so);
124 QSOCKET_UNLOCK(tcb);
125
126 DEBUG_CALL("soread");
127 DEBUG_ARG("so = %lx", (long )so);
128
129 /*
130 * No need to check if there's enough room to read.
131 * soread wouldn't have been called if there weren't
132 */
133
134 len = sb->sb_datalen - sb->sb_cc;
135
136 iov[0].iov_base = sb->sb_wptr;
137 iov[1].iov_base = 0;
138 iov[1].iov_len = 0;
139 if (sb->sb_wptr < sb->sb_rptr)
140 {
141 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
142 /* Should never succeed, but... */
143 if (iov[0].iov_len > len)
144 iov[0].iov_len = len;
145 if (iov[0].iov_len > mss)
146 iov[0].iov_len -= iov[0].iov_len%mss;
147 n = 1;
148 }
149 else
150 {
151 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
152 /* Should never succeed, but... */
153 if (iov[0].iov_len > len)
154 iov[0].iov_len = len;
155 len -= iov[0].iov_len;
156 if (len)
157 {
158 iov[1].iov_base = sb->sb_data;
159 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
160 if(iov[1].iov_len > len)
161 iov[1].iov_len = len;
162 total = iov[0].iov_len + iov[1].iov_len;
163 if (total > mss)
164 {
165 lss = total % mss;
166 if (iov[1].iov_len > lss)
167 {
168 iov[1].iov_len -= lss;
169 n = 2;
170 }
171 else
172 {
173 lss -= iov[1].iov_len;
174 iov[0].iov_len -= lss;
175 n = 1;
176 }
177 }
178 else
179 n = 2;
180 }
181 else
182 {
183 if (iov[0].iov_len > mss)
184 iov[0].iov_len -= iov[0].iov_len%mss;
185 n = 1;
186 }
187 }
188
189#ifdef HAVE_READV
190 nn = readv(so->s, (struct iovec *)iov, n);
191 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
192#else
193 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
194#endif
195 if (nn <= 0)
196 {
197#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
198 /*
199 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
200 * _could_ mean that the connection is closed. But we will receive an
201 * FD_CLOSE event later if the connection was _really_ closed. With
202 * www.youtube.com I see this very often. Closing the socket too early
203 * would be dangerous.
204 */
205 int status, ignored;
206 unsigned long pending = 0;
207 status = WSAIoctl(so->s, FIONREAD, NULL, 0, &pending, sizeof(unsigned long), &ignored, NULL, NULL);
208 if (status < 0)
209 Log2(("error in WSAIoctl: %d\n", WSAGetLastError()));
210 if (nn == 0 && (pending != 0))
211 {
212 SOCKET_UNLOCK(so);
213 return 0;
214 }
215#endif
216 if (nn < 0 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK))
217 {
218 SOCKET_UNLOCK(so);
219 return 0;
220 }
221 else
222 {
223 /* nn == 0 means peer has performed an orderly shutdown */
224 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
225 nn, errno,strerror(errno)));
226 sofcantrcvmore(so);
227 tcp_sockclosed(pData, sototcpcb(so));
228 SOCKET_UNLOCK(so);
229 return -1;
230 }
231 }
232
233#ifndef HAVE_READV
234 /*
235 * If there was no error, try and read the second time round
236 * We read again if n = 2 (ie, there's another part of the buffer)
237 * and we read as much as we could in the first read
238 * We don't test for <= 0 this time, because there legitimately
239 * might not be any more data (since the socket is non-blocking),
240 * a close will be detected on next iteration.
241 * A return of -1 wont (shouldn't) happen, since it didn't happen above
242 */
243 if (n == 2 && nn == iov[0].iov_len)
244 {
245 int ret;
246 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
247 if (ret > 0)
248 nn += ret;
249 }
250
251 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
252#endif
253
254 /* Update fields */
255 sb->sb_cc += nn;
256 sb->sb_wptr += nn;
257 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
258 sb->sb_wptr -= sb->sb_datalen;
259 SOCKET_UNLOCK(so);
260 return nn;
261}
262
263/*
264 * Get urgent data
265 *
266 * When the socket is created, we set it SO_OOBINLINE,
267 * so when OOB data arrives, we soread() it and everything
268 * in the send buffer is sent as urgent data
269 */
270void
271sorecvoob(PNATState pData, struct socket *so)
272{
273 struct tcpcb *tp = sototcpcb(so);
274
275 DEBUG_CALL("sorecvoob");
276 DEBUG_ARG("so = %lx", (long)so);
277
278 /*
279 * We take a guess at how much urgent data has arrived.
280 * In most situations, when urgent data arrives, the next
281 * read() should get all the urgent data. This guess will
282 * be wrong however if more data arrives just after the
283 * urgent data, or the read() doesn't return all the
284 * urgent data.
285 */
286 soread(pData, so);
287 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
288 tp->t_force = 1;
289 tcp_output(pData, tp);
290 tp->t_force = 0;
291}
292
293/*
294 * Send urgent data
295 * There's a lot duplicated code here, but...
296 */
297int
298sosendoob(struct socket *so)
299{
300 struct sbuf *sb = &so->so_rcv;
301 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
302
303 int n, len;
304
305 DEBUG_CALL("sosendoob");
306 DEBUG_ARG("so = %lx", (long)so);
307 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
308
309 if (so->so_urgc > sizeof(buff))
310 so->so_urgc = sizeof(buff); /* XXX */
311
312 if (sb->sb_rptr < sb->sb_wptr)
313 {
314 /* We can send it directly */
315 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
316 so->so_urgc -= n;
317
318 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
319 n, so->so_urgc));
320 }
321 else
322 {
323 /*
324 * Since there's no sendv or sendtov like writev,
325 * we must copy all data to a linear buffer then
326 * send it all
327 */
328 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
329 if (len > so->so_urgc)
330 len = so->so_urgc;
331 memcpy(buff, sb->sb_rptr, len);
332 so->so_urgc -= len;
333 if (so->so_urgc)
334 {
335 n = sb->sb_wptr - sb->sb_data;
336 if (n > so->so_urgc)
337 n = so->so_urgc;
338 memcpy(buff + len, sb->sb_data, n);
339 so->so_urgc -= n;
340 len += n;
341 }
342 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
343#ifdef DEBUG
344 if (n != len)
345 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
346#endif
347 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
348 n, so->so_urgc));
349 }
350
351 sb->sb_cc -= n;
352 sb->sb_rptr += n;
353 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
354 sb->sb_rptr -= sb->sb_datalen;
355
356 return n;
357}
358
359/*
360 * Write data from so_rcv to so's socket,
361 * updating all sbuf field as necessary
362 */
363int
364sowrite(PNATState pData, struct socket *so)
365{
366 int n,nn;
367 struct sbuf *sb = &so->so_rcv;
368 size_t len = sb->sb_cc;
369 struct iovec iov[2];
370
371 DEBUG_CALL("sowrite");
372 DEBUG_ARG("so = %lx", (long)so);
373 QSOCKET_LOCK(tcb);
374 SOCKET_LOCK(so);
375 QSOCKET_UNLOCK(tcb);
376 if (so->so_urgc)
377 {
378 sosendoob(so);
379 if (sb->sb_cc == 0)
380 {
381 SOCKET_UNLOCK(so);
382 return 0;
383 }
384 }
385
386 /*
387 * No need to check if there's something to write,
388 * sowrite wouldn't have been called otherwise
389 */
390
391 len = sb->sb_cc;
392
393 iov[0].iov_base = sb->sb_rptr;
394 iov[1].iov_base = 0;
395 iov[1].iov_len = 0;
396 if (sb->sb_rptr < sb->sb_wptr)
397 {
398 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
399 /* Should never succeed, but... */
400 if (iov[0].iov_len > len)
401 iov[0].iov_len = len;
402 n = 1;
403 }
404 else
405 {
406 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
407 if (iov[0].iov_len > len)
408 iov[0].iov_len = len;
409 len -= iov[0].iov_len;
410 if (len)
411 {
412 iov[1].iov_base = sb->sb_data;
413 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
414 if (iov[1].iov_len > len)
415 iov[1].iov_len = len;
416 n = 2;
417 }
418 else
419 n = 1;
420 }
421 /* Check if there's urgent data to send, and if so, send it */
422#ifdef HAVE_READV
423 nn = writev(so->s, (const struct iovec *)iov, n);
424 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
425#else
426 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
427#endif
428 /* This should never happen, but people tell me it does *shrug* */
429 if (nn < 0 && (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK))
430 {
431 SOCKET_UNLOCK(so);
432 return 0;
433 }
434
435 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
436 {
437 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
438 so->so_state, errno));
439 sofcantsendmore(so);
440 tcp_sockclosed(pData, sototcpcb(so));
441 SOCKET_UNLOCK(so);
442 return -1;
443 }
444
445#ifndef HAVE_READV
446 if (n == 2 && nn == iov[0].iov_len)
447 {
448 int ret;
449 ret = send(so->s, iov[1].iov_base, iov[1].iov_len,0);
450 if (ret > 0)
451 nn += ret;
452 }
453 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
454#endif
455
456 /* Update sbuf */
457 sb->sb_cc -= nn;
458 sb->sb_rptr += nn;
459 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
460 sb->sb_rptr -= sb->sb_datalen;
461
462 /*
463 * If in DRAIN mode, and there's no more data, set
464 * it CANTSENDMORE
465 */
466 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
467 sofcantsendmore(so);
468
469 SOCKET_UNLOCK(so);
470 return nn;
471}
472
473/*
474 * recvfrom() a UDP socket
475 */
476void
477sorecvfrom(PNATState pData, struct socket *so)
478{
479 struct sockaddr_in addr;
480 socklen_t addrlen = sizeof(struct sockaddr_in);
481
482 DEBUG_CALL("sorecvfrom");
483 DEBUG_ARG("so = %lx", (long)so);
484
485 if (so->so_type == IPPROTO_ICMP)
486 {
487 /* This is a "ping" reply */
488#ifdef RT_OS_WINDOWS
489 sorecvfrom_icmp_win(pData, so);
490#else /* RT_OS_WINDOWS */
491 sorecvfrom_icmp_unix(pData, so);
492#endif /* !RT_OS_WINDOWS */
493 udp_detach(pData, so);
494 }
495 else
496 {
497 /* A "normal" UDP packet */
498 struct mbuf *m;
499 size_t len;
500 u_long n;
501
502 QSOCKET_LOCK(udb);
503 SOCKET_LOCK(so);
504 QSOCKET_UNLOCK(udb);
505
506 if (!(m = m_get(pData)))
507 {
508 SOCKET_UNLOCK(so);
509 return;
510 }
511 m->m_data += if_maxlinkhdr;
512#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
513 m->m_data += sizeof(struct udphdr)
514 + sizeof(struct ip); /*XXX: no options atm*/
515#endif
516
517 /*
518 * XXX Shouldn't FIONREAD packets destined for port 53,
519 * but I don't know the max packet size for DNS lookups
520 */
521 len = M_FREEROOM(m);
522 /* if (so->so_fport != htons(53)) */
523 {
524 ioctlsocket(so->s, FIONREAD, &n);
525
526 if (n > len)
527 {
528 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
529 m_inc(m, n);
530 len = M_FREEROOM(m);
531 }
532 }
533
534 m->m_len = recvfrom(so->s, m->m_data, len, 0,
535 (struct sockaddr *)&addr, &addrlen);
536 Log2((" did recvfrom %d, errno = %d-%s\n",
537 m->m_len, errno,strerror(errno)));
538 if(m->m_len < 0)
539 {
540 u_char code = ICMP_UNREACH_PORT;
541
542 if (errno == EHOSTUNREACH)
543 code = ICMP_UNREACH_HOST;
544 else if(errno == ENETUNREACH)
545 code = ICMP_UNREACH_NET;
546
547 Log2((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code));
548 icmp_error(pData, so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
549 m_free(pData, m);
550 }
551 else
552 {
553 /*
554 * Hack: domain name lookup will be used the most for UDP,
555 * and since they'll only be used once there's no need
556 * for the 4 minute (or whatever) timeout... So we time them
557 * out much quicker (10 seconds for now...)
558 */
559 if (so->so_expire)
560 {
561 if (so->so_fport == htons(53))
562 so->so_expire = curtime + SO_EXPIREFAST;
563 else
564 so->so_expire = curtime + SO_EXPIRE;
565 }
566
567#if 0
568 if (m->m_len == len)
569 {
570 m_inc(m, MINCSIZE);
571 m->m_len = 0;
572 }
573#endif
574
575 /*
576 * If this packet was destined for CTL_ADDR,
577 * make it look like that's where it came from, done by udp_output
578 */
579 udp_output(pData, so, m, &addr);
580 SOCKET_UNLOCK(so);
581 } /* rx error */
582 } /* if ping packet */
583}
584
585/*
586 * sendto() a socket
587 */
588int
589sosendto(PNATState pData, struct socket *so, struct mbuf *m)
590{
591 int ret;
592 struct sockaddr_in addr;
593#if 0
594 struct sockaddr_in host_addr;
595#endif
596
597 DEBUG_CALL("sosendto");
598 DEBUG_ARG("so = %lx", (long)so);
599 DEBUG_ARG("m = %lx", (long)m);
600
601 addr.sin_family = AF_INET;
602 if ((so->so_faddr.s_addr & htonl(pData->netmask)) == special_addr.s_addr)
603 {
604 /* It's an alias */
605 uint32_t last_byte = ntohl(so->so_faddr.s_addr) & ~pData->netmask;
606 switch(last_byte)
607 {
608#if 0
609 /* handle this case at 'default:' */
610 case CTL_BROADCAST:
611 addr.sin_addr.s_addr = INADDR_BROADCAST;
612 /* Send the packet to host to fully emulate broadcast */
613 /** @todo r=klaus: on Linux host this causes the host to receive
614 * the packet twice for some reason. And I cannot find any place
615 * in the man pages which states that sending a broadcast does not
616 * reach the host itself. */
617 host_addr.sin_family = AF_INET;
618 host_addr.sin_port = so->so_fport;
619 host_addr.sin_addr = our_addr;
620 sendto(so->s, m->m_data, m->m_len, 0,
621 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
622 break;
623#endif
624 case CTL_DNS:
625#ifndef VBOX_WITH_MULTI_DNS
626 if (!get_dns_addr(pData, &dns_addr))
627 addr.sin_addr = dns_addr;
628 else
629 addr.sin_addr = loopback_addr;
630 break;
631#endif
632 case CTL_ALIAS:
633 default:
634 if (last_byte == ~pData->netmask)
635 addr.sin_addr.s_addr = INADDR_BROADCAST;
636 else
637 addr.sin_addr = loopback_addr;
638 break;
639 }
640 }
641 else
642 addr.sin_addr = so->so_faddr;
643 addr.sin_port = so->so_fport;
644
645 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
646 ntohs(addr.sin_port), inet_ntoa(addr.sin_addr)));
647
648 /* Don't care what port we get */
649 ret = sendto(so->s, m->m_data, m->m_len, 0,
650 (struct sockaddr *)&addr, sizeof (struct sockaddr));
651 if (ret < 0)
652 {
653 LogRel(("UDP: sendto fails (%s)\n", strerror(errno)));
654 return -1;
655 }
656
657 /*
658 * Kill the socket if there's no reply in 4 minutes,
659 * but only if it's an expirable socket
660 */
661 if (so->so_expire)
662 so->so_expire = curtime + SO_EXPIRE;
663 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
664 return 0;
665}
666
667/*
668 * XXX This should really be tcp_listen
669 */
670struct socket *
671solisten(PNATState pData, u_int port, u_int32_t laddr, u_int lport, int flags)
672{
673 struct sockaddr_in addr;
674 struct socket *so;
675 socklen_t addrlen = sizeof(addr);
676 int s, opt = 1;
677
678 DEBUG_CALL("solisten");
679 DEBUG_ARG("port = %d", port);
680 DEBUG_ARG("laddr = %x", laddr);
681 DEBUG_ARG("lport = %d", lport);
682 DEBUG_ARG("flags = %x", flags);
683
684 if ((so = socreate()) == NULL)
685 {
686 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
687 return NULL;
688 }
689
690 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
691 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
692 {
693 RTMemFree(so);
694 return NULL;
695 }
696
697 SOCKET_LOCK_CREATE(so);
698 SOCKET_LOCK(so);
699 QSOCKET_LOCK(tcb);
700 insque(pData, so,&tcb);
701 NSOCK_INC();
702 QSOCKET_UNLOCK(tcb);
703
704 /*
705 * SS_FACCEPTONCE sockets must time out.
706 */
707 if (flags & SS_FACCEPTONCE)
708 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
709
710 so->so_state = (SS_FACCEPTCONN|flags);
711 so->so_lport = lport; /* Kept in network format */
712 so->so_laddr.s_addr = laddr; /* Ditto */
713
714 addr.sin_family = AF_INET;
715 addr.sin_addr.s_addr = INADDR_ANY;
716 addr.sin_port = port;
717
718 if ( ((s = socket(AF_INET,SOCK_STREAM,0)) < 0)
719 || (setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)) < 0)
720 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
721 || (listen(s,1) < 0))
722 {
723#ifdef RT_OS_WINDOWS
724 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
725 closesocket(s);
726 QSOCKET_LOCK(tcb);
727 sofree(pData, so);
728 QSOCKET_UNLOCK(tcb);
729 /* Restore the real errno */
730 WSASetLastError(tmperrno);
731#else
732 int tmperrno = errno; /* Don't clobber the real reason we failed */
733 close(s);
734 QSOCKET_LOCK(tcb);
735 sofree(pData, so);
736 QSOCKET_UNLOCK(tcb);
737 /* Restore the real errno */
738 errno = tmperrno;
739#endif
740 return NULL;
741 }
742 setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
743
744 getsockname(s,(struct sockaddr *)&addr,&addrlen);
745 so->so_fport = addr.sin_port;
746 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
747 so->so_faddr = alias_addr;
748 else
749 so->so_faddr = addr.sin_addr;
750
751 so->s = s;
752 SOCKET_UNLOCK(so);
753 return so;
754}
755
756/*
757 * Data is available in so_rcv
758 * Just write() the data to the socket
759 * XXX not yet...
760 */
761void
762sorwakeup(struct socket *so)
763{
764#if 0
765 sowrite(so);
766 FD_CLR(so->s,&writefds);
767#endif
768}
769
770/*
771 * Data has been freed in so_snd
772 * We have room for a read() if we want to
773 * For now, don't read, it'll be done in the main loop
774 */
775void
776sowwakeup(struct socket *so)
777{
778}
779
780/*
781 * Various session state calls
782 * XXX Should be #define's
783 * The socket state stuff needs work, these often get call 2 or 3
784 * times each when only 1 was needed
785 */
786void
787soisfconnecting(struct socket *so)
788{
789 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
790 SS_FCANTSENDMORE|SS_FWDRAIN);
791 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
792}
793
794void
795soisfconnected(struct socket *so)
796{
797 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
798 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
799}
800
801void
802sofcantrcvmore(struct socket *so)
803{
804 if ((so->so_state & SS_NOFDREF) == 0)
805 {
806 shutdown(so->s,0);
807 }
808 so->so_state &= ~(SS_ISFCONNECTING);
809 if (so->so_state & SS_FCANTSENDMORE)
810 so->so_state = SS_NOFDREF; /* Don't select it */
811 /* XXX close() here as well? */
812 else
813 so->so_state |= SS_FCANTRCVMORE;
814}
815
816void
817sofcantsendmore(struct socket *so)
818{
819 if ((so->so_state & SS_NOFDREF) == 0)
820 shutdown(so->s, 1); /* send FIN to fhost */
821
822 so->so_state &= ~(SS_ISFCONNECTING);
823 if (so->so_state & SS_FCANTRCVMORE)
824 so->so_state = SS_NOFDREF; /* as above */
825 else
826 so->so_state |= SS_FCANTSENDMORE;
827}
828
829void
830soisfdisconnected(struct socket *so)
831{
832#if 0
833 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
834 close(so->s);
835 so->so_state = SS_ISFDISCONNECTED;
836 /*
837 * XXX Do nothing ... ?
838 */
839#endif
840}
841
842/*
843 * Set write drain mode
844 * Set CANTSENDMORE once all data has been write()n
845 */
846void
847sofwdrain(struct socket *so)
848{
849 if (so->so_rcv.sb_cc)
850 so->so_state |= SS_FWDRAIN;
851 else
852 sofcantsendmore(so);
853}
854
855static void
856send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
857{
858 struct ip *ip;
859 uint32_t dst,src;
860 char ip_copy[256];
861 struct icmp *icp;
862 int old_ip_len = 0;
863 int hlen, original_hlen = 0;
864 struct mbuf *m;
865 struct icmp_msg *icm;
866 uint8_t proto;
867 int type = 0;
868
869 ip = (struct ip *)buff;
870 hlen = (ip->ip_hl << 2);
871 icp = (struct icmp *)((char *)ip + hlen);
872
873 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
874 if ( icp->icmp_type != ICMP_ECHOREPLY
875 && icp->icmp_type != ICMP_TIMXCEED
876 && icp->icmp_type != ICMP_UNREACH)
877 {
878 return;
879 }
880
881 type = icp->icmp_type;
882 if ( type == ICMP_TIMXCEED
883 || type == ICMP_UNREACH)
884 {
885 ip = &icp->icmp_ip;
886 DO_ALIAS(&ip->ip_dst);
887 }
888 else
889 {
890 DO_ALIAS(&ip->ip_src);
891 }
892
893 icm = icmp_find_original_mbuf(pData, ip);
894
895 if (icm == NULL)
896 {
897 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
898 return;
899 }
900
901 m = icm->im_m;
902 Assert(m != NULL);
903
904 src = addr->sin_addr.s_addr;
905
906 ip = mtod(m, struct ip *);
907 proto = ip->ip_p;
908 /* Now ip is pointing on header we've sent from guest */
909 if ( icp->icmp_type == ICMP_TIMXCEED
910 || icp->icmp_type == ICMP_UNREACH)
911 {
912 old_ip_len = (ip->ip_hl << 2) + 64;
913 if (old_ip_len > sizeof(ip_copy))
914 old_ip_len = sizeof(ip_copy);
915 memcpy(ip_copy, ip, old_ip_len);
916 }
917
918 /* source address from original IP packet*/
919 dst = ip->ip_src.s_addr;
920
921 /* overide ther tail of old packet */
922 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
923 original_hlen = ip->ip_hl << 2;
924 /* saves original ip header and options */
925 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
926 m->m_len = len - hlen + original_hlen;
927 ip->ip_len = m->m_len;
928 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
929
930 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
931 type = icp->icmp_type;
932 if ( type == ICMP_TIMXCEED
933 || type == ICMP_UNREACH)
934 {
935 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
936 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
937 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
938 }
939
940 ip->ip_src.s_addr = src;
941 ip->ip_dst.s_addr = dst;
942 icmp_reflect(pData, m);
943 LIST_REMOVE(icm, im_list);
944 /* Don't call m_free here*/
945
946 if ( type == ICMP_TIMXCEED
947 || type == ICMP_UNREACH)
948 {
949 icm->im_so->so_m = NULL;
950 switch (proto)
951 {
952 case IPPROTO_UDP:
953 /*XXX: so->so_m already freed so we shouldn't call sofree */
954 udp_detach(pData, icm->im_so);
955 break;
956 case IPPROTO_TCP:
957 /*close tcp should be here */
958 break;
959 default:
960 /* do nothing */
961 break;
962 }
963 }
964 RTMemFree(icm);
965}
966
967#ifdef RT_OS_WINDOWS
968static void
969sorecvfrom_icmp_win(PNATState pData, struct socket *so)
970{
971 int len;
972 int i;
973 struct ip *ip;
974 struct mbuf *m;
975 struct icmp *icp;
976 struct icmp_msg *icm;
977 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
978 uint32_t src;
979 ICMP_ECHO_REPLY *icr;
980 int hlen = 0;
981 int data_len = 0;
982 int nbytes = 0;
983 u_char code = ~0;
984
985 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
986#ifndef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
987 fIcmp = 0; /* reply processed */
988#endif
989 if (len < 0)
990 {
991 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
992 return;
993 }
994 if (len == 0)
995 return; /* no error */
996
997 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
998 for (i = 0; i < len; ++i)
999 {
1000 switch(icr[i].Status)
1001 {
1002 case IP_DEST_HOST_UNREACHABLE:
1003 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1004 case IP_DEST_NET_UNREACHABLE:
1005 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1006 case IP_DEST_PROT_UNREACHABLE:
1007 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1008 /* UNREACH error inject here */
1009 case IP_DEST_PORT_UNREACHABLE:
1010 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1011 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1012 break;
1013 case IP_SUCCESS: /* echo replied */
1014 m = m_get(pData);
1015 m->m_data += if_maxlinkhdr;
1016 ip = mtod(m, struct ip *);
1017 ip->ip_src.s_addr = icr[i].Address;
1018 DO_ALIAS(&ip->ip_src);
1019 ip->ip_p = IPPROTO_ICMP;
1020 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1021 data_len = sizeof(struct ip);
1022 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1023 ip->ip_ttl = icr[i].Options.Ttl;
1024
1025 icp = (struct icmp *)&ip[1]; /* no options */
1026 icp->icmp_type = ICMP_ECHOREPLY;
1027 icp->icmp_code = 0;
1028 icp->icmp_id = so->so_icmp_id;
1029 icp->icmp_seq = so->so_icmp_seq;
1030
1031 data_len += ICMP_MINLEN;
1032
1033 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
1034 memcpy(icp->icmp_data, icr[i].Data, nbytes);
1035
1036 data_len += icr[i].DataSize;
1037
1038 ip->ip_len = data_len;
1039 m->m_len = ip->ip_len;
1040
1041 icmp_reflect(pData, m);
1042 break;
1043 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1044
1045 ip_broken = icr[i].Data;
1046 icm = icmp_find_original_mbuf(pData, ip_broken);
1047 if (icm == NULL) {
1048 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1049 return;
1050 }
1051 m = icm->im_m;
1052 ip = mtod(m, struct ip *);
1053 ip->ip_ttl = icr[i].Options.Ttl;
1054 src = ip->ip_src.s_addr;
1055 ip->ip_dst.s_addr = src;
1056 ip->ip_dst.s_addr = icr[i].Address;
1057
1058 hlen = (ip->ip_hl << 2);
1059 icp = (struct icmp *)((char *)ip + hlen);
1060 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1061 data_len = (ip_broken->ip_hl << 2) + 64;
1062
1063 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1064 memcpy(icp->icmp_data, ip_broken, nbytes);
1065 icmp_reflect(pData, m);
1066 break;
1067 default:
1068 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1069 break;
1070 }
1071 }
1072}
1073#else /* RT_OS_WINDOWS */
1074static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1075{
1076 struct sockaddr_in addr;
1077 socklen_t addrlen = sizeof(struct sockaddr_in);
1078 char buff[1500];
1079 int len;
1080 len = recvfrom(so->s, buff, 1500, 0,
1081 (struct sockaddr *)&addr, &addrlen);
1082 /* XXX Check if reply is "correct"? */
1083
1084 if (len == -1 || len == 0)
1085 {
1086 u_char code = ICMP_UNREACH_PORT;
1087
1088 if (errno == EHOSTUNREACH)
1089 code = ICMP_UNREACH_HOST;
1090 else if(errno == ENETUNREACH)
1091 code = ICMP_UNREACH_NET;
1092
1093 DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
1094 errno,strerror(errno)));
1095 icmp_error(pData, so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
1096 }
1097 else
1098 {
1099 send_icmp_to_guest(pData, buff, len, so, &addr);
1100 }
1101}
1102#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette