VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 16291

Last change on this file since 16291 was 16291, checked in by vboxsync, 16 years ago

NAT: multi threading.
Introduces set of macroces to use locking/unlocking/creating/destroing mutexes
Also every socket enqueueing is accomponement with lock creation
(it doen't work yet, compilable on Windows don't know about Linux)

  • Property svn:eol-style set to native
File size: 29.1 KB
Line 
1/*
2 * Copyright (c) 1995 Danny Gasparovski.
3 *
4 * Please read the file COPYRIGHT for the
5 * terms and conditions of the copyright.
6 */
7
8#define WANT_SYS_IOCTL_H
9#include <slirp.h>
10#include "ip_icmp.h"
11#include "main.h"
12#ifdef __sun__
13#include <sys/filio.h>
14#endif
15#if defined (RT_OS_WINDOWS)
16#include <iphlpapi.h>
17#include <icmpapi.h>
18#endif
19
20
21static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
22#ifdef RT_OS_WINDOWS
23static void sorecvfrom_icmp_win(PNATState, struct socket *);
24#else /* RT_OS_WINDOWS */
25static void sorecvfrom_icmp_unix(PNATState, struct socket *);
26#endif /* !RT_OS_WINDOWS */
27
28void
29so_init()
30{
31}
32
33
34struct socket *
35solookup(struct socket *head, struct in_addr laddr,
36 u_int lport, struct in_addr faddr, u_int fport)
37{
38 struct socket *so;
39
40 for (so = head->so_next; so != head; so = so->so_next)
41 {
42 if ( so->so_lport == lport
43 && so->so_laddr.s_addr == laddr.s_addr
44 && so->so_faddr.s_addr == faddr.s_addr
45 && so->so_fport == fport)
46 return so;
47 }
48
49 return (struct socket *)NULL;
50}
51
52/*
53 * Create a new socket, initialise the fields
54 * It is the responsibility of the caller to
55 * insque() it into the correct linked-list
56 */
57struct socket *
58socreate()
59{
60 struct socket *so;
61
62 so = (struct socket *)RTMemAlloc(sizeof(struct socket));
63 if(so)
64 {
65 memset(so, 0, sizeof(struct socket));
66 so->so_state = SS_NOFDREF;
67 so->s = -1;
68 }
69 return so;
70}
71
72/*
73 * remque and free a socket, clobber cache
74 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
75 * in sofree we don't know from which queue item beeing removed.
76 */
77void
78sofree(PNATState pData, struct socket *so)
79{
80 if (so == tcp_last_so)
81 tcp_last_so = &tcb;
82 else if (so == udp_last_so)
83 udp_last_so = &udb;
84
85 /* check if mbuf haven't been already freed */
86 if (so->so_m != NULL)
87 m_free(pData, so->so_m);
88
89 if(so->so_next && so->so_prev)
90 remque(pData, so); /* crashes if so is not in a queue */
91
92 SOCKET_UNLOCK(so);
93 SOCKET_LOCK_DESTROY(so);
94
95 RTMemFree(so);
96}
97
98/*
99 * Read from so's socket into sb_snd, updating all relevant sbuf fields
100 * NOTE: This will only be called if it is select()ed for reading, so
101 * a read() of 0 (or less) means it's disconnected
102 */
103int
104soread(PNATState pData, struct socket *so, int fCloseIfNothingRead)
105{
106 int n, nn, lss, total;
107 struct sbuf *sb = &so->so_snd;
108 size_t len = sb->sb_datalen - sb->sb_cc;
109 struct iovec iov[2];
110 int mss = so->so_tcpcb->t_maxseg;
111
112 DEBUG_CALL("soread");
113 DEBUG_ARG("so = %lx", (long )so);
114
115 /*
116 * No need to check if there's enough room to read.
117 * soread wouldn't have been called if there weren't
118 */
119
120 len = sb->sb_datalen - sb->sb_cc;
121
122 iov[0].iov_base = sb->sb_wptr;
123 iov[1].iov_base = 0;
124 iov[1].iov_len = 0;
125 if (sb->sb_wptr < sb->sb_rptr)
126 {
127 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
128 /* Should never succeed, but... */
129 if (iov[0].iov_len > len)
130 iov[0].iov_len = len;
131 if (iov[0].iov_len > mss)
132 iov[0].iov_len -= iov[0].iov_len%mss;
133 n = 1;
134 }
135 else
136 {
137 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
138 /* Should never succeed, but... */
139 if (iov[0].iov_len > len)
140 iov[0].iov_len = len;
141 len -= iov[0].iov_len;
142 if (len)
143 {
144 iov[1].iov_base = sb->sb_data;
145 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
146 if(iov[1].iov_len > len)
147 iov[1].iov_len = len;
148 total = iov[0].iov_len + iov[1].iov_len;
149 if (total > mss)
150 {
151 lss = total % mss;
152 if (iov[1].iov_len > lss)
153 {
154 iov[1].iov_len -= lss;
155 n = 2;
156 }
157 else
158 {
159 lss -= iov[1].iov_len;
160 iov[0].iov_len -= lss;
161 n = 1;
162 }
163 }
164 else
165 n = 2;
166 }
167 else
168 {
169 if (iov[0].iov_len > mss)
170 iov[0].iov_len -= iov[0].iov_len%mss;
171 n = 1;
172 }
173 }
174
175#ifdef HAVE_READV
176 nn = readv(so->s, (struct iovec *)iov, n);
177 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
178#else
179 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len,0);
180#endif
181 if (nn <= 0)
182 {
183#if defined(VBOX_WITH_SIMPLIFIED_SLIRP_SYNC) && defined(RT_OS_WINDOWS)
184 /*
185 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
186 * _could_ mean that the connection is closed. But we will receive an
187 * FD_CLOSE event later if the connection was _really_ closed. With
188 * www.youtube.com I see this very often. Closing the socket too early
189 * would be dangerous.
190 */
191 if (nn == 0 && !fCloseIfNothingRead)
192 return 0;
193#endif
194 if (nn < 0 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK))
195 return 0;
196 else
197 {
198 /* nn == 0 means peer has performed an orderly shutdown */
199 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
200 nn, errno,strerror(errno)));
201 sofcantrcvmore(so);
202 tcp_sockclosed(pData, sototcpcb(so));
203 return -1;
204 }
205 }
206
207#ifndef HAVE_READV
208 /*
209 * If there was no error, try and read the second time round
210 * We read again if n = 2 (ie, there's another part of the buffer)
211 * and we read as much as we could in the first read
212 * We don't test for <= 0 this time, because there legitimately
213 * might not be any more data (since the socket is non-blocking),
214 * a close will be detected on next iteration.
215 * A return of -1 wont (shouldn't) happen, since it didn't happen above
216 */
217 if (n == 2 && nn == iov[0].iov_len)
218 {
219 int ret;
220 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len,0);
221 if (ret > 0)
222 nn += ret;
223 }
224
225 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
226#endif
227
228 /* Update fields */
229 sb->sb_cc += nn;
230 sb->sb_wptr += nn;
231 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
232 sb->sb_wptr -= sb->sb_datalen;
233 return nn;
234}
235
236/*
237 * Get urgent data
238 *
239 * When the socket is created, we set it SO_OOBINLINE,
240 * so when OOB data arrives, we soread() it and everything
241 * in the send buffer is sent as urgent data
242 */
243void
244sorecvoob(PNATState pData, struct socket *so)
245{
246 struct tcpcb *tp = sototcpcb(so);
247
248 DEBUG_CALL("sorecvoob");
249 DEBUG_ARG("so = %lx", (long)so);
250
251 /*
252 * We take a guess at how much urgent data has arrived.
253 * In most situations, when urgent data arrives, the next
254 * read() should get all the urgent data. This guess will
255 * be wrong however if more data arrives just after the
256 * urgent data, or the read() doesn't return all the
257 * urgent data.
258 */
259 soread(pData, so, /*fCloseIfNothingRead=*/false);
260 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
261 tp->t_force = 1;
262 tcp_output(pData, tp);
263 tp->t_force = 0;
264}
265
266/*
267 * Send urgent data
268 * There's a lot duplicated code here, but...
269 */
270int
271sosendoob(struct socket *so)
272{
273 struct sbuf *sb = &so->so_rcv;
274 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
275
276 int n, len;
277
278 DEBUG_CALL("sosendoob");
279 DEBUG_ARG("so = %lx", (long)so);
280 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
281
282 if (so->so_urgc > sizeof(buff))
283 so->so_urgc = sizeof(buff); /* XXX */
284
285 if (sb->sb_rptr < sb->sb_wptr)
286 {
287 /* We can send it directly */
288 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
289 so->so_urgc -= n;
290
291 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
292 n, so->so_urgc));
293 }
294 else
295 {
296 /*
297 * Since there's no sendv or sendtov like writev,
298 * we must copy all data to a linear buffer then
299 * send it all
300 */
301 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
302 if (len > so->so_urgc)
303 len = so->so_urgc;
304 memcpy(buff, sb->sb_rptr, len);
305 so->so_urgc -= len;
306 if (so->so_urgc)
307 {
308 n = sb->sb_wptr - sb->sb_data;
309 if (n > so->so_urgc)
310 n = so->so_urgc;
311 memcpy(buff + len, sb->sb_data, n);
312 so->so_urgc -= n;
313 len += n;
314 }
315 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
316#ifdef DEBUG
317 if (n != len)
318 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
319#endif
320 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
321 n, so->so_urgc));
322 }
323
324 sb->sb_cc -= n;
325 sb->sb_rptr += n;
326 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
327 sb->sb_rptr -= sb->sb_datalen;
328
329 return n;
330}
331
332/*
333 * Write data from so_rcv to so's socket,
334 * updating all sbuf field as necessary
335 */
336int
337sowrite(PNATState pData, struct socket *so)
338{
339 int n,nn;
340 struct sbuf *sb = &so->so_rcv;
341 size_t len = sb->sb_cc;
342 struct iovec iov[2];
343
344 DEBUG_CALL("sowrite");
345 DEBUG_ARG("so = %lx", (long)so);
346
347 if (so->so_urgc)
348 {
349 sosendoob(so);
350 if (sb->sb_cc == 0)
351 return 0;
352 }
353
354 /*
355 * No need to check if there's something to write,
356 * sowrite wouldn't have been called otherwise
357 */
358
359 len = sb->sb_cc;
360
361 iov[0].iov_base = sb->sb_rptr;
362 iov[1].iov_base = 0;
363 iov[1].iov_len = 0;
364 if (sb->sb_rptr < sb->sb_wptr)
365 {
366 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
367 /* Should never succeed, but... */
368 if (iov[0].iov_len > len)
369 iov[0].iov_len = len;
370 n = 1;
371 }
372 else
373 {
374 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
375 if (iov[0].iov_len > len)
376 iov[0].iov_len = len;
377 len -= iov[0].iov_len;
378 if (len)
379 {
380 iov[1].iov_base = sb->sb_data;
381 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
382 if (iov[1].iov_len > len)
383 iov[1].iov_len = len;
384 n = 2;
385 }
386 else
387 n = 1;
388 }
389 /* Check if there's urgent data to send, and if so, send it */
390#ifdef HAVE_READV
391 nn = writev(so->s, (const struct iovec *)iov, n);
392 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
393#else
394 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
395#endif
396 /* This should never happen, but people tell me it does *shrug* */
397 if (nn < 0 && (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK))
398 return 0;
399
400 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
401 {
402 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
403 so->so_state, errno));
404 sofcantsendmore(so);
405 tcp_sockclosed(pData, sototcpcb(so));
406 return -1;
407 }
408
409#ifndef HAVE_READV
410 if (n == 2 && nn == iov[0].iov_len)
411 {
412 int ret;
413 ret = send(so->s, iov[1].iov_base, iov[1].iov_len,0);
414 if (ret > 0)
415 nn += ret;
416 }
417 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
418#endif
419
420 /* Update sbuf */
421 sb->sb_cc -= nn;
422 sb->sb_rptr += nn;
423 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
424 sb->sb_rptr -= sb->sb_datalen;
425
426 /*
427 * If in DRAIN mode, and there's no more data, set
428 * it CANTSENDMORE
429 */
430 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
431 sofcantsendmore(so);
432
433 return nn;
434}
435
436/*
437 * recvfrom() a UDP socket
438 */
439void
440sorecvfrom(PNATState pData, struct socket *so)
441{
442 struct sockaddr_in addr;
443 socklen_t addrlen = sizeof(struct sockaddr_in);
444
445 DEBUG_CALL("sorecvfrom");
446 DEBUG_ARG("so = %lx", (long)so);
447
448 if (so->so_type == IPPROTO_ICMP)
449 {
450 /* This is a "ping" reply */
451#ifdef RT_OS_WINDOWS
452 sorecvfrom_icmp_win(pData, so);
453#else /* RT_OS_WINDOWS */
454 sorecvfrom_icmp_unix(pData, so);
455#endif /* !RT_OS_WINDOWS */
456 udp_detach(pData, so);
457 }
458 else
459 {
460 /* A "normal" UDP packet */
461 struct mbuf *m;
462 size_t len;
463 u_long n;
464
465 if (!(m = m_get(pData)))
466 return;
467 m->m_data += if_maxlinkhdr;
468#ifdef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
469 m->m_data += sizeof(struct udphdr)
470 + sizeof(struct ip); /*XXX: no options atm*/
471#endif
472
473 /*
474 * XXX Shouldn't FIONREAD packets destined for port 53,
475 * but I don't know the max packet size for DNS lookups
476 */
477 len = M_FREEROOM(m);
478 /* if (so->so_fport != htons(53)) */
479 {
480 ioctlsocket(so->s, FIONREAD, &n);
481
482 if (n > len)
483 {
484 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
485 m_inc(m, n);
486 len = M_FREEROOM(m);
487 }
488 }
489
490 m->m_len = recvfrom(so->s, m->m_data, len, 0,
491 (struct sockaddr *)&addr, &addrlen);
492 DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
493 m->m_len, errno,strerror(errno)));
494 if(m->m_len < 0)
495 {
496 u_char code = ICMP_UNREACH_PORT;
497
498 if (errno == EHOSTUNREACH)
499 code = ICMP_UNREACH_HOST;
500 else if(errno == ENETUNREACH)
501 code = ICMP_UNREACH_NET;
502
503 DEBUG_MISC((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code));
504 icmp_error(pData, so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
505 m_free(pData, m);
506 }
507 else
508 {
509 /*
510 * Hack: domain name lookup will be used the most for UDP,
511 * and since they'll only be used once there's no need
512 * for the 4 minute (or whatever) timeout... So we time them
513 * out much quicker (10 seconds for now...)
514 */
515 if (so->so_expire)
516 {
517 if (so->so_fport == htons(53))
518 so->so_expire = curtime + SO_EXPIREFAST;
519 else
520 so->so_expire = curtime + SO_EXPIRE;
521 }
522
523#if 0
524 if (m->m_len == len)
525 {
526 m_inc(m, MINCSIZE);
527 m->m_len = 0;
528 }
529#endif
530
531 /*
532 * If this packet was destined for CTL_ADDR,
533 * make it look like that's where it came from, done by udp_output
534 */
535 udp_output(pData, so, m, &addr);
536 } /* rx error */
537 } /* if ping packet */
538}
539
540/*
541 * sendto() a socket
542 */
543int
544sosendto(PNATState pData, struct socket *so, struct mbuf *m)
545{
546 int ret;
547 struct sockaddr_in addr;
548#if 0
549 struct sockaddr_in host_addr;
550#endif
551
552 DEBUG_CALL("sosendto");
553 DEBUG_ARG("so = %lx", (long)so);
554 DEBUG_ARG("m = %lx", (long)m);
555
556 addr.sin_family = AF_INET;
557 if ((so->so_faddr.s_addr & htonl(pData->netmask)) == special_addr.s_addr)
558 {
559 /* It's an alias */
560 uint32_t last_byte = ntohl(so->so_faddr.s_addr) & ~pData->netmask;
561 switch(last_byte)
562 {
563#if 0
564 /* handle this case at 'default:' */
565 case CTL_BROADCAST:
566 addr.sin_addr.s_addr = INADDR_BROADCAST;
567 /* Send the packet to host to fully emulate broadcast */
568 /** @todo r=klaus: on Linux host this causes the host to receive
569 * the packet twice for some reason. And I cannot find any place
570 * in the man pages which states that sending a broadcast does not
571 * reach the host itself. */
572 host_addr.sin_family = AF_INET;
573 host_addr.sin_port = so->so_fport;
574 host_addr.sin_addr = our_addr;
575 sendto(so->s, m->m_data, m->m_len, 0,
576 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
577 break;
578#endif
579 case CTL_DNS:
580#ifndef VBOX_WITH_MULTI_DNS
581 if (!get_dns_addr(pData, &dns_addr))
582 addr.sin_addr = dns_addr;
583 else
584 addr.sin_addr = loopback_addr;
585 break;
586#endif
587 case CTL_ALIAS:
588 default:
589 if (last_byte == ~pData->netmask)
590 addr.sin_addr.s_addr = INADDR_BROADCAST;
591 else
592 addr.sin_addr = loopback_addr;
593 break;
594 }
595 }
596 else
597 addr.sin_addr = so->so_faddr;
598 addr.sin_port = so->so_fport;
599
600 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
601 ntohs(addr.sin_port), inet_ntoa(addr.sin_addr)));
602
603 /* Don't care what port we get */
604 ret = sendto(so->s, m->m_data, m->m_len, 0,
605 (struct sockaddr *)&addr, sizeof (struct sockaddr));
606 if (ret < 0)
607 {
608 LogRel(("UDP: sendto fails (%s)\n", strerror(errno)));
609 return -1;
610 }
611
612 /*
613 * Kill the socket if there's no reply in 4 minutes,
614 * but only if it's an expirable socket
615 */
616 if (so->so_expire)
617 so->so_expire = curtime + SO_EXPIRE;
618 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
619 return 0;
620}
621
622/*
623 * XXX This should really be tcp_listen
624 */
625struct socket *
626solisten(PNATState pData, u_int port, u_int32_t laddr, u_int lport, int flags)
627{
628 struct sockaddr_in addr;
629 struct socket *so;
630 socklen_t addrlen = sizeof(addr);
631 int s, opt = 1;
632
633 DEBUG_CALL("solisten");
634 DEBUG_ARG("port = %d", port);
635 DEBUG_ARG("laddr = %x", laddr);
636 DEBUG_ARG("lport = %d", lport);
637 DEBUG_ARG("flags = %x", flags);
638
639 if ((so = socreate()) == NULL)
640 {
641 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
642 return NULL;
643 }
644
645 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
646 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
647 {
648 RTMemFree(so);
649 return NULL;
650 }
651
652 SOCKET_LOCK_CREATE(so);
653 SOCKET_LOCK(so);
654 QSOCKET_LOCK(tcb);
655 insque(pData, so,&tcb);
656 QSOCKET_UNLOCK(tcb);
657
658 /*
659 * SS_FACCEPTONCE sockets must time out.
660 */
661 if (flags & SS_FACCEPTONCE)
662 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
663
664 so->so_state = (SS_FACCEPTCONN|flags);
665 so->so_lport = lport; /* Kept in network format */
666 so->so_laddr.s_addr = laddr; /* Ditto */
667
668 addr.sin_family = AF_INET;
669 addr.sin_addr.s_addr = INADDR_ANY;
670 addr.sin_port = port;
671
672 if ( ((s = socket(AF_INET,SOCK_STREAM,0)) < 0)
673 || (setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int)) < 0)
674 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
675 || (listen(s,1) < 0))
676 {
677#ifdef RT_OS_WINDOWS
678 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
679 closesocket(s);
680 sofree(pData, so);
681 /* Restore the real errno */
682 WSASetLastError(tmperrno);
683#else
684 int tmperrno = errno; /* Don't clobber the real reason we failed */
685 close(s);
686 QSOCKET_LOCK(tcb);
687 sofree(pData, so);
688 QSOCKET_UNLOCK(tcb);
689 /* Restore the real errno */
690 errno = tmperrno;
691#endif
692 return NULL;
693 }
694 setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
695
696 getsockname(s,(struct sockaddr *)&addr,&addrlen);
697 so->so_fport = addr.sin_port;
698 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
699 so->so_faddr = alias_addr;
700 else
701 so->so_faddr = addr.sin_addr;
702
703 so->s = s;
704 return so;
705}
706
707/*
708 * Data is available in so_rcv
709 * Just write() the data to the socket
710 * XXX not yet...
711 */
712void
713sorwakeup(struct socket *so)
714{
715#if 0
716 sowrite(so);
717 FD_CLR(so->s,&writefds);
718#endif
719}
720
721/*
722 * Data has been freed in so_snd
723 * We have room for a read() if we want to
724 * For now, don't read, it'll be done in the main loop
725 */
726void
727sowwakeup(struct socket *so)
728{
729}
730
731/*
732 * Various session state calls
733 * XXX Should be #define's
734 * The socket state stuff needs work, these often get call 2 or 3
735 * times each when only 1 was needed
736 */
737void
738soisfconnecting(struct socket *so)
739{
740 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
741 SS_FCANTSENDMORE|SS_FWDRAIN);
742 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
743}
744
745void
746soisfconnected(struct socket *so)
747{
748 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
749 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
750}
751
752void
753sofcantrcvmore(struct socket *so)
754{
755 if ((so->so_state & SS_NOFDREF) == 0)
756 {
757 shutdown(so->s,0);
758 }
759 so->so_state &= ~(SS_ISFCONNECTING);
760 if (so->so_state & SS_FCANTSENDMORE)
761 so->so_state = SS_NOFDREF; /* Don't select it */
762 /* XXX close() here as well? */
763 else
764 so->so_state |= SS_FCANTRCVMORE;
765}
766
767void
768sofcantsendmore(struct socket *so)
769{
770 if ((so->so_state & SS_NOFDREF) == 0)
771 shutdown(so->s, 1); /* send FIN to fhost */
772
773 so->so_state &= ~(SS_ISFCONNECTING);
774 if (so->so_state & SS_FCANTRCVMORE)
775 so->so_state = SS_NOFDREF; /* as above */
776 else
777 so->so_state |= SS_FCANTSENDMORE;
778}
779
780void
781soisfdisconnected(struct socket *so)
782{
783#if 0
784 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
785 close(so->s);
786 so->so_state = SS_ISFDISCONNECTED;
787 /*
788 * XXX Do nothing ... ?
789 */
790#endif
791}
792
793/*
794 * Set write drain mode
795 * Set CANTSENDMORE once all data has been write()n
796 */
797void
798sofwdrain(struct socket *so)
799{
800 if (so->so_rcv.sb_cc)
801 so->so_state |= SS_FWDRAIN;
802 else
803 sofcantsendmore(so);
804}
805
806static void
807send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
808{
809 struct ip *ip;
810 uint32_t dst,src;
811 char ip_copy[256];
812 struct icmp *icp;
813 int old_ip_len;
814 int hlen, original_hlen = 0;
815 struct mbuf *m;
816 struct icmp_msg *icm;
817 uint8_t proto;
818
819 ip = (struct ip *)buff;
820 hlen = (ip->ip_hl << 2);
821 icp = (struct icmp *)((char *)ip + hlen);
822
823 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
824 if ( icp->icmp_type != ICMP_ECHOREPLY
825 && icp->icmp_type != ICMP_TIMXCEED
826 && icp->icmp_type != ICMP_UNREACH)
827 {
828 return;
829 }
830
831 if ( icp->icmp_type == ICMP_TIMXCEED
832 || icp->icmp_type == ICMP_UNREACH)
833 {
834 ip = &icp->icmp_ip;
835 DO_ALIAS(&ip->ip_dst);
836 }
837 else
838 {
839 DO_ALIAS(&ip->ip_src);
840 }
841
842 icm = icmp_find_original_mbuf(pData, ip);
843
844 if (icm == NULL)
845 {
846 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
847 return;
848 }
849
850 m = icm->im_m;
851 Assert(m != NULL);
852
853 src = addr->sin_addr.s_addr;
854
855 ip = mtod(m, struct ip *);
856 proto = ip->ip_p;
857 /* Now ip is pointing on header we've sent from guest */
858 if ( icp->icmp_type == ICMP_TIMXCEED
859 || icp->icmp_type == ICMP_UNREACH)
860 {
861 old_ip_len = (ip->ip_hl << 2) + 64;
862 if (old_ip_len > sizeof(ip_copy))
863 old_ip_len = sizeof(ip_copy);
864 memcpy(ip_copy, ip, old_ip_len);
865 }
866
867 /* source address from original IP packet*/
868 dst = ip->ip_src.s_addr;
869
870 /* overide ther tail of old packet */
871 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
872 original_hlen = ip->ip_hl << 2;
873 /* saves original ip header and options */
874 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
875 m->m_len = len - hlen + original_hlen;
876 ip->ip_len = m->m_len;
877 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
878
879 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
880 if ( icp->icmp_type == ICMP_TIMXCEED
881 || icp->icmp_type == ICMP_UNREACH)
882 {
883 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
884 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
885 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
886 }
887
888 ip->ip_src.s_addr = src;
889 ip->ip_dst.s_addr = dst;
890 icmp_reflect(pData, m);
891 LIST_REMOVE(icm, im_list);
892 /* Don't call m_free here*/
893
894 if ( icp->icmp_type == ICMP_TIMXCEED
895 || icp->icmp_type == ICMP_UNREACH)
896 {
897 icm->im_so->so_m = NULL;
898 switch (proto)
899 {
900 case IPPROTO_UDP:
901 /*XXX: so->so_m already freed so we shouldn't call sofree */
902 udp_detach(pData, icm->im_so);
903 break;
904 case IPPROTO_TCP:
905 /*close tcp should be here */
906 break;
907 default:
908 /* do nothing */
909 break;
910 }
911 }
912 RTMemFree(icm);
913}
914
915#ifdef RT_OS_WINDOWS
916static void
917sorecvfrom_icmp_win(PNATState pData, struct socket *so)
918{
919 int len;
920 int i;
921 struct ip *ip;
922 struct mbuf *m;
923 struct icmp *icp;
924 struct icmp_msg *icm;
925 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
926 uint32_t src;
927 ICMP_ECHO_REPLY *icr;
928 int hlen = 0;
929 int data_len = 0;
930 int nbytes = 0;
931 u_char code = ~0;
932
933 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
934#ifndef VBOX_WITH_SIMPLIFIED_SLIRP_SYNC
935 fIcmp = 0; /* reply processed */
936#endif
937 if (len < 0)
938 {
939 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
940 return;
941 }
942 if (len == 0)
943 return; /* no error */
944
945 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
946 for (i = 0; i < len; ++i)
947 {
948 switch(icr[i].Status)
949 {
950 case IP_DEST_HOST_UNREACHABLE:
951 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
952 case IP_DEST_NET_UNREACHABLE:
953 code = (code != ~0 ? code : ICMP_UNREACH_NET);
954 case IP_DEST_PROT_UNREACHABLE:
955 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
956 /* UNREACH error inject here */
957 case IP_DEST_PORT_UNREACHABLE:
958 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
959 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
960 break;
961 case IP_SUCCESS: /* echo replied */
962 m = m_get(pData);
963 m->m_data += if_maxlinkhdr;
964 ip = mtod(m, struct ip *);
965 ip->ip_src.s_addr = icr[i].Address;
966 DO_ALIAS(&ip->ip_src);
967 ip->ip_p = IPPROTO_ICMP;
968 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
969 data_len = sizeof(struct ip);
970 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
971 ip->ip_ttl = icr[i].Options.Ttl;
972
973 icp = (struct icmp *)&ip[1]; /* no options */
974 icp->icmp_type = ICMP_ECHOREPLY;
975 icp->icmp_code = 0;
976 icp->icmp_id = so->so_icmp_id;
977 icp->icmp_seq = so->so_icmp_seq;
978
979 data_len += ICMP_MINLEN;
980
981 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
982 memcpy(icp->icmp_data, icr[i].Data, nbytes);
983
984 data_len += icr[i].DataSize;
985
986 ip->ip_len = data_len;
987 m->m_len = ip->ip_len;
988
989 icmp_reflect(pData, m);
990 break;
991 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
992
993 ip_broken = icr[i].Data;
994 icm = icmp_find_original_mbuf(pData, ip_broken);
995 if (icm == NULL) {
996 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
997 return;
998 }
999 m = icm->im_m;
1000 ip = mtod(m, struct ip *);
1001 ip->ip_ttl = icr[i].Options.Ttl;
1002 src = ip->ip_src.s_addr;
1003 ip->ip_dst.s_addr = src;
1004 ip->ip_dst.s_addr = icr[i].Address;
1005
1006 hlen = (ip->ip_hl << 2);
1007 icp = (struct icmp *)((char *)ip + hlen);
1008 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1009 data_len = (ip_broken->ip_hl << 2) + 64;
1010
1011 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1012 memcpy(icp->icmp_data, ip_broken, nbytes);
1013 icmp_reflect(pData, m);
1014 break;
1015 default:
1016 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1017 break;
1018 }
1019 }
1020}
1021#else /* RT_OS_WINDOWS */
1022static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1023{
1024 struct sockaddr_in addr;
1025 socklen_t addrlen = sizeof(struct sockaddr_in);
1026 char buff[1500];
1027 int len;
1028 len = recvfrom(so->s, buff, 1500, 0,
1029 (struct sockaddr *)&addr, &addrlen);
1030 /* XXX Check if reply is "correct"? */
1031
1032 if (len == -1 || len == 0)
1033 {
1034 u_char code = ICMP_UNREACH_PORT;
1035
1036 if (errno == EHOSTUNREACH)
1037 code = ICMP_UNREACH_HOST;
1038 else if(errno == ENETUNREACH)
1039 code = ICMP_UNREACH_NET;
1040
1041 DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
1042 errno,strerror(errno)));
1043 icmp_error(pData, so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
1044 }
1045 else
1046 {
1047 send_icmp_to_guest(pData, buff, len, so, &addr);
1048 }
1049}
1050#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette