VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/socket.c@ 19839

Last change on this file since 19839 was 19839, checked in by vboxsync, 16 years ago

NAT: Slirp don't use ether address of guest anymore
instead it calculates ethernet address of destination
with lookup operation. Currently it's very simple looks
over send addresses via dhcp or assume destination in outer
network and gets Slirp's ethernet address.

  • Property svn:eol-style set to native
File size: 30.7 KB
Line 
1/*
2 * Copyright (c) 1995 Danny Gasparovski.
3 *
4 * Please read the file COPYRIGHT for the
5 * terms and conditions of the copyright.
6 */
7
8#define WANT_SYS_IOCTL_H
9#include <slirp.h>
10#include "ip_icmp.h"
11#include "main.h"
12#ifdef __sun__
13#include <sys/filio.h>
14#endif
15#if defined (RT_OS_WINDOWS)
16#include <iphlpapi.h>
17#include <icmpapi.h>
18#endif
19
20
21static void send_icmp_to_guest(PNATState, char *, size_t, struct socket *, const struct sockaddr_in *);
22#ifdef RT_OS_WINDOWS
23static void sorecvfrom_icmp_win(PNATState, struct socket *);
24#else /* RT_OS_WINDOWS */
25static void sorecvfrom_icmp_unix(PNATState, struct socket *);
26#endif /* !RT_OS_WINDOWS */
27
28void
29so_init()
30{
31}
32
33
34struct socket *
35solookup(struct socket *head, struct in_addr laddr,
36 u_int lport, struct in_addr faddr, u_int fport)
37{
38 struct socket *so;
39
40 for (so = head->so_next; so != head; so = so->so_next)
41 {
42 if ( so->so_lport == lport
43 && so->so_laddr.s_addr == laddr.s_addr
44 && so->so_faddr.s_addr == faddr.s_addr
45 && so->so_fport == fport)
46 return so;
47 }
48
49 return (struct socket *)NULL;
50}
51
52/*
53 * Create a new socket, initialise the fields
54 * It is the responsibility of the caller to
55 * insque() it into the correct linked-list
56 */
57struct socket *
58socreate()
59{
60 struct socket *so;
61
62 so = (struct socket *)RTMemAllocZ(sizeof(struct socket));
63 if(so)
64 {
65 so->so_state = SS_NOFDREF;
66 so->s = -1;
67#if !defined(RT_OS_WINDOWS)
68 so->so_poll_index = -1;
69#endif
70 }
71 return so;
72}
73
74/*
75 * remque and free a socket, clobber cache
76 * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because
77 * in sofree we don't know from which queue item beeing removed.
78 */
79void
80sofree(PNATState pData, struct socket *so)
81{
82 struct socket *so_prev = NULL;
83 if (so == tcp_last_so)
84 tcp_last_so = &tcb;
85 else if (so == udp_last_so)
86 udp_last_so = &udb;
87
88 /* check if mbuf haven't been already freed */
89 if (so->so_m != NULL)
90 m_free(pData, so->so_m);
91#ifndef VBOX_WITH_SLIRP_MT
92 if(so->so_next && so->so_prev)
93 {
94 remque(pData, so); /* crashes if so is not in a queue */
95 NSOCK_DEC();
96 }
97
98 RTMemFree(so);
99#else
100 so->so_deleted = 1;
101#endif
102}
103
104#ifdef VBOX_WITH_SLIRP_MT
105void
106soread_queue(PNATState pData, struct socket *so, int *ret)
107{
108 *ret = soread(pData, so);
109}
110#endif
111
112/*
113 * Read from so's socket into sb_snd, updating all relevant sbuf fields
114 * NOTE: This will only be called if it is select()ed for reading, so
115 * a read() of 0 (or less) means it's disconnected
116 */
117int
118soread(PNATState pData, struct socket *so)
119{
120 int n, nn, lss, total;
121 struct sbuf *sb = &so->so_snd;
122 size_t len = sb->sb_datalen - sb->sb_cc;
123 struct iovec iov[2];
124 int mss = so->so_tcpcb->t_maxseg;
125 QSOCKET_LOCK(tcb);
126 SOCKET_LOCK(so);
127 QSOCKET_UNLOCK(tcb);
128
129 DEBUG_CALL("soread");
130 DEBUG_ARG("so = %lx", (long )so);
131
132 /*
133 * No need to check if there's enough room to read.
134 * soread wouldn't have been called if there weren't
135 */
136
137 len = sb->sb_datalen - sb->sb_cc;
138
139 iov[0].iov_base = sb->sb_wptr;
140 iov[1].iov_base = 0;
141 iov[1].iov_len = 0;
142 if (sb->sb_wptr < sb->sb_rptr)
143 {
144 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
145 /* Should never succeed, but... */
146 if (iov[0].iov_len > len)
147 iov[0].iov_len = len;
148 if (iov[0].iov_len > mss)
149 iov[0].iov_len -= iov[0].iov_len%mss;
150 n = 1;
151 }
152 else
153 {
154 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
155 /* Should never succeed, but... */
156 if (iov[0].iov_len > len)
157 iov[0].iov_len = len;
158 len -= iov[0].iov_len;
159 if (len)
160 {
161 iov[1].iov_base = sb->sb_data;
162 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
163 if(iov[1].iov_len > len)
164 iov[1].iov_len = len;
165 total = iov[0].iov_len + iov[1].iov_len;
166 if (total > mss)
167 {
168 lss = total % mss;
169 if (iov[1].iov_len > lss)
170 {
171 iov[1].iov_len -= lss;
172 n = 2;
173 }
174 else
175 {
176 lss -= iov[1].iov_len;
177 iov[0].iov_len -= lss;
178 n = 1;
179 }
180 }
181 else
182 n = 2;
183 }
184 else
185 {
186 if (iov[0].iov_len > mss)
187 iov[0].iov_len -= iov[0].iov_len%mss;
188 n = 1;
189 }
190 }
191
192#ifdef HAVE_READV
193 nn = readv(so->s, (struct iovec *)iov, n);
194 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
195#else
196 nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0);
197#endif
198 if (nn <= 0)
199 {
200#if defined(RT_OS_WINDOWS)
201 /*
202 * Special case for WSAEnumNetworkEvents: If we receive 0 bytes that
203 * _could_ mean that the connection is closed. But we will receive an
204 * FD_CLOSE event later if the connection was _really_ closed. With
205 * www.youtube.com I see this very often. Closing the socket too early
206 * would be dangerous.
207 */
208 int status, ignored;
209 unsigned long pending = 0;
210 status = WSAIoctl(so->s, FIONREAD, NULL, 0, &pending, sizeof(unsigned long), &ignored, NULL, NULL);
211 if (status < 0)
212 LogRel(("NAT:error in WSAIoctl: %d\n", WSAGetLastError()));
213 if (nn == 0 && (pending != 0))
214 {
215 SOCKET_UNLOCK(so);
216 return 0;
217 }
218#endif
219 if (nn < 0 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK))
220 {
221 SOCKET_UNLOCK(so);
222 return 0;
223 }
224 else
225 {
226 /* nn == 0 means peer has performed an orderly shutdown */
227 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n",
228 nn, errno, strerror(errno)));
229 sofcantrcvmore(so);
230 tcp_sockclosed(pData, sototcpcb(so));
231 SOCKET_UNLOCK(so);
232 return -1;
233 }
234 }
235
236#ifndef HAVE_READV
237 /*
238 * If there was no error, try and read the second time round
239 * We read again if n = 2 (ie, there's another part of the buffer)
240 * and we read as much as we could in the first read
241 * We don't test for <= 0 this time, because there legitimately
242 * might not be any more data (since the socket is non-blocking),
243 * a close will be detected on next iteration.
244 * A return of -1 wont (shouldn't) happen, since it didn't happen above
245 */
246 if (n == 2 && nn == iov[0].iov_len)
247 {
248 int ret;
249 ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0);
250 if (ret > 0)
251 nn += ret;
252 }
253
254 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
255#endif
256
257 /* Update fields */
258 sb->sb_cc += nn;
259 sb->sb_wptr += nn;
260 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
261 sb->sb_wptr -= sb->sb_datalen;
262 SOCKET_UNLOCK(so);
263 return nn;
264}
265
266/*
267 * Get urgent data
268 *
269 * When the socket is created, we set it SO_OOBINLINE,
270 * so when OOB data arrives, we soread() it and everything
271 * in the send buffer is sent as urgent data
272 */
273void
274sorecvoob(PNATState pData, struct socket *so)
275{
276 struct tcpcb *tp = sototcpcb(so);
277
278 DEBUG_CALL("sorecvoob");
279 DEBUG_ARG("so = %lx", (long)so);
280
281 /*
282 * We take a guess at how much urgent data has arrived.
283 * In most situations, when urgent data arrives, the next
284 * read() should get all the urgent data. This guess will
285 * be wrong however if more data arrives just after the
286 * urgent data, or the read() doesn't return all the
287 * urgent data.
288 */
289 soread(pData, so);
290 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
291 tp->t_force = 1;
292 tcp_output(pData, tp);
293 tp->t_force = 0;
294}
295
296/*
297 * Send urgent data
298 * There's a lot duplicated code here, but...
299 */
300int
301sosendoob(struct socket *so)
302{
303 struct sbuf *sb = &so->so_rcv;
304 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
305
306 int n, len;
307
308 DEBUG_CALL("sosendoob");
309 DEBUG_ARG("so = %lx", (long)so);
310 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
311
312 if (so->so_urgc > sizeof(buff))
313 so->so_urgc = sizeof(buff); /* XXX */
314
315 if (sb->sb_rptr < sb->sb_wptr)
316 {
317 /* We can send it directly */
318 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
319 so->so_urgc -= n;
320
321 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n",
322 n, so->so_urgc));
323 }
324 else
325 {
326 /*
327 * Since there's no sendv or sendtov like writev,
328 * we must copy all data to a linear buffer then
329 * send it all
330 */
331 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
332 if (len > so->so_urgc)
333 len = so->so_urgc;
334 memcpy(buff, sb->sb_rptr, len);
335 so->so_urgc -= len;
336 if (so->so_urgc)
337 {
338 n = sb->sb_wptr - sb->sb_data;
339 if (n > so->so_urgc)
340 n = so->so_urgc;
341 memcpy(buff + len, sb->sb_data, n);
342 so->so_urgc -= n;
343 len += n;
344 }
345 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
346#ifdef DEBUG
347 if (n != len)
348 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
349#endif
350 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n",
351 n, so->so_urgc));
352 }
353
354 sb->sb_cc -= n;
355 sb->sb_rptr += n;
356 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
357 sb->sb_rptr -= sb->sb_datalen;
358
359 return n;
360}
361
362/*
363 * Write data from so_rcv to so's socket,
364 * updating all sbuf field as necessary
365 */
366int
367sowrite(PNATState pData, struct socket *so)
368{
369 int n, nn;
370 struct sbuf *sb = &so->so_rcv;
371 size_t len = sb->sb_cc;
372 struct iovec iov[2];
373
374 DEBUG_CALL("sowrite");
375 DEBUG_ARG("so = %lx", (long)so);
376 QSOCKET_LOCK(tcb);
377 SOCKET_LOCK(so);
378 QSOCKET_UNLOCK(tcb);
379 if (so->so_urgc)
380 {
381 sosendoob(so);
382 if (sb->sb_cc == 0)
383 {
384 SOCKET_UNLOCK(so);
385 return 0;
386 }
387 }
388
389 /*
390 * No need to check if there's something to write,
391 * sowrite wouldn't have been called otherwise
392 */
393
394 len = sb->sb_cc;
395
396 iov[0].iov_base = sb->sb_rptr;
397 iov[1].iov_base = 0;
398 iov[1].iov_len = 0;
399 if (sb->sb_rptr < sb->sb_wptr)
400 {
401 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
402 /* Should never succeed, but... */
403 if (iov[0].iov_len > len)
404 iov[0].iov_len = len;
405 n = 1;
406 }
407 else
408 {
409 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
410 if (iov[0].iov_len > len)
411 iov[0].iov_len = len;
412 len -= iov[0].iov_len;
413 if (len)
414 {
415 iov[1].iov_base = sb->sb_data;
416 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
417 if (iov[1].iov_len > len)
418 iov[1].iov_len = len;
419 n = 2;
420 }
421 else
422 n = 1;
423 }
424 /* Check if there's urgent data to send, and if so, send it */
425#ifdef HAVE_READV
426 nn = writev(so->s, (const struct iovec *)iov, n);
427 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
428#else
429 nn = send(so->s, iov[0].iov_base, iov[0].iov_len, 0);
430#endif
431 /* This should never happen, but people tell me it does *shrug* */
432 if (nn < 0 && (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK))
433 {
434 SOCKET_UNLOCK(so);
435 return 0;
436 }
437
438 if (nn < 0 || (nn == 0 && iov[0].iov_len > 0))
439 {
440 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
441 so->so_state, errno));
442 sofcantsendmore(so);
443 tcp_sockclosed(pData, sototcpcb(so));
444 SOCKET_UNLOCK(so);
445 return -1;
446 }
447
448#ifndef HAVE_READV
449 if (n == 2 && nn == iov[0].iov_len)
450 {
451 int ret;
452 ret = send(so->s, iov[1].iov_base, iov[1].iov_len, 0);
453 if (ret > 0)
454 nn += ret;
455 }
456 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
457#endif
458
459 /* Update sbuf */
460 sb->sb_cc -= nn;
461 sb->sb_rptr += nn;
462 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
463 sb->sb_rptr -= sb->sb_datalen;
464
465 /*
466 * If in DRAIN mode, and there's no more data, set
467 * it CANTSENDMORE
468 */
469 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
470 sofcantsendmore(so);
471
472 SOCKET_UNLOCK(so);
473 return nn;
474}
475
476/*
477 * recvfrom() a UDP socket
478 */
479void
480sorecvfrom(PNATState pData, struct socket *so)
481{
482 struct sockaddr_in addr;
483 socklen_t addrlen = sizeof(struct sockaddr_in);
484
485 DEBUG_CALL("sorecvfrom");
486 DEBUG_ARG("so = %lx", (long)so);
487
488 if (so->so_type == IPPROTO_ICMP)
489 {
490 /* This is a "ping" reply */
491#ifdef RT_OS_WINDOWS
492 sorecvfrom_icmp_win(pData, so);
493#else /* RT_OS_WINDOWS */
494 sorecvfrom_icmp_unix(pData, so);
495#endif /* !RT_OS_WINDOWS */
496 udp_detach(pData, so);
497 }
498 else
499 {
500 /* A "normal" UDP packet */
501 struct mbuf *m;
502 struct ethhdr *eh;
503 size_t len;
504 u_long n;
505
506 QSOCKET_LOCK(udb);
507 SOCKET_LOCK(so);
508 QSOCKET_UNLOCK(udb);
509
510 if (!(m = m_get(pData)))
511 {
512 SOCKET_UNLOCK(so);
513 return;
514 }
515 /* adjust both parameters to maks M_FREEROOM calculate correct */
516 m_adj(m, if_maxlinkhdr + sizeof(struct udphdr) + sizeof(struct ip));
517
518 /*
519 * XXX Shouldn't FIONREAD packets destined for port 53,
520 * but I don't know the max packet size for DNS lookups
521 */
522 len = M_FREEROOM(m);
523 /* if (so->so_fport != htons(53)) */
524 {
525 ioctlsocket(so->s, FIONREAD, &n);
526
527 if (n > len)
528 {
529 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
530 m_inc(m, n);
531 len = M_FREEROOM(m);
532 }
533 }
534
535 m->m_len = recvfrom(so->s, m->m_data, len, 0,
536 (struct sockaddr *)&addr, &addrlen);
537 Log2((" did recvfrom %d, errno = %d-%s\n",
538 m->m_len, errno, strerror(errno)));
539 if(m->m_len < 0)
540 {
541 u_char code = ICMP_UNREACH_PORT;
542
543 if (errno == EHOSTUNREACH)
544 code = ICMP_UNREACH_HOST;
545 else if(errno == ENETUNREACH)
546 code = ICMP_UNREACH_NET;
547
548 Log2((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code));
549 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
550 so->so_m = NULL;
551 m_free(pData, m);
552 }
553 else
554 {
555 /*
556 * Hack: domain name lookup will be used the most for UDP,
557 * and since they'll only be used once there's no need
558 * for the 4 minute (or whatever) timeout... So we time them
559 * out much quicker (10 seconds for now...)
560 */
561#ifndef VBOX_WITH_SLIRP_DNS_PROXY
562 if (so->so_expire)
563 {
564 if (so->so_fport == htons(53))
565 so->so_expire = curtime + SO_EXPIREFAST;
566 else
567 so->so_expire = curtime + SO_EXPIRE;
568 }
569#else
570 if (so->so_expire)
571 {
572 if (so->so_fport != htons(53))
573 so->so_expire = curtime + SO_EXPIRE;
574 }
575 /*
576 * last argument should be changed if Slirp will inject IP attributes
577 * Note: Here we can't check if dnsproxy's sent initial request
578 */
579 if (so->so_fport == htons(53))
580 dnsproxy_answer(pData, so, m);
581#endif
582
583#if 0
584 if (m->m_len == len)
585 {
586 m_inc(m, MINCSIZE);
587 m->m_len = 0;
588 }
589#endif
590
591 /*
592 * If this packet was destined for CTL_ADDR,
593 * make it look like that's where it came from, done by udp_output
594 */
595 udp_output(pData, so, m, &addr);
596 SOCKET_UNLOCK(so);
597 } /* rx error */
598 } /* if ping packet */
599}
600
601/*
602 * sendto() a socket
603 */
604int
605sosendto(PNATState pData, struct socket *so, struct mbuf *m)
606{
607 int ret;
608 struct sockaddr_in addr;
609#if 0
610 struct sockaddr_in host_addr;
611#endif
612
613 DEBUG_CALL("sosendto");
614 DEBUG_ARG("so = %lx", (long)so);
615 DEBUG_ARG("m = %lx", (long)m);
616
617 addr.sin_family = AF_INET;
618 if ((so->so_faddr.s_addr & htonl(pData->netmask)) == special_addr.s_addr)
619 {
620 /* It's an alias */
621 uint32_t last_byte = ntohl(so->so_faddr.s_addr) & ~pData->netmask;
622 switch(last_byte)
623 {
624#if 0
625 /* handle this case at 'default:' */
626 case CTL_BROADCAST:
627 addr.sin_addr.s_addr = INADDR_BROADCAST;
628 /* Send the packet to host to fully emulate broadcast */
629 /** @todo r=klaus: on Linux host this causes the host to receive
630 * the packet twice for some reason. And I cannot find any place
631 * in the man pages which states that sending a broadcast does not
632 * reach the host itself. */
633 host_addr.sin_family = AF_INET;
634 host_addr.sin_port = so->so_fport;
635 host_addr.sin_addr = our_addr;
636 sendto(so->s, m->m_data, m->m_len, 0,
637 (struct sockaddr *)&host_addr, sizeof (struct sockaddr));
638 break;
639#endif
640 case CTL_DNS:
641#ifndef VBOX_WITH_MULTI_DNS
642 if (!get_dns_addr(pData, &dns_addr))
643 addr.sin_addr = dns_addr;
644 else
645 addr.sin_addr = loopback_addr;
646 break;
647#endif
648 case CTL_ALIAS:
649 default:
650 if (last_byte == ~pData->netmask)
651 addr.sin_addr.s_addr = INADDR_BROADCAST;
652 else
653 addr.sin_addr = loopback_addr;
654 break;
655 }
656 }
657 else
658 addr.sin_addr = so->so_faddr;
659 addr.sin_port = so->so_fport;
660
661 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n",
662 ntohs(addr.sin_port), inet_ntoa(addr.sin_addr)));
663
664 /* Don't care what port we get */
665 ret = sendto(so->s, m->m_data, m->m_len, 0,
666 (struct sockaddr *)&addr, sizeof (struct sockaddr));
667 if (ret < 0)
668 {
669 LogRel(("UDP: sendto fails (%s)\n", strerror(errno)));
670 return -1;
671 }
672
673 /*
674 * Kill the socket if there's no reply in 4 minutes,
675 * but only if it's an expirable socket
676 */
677 if (so->so_expire)
678 so->so_expire = curtime + SO_EXPIRE;
679 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
680 return 0;
681}
682
683/*
684 * XXX This should really be tcp_listen
685 */
686struct socket *
687solisten(PNATState pData, u_int port, u_int32_t laddr, u_int lport, int flags)
688{
689 struct sockaddr_in addr;
690 struct socket *so;
691 socklen_t addrlen = sizeof(addr);
692 int s, opt = 1;
693
694 DEBUG_CALL("solisten");
695 DEBUG_ARG("port = %d", port);
696 DEBUG_ARG("laddr = %x", laddr);
697 DEBUG_ARG("lport = %d", lport);
698 DEBUG_ARG("flags = %x", flags);
699
700 if ((so = socreate()) == NULL)
701 {
702 /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */
703 return NULL;
704 }
705
706 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
707 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
708 {
709 RTMemFree(so);
710 return NULL;
711 }
712
713 SOCKET_LOCK_CREATE(so);
714 SOCKET_LOCK(so);
715 QSOCKET_LOCK(tcb);
716 insque(pData, so,&tcb);
717 NSOCK_INC();
718 QSOCKET_UNLOCK(tcb);
719
720 /*
721 * SS_FACCEPTONCE sockets must time out.
722 */
723 if (flags & SS_FACCEPTONCE)
724 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
725
726 so->so_state = (SS_FACCEPTCONN|flags);
727 so->so_lport = lport; /* Kept in network format */
728 so->so_laddr.s_addr = laddr; /* Ditto */
729
730 addr.sin_family = AF_INET;
731 addr.sin_addr.s_addr = INADDR_ANY;
732 addr.sin_port = port;
733
734 if ( ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
735 || (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int)) < 0)
736 || (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0)
737 || (listen(s, 1) < 0))
738 {
739#ifdef RT_OS_WINDOWS
740 int tmperrno = WSAGetLastError(); /* Don't clobber the real reason we failed */
741 closesocket(s);
742 QSOCKET_LOCK(tcb);
743 sofree(pData, so);
744 QSOCKET_UNLOCK(tcb);
745 /* Restore the real errno */
746 WSASetLastError(tmperrno);
747#else
748 int tmperrno = errno; /* Don't clobber the real reason we failed */
749 close(s);
750 QSOCKET_LOCK(tcb);
751 sofree(pData, so);
752 QSOCKET_UNLOCK(tcb);
753 /* Restore the real errno */
754 errno = tmperrno;
755#endif
756 return NULL;
757 }
758 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
759
760 getsockname(s,(struct sockaddr *)&addr,&addrlen);
761 so->so_fport = addr.sin_port;
762 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
763 so->so_faddr = alias_addr;
764 else
765 so->so_faddr = addr.sin_addr;
766
767 so->s = s;
768 SOCKET_UNLOCK(so);
769 return so;
770}
771
772/*
773 * Data is available in so_rcv
774 * Just write() the data to the socket
775 * XXX not yet...
776 */
777void
778sorwakeup(struct socket *so)
779{
780#if 0
781 sowrite(so);
782 FD_CLR(so->s,&writefds);
783#endif
784}
785
786/*
787 * Data has been freed in so_snd
788 * We have room for a read() if we want to
789 * For now, don't read, it'll be done in the main loop
790 */
791void
792sowwakeup(struct socket *so)
793{
794}
795
796/*
797 * Various session state calls
798 * XXX Should be #define's
799 * The socket state stuff needs work, these often get call 2 or 3
800 * times each when only 1 was needed
801 */
802void
803soisfconnecting(struct socket *so)
804{
805 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
806 SS_FCANTSENDMORE|SS_FWDRAIN);
807 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
808}
809
810void
811soisfconnected(struct socket *so)
812{
813 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
814 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
815}
816
817void
818sofcantrcvmore(struct socket *so)
819{
820 if ((so->so_state & SS_NOFDREF) == 0)
821 {
822 shutdown(so->s, 0);
823 }
824 so->so_state &= ~(SS_ISFCONNECTING);
825 if (so->so_state & SS_FCANTSENDMORE)
826 so->so_state = SS_NOFDREF; /* Don't select it */
827 /* XXX close() here as well? */
828 else
829 so->so_state |= SS_FCANTRCVMORE;
830}
831
832void
833sofcantsendmore(struct socket *so)
834{
835 if ((so->so_state & SS_NOFDREF) == 0)
836 shutdown(so->s, 1); /* send FIN to fhost */
837
838 so->so_state &= ~(SS_ISFCONNECTING);
839 if (so->so_state & SS_FCANTRCVMORE)
840 so->so_state = SS_NOFDREF; /* as above */
841 else
842 so->so_state |= SS_FCANTSENDMORE;
843}
844
845void
846soisfdisconnected(struct socket *so)
847{
848#if 0
849 so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED);
850 close(so->s);
851 so->so_state = SS_ISFDISCONNECTED;
852 /*
853 * XXX Do nothing ... ?
854 */
855#endif
856}
857
858/*
859 * Set write drain mode
860 * Set CANTSENDMORE once all data has been write()n
861 */
862void
863sofwdrain(struct socket *so)
864{
865 if (so->so_rcv.sb_cc)
866 so->so_state |= SS_FWDRAIN;
867 else
868 sofcantsendmore(so);
869}
870
871static void
872send_icmp_to_guest(PNATState pData, char *buff, size_t len, struct socket *so, const struct sockaddr_in *addr)
873{
874 struct ip *ip;
875 uint32_t dst, src;
876 char ip_copy[256];
877 struct icmp *icp;
878 int old_ip_len = 0;
879 int hlen, original_hlen = 0;
880 struct mbuf *m;
881 struct icmp_msg *icm;
882 uint8_t proto;
883 int type = 0;
884
885 ip = (struct ip *)buff;
886 hlen = (ip->ip_hl << 2);
887 icp = (struct icmp *)((char *)ip + hlen);
888
889 Log(("ICMP:received msg(t:%d, c:%d)\n", icp->icmp_type, icp->icmp_code));
890 if ( icp->icmp_type != ICMP_ECHOREPLY
891 && icp->icmp_type != ICMP_TIMXCEED
892 && icp->icmp_type != ICMP_UNREACH)
893 {
894 return;
895 }
896
897 type = icp->icmp_type;
898 if ( type == ICMP_TIMXCEED
899 || type == ICMP_UNREACH)
900 {
901 ip = &icp->icmp_ip;
902 DO_ALIAS(&ip->ip_dst);
903 }
904 else
905 {
906 DO_ALIAS(&ip->ip_src);
907 }
908
909 icm = icmp_find_original_mbuf(pData, ip);
910
911 if (icm == NULL)
912 {
913 Log(("NAT: Can't find the corresponding packet for the received ICMP\n"));
914 return;
915 }
916
917 m = icm->im_m;
918 Assert(m != NULL);
919
920 src = addr->sin_addr.s_addr;
921
922 ip = mtod(m, struct ip *);
923 proto = ip->ip_p;
924 /* Now ip is pointing on header we've sent from guest */
925 if ( icp->icmp_type == ICMP_TIMXCEED
926 || icp->icmp_type == ICMP_UNREACH)
927 {
928 old_ip_len = (ip->ip_hl << 2) + 64;
929 if (old_ip_len > sizeof(ip_copy))
930 old_ip_len = sizeof(ip_copy);
931 memcpy(ip_copy, ip, old_ip_len);
932 }
933
934 /* source address from original IP packet*/
935 dst = ip->ip_src.s_addr;
936
937 /* overide ther tail of old packet */
938 ip = mtod(m, struct ip *); /* ip is from mbuf we've overrided */
939 original_hlen = ip->ip_hl << 2;
940 /* saves original ip header and options */
941 memcpy(m->m_data + original_hlen, buff + hlen, len - hlen);
942 m->m_len = len - hlen + original_hlen;
943 ip->ip_len = m->m_len;
944 ip->ip_p = IPPROTO_ICMP; /* the original package could be whatever, but we're response via ICMP*/
945
946 icp = (struct icmp *)((char *)ip + (ip->ip_hl << 2));
947 type = icp->icmp_type;
948 if ( type == ICMP_TIMXCEED
949 || type == ICMP_UNREACH)
950 {
951 /* according RFC 793 error messages required copy of initial IP header + 64 bit */
952 memcpy(&icp->icmp_ip, ip_copy, old_ip_len);
953 ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */
954 }
955
956 ip->ip_src.s_addr = src;
957 ip->ip_dst.s_addr = dst;
958 icmp_reflect(pData, m);
959 LIST_REMOVE(icm, im_list);
960 /* Don't call m_free here*/
961
962 if ( type == ICMP_TIMXCEED
963 || type == ICMP_UNREACH)
964 {
965 icm->im_so->so_m = NULL;
966 switch (proto)
967 {
968 case IPPROTO_UDP:
969 /*XXX: so->so_m already freed so we shouldn't call sofree */
970 udp_detach(pData, icm->im_so);
971 break;
972 case IPPROTO_TCP:
973 /*close tcp should be here */
974 break;
975 default:
976 /* do nothing */
977 break;
978 }
979 }
980 RTMemFree(icm);
981}
982
983#ifdef RT_OS_WINDOWS
984static void
985sorecvfrom_icmp_win(PNATState pData, struct socket *so)
986{
987 int len;
988 int i;
989 struct ip *ip;
990 struct mbuf *m;
991 struct icmp *icp;
992 struct icmp_msg *icm;
993 struct ip *ip_broken; /* ICMP returns header + 64 bit of packet */
994 uint32_t src;
995 ICMP_ECHO_REPLY *icr;
996 int hlen = 0;
997 int data_len = 0;
998 int nbytes = 0;
999 u_char code = ~0;
1000
1001 len = pData->pfIcmpParseReplies(pData->pvIcmpBuffer, pData->szIcmpBuffer);
1002 if (len < 0)
1003 {
1004 LogRel(("NAT: Error (%d) occurred on ICMP receiving\n", GetLastError()));
1005 return;
1006 }
1007 if (len == 0)
1008 return; /* no error */
1009
1010 icr = (ICMP_ECHO_REPLY *)pData->pvIcmpBuffer;
1011 for (i = 0; i < len; ++i)
1012 {
1013 switch(icr[i].Status)
1014 {
1015 case IP_DEST_HOST_UNREACHABLE:
1016 code = (code != ~0 ? code : ICMP_UNREACH_HOST);
1017 case IP_DEST_NET_UNREACHABLE:
1018 code = (code != ~0 ? code : ICMP_UNREACH_NET);
1019 case IP_DEST_PROT_UNREACHABLE:
1020 code = (code != ~0 ? code : ICMP_UNREACH_PROTOCOL);
1021 /* UNREACH error inject here */
1022 case IP_DEST_PORT_UNREACHABLE:
1023 code = (code != ~0 ? code : ICMP_UNREACH_PORT);
1024 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, "Error occurred!!!");
1025 so->so_m = NULL;
1026 break;
1027 case IP_SUCCESS: /* echo replied */
1028 m = m_get(pData);
1029 m->m_data += if_maxlinkhdr;
1030 ip = mtod(m, struct ip *);
1031 ip->ip_src.s_addr = icr[i].Address;
1032 DO_ALIAS(&ip->ip_src);
1033 ip->ip_p = IPPROTO_ICMP;
1034 ip->ip_dst.s_addr = so->so_laddr.s_addr; /*XXX: still the hack*/
1035 data_len = sizeof(struct ip);
1036 ip->ip_hl = data_len >> 2; /* requiered for icmp_reflect, no IP options */
1037 ip->ip_ttl = icr[i].Options.Ttl;
1038
1039 icp = (struct icmp *)&ip[1]; /* no options */
1040 icp->icmp_type = ICMP_ECHOREPLY;
1041 icp->icmp_code = 0;
1042 icp->icmp_id = so->so_icmp_id;
1043 icp->icmp_seq = so->so_icmp_seq;
1044
1045 data_len += ICMP_MINLEN;
1046
1047 nbytes = (data_len + icr[i].DataSize > m->m_size? m->m_size - data_len: icr[i].DataSize);
1048 memcpy(icp->icmp_data, icr[i].Data, nbytes);
1049
1050 data_len += icr[i].DataSize;
1051
1052 ip->ip_len = data_len;
1053 m->m_len = ip->ip_len;
1054
1055 icmp_reflect(pData, m);
1056 break;
1057 case IP_TTL_EXPIRED_TRANSIT: /* TTL expired */
1058
1059 ip_broken = icr[i].Data;
1060 icm = icmp_find_original_mbuf(pData, ip_broken);
1061 if (icm == NULL) {
1062 Log(("ICMP: can't find original package (first double word %x)\n", *(uint32_t *)ip_broken));
1063 return;
1064 }
1065 m = icm->im_m;
1066 ip = mtod(m, struct ip *);
1067 ip->ip_ttl = icr[i].Options.Ttl;
1068 src = ip->ip_src.s_addr;
1069 ip->ip_dst.s_addr = src;
1070 ip->ip_dst.s_addr = icr[i].Address;
1071
1072 hlen = (ip->ip_hl << 2);
1073 icp = (struct icmp *)((char *)ip + hlen);
1074 ip_broken->ip_src.s_addr = src; /*it packet sent from host not from guest*/
1075 data_len = (ip_broken->ip_hl << 2) + 64;
1076
1077 nbytes =(hlen + ICMP_MINLEN + data_len > m->m_size? m->m_size - (hlen + ICMP_MINLEN): data_len);
1078 memcpy(icp->icmp_data, ip_broken, nbytes);
1079 icmp_reflect(pData, m);
1080 break;
1081 default:
1082 Log(("ICMP(default): message with Status: %x was received from %x\n", icr[i].Status, icr[i].Address));
1083 break;
1084 }
1085 }
1086}
1087#else /* RT_OS_WINDOWS */
1088static void sorecvfrom_icmp_unix(PNATState pData, struct socket *so)
1089{
1090 struct sockaddr_in addr;
1091 socklen_t addrlen = sizeof(struct sockaddr_in);
1092 char buff[1500];
1093 int len;
1094 len = recvfrom(so->s, buff, 1500, 0,
1095 (struct sockaddr *)&addr, &addrlen);
1096 /* XXX Check if reply is "correct"? */
1097
1098 if (len == -1 || len == 0)
1099 {
1100 u_char code = ICMP_UNREACH_PORT;
1101
1102 if (errno == EHOSTUNREACH)
1103 code = ICMP_UNREACH_HOST;
1104 else if(errno == ENETUNREACH)
1105 code = ICMP_UNREACH_NET;
1106
1107 DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
1108 errno, strerror(errno)));
1109 icmp_error(pData, so->so_m, ICMP_UNREACH, code, 0, strerror(errno));
1110 so->so_m = NULL;
1111 }
1112 else
1113 {
1114 send_icmp_to_guest(pData, buff, len, so, &addr);
1115 }
1116}
1117#endif /* !RT_OS_WINDOWS */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette