VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/tcp_subr.c@ 28587

Last change on this file since 28587 was 28510, checked in by vboxsync, 15 years ago

refined r60215

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.1 KB
Line 
1/* $Id: tcp_subr.c 28510 2010-04-20 10:25:22Z vboxsync $ */
2/** @file
3 * NAT - TCP support.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*
23 * This code is based on:
24 *
25 * Copyright (c) 1982, 1986, 1988, 1990, 1993
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93
57 * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp
58 */
59
60/*
61 * Changes and additions relating to SLiRP
62 * Copyright (c) 1995 Danny Gasparovski.
63 *
64 * Please read the file COPYRIGHT for the
65 * terms and conditions of the copyright.
66 */
67
68#define WANT_SYS_IOCTL_H
69#include <slirp.h>
70
71
72/*
73 * Tcp initialization
74 */
75void
76tcp_init(PNATState pData)
77{
78 tcp_iss = 1; /* wrong */
79 tcb.so_next = tcb.so_prev = &tcb;
80 tcp_last_so = &tcb;
81 tcp_reass_maxqlen = 48;
82 tcp_reass_maxseg = 256;
83}
84
85/*
86 * Create template to be used to send tcp packets on a connection.
87 * Call after host entry created, fills
88 * in a skeletal tcp/ip header, minimizing the amount of work
89 * necessary when the connection is used.
90 */
91/* struct tcpiphdr * */
92void
93tcp_template(struct tcpcb *tp)
94{
95 struct socket *so = tp->t_socket;
96 register struct tcpiphdr *n = &tp->t_template;
97
98 memset(n->ti_x1, 0, 9);
99 n->ti_pr = IPPROTO_TCP;
100 n->ti_len = RT_H2N_U16(sizeof (struct tcpiphdr) - sizeof (struct ip));
101 n->ti_src = so->so_faddr;
102 n->ti_dst = so->so_laddr;
103 n->ti_sport = so->so_fport;
104 n->ti_dport = so->so_lport;
105
106 n->ti_seq = 0;
107 n->ti_ack = 0;
108 n->ti_x2 = 0;
109 n->ti_off = 5;
110 n->ti_flags = 0;
111 n->ti_win = 0;
112 n->ti_sum = 0;
113 n->ti_urp = 0;
114}
115
116/*
117 * Send a single message to the TCP at address specified by
118 * the given TCP/IP header. If m == 0, then we make a copy
119 * of the tcpiphdr at ti and send directly to the addressed host.
120 * This is used to force keep alive messages out using the TCP
121 * template for a connection tp->t_template. If flags are given
122 * then we send a message back to the TCP which originated the
123 * segment ti, and discard the mbuf containing it and any other
124 * attached mbufs.
125 *
126 * In any case the ack and sequence number of the transmitted
127 * segment are as specified by the parameters.
128 */
129void
130tcp_respond(PNATState pData, struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags)
131{
132 register int tlen;
133 int win = 0;
134
135 DEBUG_CALL("tcp_respond");
136 DEBUG_ARG("tp = %lx", (long)tp);
137 DEBUG_ARG("ti = %lx", (long)ti);
138 DEBUG_ARG("m = %lx", (long)m);
139 DEBUG_ARG("ack = %u", ack);
140 DEBUG_ARG("seq = %u", seq);
141 DEBUG_ARG("flags = %x", flags);
142
143 if (tp)
144 win = sbspace(&tp->t_socket->so_rcv);
145 if (m == 0)
146 {
147#ifndef VBOX_WITH_SLIRP_BSD_MBUF
148 if ((m = m_get(pData)) == NULL)
149#else
150 if ((m = m_gethdr(pData, M_DONTWAIT, MT_HEADER)) == NULL)
151#endif
152 return;
153#ifdef TCP_COMPAT_42
154 tlen = 1;
155#else
156 tlen = 0;
157#endif
158 m->m_data += if_maxlinkhdr;
159#ifdef VBOX_WITH_SLIRP_BSD_MBUF
160 m->m_pkthdr.header = mtod(m, void *);
161#endif
162 *mtod(m, struct tcpiphdr *) = *ti;
163 ti = mtod(m, struct tcpiphdr *);
164 flags = TH_ACK;
165 }
166 else
167 {
168 /*
169 * ti points into m so the next line is just making
170 * the mbuf point to ti
171 */
172 m->m_data = (caddr_t)ti;
173
174 m->m_len = sizeof (struct tcpiphdr);
175 tlen = 0;
176#define xchg(a,b,type) { type t; t = a; a = b; b = t; }
177 xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t);
178 xchg(ti->ti_dport, ti->ti_sport, u_int16_t);
179#undef xchg
180 }
181 ti->ti_len = RT_H2N_U16((u_short)(sizeof (struct tcphdr) + tlen));
182 tlen += sizeof (struct tcpiphdr);
183 m->m_len = tlen;
184
185 memset(ti->ti_x1, 0, 9);
186 ti->ti_seq = RT_H2N_U32(seq);
187 ti->ti_ack = RT_H2N_U32(ack);
188 ti->ti_x2 = 0;
189 ti->ti_off = sizeof (struct tcphdr) >> 2;
190 ti->ti_flags = flags;
191 if (tp)
192 ti->ti_win = RT_H2N_U16((u_int16_t) (win >> tp->rcv_scale));
193 else
194 ti->ti_win = RT_H2N_U16((u_int16_t)win);
195 ti->ti_urp = 0;
196 ti->ti_sum = 0;
197 ti->ti_sum = cksum(m, tlen);
198 ((struct ip *)ti)->ip_len = tlen;
199
200 if(flags & TH_RST)
201 ((struct ip *)ti)->ip_ttl = MAXTTL;
202 else
203 ((struct ip *)ti)->ip_ttl = ip_defttl;
204
205 (void) ip_output(pData, (struct socket *)0, m);
206}
207
208/*
209 * Create a new TCP control block, making an
210 * empty reassembly queue and hooking it to the argument
211 * protocol control block.
212 */
213struct tcpcb *
214tcp_newtcpcb(PNATState pData, struct socket *so)
215{
216 register struct tcpcb *tp;
217
218 tp = (struct tcpcb *)RTMemAllocZ(sizeof(*tp));
219 if (tp == NULL)
220 return ((struct tcpcb *)0);
221
222 tp->t_maxseg = tcp_mssdflt;
223
224 tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
225 tp->t_socket = so;
226
227 /*
228 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
229 * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
230 * reasonable initial retransmit time.
231 */
232 tp->t_srtt = TCPTV_SRTTBASE;
233 tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
234 tp->t_rttmin = TCPTV_MIN;
235
236 TCPT_RANGESET(tp->t_rxtcur,
237 ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
238 TCPTV_MIN, TCPTV_REXMTMAX);
239
240 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
241 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
242 tp->t_state = TCPS_CLOSED;
243
244 so->so_tcpcb = tp;
245
246 return (tp);
247}
248
249/*
250 * Drop a TCP connection, reporting
251 * the specified error. If connection is synchronized,
252 * then send a RST to peer.
253 */
254struct tcpcb *tcp_drop(PNATState pData, struct tcpcb *tp, int err)
255{
256/* tcp_drop(tp, errno)
257 register struct tcpcb *tp;
258 int errno;
259{
260*/
261 DEBUG_CALL("tcp_drop");
262 DEBUG_ARG("tp = %lx", (long)tp);
263 DEBUG_ARG("errno = %d", errno);
264
265 if (TCPS_HAVERCVDSYN(tp->t_state))
266 {
267 tp->t_state = TCPS_CLOSED;
268 (void) tcp_output(pData, tp);
269 tcpstat.tcps_drops++;
270 }
271 else
272 tcpstat.tcps_conndrops++;
273#if 0
274 if (errno == ETIMEDOUT && tp->t_softerror)
275 errno = tp->t_softerror;
276
277 so->so_error = errno;
278#endif
279 return (tcp_close(pData, tp));
280}
281
282/*
283 * Close a TCP control block:
284 * discard all space held by the tcp
285 * discard internet protocol block
286 * wake up any sleepers
287 */
288struct tcpcb *
289tcp_close(PNATState pData, register struct tcpcb *tp)
290{
291 struct socket *so = tp->t_socket;
292 struct socket *so_next, *so_prev;
293
294 struct tseg_qent *te = NULL;
295 DEBUG_CALL("tcp_close");
296 DEBUG_ARG("tp = %lx", (long )tp);
297 so_next = so_prev = NULL;
298 /*XXX: freeing the reassembly queue */
299 while (!LIST_EMPTY(&tp->t_segq))
300 {
301 te = LIST_FIRST(&tp->t_segq);
302 LIST_REMOVE(te, tqe_q);
303 m_freem(pData, te->tqe_m);
304 RTMemFree(te);
305 tcp_reass_qsize--;
306 }
307 RTMemFree(tp);
308 so->so_tcpcb = 0;
309 soisfdisconnected(so);
310 /* clobber input socket cache if we're closing the cached connection */
311 if (so == tcp_last_so)
312 tcp_last_so = &tcb;
313 closesocket(so->s);
314 /* Avoid double free if the socket is listening and therefore doesn't have
315 * any sbufs reserved. */
316 if (!(so->so_state & SS_FACCEPTCONN))
317 {
318 sbfree(&so->so_rcv);
319 sbfree(&so->so_snd);
320 }
321 sofree(pData, so);
322 SOCKET_UNLOCK(so);
323 tcpstat.tcps_closed++;
324 return ((struct tcpcb *)0);
325}
326
327void
328tcp_drain()
329{
330 /* XXX */
331}
332
333/*
334 * When a source quench is received, close congestion window
335 * to one segment. We will gradually open it again as we proceed.
336 */
337
338#if 0
339
340void
341tcp_quench(i, int errno)
342{
343 struct tcpcb *tp = intotcpcb(inp);
344
345 if (tp)
346 tp->snd_cwnd = tp->t_maxseg;
347}
348
349#endif
350
351/*
352 * TCP protocol interface to socket abstraction.
353 */
354
355/*
356 * User issued close, and wish to trail through shutdown states:
357 * if never received SYN, just forget it. If got a SYN from peer,
358 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
359 * If already got a FIN from peer, then almost done; go to LAST_ACK
360 * state. In all other cases, have already sent FIN to peer (e.g.
361 * after PRU_SHUTDOWN), and just have to play tedious game waiting
362 * for peer to send FIN or not respond to keep-alives, etc.
363 * We can let the user exit from the close as soon as the FIN is acked.
364 */
365void
366tcp_sockclosed(PNATState pData, struct tcpcb *tp)
367{
368 DEBUG_CALL("tcp_sockclosed");
369 DEBUG_ARG("tp = %lx", (long)tp);
370
371 switch (tp->t_state)
372 {
373 case TCPS_CLOSED:
374 case TCPS_LISTEN:
375 case TCPS_SYN_SENT:
376 tp->t_state = TCPS_CLOSED;
377 tp = tcp_close(pData, tp);
378 break;
379
380 case TCPS_SYN_RECEIVED:
381 case TCPS_ESTABLISHED:
382 tp->t_state = TCPS_FIN_WAIT_1;
383 break;
384
385 case TCPS_CLOSE_WAIT:
386 tp->t_state = TCPS_LAST_ACK;
387 break;
388 }
389/* soisfdisconnecting(tp->t_socket); */
390 if ( tp
391 && tp->t_state >= TCPS_FIN_WAIT_2)
392 soisfdisconnected(tp->t_socket);
393 /*
394 * (vasily) there're situations when the FIN or FIN,ACK are lost (Windows host)
395 * and retransmitting keeps VBox busy on sending closing sequences *very* frequent,
396 * easting a lot of CPU. To avoid this we don't sent on sockets marked as closed
397 * (see slirp.c for details about setting so_close member).
398 */
399 if ( tp
400 && tp->t_socket
401 && !tp->t_socket->so_close)
402 tcp_output(pData, tp);
403}
404
405/*
406 * Connect to a host on the Internet
407 * Called by tcp_input
408 * Only do a connect, the tcp fields will be set in tcp_input
409 * return 0 if there's a result of the connect,
410 * else return -1 means we're still connecting
411 * The return value is almost always -1 since the socket is
412 * nonblocking. Connect returns after the SYN is sent, and does
413 * not wait for ACK+SYN.
414 */
415int tcp_fconnect(PNATState pData, struct socket *so)
416{
417 int ret = 0;
418
419 DEBUG_CALL("tcp_fconnect");
420 DEBUG_ARG("so = %lx", (long )so);
421
422 if ((ret = so->s = socket(AF_INET, SOCK_STREAM, 0)) >= 0)
423 {
424 int opt, s = so->s;
425 struct sockaddr_in addr;
426
427 fd_nonblock(s);
428 opt = 1;
429 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(opt));
430 opt = 1;
431 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(opt));
432
433 addr.sin_family = AF_INET;
434 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
435 {
436 /* It's an alias */
437 switch(RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask)
438 {
439 case CTL_DNS:
440 case CTL_ALIAS:
441 default:
442 addr.sin_addr = loopback_addr;
443 break;
444 }
445 }
446 else
447 addr.sin_addr = so->so_faddr;
448 addr.sin_port = so->so_fport;
449
450 DEBUG_MISC((dfd, " connect()ing, addr.sin_port=%d, "
451 "addr.sin_addr.s_addr=%.16s\n",
452 RT_N2H_U16(addr.sin_port), inet_ntoa(addr.sin_addr)));
453 /* We don't care what port we get */
454 ret = connect(s,(struct sockaddr *)&addr,sizeof (addr));
455
456 /*
457 * If it's not in progress, it failed, so we just return 0,
458 * without clearing SS_NOFDREF
459 */
460 soisfconnecting(so);
461 }
462
463 return(ret);
464}
465
466/*
467 * Accept the socket and connect to the local-host
468 *
469 * We have a problem. The correct thing to do would be
470 * to first connect to the local-host, and only if the
471 * connection is accepted, then do an accept() here.
472 * But, a) we need to know who's trying to connect
473 * to the socket to be able to SYN the local-host, and
474 * b) we are already connected to the foreign host by
475 * the time it gets to accept(), so... We simply accept
476 * here and SYN the local-host.
477 */
478void
479tcp_connect(PNATState pData, struct socket *inso)
480{
481 struct socket *so;
482 struct sockaddr_in addr;
483 socklen_t addrlen = sizeof(struct sockaddr_in);
484 struct tcpcb *tp;
485 int s, opt;
486 int status;
487 socklen_t optlen;
488 static int cVerbose = 1;
489
490 DEBUG_CALL("tcp_connect");
491 DEBUG_ARG("inso = %lx", (long)inso);
492
493 /*
494 * If it's an SS_ACCEPTONCE socket, no need to socreate()
495 * another socket, just use the accept() socket.
496 */
497 if (inso->so_state & SS_FACCEPTONCE)
498 {
499 /* FACCEPTONCE already have a tcpcb */
500 so = inso;
501 }
502 else
503 {
504 if ((so = socreate()) == NULL)
505 {
506 /* If it failed, get rid of the pending connection */
507 closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen));
508 return;
509 }
510 if (tcp_attach(pData, so) < 0)
511 {
512 RTMemFree(so); /* NOT sofree */
513 return;
514 }
515 so->so_laddr = inso->so_laddr;
516 so->so_lport = inso->so_lport;
517 so->so_la = inso->so_la;
518 }
519
520 (void) tcp_mss(pData, sototcpcb(so), 0);
521
522 fd_nonblock(inso->s);
523 if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0)
524 {
525 tcp_close(pData, sototcpcb(so)); /* This will sofree() as well */
526 return;
527 }
528 fd_nonblock(s);
529 opt = 1;
530 setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int));
531 opt = 1;
532 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
533#if 0
534 opt = 1;
535 setsockopt(s, IPPROTO_TCP, TCP_NODELAY,(char *)&opt, sizeof(int));
536#endif
537
538 optlen = sizeof(int);
539 status = getsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, &optlen);
540 if (status < 0)
541 {
542 LogRel(("NAT: Error(%d) while getting RCV capacity\n", errno));
543 goto no_sockopt;
544 }
545 if (cVerbose > 0)
546 LogRel(("NAT: old socket rcv size: %dKB\n", opt / 1024));
547 /* @todo (r-vvl) make it configurable (via extra data) */
548 opt = pData->socket_rcv;
549 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
550 if (status < 0)
551 {
552 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
553 goto no_sockopt;
554 }
555 optlen = sizeof(int);
556 status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, &optlen);
557 if (status < 0)
558 {
559 LogRel(("NAT: Error(%d) while getting SND capacity\n", errno));
560 goto no_sockopt;
561 }
562 if (cVerbose > 0)
563 LogRel(("NAT: old socket snd size: %dKB\n", opt / 1024));
564 opt = pData->socket_rcv;
565 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
566 if (status < 0)
567 {
568 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
569 goto no_sockopt;
570 }
571 if (cVerbose > 0)
572 cVerbose--;
573
574 no_sockopt:
575 so->so_fport = addr.sin_port;
576 so->so_faddr = addr.sin_addr;
577 /* Translate connections from localhost to the real hostname */
578 if (so->so_faddr.s_addr == 0 || so->so_faddr.s_addr == loopback_addr.s_addr)
579 so->so_faddr = alias_addr;
580
581 /* Close the accept() socket, set right state */
582 if (inso->so_state & SS_FACCEPTONCE)
583 {
584 closesocket(so->s); /* If we only accept once, close the accept() socket */
585 so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */
586 /* if it's not FACCEPTONCE, it's already NOFDREF */
587 }
588 so->s = s;
589
590 tp = sototcpcb(so);
591
592 tcp_template(tp);
593
594 /* Compute window scaling to request. */
595/* while (tp->request_r_scale < TCP_MAX_WINSHIFT
596 * && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
597 * tp->request_r_scale++;
598 */
599
600/* soisconnecting(so); */ /* NOFDREF used instead */
601 tcpstat.tcps_connattempt++;
602
603 tp->t_state = TCPS_SYN_SENT;
604 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
605 tp->iss = tcp_iss;
606 tcp_iss += TCP_ISSINCR/2;
607 tcp_sendseqinit(tp);
608 tcp_output(pData, tp);
609}
610
611/*
612 * Attach a TCPCB to a socket.
613 */
614int
615tcp_attach(PNATState pData, struct socket *so)
616{
617 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
618 return -1;
619
620 SOCKET_LOCK_CREATE(so);
621 QSOCKET_LOCK(tcb);
622 insque(pData, so, &tcb);
623 NSOCK_INC();
624 QSOCKET_UNLOCK(tcb);
625 return 0;
626}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette