VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/tcp_subr.c@ 28800

Last change on this file since 28800 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.9 KB
Line 
1/* $Id: tcp_subr.c 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * NAT - TCP support.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*
19 * This code is based on:
20 *
21 * Copyright (c) 1982, 1986, 1988, 1990, 1993
22 * The Regents of the University of California. All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 * 3. All advertising materials mentioning features or use of this software
33 * must display the following acknowledgement:
34 * This product includes software developed by the University of
35 * California, Berkeley and its contributors.
36 * 4. Neither the name of the University nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93
53 * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp
54 */
55
56/*
57 * Changes and additions relating to SLiRP
58 * Copyright (c) 1995 Danny Gasparovski.
59 *
60 * Please read the file COPYRIGHT for the
61 * terms and conditions of the copyright.
62 */
63
64#define WANT_SYS_IOCTL_H
65#include <slirp.h>
66
67
68/*
69 * Tcp initialization
70 */
71void
72tcp_init(PNATState pData)
73{
74 tcp_iss = 1; /* wrong */
75 tcb.so_next = tcb.so_prev = &tcb;
76 tcp_last_so = &tcb;
77 tcp_reass_maxqlen = 48;
78 tcp_reass_maxseg = 256;
79}
80
81/*
82 * Create template to be used to send tcp packets on a connection.
83 * Call after host entry created, fills
84 * in a skeletal tcp/ip header, minimizing the amount of work
85 * necessary when the connection is used.
86 */
87/* struct tcpiphdr * */
88void
89tcp_template(struct tcpcb *tp)
90{
91 struct socket *so = tp->t_socket;
92 register struct tcpiphdr *n = &tp->t_template;
93
94 memset(n->ti_x1, 0, 9);
95 n->ti_pr = IPPROTO_TCP;
96 n->ti_len = RT_H2N_U16(sizeof (struct tcpiphdr) - sizeof (struct ip));
97 n->ti_src = so->so_faddr;
98 n->ti_dst = so->so_laddr;
99 n->ti_sport = so->so_fport;
100 n->ti_dport = so->so_lport;
101
102 n->ti_seq = 0;
103 n->ti_ack = 0;
104 n->ti_x2 = 0;
105 n->ti_off = 5;
106 n->ti_flags = 0;
107 n->ti_win = 0;
108 n->ti_sum = 0;
109 n->ti_urp = 0;
110}
111
112/*
113 * Send a single message to the TCP at address specified by
114 * the given TCP/IP header. If m == 0, then we make a copy
115 * of the tcpiphdr at ti and send directly to the addressed host.
116 * This is used to force keep alive messages out using the TCP
117 * template for a connection tp->t_template. If flags are given
118 * then we send a message back to the TCP which originated the
119 * segment ti, and discard the mbuf containing it and any other
120 * attached mbufs.
121 *
122 * In any case the ack and sequence number of the transmitted
123 * segment are as specified by the parameters.
124 */
125void
126tcp_respond(PNATState pData, struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags)
127{
128 register int tlen;
129 int win = 0;
130
131 DEBUG_CALL("tcp_respond");
132 DEBUG_ARG("tp = %lx", (long)tp);
133 DEBUG_ARG("ti = %lx", (long)ti);
134 DEBUG_ARG("m = %lx", (long)m);
135 DEBUG_ARG("ack = %u", ack);
136 DEBUG_ARG("seq = %u", seq);
137 DEBUG_ARG("flags = %x", flags);
138
139 if (tp)
140 win = sbspace(&tp->t_socket->so_rcv);
141 if (m == 0)
142 {
143#ifndef VBOX_WITH_SLIRP_BSD_MBUF
144 if ((m = m_get(pData)) == NULL)
145#else
146 if ((m = m_gethdr(pData, M_DONTWAIT, MT_HEADER)) == NULL)
147#endif
148 return;
149#ifdef TCP_COMPAT_42
150 tlen = 1;
151#else
152 tlen = 0;
153#endif
154 m->m_data += if_maxlinkhdr;
155#ifdef VBOX_WITH_SLIRP_BSD_MBUF
156 m->m_pkthdr.header = mtod(m, void *);
157#endif
158 *mtod(m, struct tcpiphdr *) = *ti;
159 ti = mtod(m, struct tcpiphdr *);
160 flags = TH_ACK;
161 }
162 else
163 {
164 /*
165 * ti points into m so the next line is just making
166 * the mbuf point to ti
167 */
168 m->m_data = (caddr_t)ti;
169
170 m->m_len = sizeof (struct tcpiphdr);
171 tlen = 0;
172#define xchg(a,b,type) { type t; t = a; a = b; b = t; }
173 xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_int32_t);
174 xchg(ti->ti_dport, ti->ti_sport, u_int16_t);
175#undef xchg
176 }
177 ti->ti_len = RT_H2N_U16((u_short)(sizeof (struct tcphdr) + tlen));
178 tlen += sizeof (struct tcpiphdr);
179 m->m_len = tlen;
180
181 memset(ti->ti_x1, 0, 9);
182 ti->ti_seq = RT_H2N_U32(seq);
183 ti->ti_ack = RT_H2N_U32(ack);
184 ti->ti_x2 = 0;
185 ti->ti_off = sizeof (struct tcphdr) >> 2;
186 ti->ti_flags = flags;
187 if (tp)
188 ti->ti_win = RT_H2N_U16((u_int16_t) (win >> tp->rcv_scale));
189 else
190 ti->ti_win = RT_H2N_U16((u_int16_t)win);
191 ti->ti_urp = 0;
192 ti->ti_sum = 0;
193 ti->ti_sum = cksum(m, tlen);
194 ((struct ip *)ti)->ip_len = tlen;
195
196 if(flags & TH_RST)
197 ((struct ip *)ti)->ip_ttl = MAXTTL;
198 else
199 ((struct ip *)ti)->ip_ttl = ip_defttl;
200
201 (void) ip_output(pData, (struct socket *)0, m);
202}
203
204/*
205 * Create a new TCP control block, making an
206 * empty reassembly queue and hooking it to the argument
207 * protocol control block.
208 */
209struct tcpcb *
210tcp_newtcpcb(PNATState pData, struct socket *so)
211{
212 register struct tcpcb *tp;
213
214 tp = (struct tcpcb *)RTMemAllocZ(sizeof(*tp));
215 if (tp == NULL)
216 return ((struct tcpcb *)0);
217
218 tp->t_maxseg = tcp_mssdflt;
219
220 tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
221 tp->t_socket = so;
222
223 /*
224 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
225 * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
226 * reasonable initial retransmit time.
227 */
228 tp->t_srtt = TCPTV_SRTTBASE;
229 tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
230 tp->t_rttmin = TCPTV_MIN;
231
232 TCPT_RANGESET(tp->t_rxtcur,
233 ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
234 TCPTV_MIN, TCPTV_REXMTMAX);
235
236 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
237 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
238 tp->t_state = TCPS_CLOSED;
239
240 so->so_tcpcb = tp;
241
242 return (tp);
243}
244
245/*
246 * Drop a TCP connection, reporting
247 * the specified error. If connection is synchronized,
248 * then send a RST to peer.
249 */
250struct tcpcb *tcp_drop(PNATState pData, struct tcpcb *tp, int err)
251{
252/* tcp_drop(tp, errno)
253 register struct tcpcb *tp;
254 int errno;
255{
256*/
257 DEBUG_CALL("tcp_drop");
258 DEBUG_ARG("tp = %lx", (long)tp);
259 DEBUG_ARG("errno = %d", errno);
260
261 if (TCPS_HAVERCVDSYN(tp->t_state))
262 {
263 tp->t_state = TCPS_CLOSED;
264 (void) tcp_output(pData, tp);
265 tcpstat.tcps_drops++;
266 }
267 else
268 tcpstat.tcps_conndrops++;
269#if 0
270 if (errno == ETIMEDOUT && tp->t_softerror)
271 errno = tp->t_softerror;
272
273 so->so_error = errno;
274#endif
275 return (tcp_close(pData, tp));
276}
277
278/*
279 * Close a TCP control block:
280 * discard all space held by the tcp
281 * discard internet protocol block
282 * wake up any sleepers
283 */
284struct tcpcb *
285tcp_close(PNATState pData, register struct tcpcb *tp)
286{
287 struct socket *so = tp->t_socket;
288 struct socket *so_next, *so_prev;
289
290 struct tseg_qent *te = NULL;
291 DEBUG_CALL("tcp_close");
292 DEBUG_ARG("tp = %lx", (long )tp);
293 so_next = so_prev = NULL;
294 /*XXX: freeing the reassembly queue */
295 while (!LIST_EMPTY(&tp->t_segq))
296 {
297 te = LIST_FIRST(&tp->t_segq);
298 LIST_REMOVE(te, tqe_q);
299 m_freem(pData, te->tqe_m);
300 RTMemFree(te);
301 tcp_reass_qsize--;
302 }
303 RTMemFree(tp);
304 so->so_tcpcb = 0;
305 soisfdisconnected(so);
306 /* clobber input socket cache if we're closing the cached connection */
307 if (so == tcp_last_so)
308 tcp_last_so = &tcb;
309 closesocket(so->s);
310 /* Avoid double free if the socket is listening and therefore doesn't have
311 * any sbufs reserved. */
312 if (!(so->so_state & SS_FACCEPTCONN))
313 {
314 sbfree(&so->so_rcv);
315 sbfree(&so->so_snd);
316 }
317 sofree(pData, so);
318 SOCKET_UNLOCK(so);
319 tcpstat.tcps_closed++;
320 return ((struct tcpcb *)0);
321}
322
323void
324tcp_drain()
325{
326 /* XXX */
327}
328
329/*
330 * When a source quench is received, close congestion window
331 * to one segment. We will gradually open it again as we proceed.
332 */
333
334#if 0
335
336void
337tcp_quench(i, int errno)
338{
339 struct tcpcb *tp = intotcpcb(inp);
340
341 if (tp)
342 tp->snd_cwnd = tp->t_maxseg;
343}
344
345#endif
346
347/*
348 * TCP protocol interface to socket abstraction.
349 */
350
351/*
352 * User issued close, and wish to trail through shutdown states:
353 * if never received SYN, just forget it. If got a SYN from peer,
354 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
355 * If already got a FIN from peer, then almost done; go to LAST_ACK
356 * state. In all other cases, have already sent FIN to peer (e.g.
357 * after PRU_SHUTDOWN), and just have to play tedious game waiting
358 * for peer to send FIN or not respond to keep-alives, etc.
359 * We can let the user exit from the close as soon as the FIN is acked.
360 */
361void
362tcp_sockclosed(PNATState pData, struct tcpcb *tp)
363{
364 DEBUG_CALL("tcp_sockclosed");
365 DEBUG_ARG("tp = %lx", (long)tp);
366
367 switch (tp->t_state)
368 {
369 case TCPS_CLOSED:
370 case TCPS_LISTEN:
371 case TCPS_SYN_SENT:
372 tp->t_state = TCPS_CLOSED;
373 tp = tcp_close(pData, tp);
374 break;
375
376 case TCPS_SYN_RECEIVED:
377 case TCPS_ESTABLISHED:
378 tp->t_state = TCPS_FIN_WAIT_1;
379 break;
380
381 case TCPS_CLOSE_WAIT:
382 tp->t_state = TCPS_LAST_ACK;
383 break;
384 }
385/* soisfdisconnecting(tp->t_socket); */
386 if ( tp
387 && tp->t_state >= TCPS_FIN_WAIT_2)
388 soisfdisconnected(tp->t_socket);
389 /*
390 * (vasily) there're situations when the FIN or FIN,ACK are lost (Windows host)
391 * and retransmitting keeps VBox busy on sending closing sequences *very* frequent,
392 * easting a lot of CPU. To avoid this we don't sent on sockets marked as closed
393 * (see slirp.c for details about setting so_close member).
394 */
395 if ( tp
396 && tp->t_socket
397 && !tp->t_socket->so_close)
398 tcp_output(pData, tp);
399}
400
401/*
402 * Connect to a host on the Internet
403 * Called by tcp_input
404 * Only do a connect, the tcp fields will be set in tcp_input
405 * return 0 if there's a result of the connect,
406 * else return -1 means we're still connecting
407 * The return value is almost always -1 since the socket is
408 * nonblocking. Connect returns after the SYN is sent, and does
409 * not wait for ACK+SYN.
410 */
411int tcp_fconnect(PNATState pData, struct socket *so)
412{
413 int ret = 0;
414
415 DEBUG_CALL("tcp_fconnect");
416 DEBUG_ARG("so = %lx", (long )so);
417
418 if ((ret = so->s = socket(AF_INET, SOCK_STREAM, 0)) >= 0)
419 {
420 int opt, s = so->s;
421 struct sockaddr_in addr;
422
423 fd_nonblock(s);
424 opt = 1;
425 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(opt));
426 opt = 1;
427 setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(opt));
428
429 addr.sin_family = AF_INET;
430 if ((so->so_faddr.s_addr & RT_H2N_U32(pData->netmask)) == pData->special_addr.s_addr)
431 {
432 /* It's an alias */
433 switch(RT_N2H_U32(so->so_faddr.s_addr) & ~pData->netmask)
434 {
435 case CTL_DNS:
436 case CTL_ALIAS:
437 default:
438 addr.sin_addr = loopback_addr;
439 break;
440 }
441 }
442 else
443 addr.sin_addr = so->so_faddr;
444 addr.sin_port = so->so_fport;
445
446 DEBUG_MISC((dfd, " connect()ing, addr.sin_port=%d, "
447 "addr.sin_addr.s_addr=%.16s\n",
448 RT_N2H_U16(addr.sin_port), inet_ntoa(addr.sin_addr)));
449 /* We don't care what port we get */
450 ret = connect(s,(struct sockaddr *)&addr,sizeof (addr));
451
452 /*
453 * If it's not in progress, it failed, so we just return 0,
454 * without clearing SS_NOFDREF
455 */
456 soisfconnecting(so);
457 }
458
459 return(ret);
460}
461
462/*
463 * Accept the socket and connect to the local-host
464 *
465 * We have a problem. The correct thing to do would be
466 * to first connect to the local-host, and only if the
467 * connection is accepted, then do an accept() here.
468 * But, a) we need to know who's trying to connect
469 * to the socket to be able to SYN the local-host, and
470 * b) we are already connected to the foreign host by
471 * the time it gets to accept(), so... We simply accept
472 * here and SYN the local-host.
473 */
474void
475tcp_connect(PNATState pData, struct socket *inso)
476{
477 struct socket *so;
478 struct sockaddr_in addr;
479 socklen_t addrlen = sizeof(struct sockaddr_in);
480 struct tcpcb *tp;
481 int s, opt;
482 int status;
483 socklen_t optlen;
484 static int cVerbose = 1;
485
486 DEBUG_CALL("tcp_connect");
487 DEBUG_ARG("inso = %lx", (long)inso);
488
489 /*
490 * If it's an SS_ACCEPTONCE socket, no need to socreate()
491 * another socket, just use the accept() socket.
492 */
493 if (inso->so_state & SS_FACCEPTONCE)
494 {
495 /* FACCEPTONCE already have a tcpcb */
496 so = inso;
497 }
498 else
499 {
500 if ((so = socreate()) == NULL)
501 {
502 /* If it failed, get rid of the pending connection */
503 closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen));
504 return;
505 }
506 if (tcp_attach(pData, so) < 0)
507 {
508 RTMemFree(so); /* NOT sofree */
509 return;
510 }
511 so->so_laddr = inso->so_laddr;
512 so->so_lport = inso->so_lport;
513 so->so_la = inso->so_la;
514 }
515
516 (void) tcp_mss(pData, sototcpcb(so), 0);
517
518 fd_nonblock(inso->s);
519 if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0)
520 {
521 tcp_close(pData, sototcpcb(so)); /* This will sofree() as well */
522 return;
523 }
524 fd_nonblock(s);
525 opt = 1;
526 setsockopt(s, SOL_SOCKET, SO_REUSEADDR,(char *)&opt, sizeof(int));
527 opt = 1;
528 setsockopt(s, SOL_SOCKET, SO_OOBINLINE,(char *)&opt, sizeof(int));
529#if 0
530 opt = 1;
531 setsockopt(s, IPPROTO_TCP, TCP_NODELAY,(char *)&opt, sizeof(int));
532#endif
533
534 optlen = sizeof(int);
535 status = getsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, &optlen);
536 if (status < 0)
537 {
538 LogRel(("NAT: Error(%d) while getting RCV capacity\n", errno));
539 goto no_sockopt;
540 }
541 if (cVerbose > 0)
542 LogRel(("NAT: old socket rcv size: %dKB\n", opt / 1024));
543 /* @todo (r-vvl) make it configurable (via extra data) */
544 opt = pData->socket_rcv;
545 status = setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&opt, sizeof(int));
546 if (status < 0)
547 {
548 LogRel(("NAT: Error(%d) while setting RCV capacity to (%d)\n", errno, opt));
549 goto no_sockopt;
550 }
551 optlen = sizeof(int);
552 status = getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, &optlen);
553 if (status < 0)
554 {
555 LogRel(("NAT: Error(%d) while getting SND capacity\n", errno));
556 goto no_sockopt;
557 }
558 if (cVerbose > 0)
559 LogRel(("NAT: old socket snd size: %dKB\n", opt / 1024));
560 opt = pData->socket_rcv;
561 status = setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *)&opt, sizeof(int));
562 if (status < 0)
563 {
564 LogRel(("NAT: Error(%d) while setting SND capacity to (%d)\n", errno, opt));
565 goto no_sockopt;
566 }
567 if (cVerbose > 0)
568 cVerbose--;
569
570 no_sockopt:
571 so->so_fport = addr.sin_port;
572 so->so_faddr = addr.sin_addr;
573 /* Translate connections from localhost to the real hostname */
574 if (so->so_faddr.s_addr == 0 || so->so_faddr.s_addr == loopback_addr.s_addr)
575 so->so_faddr = alias_addr;
576
577 /* Close the accept() socket, set right state */
578 if (inso->so_state & SS_FACCEPTONCE)
579 {
580 closesocket(so->s); /* If we only accept once, close the accept() socket */
581 so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */
582 /* if it's not FACCEPTONCE, it's already NOFDREF */
583 }
584 so->s = s;
585
586 tp = sototcpcb(so);
587
588 tcp_template(tp);
589
590 /* Compute window scaling to request. */
591/* while (tp->request_r_scale < TCP_MAX_WINSHIFT
592 * && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
593 * tp->request_r_scale++;
594 */
595
596/* soisconnecting(so); */ /* NOFDREF used instead */
597 tcpstat.tcps_connattempt++;
598
599 tp->t_state = TCPS_SYN_SENT;
600 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
601 tp->iss = tcp_iss;
602 tcp_iss += TCP_ISSINCR/2;
603 tcp_sendseqinit(tp);
604 tcp_output(pData, tp);
605}
606
607/*
608 * Attach a TCPCB to a socket.
609 */
610int
611tcp_attach(PNATState pData, struct socket *so)
612{
613 if ((so->so_tcpcb = tcp_newtcpcb(pData, so)) == NULL)
614 return -1;
615
616 SOCKET_LOCK_CREATE(so);
617 QSOCKET_LOCK(tcb);
618 insque(pData, so, &tcb);
619 NSOCK_INC();
620 QSOCKET_UNLOCK(tcb);
621 return 0;
622}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette