VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/tcp_input.c@ 1033

Last change on this file since 1033 was 1033, checked in by vboxsync, 18 years ago

Big change to make slirp fully instantiatable (replace all global
variables with local ones, passing a reference to the state/config
structure to all places which are interested). You can now have as many
cards in the guest configured for NAT networking as you want.

  • Property svn:eol-style set to native
File size: 53.8 KB
Line 
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
34 * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
35 */
36
37/*
38 * Changes and additions relating to SLiRP
39 * Copyright (c) 1995 Danny Gasparovski.
40 *
41 * Please read the file COPYRIGHT for the
42 * terms and conditions of the copyright.
43 */
44
45#include <slirp.h>
46#include "ip_icmp.h"
47
48#ifndef VBOX
49struct socket tcb;
50
51int tcprexmtthresh = 3;
52struct socket *tcp_last_so = &tcb;
53
54tcp_seq tcp_iss; /* tcp initial send seq # */
55#endif /* !VBOX */
56
57#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
58
59/* for modulo comparisons of timestamps */
60#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
61#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
62
63/*
64 * Insert segment ti into reassembly queue of tcp with
65 * control block tp. Return TH_FIN if reassembly now includes
66 * a segment with FIN. The macro form does the common case inline
67 * (segment is the next to be received on an established connection,
68 * and the queue is empty), avoiding linkage into and removal
69 * from the queue and repetition of various conversions.
70 * Set DELACK for segments received in order, but ack immediately
71 * when segments are out of order (so fast retransmit can work).
72 */
73#ifdef VBOX
74#ifdef TCP_ACK_HACK
75#define TCP_REASS(pData, tp, ti, m, so, flags) {\
76 if ((ti)->ti_seq == (tp)->rcv_nxt && \
77 u32_to_ptr((tp)->seg_next, struct tcpcb *) == (tp) && \
78 (tp)->t_state == TCPS_ESTABLISHED) {\
79 if (ti->ti_flags & TH_PUSH) \
80 tp->t_flags |= TF_ACKNOW; \
81 else \
82 tp->t_flags |= TF_DELACK; \
83 (tp)->rcv_nxt += (ti)->ti_len; \
84 flags = (ti)->ti_flags & TH_FIN; \
85 tcpstat.tcps_rcvpack++;\
86 tcpstat.tcps_rcvbyte += (ti)->ti_len;\
87 if (so->so_emu) { \
88 if (tcp_emu((pData), (so),(m))) sbappend((pData), (so), (m)); \
89 } else \
90 sbappend((pData), (so), (m)); \
91/* sorwakeup(so); */ \
92 } else {\
93 (flags) = tcp_reass((pData), (tp), (ti), (m)); \
94 tp->t_flags |= TF_ACKNOW; \
95 } \
96}
97#else
98#define TCP_REASS(pData, tp, ti, m, so, flags) { \
99 if ((ti)->ti_seq == (tp)->rcv_nxt && \
100 u32_to_ptr((tp)->seg_next, struct tcpcb *) == (tp) && \
101 (tp)->t_state == TCPS_ESTABLISHED) { \
102 tp->t_flags |= TF_DELACK; \
103 (tp)->rcv_nxt += (ti)->ti_len; \
104 flags = (ti)->ti_flags & TH_FIN; \
105 tcpstat.tcps_rcvpack++;\
106 tcpstat.tcps_rcvbyte += (ti)->ti_len;\
107 if (so->so_emu) { \
108 if (tcp_emu((pData), (so),(m))) sbappend((pData), (so), (m)); \
109 } else \
110 sbappend((pData), (so), (m)); \
111/* sorwakeup(so); */ \
112 } else { \
113 (flags) = tcp_reass((pData), (tp), (ti), (m)); \
114 tp->t_flags |= TF_ACKNOW; \
115 } \
116}
117#endif
118#else /* !VBOX */
119#ifdef TCP_ACK_HACK
120#define TCP_REASS(tp, ti, m, so, flags) {\
121 if ((ti)->ti_seq == (tp)->rcv_nxt && \
122 u32_to_ptr((tp)->seg_next, struct tcpcb *) == (tp) && \
123 (tp)->t_state == TCPS_ESTABLISHED) {\
124 if (ti->ti_flags & TH_PUSH) \
125 tp->t_flags |= TF_ACKNOW; \
126 else \
127 tp->t_flags |= TF_DELACK; \
128 (tp)->rcv_nxt += (ti)->ti_len; \
129 flags = (ti)->ti_flags & TH_FIN; \
130 tcpstat.tcps_rcvpack++;\
131 tcpstat.tcps_rcvbyte += (ti)->ti_len;\
132 if (so->so_emu) { \
133 if (tcp_emu((so),(m))) sbappend((so), (m)); \
134 } else \
135 sbappend((so), (m)); \
136/* sorwakeup(so); */ \
137 } else {\
138 (flags) = tcp_reass((tp), (ti), (m)); \
139 tp->t_flags |= TF_ACKNOW; \
140 } \
141}
142#else
143#define TCP_REASS(tp, ti, m, so, flags) { \
144 if ((ti)->ti_seq == (tp)->rcv_nxt && \
145 u32_to_ptr((tp)->seg_next, struct tcpcb *) == (tp) && \
146 (tp)->t_state == TCPS_ESTABLISHED) { \
147 tp->t_flags |= TF_DELACK; \
148 (tp)->rcv_nxt += (ti)->ti_len; \
149 flags = (ti)->ti_flags & TH_FIN; \
150 tcpstat.tcps_rcvpack++;\
151 tcpstat.tcps_rcvbyte += (ti)->ti_len;\
152 if (so->so_emu) { \
153 if (tcp_emu((so),(m))) sbappend(so, (m)); \
154 } else \
155 sbappend((so), (m)); \
156/* sorwakeup(so); */ \
157 } else { \
158 (flags) = tcp_reass((tp), (ti), (m)); \
159 tp->t_flags |= TF_ACKNOW; \
160 } \
161}
162#endif
163#endif /* !VBOX */
164
165int
166#ifdef VBOX
167tcp_reass(PNATState pData, register struct tcpcb *tp, register struct tcpiphdr *ti, struct mbuf *m)
168#else /* !VBOX */
169tcp_reass(tp, ti, m)
170 register struct tcpcb *tp;
171 register struct tcpiphdr *ti;
172 struct mbuf *m;
173#endif /* !VBOX */
174{
175 register struct tcpiphdr *q;
176 struct socket *so = tp->t_socket;
177 int flags;
178
179 /*
180 * Call with ti==0 after become established to
181 * force pre-ESTABLISHED data up to user socket.
182 */
183 if (ti == 0)
184 goto present;
185
186 /*
187 * Find a segment which begins after this one does.
188 */
189 for (q = u32_to_ptr(tp->seg_next, struct tcpiphdr *); q != (struct tcpiphdr *)tp;
190 q = u32_to_ptr(q->ti_next, struct tcpiphdr *))
191 if (SEQ_GT(q->ti_seq, ti->ti_seq))
192 break;
193
194 /*
195 * If there is a preceding segment, it may provide some of
196 * our data already. If so, drop the data from the incoming
197 * segment. If it provides all of our data, drop us.
198 */
199 if (u32_to_ptr(q->ti_prev, struct tcpiphdr *) != (struct tcpiphdr *)tp) {
200 register int i;
201 q = u32_to_ptr(q->ti_prev, struct tcpiphdr *);
202 /* conversion to int (in i) handles seq wraparound */
203 i = q->ti_seq + q->ti_len - ti->ti_seq;
204 if (i > 0) {
205 if (i >= ti->ti_len) {
206 tcpstat.tcps_rcvduppack++;
207 tcpstat.tcps_rcvdupbyte += ti->ti_len;
208#ifdef VBOX
209 m_freem(pData, m);
210#else /* !VBOX */
211 m_freem(m);
212#endif /* !VBOX */
213 /*
214 * Try to present any queued data
215 * at the left window edge to the user.
216 * This is needed after the 3-WHS
217 * completes.
218 */
219 goto present; /* ??? */
220 }
221 m_adj(m, i);
222 ti->ti_len -= i;
223 ti->ti_seq += i;
224 }
225 q = u32_to_ptr(q->ti_next, struct tcpiphdr *);
226 }
227 tcpstat.tcps_rcvoopack++;
228 tcpstat.tcps_rcvoobyte += ti->ti_len;
229 REASS_MBUF_SET(ti, m); /* XXX */
230
231 /*
232 * While we overlap succeeding segments trim them or,
233 * if they are completely covered, dequeue them.
234 */
235 while (q != (struct tcpiphdr *)tp) {
236 register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
237 if (i <= 0)
238 break;
239 if (i < q->ti_len) {
240 q->ti_seq += i;
241 q->ti_len -= i;
242 m_adj(REASS_MBUF_GET(q), i);
243 break;
244 }
245 q = u32_to_ptr(q->ti_next, struct tcpiphdr *);
246 m = REASS_MBUF_GET(u32_to_ptr(q->ti_prev, struct tcpiphdr *));
247 remque_32(u32_to_ptr(q->ti_prev, struct tcpiphdr *));
248#ifdef VBOX
249 m_freem(pData, m);
250#else /* !VBOX */
251 m_freem(m);
252#endif /* !VBOX */
253 }
254
255 /*
256 * Stick new segment in its place.
257 */
258 insque_32(ti, u32_to_ptr(q->ti_prev, struct tcpiphdr *));
259
260present:
261 /*
262 * Present data to user, advancing rcv_nxt through
263 * completed sequence space.
264 */
265 if (!TCPS_HAVEESTABLISHED(tp->t_state))
266 return (0);
267 ti = u32_to_ptr(tp->seg_next, struct tcpiphdr *);
268 if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
269 return (0);
270 if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
271 return (0);
272 do {
273 tp->rcv_nxt += ti->ti_len;
274 flags = ti->ti_flags & TH_FIN;
275 remque_32(ti);
276 m = REASS_MBUF_GET(ti); /* XXX */
277 ti = u32_to_ptr(ti->ti_next, struct tcpiphdr *);
278/* if (so->so_state & SS_FCANTRCVMORE) */
279 if (so->so_state & SS_FCANTSENDMORE)
280#ifdef VBOX
281 m_freem(pData, m);
282#else /* !VBOX */
283 m_freem(m);
284#endif /* !VBOX */
285 else {
286 if (so->so_emu) {
287#ifdef VBOX
288 if (tcp_emu(pData, so,m)) sbappend(pData, so, m);
289#else /* !VBOX */
290 if (tcp_emu(so,m)) sbappend(so, m);
291#endif /* !VBOX */
292 } else
293#ifdef VBOX
294 sbappend(pData, so, m);
295#else /* !VBOX */
296 sbappend(so, m);
297#endif /* !VBOX */
298 }
299 } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
300/* sorwakeup(so); */
301 return (flags);
302}
303
304/*
305 * TCP input routine, follows pages 65-76 of the
306 * protocol specification dated September, 1981 very closely.
307 */
308void
309#ifdef VBOX
310tcp_input(PNATState pData, register struct mbuf *m, int iphlen, struct socket *inso)
311#else /* !VBOX */
312tcp_input(m, iphlen, inso)
313 register struct mbuf *m;
314 int iphlen;
315 struct socket *inso;
316#endif /* !VBOX */
317{
318 struct ip save_ip, *ip;
319 register struct tcpiphdr *ti;
320 caddr_t optp = NULL;
321 int optlen = 0;
322 int len, tlen, off;
323 register struct tcpcb *tp = 0;
324 register int tiflags;
325 struct socket *so = 0;
326 int todrop, acked, ourfinisacked, needoutput = 0;
327/* int dropsocket = 0; */
328 int iss = 0;
329 u_long tiwin;
330 int ret;
331/* int ts_present = 0; */
332
333 DEBUG_CALL("tcp_input");
334 DEBUG_ARGS((dfd," m = %8lx iphlen = %2d inso = %lx\n",
335 (long )m, iphlen, (long )inso ));
336
337 /*
338 * If called with m == 0, then we're continuing the connect
339 */
340 if (m == NULL) {
341 so = inso;
342
343 /* Re-set a few variables */
344 tp = sototcpcb(so);
345 m = so->so_m;
346 so->so_m = 0;
347 ti = so->so_ti;
348 tiwin = ti->ti_win;
349 tiflags = ti->ti_flags;
350
351 goto cont_conn;
352 }
353
354
355 tcpstat.tcps_rcvtotal++;
356 /*
357 * Get IP and TCP header together in first mbuf.
358 * Note: IP leaves IP header in first mbuf.
359 */
360 ti = mtod(m, struct tcpiphdr *);
361 if (iphlen > sizeof(struct ip )) {
362 ip_stripoptions(m, (struct mbuf *)0);
363 iphlen=sizeof(struct ip );
364 }
365 /* XXX Check if too short */
366
367
368 /*
369 * Save a copy of the IP header in case we want restore it
370 * for sending an ICMP error message in response.
371 */
372 ip=mtod(m, struct ip *);
373 save_ip = *ip;
374 save_ip.ip_len+= iphlen;
375
376 /*
377 * Checksum extended TCP header and data.
378 */
379 tlen = ((struct ip *)ti)->ip_len;
380 ti->ti_next = ti->ti_prev = 0;
381 ti->ti_x1 = 0;
382 ti->ti_len = htons((u_int16_t)tlen);
383 len = sizeof(struct ip ) + tlen;
384 /* keep checksum for ICMP reply
385 * ti->ti_sum = cksum(m, len);
386 * if (ti->ti_sum) { */
387 if(cksum(m, len)) {
388 tcpstat.tcps_rcvbadsum++;
389 goto drop;
390 }
391
392 /*
393 * Check that TCP offset makes sense,
394 * pull out TCP options and adjust length. XXX
395 */
396 off = ti->ti_off << 2;
397 if (off < sizeof (struct tcphdr) || off > tlen) {
398 tcpstat.tcps_rcvbadoff++;
399 goto drop;
400 }
401 tlen -= off;
402 ti->ti_len = tlen;
403 if (off > sizeof (struct tcphdr)) {
404 optlen = off - sizeof (struct tcphdr);
405 optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
406
407 /*
408 * Do quick retrieval of timestamp options ("options
409 * prediction?"). If timestamp is the only option and it's
410 * formatted as recommended in RFC 1323 appendix A, we
411 * quickly get the values now and not bother calling
412 * tcp_dooptions(), etc.
413 */
414/* if ((optlen == TCPOLEN_TSTAMP_APPA ||
415 * (optlen > TCPOLEN_TSTAMP_APPA &&
416 * optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
417 * *(u_int32_t *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
418 * (ti->ti_flags & TH_SYN) == 0) {
419 * ts_present = 1;
420 * ts_val = ntohl(*(u_int32_t *)(optp + 4));
421 * ts_ecr = ntohl(*(u_int32_t *)(optp + 8));
422 * optp = NULL; / * we've parsed the options * /
423 * }
424 */
425 }
426 tiflags = ti->ti_flags;
427
428 /*
429 * Convert TCP protocol specific fields to host format.
430 */
431 NTOHL(ti->ti_seq);
432 NTOHL(ti->ti_ack);
433 NTOHS(ti->ti_win);
434 NTOHS(ti->ti_urp);
435
436 /*
437 * Drop TCP, IP headers and TCP options.
438 */
439 m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
440 m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
441
442 /*
443 * Locate pcb for segment.
444 */
445findso:
446 so = tcp_last_so;
447 if (so->so_fport != ti->ti_dport ||
448 so->so_lport != ti->ti_sport ||
449 so->so_laddr.s_addr != ti->ti_src.s_addr ||
450 so->so_faddr.s_addr != ti->ti_dst.s_addr) {
451 so = solookup(&tcb, ti->ti_src, ti->ti_sport,
452 ti->ti_dst, ti->ti_dport);
453 if (so)
454 tcp_last_so = so;
455 ++tcpstat.tcps_socachemiss;
456 }
457
458 /*
459 * If the state is CLOSED (i.e., TCB does not exist) then
460 * all data in the incoming segment is discarded.
461 * If the TCB exists but is in CLOSED state, it is embryonic,
462 * but should either do a listen or a connect soon.
463 *
464 * state == CLOSED means we've done socreate() but haven't
465 * attached it to a protocol yet...
466 *
467 * XXX If a TCB does not exist, and the TH_SYN flag is
468 * the only flag set, then create a session, mark it
469 * as if it was LISTENING, and continue...
470 */
471 if (so == 0) {
472 if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
473 goto dropwithreset;
474
475 if ((so = socreate()) == NULL)
476 goto dropwithreset;
477#ifdef VBOX
478 if (tcp_attach(pData, so) < 0) {
479#else /* !VBOX */
480 if (tcp_attach(so) < 0) {
481#endif /* !VBOX */
482 free(so); /* Not sofree (if it failed, it's not insqued) */
483 goto dropwithreset;
484 }
485
486 sbreserve(&so->so_snd, tcp_sndspace);
487 sbreserve(&so->so_rcv, tcp_rcvspace);
488
489 /* tcp_last_so = so; */ /* XXX ? */
490 /* tp = sototcpcb(so); */
491
492 so->so_laddr = ti->ti_src;
493 so->so_lport = ti->ti_sport;
494 so->so_faddr = ti->ti_dst;
495 so->so_fport = ti->ti_dport;
496
497 if ((so->so_iptos = tcp_tos(so)) == 0)
498 so->so_iptos = ((struct ip *)ti)->ip_tos;
499
500 tp = sototcpcb(so);
501 tp->t_state = TCPS_LISTEN;
502 }
503
504 /*
505 * If this is a still-connecting socket, this probably
506 * a retransmit of the SYN. Whether it's a retransmit SYN
507 * or something else, we nuke it.
508 */
509 if (so->so_state & SS_ISFCONNECTING)
510 goto drop;
511
512 tp = sototcpcb(so);
513
514 /* XXX Should never fail */
515 if (tp == 0)
516 goto dropwithreset;
517 if (tp->t_state == TCPS_CLOSED)
518 goto drop;
519
520 /* Unscale the window into a 32-bit value. */
521/* if ((tiflags & TH_SYN) == 0)
522 * tiwin = ti->ti_win << tp->snd_scale;
523 * else
524 */
525 tiwin = ti->ti_win;
526
527 /*
528 * Segment received on connection.
529 * Reset idle time and keep-alive timer.
530 */
531 tp->t_idle = 0;
532 if (so_options)
533 tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
534 else
535 tp->t_timer[TCPT_KEEP] = tcp_keepidle;
536
537 /*
538 * Process options if not in LISTEN state,
539 * else do it below (after getting remote address).
540 */
541 if (optp && tp->t_state != TCPS_LISTEN)
542#ifdef VBOX
543 tcp_dooptions(pData, tp, (u_char *)optp, optlen, ti);
544#else /* !VBOX */
545 tcp_dooptions(tp, (u_char *)optp, optlen, ti);
546#endif /* !VBOX */
547/* , */
548/* &ts_present, &ts_val, &ts_ecr); */
549
550 /*
551 * Header prediction: check for the two common cases
552 * of a uni-directional data xfer. If the packet has
553 * no control flags, is in-sequence, the window didn't
554 * change and we're not retransmitting, it's a
555 * candidate. If the length is zero and the ack moved
556 * forward, we're the sender side of the xfer. Just
557 * free the data acked & wake any higher level process
558 * that was blocked waiting for space. If the length
559 * is non-zero and the ack didn't move, we're the
560 * receiver side. If we're getting packets in-order
561 * (the reassembly queue is empty), add the data to
562 * the socket buffer and note that we need a delayed ack.
563 *
564 * XXX Some of these tests are not needed
565 * eg: the tiwin == tp->snd_wnd prevents many more
566 * predictions.. with no *real* advantage..
567 */
568 if (tp->t_state == TCPS_ESTABLISHED &&
569 (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
570/* (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) && */
571 ti->ti_seq == tp->rcv_nxt &&
572 tiwin && tiwin == tp->snd_wnd &&
573 tp->snd_nxt == tp->snd_max) {
574 /*
575 * If last ACK falls within this segment's sequence numbers,
576 * record the timestamp.
577 */
578/* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
579 * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
580 * tp->ts_recent_age = tcp_now;
581 * tp->ts_recent = ts_val;
582 * }
583 */
584 if (ti->ti_len == 0) {
585 if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
586 SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
587 tp->snd_cwnd >= tp->snd_wnd) {
588 /*
589 * this is a pure ack for outstanding data.
590 */
591 ++tcpstat.tcps_predack;
592/* if (ts_present)
593 * tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
594 * else
595 */ if (tp->t_rtt &&
596 SEQ_GT(ti->ti_ack, tp->t_rtseq))
597#ifdef VBOX
598 tcp_xmit_timer(pData, tp, tp->t_rtt);
599#else /* !VBOX */
600 tcp_xmit_timer(tp, tp->t_rtt);
601#endif /* !VBOX */
602 acked = ti->ti_ack - tp->snd_una;
603 tcpstat.tcps_rcvackpack++;
604 tcpstat.tcps_rcvackbyte += acked;
605 sbdrop(&so->so_snd, acked);
606 tp->snd_una = ti->ti_ack;
607#ifdef VBOX
608 m_freem(pData, m);
609#else /* !VBOX */
610 m_freem(m);
611#endif /* !VBOX */
612
613 /*
614 * If all outstanding data are acked, stop
615 * retransmit timer, otherwise restart timer
616 * using current (possibly backed-off) value.
617 * If process is waiting for space,
618 * wakeup/selwakeup/signal. If data
619 * are ready to send, let tcp_output
620 * decide between more output or persist.
621 */
622 if (tp->snd_una == tp->snd_max)
623 tp->t_timer[TCPT_REXMT] = 0;
624 else if (tp->t_timer[TCPT_PERSIST] == 0)
625 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
626
627 /*
628 * There's room in so_snd, sowwakup will read()
629 * from the socket if we can
630 */
631/* if (so->so_snd.sb_flags & SB_NOTIFY)
632 * sowwakeup(so);
633 */
634 /*
635 * This is called because sowwakeup might have
636 * put data into so_snd. Since we don't so sowwakeup,
637 * we don't need this.. XXX???
638 */
639 if (so->so_snd.sb_cc)
640#ifdef VBOX
641 (void) tcp_output(pData, tp);
642#else /* !VBOX */
643 (void) tcp_output(tp);
644#endif /* !VBOX */
645
646 return;
647 }
648 } else if (ti->ti_ack == tp->snd_una &&
649 u32_to_ptr(tp->seg_next, struct tcpcb *) == tp &&
650 ti->ti_len <= sbspace(&so->so_rcv)) {
651 /*
652 * this is a pure, in-sequence data packet
653 * with nothing on the reassembly queue and
654 * we have enough buffer space to take it.
655 */
656 ++tcpstat.tcps_preddat;
657 tp->rcv_nxt += ti->ti_len;
658 tcpstat.tcps_rcvpack++;
659 tcpstat.tcps_rcvbyte += ti->ti_len;
660 /*
661 * Add data to socket buffer.
662 */
663 if (so->so_emu) {
664#ifdef VBOX
665 if (tcp_emu(pData, so,m)) sbappend(pData, so, m);
666#else /* !VBOX */
667 if (tcp_emu(so,m)) sbappend(so, m);
668#endif /* !VBOX */
669 } else
670#ifdef VBOX
671 sbappend(pData, so, m);
672#else /* !VBOX */
673 sbappend(so, m);
674#endif /* !VBOX */
675
676 /*
677 * XXX This is called when data arrives. Later, check
678 * if we can actually write() to the socket
679 * XXX Need to check? It's be NON_BLOCKING
680 */
681/* sorwakeup(so); */
682
683 /*
684 * If this is a short packet, then ACK now - with Nagel
685 * congestion avoidance sender won't send more until
686 * he gets an ACK.
687 *
688 * It is better to not delay acks at all to maximize
689 * TCP throughput. See RFC 2581.
690 */
691 tp->t_flags |= TF_ACKNOW;
692#ifdef VBOX
693 tcp_output(pData, tp);
694#else /* !VBOX */
695 tcp_output(tp);
696#endif /* !VBOX */
697 return;
698 }
699 } /* header prediction */
700 /*
701 * Calculate amount of space in receive window,
702 * and then do TCP input processing.
703 * Receive window is amount of space in rcv queue,
704 * but not less than advertised window.
705 */
706 { int win;
707 win = sbspace(&so->so_rcv);
708 if (win < 0)
709 win = 0;
710 tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
711 }
712
713 switch (tp->t_state) {
714
715 /*
716 * If the state is LISTEN then ignore segment if it contains an RST.
717 * If the segment contains an ACK then it is bad and send a RST.
718 * If it does not contain a SYN then it is not interesting; drop it.
719 * Don't bother responding if the destination was a broadcast.
720 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
721 * tp->iss, and send a segment:
722 * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
723 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
724 * Fill in remote peer address fields if not previously specified.
725 * Enter SYN_RECEIVED state, and process any other fields of this
726 * segment in this state.
727 */
728 case TCPS_LISTEN: {
729
730 if (tiflags & TH_RST)
731 goto drop;
732 if (tiflags & TH_ACK)
733 goto dropwithreset;
734 if ((tiflags & TH_SYN) == 0)
735 goto drop;
736
737 /*
738 * This has way too many gotos...
739 * But a bit of spaghetti code never hurt anybody :)
740 */
741
742 /*
743 * If this is destined for the control address, then flag to
744 * tcp_ctl once connected, otherwise connect
745 */
746 if ((so->so_faddr.s_addr&htonl(0xffffff00)) == special_addr.s_addr) {
747 int lastbyte=ntohl(so->so_faddr.s_addr) & 0xff;
748 if (lastbyte!=CTL_ALIAS && lastbyte!=CTL_DNS) {
749#if 0
750 if(lastbyte==CTL_CMD || lastbyte==CTL_EXEC) {
751 /* Command or exec adress */
752 so->so_state |= SS_CTL;
753 } else
754#endif
755 {
756 /* May be an add exec */
757 struct ex_list *ex_ptr;
758 for(ex_ptr = exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
759 if(ex_ptr->ex_fport == so->so_fport &&
760 lastbyte == ex_ptr->ex_addr) {
761 so->so_state |= SS_CTL;
762 break;
763 }
764 }
765 }
766 if(so->so_state & SS_CTL) goto cont_input;
767 }
768 /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
769 }
770
771 if (so->so_emu & EMU_NOCONNECT) {
772 so->so_emu &= ~EMU_NOCONNECT;
773 goto cont_input;
774 }
775
776#ifdef VBOX
777 if((tcp_fconnect(pData, so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
778#else /* !VBOX */
779 if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
780#endif /* !VBOX */
781 u_char code=ICMP_UNREACH_NET;
782 DEBUG_MISC((dfd," tcp fconnect errno = %d-%s\n",
783 errno,strerror(errno)));
784 if(errno == ECONNREFUSED) {
785 /* ACK the SYN, send RST to refuse the connection */
786#ifdef VBOX
787 tcp_respond(pData, tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
788 TH_RST|TH_ACK);
789#else /* !VBOX */
790 tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
791 TH_RST|TH_ACK);
792#endif /* !VBOX */
793 } else {
794 if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
795 HTONL(ti->ti_seq); /* restore tcp header */
796 HTONL(ti->ti_ack);
797 HTONS(ti->ti_win);
798 HTONS(ti->ti_urp);
799 m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
800 m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
801 *ip=save_ip;
802#ifdef VBOX
803 icmp_error(pData, m, ICMP_UNREACH,code, 0,strerror(errno));
804#else /* !VBOX */
805 icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno));
806#endif /* !VBOX */
807 }
808#ifdef VBOX
809 tp = tcp_close(pData, tp);
810 m_free(pData, m);
811#else /* !VBOX */
812 tp = tcp_close(tp);
813 m_free(m);
814#endif /* !VBOX */
815 } else {
816 /*
817 * Haven't connected yet, save the current mbuf
818 * and ti, and return
819 * XXX Some OS's don't tell us whether the connect()
820 * succeeded or not. So we must time it out.
821 */
822 so->so_m = m;
823 so->so_ti = ti;
824 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
825 tp->t_state = TCPS_SYN_RECEIVED;
826 }
827 return;
828
829 cont_conn:
830 /* m==NULL
831 * Check if the connect succeeded
832 */
833 if (so->so_state & SS_NOFDREF) {
834#ifdef VBOX
835 tp = tcp_close(pData, tp);
836#else /* !VBOX */
837 tp = tcp_close(tp);
838#endif /* !VBOX */
839 goto dropwithreset;
840 }
841 cont_input:
842 tcp_template(tp);
843
844 if (optp)
845#ifdef VBOX
846 tcp_dooptions(pData, tp, (u_char *)optp, optlen, ti);
847#else /* !VBOX */
848 tcp_dooptions(tp, (u_char *)optp, optlen, ti);
849#endif /* !VBOX */
850 /* , */
851 /* &ts_present, &ts_val, &ts_ecr); */
852
853 if (iss)
854 tp->iss = iss;
855 else
856 tp->iss = tcp_iss;
857 tcp_iss += TCP_ISSINCR/2;
858 tp->irs = ti->ti_seq;
859 tcp_sendseqinit(tp);
860 tcp_rcvseqinit(tp);
861 tp->t_flags |= TF_ACKNOW;
862 tp->t_state = TCPS_SYN_RECEIVED;
863 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
864 tcpstat.tcps_accepts++;
865 goto trimthenstep6;
866 } /* case TCPS_LISTEN */
867
868 /*
869 * If the state is SYN_SENT:
870 * if seg contains an ACK, but not for our SYN, drop the input.
871 * if seg contains a RST, then drop the connection.
872 * if seg does not contain SYN, then drop it.
873 * Otherwise this is an acceptable SYN segment
874 * initialize tp->rcv_nxt and tp->irs
875 * if seg contains ack then advance tp->snd_una
876 * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
877 * arrange for segment to be acked (eventually)
878 * continue processing rest of data/controls, beginning with URG
879 */
880 case TCPS_SYN_SENT:
881 if ((tiflags & TH_ACK) &&
882 (SEQ_LEQ(ti->ti_ack, tp->iss) ||
883 SEQ_GT(ti->ti_ack, tp->snd_max)))
884 goto dropwithreset;
885
886 if (tiflags & TH_RST) {
887 if (tiflags & TH_ACK)
888#ifdef VBOX
889 tp = tcp_drop(pData, tp,0); /* XXX Check t_softerror! */
890#else /* !VBOX */
891 tp = tcp_drop(tp,0); /* XXX Check t_softerror! */
892#endif /* !VBOX */
893 goto drop;
894 }
895
896 if ((tiflags & TH_SYN) == 0)
897 goto drop;
898 if (tiflags & TH_ACK) {
899 tp->snd_una = ti->ti_ack;
900 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
901 tp->snd_nxt = tp->snd_una;
902 }
903
904 tp->t_timer[TCPT_REXMT] = 0;
905 tp->irs = ti->ti_seq;
906 tcp_rcvseqinit(tp);
907 tp->t_flags |= TF_ACKNOW;
908 if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
909 tcpstat.tcps_connects++;
910 soisfconnected(so);
911 tp->t_state = TCPS_ESTABLISHED;
912
913 /* Do window scaling on this connection? */
914/* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
915 * (TF_RCVD_SCALE|TF_REQ_SCALE)) {
916 * tp->snd_scale = tp->requested_s_scale;
917 * tp->rcv_scale = tp->request_r_scale;
918 * }
919 */
920#ifdef VBOX
921 (void) tcp_reass(pData, tp, (struct tcpiphdr *)0,
922 (struct mbuf *)0);
923#else /* !VBOX */
924 (void) tcp_reass(tp, (struct tcpiphdr *)0,
925 (struct mbuf *)0);
926#endif /* !VBOX */
927 /*
928 * if we didn't have to retransmit the SYN,
929 * use its rtt as our initial srtt & rtt var.
930 */
931 if (tp->t_rtt)
932#ifdef VBOX
933 tcp_xmit_timer(pData, tp, tp->t_rtt);
934#else /* !VBOX */
935 tcp_xmit_timer(tp, tp->t_rtt);
936#endif /* !VBOX */
937 } else
938 tp->t_state = TCPS_SYN_RECEIVED;
939
940trimthenstep6:
941 /*
942 * Advance ti->ti_seq to correspond to first data byte.
943 * If data, trim to stay within window,
944 * dropping FIN if necessary.
945 */
946 ti->ti_seq++;
947 if (ti->ti_len > tp->rcv_wnd) {
948 todrop = ti->ti_len - tp->rcv_wnd;
949 m_adj(m, -todrop);
950 ti->ti_len = tp->rcv_wnd;
951 tiflags &= ~TH_FIN;
952 tcpstat.tcps_rcvpackafterwin++;
953 tcpstat.tcps_rcvbyteafterwin += todrop;
954 }
955 tp->snd_wl1 = ti->ti_seq - 1;
956 tp->rcv_up = ti->ti_seq;
957 goto step6;
958 } /* switch tp->t_state */
959 /*
960 * States other than LISTEN or SYN_SENT.
961 * First check timestamp, if present.
962 * Then check that at least some bytes of segment are within
963 * receive window. If segment begins before rcv_nxt,
964 * drop leading data (and SYN); if nothing left, just ack.
965 *
966 * RFC 1323 PAWS: If we have a timestamp reply on this segment
967 * and it's less than ts_recent, drop it.
968 */
969/* if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
970 * TSTMP_LT(ts_val, tp->ts_recent)) {
971 *
972 */ /* Check to see if ts_recent is over 24 days old. */
973/* if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
974 */ /*
975 * * Invalidate ts_recent. If this segment updates
976 * * ts_recent, the age will be reset later and ts_recent
977 * * will get a valid value. If it does not, setting
978 * * ts_recent to zero will at least satisfy the
979 * * requirement that zero be placed in the timestamp
980 * * echo reply when ts_recent isn't valid. The
981 * * age isn't reset until we get a valid ts_recent
982 * * because we don't want out-of-order segments to be
983 * * dropped when ts_recent is old.
984 * */
985/* tp->ts_recent = 0;
986 * } else {
987 * tcpstat.tcps_rcvduppack++;
988 * tcpstat.tcps_rcvdupbyte += ti->ti_len;
989 * tcpstat.tcps_pawsdrop++;
990 * goto dropafterack;
991 * }
992 * }
993 */
994
995 todrop = tp->rcv_nxt - ti->ti_seq;
996 if (todrop > 0) {
997 if (tiflags & TH_SYN) {
998 tiflags &= ~TH_SYN;
999 ti->ti_seq++;
1000 if (ti->ti_urp > 1)
1001 ti->ti_urp--;
1002 else
1003 tiflags &= ~TH_URG;
1004 todrop--;
1005 }
1006 /*
1007 * Following if statement from Stevens, vol. 2, p. 960.
1008 */
1009 if (todrop > ti->ti_len
1010 || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
1011 /*
1012 * Any valid FIN must be to the left of the window.
1013 * At this point the FIN must be a duplicate or out
1014 * of sequence; drop it.
1015 */
1016 tiflags &= ~TH_FIN;
1017
1018 /*
1019 * Send an ACK to resynchronize and drop any data.
1020 * But keep on processing for RST or ACK.
1021 */
1022 tp->t_flags |= TF_ACKNOW;
1023 todrop = ti->ti_len;
1024 tcpstat.tcps_rcvduppack++;
1025 tcpstat.tcps_rcvdupbyte += todrop;
1026 } else {
1027 tcpstat.tcps_rcvpartduppack++;
1028 tcpstat.tcps_rcvpartdupbyte += todrop;
1029 }
1030 m_adj(m, todrop);
1031 ti->ti_seq += todrop;
1032 ti->ti_len -= todrop;
1033 if (ti->ti_urp > todrop)
1034 ti->ti_urp -= todrop;
1035 else {
1036 tiflags &= ~TH_URG;
1037 ti->ti_urp = 0;
1038 }
1039 }
1040 /*
1041 * If new data are received on a connection after the
1042 * user processes are gone, then RST the other end.
1043 */
1044 if ((so->so_state & SS_NOFDREF) &&
1045 tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
1046#ifdef VBOX
1047 tp = tcp_close(pData, tp);
1048#else /* !VBOX */
1049 tp = tcp_close(tp);
1050#endif /* !VBOX */
1051 tcpstat.tcps_rcvafterclose++;
1052 goto dropwithreset;
1053 }
1054
1055 /*
1056 * If segment ends after window, drop trailing data
1057 * (and PUSH and FIN); if nothing left, just ACK.
1058 */
1059 todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
1060 if (todrop > 0) {
1061 tcpstat.tcps_rcvpackafterwin++;
1062 if (todrop >= ti->ti_len) {
1063 tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
1064 /*
1065 * If a new connection request is received
1066 * while in TIME_WAIT, drop the old connection
1067 * and start over if the sequence numbers
1068 * are above the previous ones.
1069 */
1070 if (tiflags & TH_SYN &&
1071 tp->t_state == TCPS_TIME_WAIT &&
1072 SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
1073 iss = tp->rcv_nxt + TCP_ISSINCR;
1074#ifdef VBOX
1075 tp = tcp_close(pData, tp);
1076#else /* !VBOX */
1077 tp = tcp_close(tp);
1078#endif /* !VBOX */
1079 goto findso;
1080 }
1081 /*
1082 * If window is closed can only take segments at
1083 * window edge, and have to drop data and PUSH from
1084 * incoming segments. Continue processing, but
1085 * remember to ack. Otherwise, drop segment
1086 * and ack.
1087 */
1088 if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
1089 tp->t_flags |= TF_ACKNOW;
1090 tcpstat.tcps_rcvwinprobe++;
1091 } else
1092 goto dropafterack;
1093 } else
1094 tcpstat.tcps_rcvbyteafterwin += todrop;
1095 m_adj(m, -todrop);
1096 ti->ti_len -= todrop;
1097 tiflags &= ~(TH_PUSH|TH_FIN);
1098 }
1099
1100 /*
1101 * If last ACK falls within this segment's sequence numbers,
1102 * record its timestamp.
1103 */
1104/* if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
1105 * SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
1106 * ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
1107 * tp->ts_recent_age = tcp_now;
1108 * tp->ts_recent = ts_val;
1109 * }
1110 */
1111
1112 /*
1113 * If the RST bit is set examine the state:
1114 * SYN_RECEIVED STATE:
1115 * If passive open, return to LISTEN state.
1116 * If active open, inform user that connection was refused.
1117 * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
1118 * Inform user that connection was reset, and close tcb.
1119 * CLOSING, LAST_ACK, TIME_WAIT STATES
1120 * Close the tcb.
1121 */
1122 if (tiflags&TH_RST) switch (tp->t_state) {
1123
1124 case TCPS_SYN_RECEIVED:
1125/* so->so_error = ECONNREFUSED; */
1126 goto close;
1127
1128 case TCPS_ESTABLISHED:
1129 case TCPS_FIN_WAIT_1:
1130 case TCPS_FIN_WAIT_2:
1131 case TCPS_CLOSE_WAIT:
1132/* so->so_error = ECONNRESET; */
1133 close:
1134 tp->t_state = TCPS_CLOSED;
1135 tcpstat.tcps_drops++;
1136#ifdef VBOX
1137 tp = tcp_close(pData, tp);
1138#else /* !VBOX */
1139 tp = tcp_close(tp);
1140#endif /* !VBOX */
1141 goto drop;
1142
1143 case TCPS_CLOSING:
1144 case TCPS_LAST_ACK:
1145 case TCPS_TIME_WAIT:
1146#ifdef VBOX
1147 tp = tcp_close(pData, tp);
1148#else /* !VBOX */
1149 tp = tcp_close(tp);
1150#endif /* !VBOX */
1151 goto drop;
1152 }
1153
1154 /*
1155 * If a SYN is in the window, then this is an
1156 * error and we send an RST and drop the connection.
1157 */
1158 if (tiflags & TH_SYN) {
1159#ifdef VBOX
1160 tp = tcp_drop(pData, tp,0);
1161#else /* !VBOX */
1162 tp = tcp_drop(tp,0);
1163#endif /* !VBOX */
1164 goto dropwithreset;
1165 }
1166
1167 /*
1168 * If the ACK bit is off we drop the segment and return.
1169 */
1170 if ((tiflags & TH_ACK) == 0) goto drop;
1171
1172 /*
1173 * Ack processing.
1174 */
1175 switch (tp->t_state) {
1176 /*
1177 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
1178 * ESTABLISHED state and continue processing, otherwise
1179 * send an RST. una<=ack<=max
1180 */
1181 case TCPS_SYN_RECEIVED:
1182
1183 if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
1184 SEQ_GT(ti->ti_ack, tp->snd_max))
1185 goto dropwithreset;
1186 tcpstat.tcps_connects++;
1187 tp->t_state = TCPS_ESTABLISHED;
1188 /*
1189 * The sent SYN is ack'ed with our sequence number +1
1190 * The first data byte already in the buffer will get
1191 * lost if no correction is made. This is only needed for
1192 * SS_CTL since the buffer is empty otherwise.
1193 * tp->snd_una++; or:
1194 */
1195 tp->snd_una=ti->ti_ack;
1196 if (so->so_state & SS_CTL) {
1197 /* So tcp_ctl reports the right state */
1198#ifdef VBOX
1199 ret = tcp_ctl(pData, so);
1200#else /* !VBOX */
1201 ret = tcp_ctl(so);
1202#endif /* !VBOX */
1203 if (ret == 1) {
1204 soisfconnected(so);
1205 so->so_state &= ~SS_CTL; /* success XXX */
1206 } else if (ret == 2) {
1207 so->so_state = SS_NOFDREF; /* CTL_CMD */
1208 } else {
1209 needoutput = 1;
1210 tp->t_state = TCPS_FIN_WAIT_1;
1211 }
1212 } else {
1213 soisfconnected(so);
1214 }
1215
1216 /* Do window scaling? */
1217/* if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
1218 * (TF_RCVD_SCALE|TF_REQ_SCALE)) {
1219 * tp->snd_scale = tp->requested_s_scale;
1220 * tp->rcv_scale = tp->request_r_scale;
1221 * }
1222 */
1223#ifdef VBOX
1224 (void) tcp_reass(pData, tp, (struct tcpiphdr *)0, (struct mbuf *)0);
1225#else /* !VBOX */
1226 (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
1227#endif /* !VBOX */
1228 tp->snd_wl1 = ti->ti_seq - 1;
1229 /* Avoid ack processing; snd_una==ti_ack => dup ack */
1230 goto synrx_to_est;
1231 /* fall into ... */
1232
1233 /*
1234 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
1235 * ACKs. If the ack is in the range
1236 * tp->snd_una < ti->ti_ack <= tp->snd_max
1237 * then advance tp->snd_una to ti->ti_ack and drop
1238 * data from the retransmission queue. If this ACK reflects
1239 * more up to date window information we update our window information.
1240 */
1241 case TCPS_ESTABLISHED:
1242 case TCPS_FIN_WAIT_1:
1243 case TCPS_FIN_WAIT_2:
1244 case TCPS_CLOSE_WAIT:
1245 case TCPS_CLOSING:
1246 case TCPS_LAST_ACK:
1247 case TCPS_TIME_WAIT:
1248
1249 if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
1250 if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
1251 tcpstat.tcps_rcvdupack++;
1252 DEBUG_MISC((dfd," dup ack m = %lx so = %lx \n",
1253 (long )m, (long )so));
1254 /*
1255 * If we have outstanding data (other than
1256 * a window probe), this is a completely
1257 * duplicate ack (ie, window info didn't
1258 * change), the ack is the biggest we've
1259 * seen and we've seen exactly our rexmt
1260 * threshold of them, assume a packet
1261 * has been dropped and retransmit it.
1262 * Kludge snd_nxt & the congestion
1263 * window so we send only this one
1264 * packet.
1265 *
1266 * We know we're losing at the current
1267 * window size so do congestion avoidance
1268 * (set ssthresh to half the current window
1269 * and pull our congestion window back to
1270 * the new ssthresh).
1271 *
1272 * Dup acks mean that packets have left the
1273 * network (they're now cached at the receiver)
1274 * so bump cwnd by the amount in the receiver
1275 * to keep a constant cwnd packets in the
1276 * network.
1277 */
1278 if (tp->t_timer[TCPT_REXMT] == 0 ||
1279 ti->ti_ack != tp->snd_una)
1280 tp->t_dupacks = 0;
1281 else if (++tp->t_dupacks == tcprexmtthresh) {
1282 tcp_seq onxt = tp->snd_nxt;
1283 u_int win =
1284 min(tp->snd_wnd, tp->snd_cwnd) / 2 /
1285 tp->t_maxseg;
1286
1287 if (win < 2)
1288 win = 2;
1289 tp->snd_ssthresh = win * tp->t_maxseg;
1290 tp->t_timer[TCPT_REXMT] = 0;
1291 tp->t_rtt = 0;
1292 tp->snd_nxt = ti->ti_ack;
1293 tp->snd_cwnd = tp->t_maxseg;
1294#ifdef VBOX
1295 (void) tcp_output(pData, tp);
1296#else /* !VBOX */
1297 (void) tcp_output(tp);
1298#endif /* !VBOX */
1299 tp->snd_cwnd = tp->snd_ssthresh +
1300 tp->t_maxseg * tp->t_dupacks;
1301 if (SEQ_GT(onxt, tp->snd_nxt))
1302 tp->snd_nxt = onxt;
1303 goto drop;
1304 } else if (tp->t_dupacks > tcprexmtthresh) {
1305 tp->snd_cwnd += tp->t_maxseg;
1306#ifdef VBOX
1307 (void) tcp_output(pData, tp);
1308#else /* !VBOX */
1309 (void) tcp_output(tp);
1310#endif /* !VBOX */
1311 goto drop;
1312 }
1313 } else
1314 tp->t_dupacks = 0;
1315 break;
1316 }
1317 synrx_to_est:
1318 /*
1319 * If the congestion window was inflated to account
1320 * for the other side's cached packets, retract it.
1321 */
1322 if (tp->t_dupacks > tcprexmtthresh &&
1323 tp->snd_cwnd > tp->snd_ssthresh)
1324 tp->snd_cwnd = tp->snd_ssthresh;
1325 tp->t_dupacks = 0;
1326 if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1327 tcpstat.tcps_rcvacktoomuch++;
1328 goto dropafterack;
1329 }
1330 acked = ti->ti_ack - tp->snd_una;
1331 tcpstat.tcps_rcvackpack++;
1332 tcpstat.tcps_rcvackbyte += acked;
1333
1334 /*
1335 * If we have a timestamp reply, update smoothed
1336 * round trip time. If no timestamp is present but
1337 * transmit timer is running and timed sequence
1338 * number was acked, update smoothed round trip time.
1339 * Since we now have an rtt measurement, cancel the
1340 * timer backoff (cf., Phil Karn's retransmit alg.).
1341 * Recompute the initial retransmit timer.
1342 */
1343/* if (ts_present)
1344 * tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
1345 * else
1346 */
1347 if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
1348#ifdef VBOX
1349 tcp_xmit_timer(pData, tp,tp->t_rtt);
1350#else /* !VBOX */
1351 tcp_xmit_timer(tp,tp->t_rtt);
1352#endif /* !VBOX */
1353
1354 /*
1355 * If all outstanding data is acked, stop retransmit
1356 * timer and remember to restart (more output or persist).
1357 * If there is more data to be acked, restart retransmit
1358 * timer, using current (possibly backed-off) value.
1359 */
1360 if (ti->ti_ack == tp->snd_max) {
1361 tp->t_timer[TCPT_REXMT] = 0;
1362 needoutput = 1;
1363 } else if (tp->t_timer[TCPT_PERSIST] == 0)
1364 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1365 /*
1366 * When new data is acked, open the congestion window.
1367 * If the window gives us less than ssthresh packets
1368 * in flight, open exponentially (maxseg per packet).
1369 * Otherwise open linearly: maxseg per window
1370 * (maxseg^2 / cwnd per packet).
1371 */
1372 {
1373 register u_int cw = tp->snd_cwnd;
1374 register u_int incr = tp->t_maxseg;
1375
1376 if (cw > tp->snd_ssthresh)
1377 incr = incr * incr / cw;
1378 tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1379 }
1380 if (acked > so->so_snd.sb_cc) {
1381 tp->snd_wnd -= so->so_snd.sb_cc;
1382 sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
1383 ourfinisacked = 1;
1384 } else {
1385 sbdrop(&so->so_snd, acked);
1386 tp->snd_wnd -= acked;
1387 ourfinisacked = 0;
1388 }
1389 /*
1390 * XXX sowwakup is called when data is acked and there's room for
1391 * for more data... it should read() the socket
1392 */
1393/* if (so->so_snd.sb_flags & SB_NOTIFY)
1394 * sowwakeup(so);
1395 */
1396 tp->snd_una = ti->ti_ack;
1397 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1398 tp->snd_nxt = tp->snd_una;
1399
1400 switch (tp->t_state) {
1401
1402 /*
1403 * In FIN_WAIT_1 STATE in addition to the processing
1404 * for the ESTABLISHED state if our FIN is now acknowledged
1405 * then enter FIN_WAIT_2.
1406 */
1407 case TCPS_FIN_WAIT_1:
1408 if (ourfinisacked) {
1409 /*
1410 * If we can't receive any more
1411 * data, then closing user can proceed.
1412 * Starting the timer is contrary to the
1413 * specification, but if we don't get a FIN
1414 * we'll hang forever.
1415 */
1416 if (so->so_state & SS_FCANTRCVMORE) {
1417 soisfdisconnected(so);
1418 tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1419 }
1420 tp->t_state = TCPS_FIN_WAIT_2;
1421 }
1422 break;
1423
1424 /*
1425 * In CLOSING STATE in addition to the processing for
1426 * the ESTABLISHED state if the ACK acknowledges our FIN
1427 * then enter the TIME-WAIT state, otherwise ignore
1428 * the segment.
1429 */
1430 case TCPS_CLOSING:
1431 if (ourfinisacked) {
1432 tp->t_state = TCPS_TIME_WAIT;
1433 tcp_canceltimers(tp);
1434 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1435 soisfdisconnected(so);
1436 }
1437 break;
1438
1439 /*
1440 * In LAST_ACK, we may still be waiting for data to drain
1441 * and/or to be acked, as well as for the ack of our FIN.
1442 * If our FIN is now acknowledged, delete the TCB,
1443 * enter the closed state and return.
1444 */
1445 case TCPS_LAST_ACK:
1446 if (ourfinisacked) {
1447#ifdef VBOX
1448 tp = tcp_close(pData, tp);
1449#else /* !VBOX */
1450 tp = tcp_close(tp);
1451#endif /* !VBOX */
1452 goto drop;
1453 }
1454 break;
1455
1456 /*
1457 * In TIME_WAIT state the only thing that should arrive
1458 * is a retransmission of the remote FIN. Acknowledge
1459 * it and restart the finack timer.
1460 */
1461 case TCPS_TIME_WAIT:
1462 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1463 goto dropafterack;
1464 }
1465 } /* switch(tp->t_state) */
1466
1467step6:
1468 /*
1469 * Update window information.
1470 * Don't look at window if no ACK: TAC's send garbage on first SYN.
1471 */
1472 if ((tiflags & TH_ACK) &&
1473 (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1474 (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1475 (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1476 /* keep track of pure window updates */
1477 if (ti->ti_len == 0 &&
1478 tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
1479 tcpstat.tcps_rcvwinupd++;
1480 tp->snd_wnd = tiwin;
1481 tp->snd_wl1 = ti->ti_seq;
1482 tp->snd_wl2 = ti->ti_ack;
1483 if (tp->snd_wnd > tp->max_sndwnd)
1484 tp->max_sndwnd = tp->snd_wnd;
1485 needoutput = 1;
1486 }
1487
1488 /*
1489 * Process segments with URG.
1490 */
1491 if ((tiflags & TH_URG) && ti->ti_urp &&
1492 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1493 /*
1494 * This is a kludge, but if we receive and accept
1495 * random urgent pointers, we'll crash in
1496 * soreceive. It's hard to imagine someone
1497 * actually wanting to send this much urgent data.
1498 */
1499 if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
1500 ti->ti_urp = 0;
1501 tiflags &= ~TH_URG;
1502 goto dodata;
1503 }
1504 /*
1505 * If this segment advances the known urgent pointer,
1506 * then mark the data stream. This should not happen
1507 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1508 * a FIN has been received from the remote side.
1509 * In these states we ignore the URG.
1510 *
1511 * According to RFC961 (Assigned Protocols),
1512 * the urgent pointer points to the last octet
1513 * of urgent data. We continue, however,
1514 * to consider it to indicate the first octet
1515 * of data past the urgent section as the original
1516 * spec states (in one of two places).
1517 */
1518 if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
1519 tp->rcv_up = ti->ti_seq + ti->ti_urp;
1520 so->so_urgc = so->so_rcv.sb_cc +
1521 (tp->rcv_up - tp->rcv_nxt); /* -1; */
1522 tp->rcv_up = ti->ti_seq + ti->ti_urp;
1523
1524 }
1525 } else
1526 /*
1527 * If no out of band data is expected,
1528 * pull receive urgent pointer along
1529 * with the receive window.
1530 */
1531 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1532 tp->rcv_up = tp->rcv_nxt;
1533dodata:
1534
1535 /*
1536 * Process the segment text, merging it into the TCP sequencing queue,
1537 * and arranging for acknowledgment of receipt if necessary.
1538 * This process logically involves adjusting tp->rcv_wnd as data
1539 * is presented to the user (this happens in tcp_usrreq.c,
1540 * case PRU_RCVD). If a FIN has already been received on this
1541 * connection then we just ignore the text.
1542 */
1543 if ((ti->ti_len || (tiflags&TH_FIN)) &&
1544 TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1545#ifdef VBOX
1546 TCP_REASS(pData, tp, ti, m, so, tiflags);
1547#else /* !VBOX */
1548 TCP_REASS(tp, ti, m, so, tiflags);
1549#endif /* !VBOX */
1550 /*
1551 * Note the amount of data that peer has sent into
1552 * our window, in order to estimate the sender's
1553 * buffer size.
1554 */
1555 len = so->so_rcv.sb_datalen - (tp->rcv_adv - tp->rcv_nxt);
1556 } else {
1557#ifdef VBOX
1558 m_free(pData, m);
1559#else /* !VBOX */
1560 m_free(m);
1561#endif /* !VBOX */
1562 tiflags &= ~TH_FIN;
1563 }
1564
1565 /*
1566 * If FIN is received ACK the FIN and let the user know
1567 * that the connection is closing.
1568 */
1569 if (tiflags & TH_FIN) {
1570 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1571 /*
1572 * If we receive a FIN we can't send more data,
1573 * set it SS_FDRAIN
1574 * Shutdown the socket if there is no rx data in the
1575 * buffer.
1576 * soread() is called on completion of shutdown() and
1577 * will got to TCPS_LAST_ACK, and use tcp_output()
1578 * to send the FIN.
1579 */
1580/* sofcantrcvmore(so); */
1581 sofwdrain(so);
1582
1583 tp->t_flags |= TF_ACKNOW;
1584 tp->rcv_nxt++;
1585 }
1586 switch (tp->t_state) {
1587
1588 /*
1589 * In SYN_RECEIVED and ESTABLISHED STATES
1590 * enter the CLOSE_WAIT state.
1591 */
1592 case TCPS_SYN_RECEIVED:
1593 case TCPS_ESTABLISHED:
1594 if(so->so_emu == EMU_CTL) /* no shutdown on socket */
1595 tp->t_state = TCPS_LAST_ACK;
1596 else
1597 tp->t_state = TCPS_CLOSE_WAIT;
1598 break;
1599
1600 /*
1601 * If still in FIN_WAIT_1 STATE FIN has not been acked so
1602 * enter the CLOSING state.
1603 */
1604 case TCPS_FIN_WAIT_1:
1605 tp->t_state = TCPS_CLOSING;
1606 break;
1607
1608 /*
1609 * In FIN_WAIT_2 state enter the TIME_WAIT state,
1610 * starting the time-wait timer, turning off the other
1611 * standard timers.
1612 */
1613 case TCPS_FIN_WAIT_2:
1614 tp->t_state = TCPS_TIME_WAIT;
1615 tcp_canceltimers(tp);
1616 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1617 soisfdisconnected(so);
1618 break;
1619
1620 /*
1621 * In TIME_WAIT state restart the 2 MSL time_wait timer.
1622 */
1623 case TCPS_TIME_WAIT:
1624 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1625 break;
1626 }
1627 }
1628
1629 /*
1630 * If this is a small packet, then ACK now - with Nagel
1631 * congestion avoidance sender won't send more until
1632 * he gets an ACK.
1633 *
1634 * See above.
1635 */
1636/* if (ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg) {
1637 */
1638/* if ((ti->ti_len && (unsigned)ti->ti_len < tp->t_maxseg &&
1639 * (so->so_iptos & IPTOS_LOWDELAY) == 0) ||
1640 * ((so->so_iptos & IPTOS_LOWDELAY) &&
1641 * ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) {
1642 */
1643 if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
1644 ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
1645 tp->t_flags |= TF_ACKNOW;
1646 }
1647
1648 /*
1649 * Return any desired output.
1650 */
1651 if (needoutput || (tp->t_flags & TF_ACKNOW)) {
1652#ifdef VBOX
1653 (void) tcp_output(pData, tp);
1654#else /* !VBOX */
1655 (void) tcp_output(tp);
1656#endif /* !VBOX */
1657 }
1658 return;
1659
1660dropafterack:
1661 /*
1662 * Generate an ACK dropping incoming segment if it occupies
1663 * sequence space, where the ACK reflects our state.
1664 */
1665 if (tiflags & TH_RST)
1666 goto drop;
1667#ifdef VBOX
1668 m_freem(pData, m);
1669#else /* !VBOX */
1670 m_freem(m);
1671#endif /* !VBOX */
1672 tp->t_flags |= TF_ACKNOW;
1673#ifdef VBOX
1674 (void) tcp_output(pData, tp);
1675#else /* !VBOX */
1676 (void) tcp_output(tp);
1677#endif /* !VBOX */
1678 return;
1679
1680dropwithreset:
1681 /* reuses m if m!=NULL, m_free() unnecessary */
1682 if (tiflags & TH_ACK)
1683#ifdef VBOX
1684 tcp_respond(pData, tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
1685#else /* !VBOX */
1686 tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
1687#endif /* !VBOX */
1688 else {
1689 if (tiflags & TH_SYN) ti->ti_len++;
1690#ifdef VBOX
1691 tcp_respond(pData, tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1692 TH_RST|TH_ACK);
1693#else /* !VBOX */
1694 tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
1695 TH_RST|TH_ACK);
1696#endif /* !VBOX */
1697 }
1698
1699 return;
1700
1701drop:
1702 /*
1703 * Drop space held by incoming segment and return.
1704 */
1705#ifdef VBOX
1706 m_free(pData, m);
1707#else /* !VBOX */
1708 m_free(m);
1709#endif /* !VBOX */
1710
1711 return;
1712}
1713
1714 /* , ts_present, ts_val, ts_ecr) */
1715/* int *ts_present;
1716 * u_int32_t *ts_val, *ts_ecr;
1717 */
1718void
1719#ifdef VBOX
1720tcp_dooptions(PNATState pData, struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti)
1721#else /* !VBOX */
1722tcp_dooptions(tp, cp, cnt, ti)
1723 struct tcpcb *tp;
1724 u_char *cp;
1725 int cnt;
1726 struct tcpiphdr *ti;
1727#endif /* !VBOX */
1728{
1729 u_int16_t mss;
1730 int opt, optlen;
1731
1732 DEBUG_CALL("tcp_dooptions");
1733 DEBUG_ARGS((dfd," tp = %lx cnt=%i \n", (long )tp, cnt));
1734
1735 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1736 opt = cp[0];
1737 if (opt == TCPOPT_EOL)
1738 break;
1739 if (opt == TCPOPT_NOP)
1740 optlen = 1;
1741 else {
1742 optlen = cp[1];
1743 if (optlen <= 0)
1744 break;
1745 }
1746 switch (opt) {
1747
1748 default:
1749 continue;
1750
1751 case TCPOPT_MAXSEG:
1752 if (optlen != TCPOLEN_MAXSEG)
1753 continue;
1754 if (!(ti->ti_flags & TH_SYN))
1755 continue;
1756 memcpy((char *) &mss, (char *) cp + 2, sizeof(mss));
1757 NTOHS(mss);
1758#ifdef VBOX
1759 (void) tcp_mss(pData, tp, mss); /* sets t_maxseg */
1760#else /* !VBOX */
1761 (void) tcp_mss(tp, mss); /* sets t_maxseg */
1762#endif /* !VBOX */
1763 break;
1764
1765/* case TCPOPT_WINDOW:
1766 * if (optlen != TCPOLEN_WINDOW)
1767 * continue;
1768 * if (!(ti->ti_flags & TH_SYN))
1769 * continue;
1770 * tp->t_flags |= TF_RCVD_SCALE;
1771 * tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
1772 * break;
1773 */
1774/* case TCPOPT_TIMESTAMP:
1775 * if (optlen != TCPOLEN_TIMESTAMP)
1776 * continue;
1777 * *ts_present = 1;
1778 * memcpy((char *) ts_val, (char *)cp + 2, sizeof(*ts_val));
1779 * NTOHL(*ts_val);
1780 * memcpy((char *) ts_ecr, (char *)cp + 6, sizeof(*ts_ecr));
1781 * NTOHL(*ts_ecr);
1782 *
1783 */ /*
1784 * * A timestamp received in a SYN makes
1785 * * it ok to send timestamp requests and replies.
1786 * */
1787/* if (ti->ti_flags & TH_SYN) {
1788 * tp->t_flags |= TF_RCVD_TSTMP;
1789 * tp->ts_recent = *ts_val;
1790 * tp->ts_recent_age = tcp_now;
1791 * }
1792 */ break;
1793 }
1794 }
1795}
1796
1797
1798/*
1799 * Pull out of band byte out of a segment so
1800 * it doesn't appear in the user's data queue.
1801 * It is still reflected in the segment length for
1802 * sequencing purposes.
1803 */
1804
1805#ifdef notdef
1806
1807void
1808tcp_pulloutofband(so, ti, m)
1809 struct socket *so;
1810 struct tcpiphdr *ti;
1811 register struct mbuf *m;
1812{
1813 int cnt = ti->ti_urp - 1;
1814
1815 while (cnt >= 0) {
1816 if (m->m_len > cnt) {
1817 char *cp = mtod(m, caddr_t) + cnt;
1818 struct tcpcb *tp = sototcpcb(so);
1819
1820 tp->t_iobc = *cp;
1821 tp->t_oobflags |= TCPOOB_HAVEDATA;
1822 memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1));
1823 m->m_len--;
1824 return;
1825 }
1826 cnt -= m->m_len;
1827 m = m->m_next; /* XXX WRONG! Fix it! */
1828 if (m == 0)
1829 break;
1830 }
1831 panic("tcp_pulloutofband");
1832}
1833
1834#endif /* notdef */
1835
1836/*
1837 * Collect new round-trip time estimate
1838 * and update averages and current timeout.
1839 */
1840
1841void
1842#ifdef VBOX
1843tcp_xmit_timer(PNATState pData, register struct tcpcb *tp, int rtt)
1844#else /* !VBOX */
1845tcp_xmit_timer(tp, rtt)
1846 register struct tcpcb *tp;
1847 int rtt;
1848#endif /* !VBOX */
1849{
1850 register short delta;
1851
1852 DEBUG_CALL("tcp_xmit_timer");
1853 DEBUG_ARG("tp = %lx", (long)tp);
1854 DEBUG_ARG("rtt = %d", rtt);
1855
1856 tcpstat.tcps_rttupdated++;
1857 if (tp->t_srtt != 0) {
1858 /*
1859 * srtt is stored as fixed point with 3 bits after the
1860 * binary point (i.e., scaled by 8). The following magic
1861 * is equivalent to the smoothing algorithm in rfc793 with
1862 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1863 * point). Adjust rtt to origin 0.
1864 */
1865 delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1866 if ((tp->t_srtt += delta) <= 0)
1867 tp->t_srtt = 1;
1868 /*
1869 * We accumulate a smoothed rtt variance (actually, a
1870 * smoothed mean difference), then set the retransmit
1871 * timer to smoothed rtt + 4 times the smoothed variance.
1872 * rttvar is stored as fixed point with 2 bits after the
1873 * binary point (scaled by 4). The following is
1874 * equivalent to rfc793 smoothing with an alpha of .75
1875 * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
1876 * rfc793's wired-in beta.
1877 */
1878 if (delta < 0)
1879 delta = -delta;
1880 delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1881 if ((tp->t_rttvar += delta) <= 0)
1882 tp->t_rttvar = 1;
1883 } else {
1884 /*
1885 * No rtt measurement yet - use the unsmoothed rtt.
1886 * Set the variance to half the rtt (so our first
1887 * retransmit happens at 3*rtt).
1888 */
1889 tp->t_srtt = rtt << TCP_RTT_SHIFT;
1890 tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1891 }
1892 tp->t_rtt = 0;
1893 tp->t_rxtshift = 0;
1894
1895 /*
1896 * the retransmit should happen at rtt + 4 * rttvar.
1897 * Because of the way we do the smoothing, srtt and rttvar
1898 * will each average +1/2 tick of bias. When we compute
1899 * the retransmit timer, we want 1/2 tick of rounding and
1900 * 1 extra tick because of +-1/2 tick uncertainty in the
1901 * firing of the timer. The bias will give us exactly the
1902 * 1.5 tick we need. But, because the bias is
1903 * statistical, we have to test that we don't drop below
1904 * the minimum feasible timer (which is 2 ticks).
1905 */
1906 TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
1907 (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
1908
1909 /*
1910 * We received an ack for a packet that wasn't retransmitted;
1911 * it is probably safe to discard any error indications we've
1912 * received recently. This isn't quite right, but close enough
1913 * for now (a route might have failed after we sent a segment,
1914 * and the return path might not be symmetrical).
1915 */
1916 tp->t_softerror = 0;
1917}
1918
1919/*
1920 * Determine a reasonable value for maxseg size.
1921 * If the route is known, check route for mtu.
1922 * If none, use an mss that can be handled on the outgoing
1923 * interface without forcing IP to fragment; if bigger than
1924 * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
1925 * to utilize large mbufs. If no route is found, route has no mtu,
1926 * or the destination isn't local, use a default, hopefully conservative
1927 * size (usually 512 or the default IP max size, but no more than the mtu
1928 * of the interface), as we can't discover anything about intervening
1929 * gateways or networks. We also initialize the congestion/slow start
1930 * window to be a single segment if the destination isn't local.
1931 * While looking at the routing entry, we also initialize other path-dependent
1932 * parameters from pre-set or cached values in the routing entry.
1933 */
1934
1935int
1936#ifdef VBOX
1937tcp_mss(PNATState pData, register struct tcpcb *tp, u_int offer)
1938#else /* !VBOX */
1939tcp_mss(tp, offer)
1940 register struct tcpcb *tp;
1941 u_int offer;
1942#endif /* !VBOX */
1943{
1944 struct socket *so = tp->t_socket;
1945 int mss;
1946
1947 DEBUG_CALL("tcp_mss");
1948 DEBUG_ARG("tp = %lx", (long)tp);
1949 DEBUG_ARG("offer = %d", offer);
1950
1951 mss = min(if_mtu, if_mru) - sizeof(struct tcpiphdr);
1952 if (offer)
1953 mss = min(mss, offer);
1954 mss = max(mss, 32);
1955 if (mss < tp->t_maxseg || offer != 0)
1956 tp->t_maxseg = mss;
1957
1958 tp->snd_cwnd = mss;
1959
1960 sbreserve(&so->so_snd, tcp_sndspace+((tcp_sndspace%mss)?(mss-(tcp_sndspace%mss)):0));
1961 sbreserve(&so->so_rcv, tcp_rcvspace+((tcp_rcvspace%mss)?(mss-(tcp_rcvspace%mss)):0));
1962
1963 DEBUG_MISC((dfd, " returning mss = %d\n", mss));
1964
1965 return mss;
1966}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette