VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 48328

Last change on this file since 48328 was 48238, checked in by vboxsync, 11 years ago

pxtcp_pmgr_connect: delete forgotten old code for handling POLLNVAL,
the new, right one is just below.

  • Property svn:eol-style set to native
File size: 66.1 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#include "winutils.h"
3
4#include "pxtcp.h"
5
6#include "proxytest.h"
7#include "proxy_pollmgr.h"
8#include "pxremap.h"
9#include "portfwd.h" /* fwspec */
10
11#ifndef RT_OS_WINDOWS
12#include <sys/types.h>
13#include <sys/socket.h>
14#include <sys/ioctl.h>
15#ifdef RT_OS_SOLARIS
16#include <sys/filio.h> /* FIONREAD is BSD'ism */
17#endif
18#include <stdlib.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <string.h>
22#include <poll.h>
23
24#include <err.h> /* BSD'ism */
25#else
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29
30#include <iprt/stdint.h>
31#include "winpoll.h"
32#endif
33
34#include "lwip/opt.h"
35
36#include "lwip/sys.h"
37#include "lwip/tcpip.h"
38#include "lwip/netif.h"
39#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
40#include "lwip/icmp.h"
41#include "lwip/icmp6.h"
42
43/* NetBSD doesn't report POLLHUP for TCP sockets */
44#ifdef __NetBSD__
45# define HAVE_TCP_POLLHUP 0
46#else
47# define HAVE_TCP_POLLHUP 1
48#endif
49
50
51/**
52 * Ring buffer for inbound data. Filled with data from the host
53 * socket on poll manager thread. Data consumed by scheduling
54 * tcp_write() to the pcb on the lwip thread.
55 *
56 * NB: There is actually third party present, the lwip stack itself.
57 * Thus the buffer doesn't have dual free vs. data split, but rather
58 * three-way free / send and unACKed data / unsent data split.
59 */
60struct ringbuf {
61 char *buf;
62 size_t bufsize;
63
64 /*
65 * Start of free space, producer writes here (up till "unacked").
66 */
67 volatile size_t vacant;
68
69 /*
70 * Start of sent but unacknowledged data. The data are "owned" by
71 * the stack as it may need to retransmit. This is the free space
72 * limit for producer.
73 */
74 volatile size_t unacked;
75
76 /*
77 * Start of unsent data, consumer reads/sends from here (up till
78 * "vacant"). Not declared volatile since it's only accessed from
79 * the consumer thread.
80 */
81 size_t unsent;
82};
83
84
85/**
86 */
87struct pxtcp {
88 /**
89 * Our poll manager handler. Must be first, strong/weak
90 * references depend on this "inheritance".
91 */
92 struct pollmgr_handler pmhdl;
93
94 /**
95 * lwIP (internal/guest) side of the proxied connection.
96 */
97 struct tcp_pcb *pcb;
98
99 /**
100 * Host (external) side of the proxied connection.
101 */
102 SOCKET sock;
103
104 /**
105 * Socket events we are currently polling for.
106 */
107 int events;
108
109 /**
110 * Socket error. Currently used to save connect(2) errors so that
111 * we can decide if we need to send ICMP error.
112 */
113 int sockerr;
114
115 /**
116 * Interface that we have got the SYN from. Needed to send ICMP
117 * with correct source address.
118 */
119 struct netif *netif;
120
121 /**
122 * For tentatively accepted connections for which we are in
123 * process of connecting to the real destination this is the
124 * initial pbuf that we might need to build ICMP error.
125 *
126 * When connection is established this is used to hold outbound
127 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
128 * forwarded over the socket. We cannot "return" it to lwIP since
129 * the head of the chain is already sent and freed.
130 */
131 struct pbuf *unsent;
132
133 /**
134 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
135 * once and we might not be able to forward it immediately if we
136 * have unsent pbuf.
137 */
138 int outbound_close;
139
140 /**
141 * Outbound half-close has been done on the socket.
142 */
143 int outbound_close_done;
144
145 /**
146 * External has closed its side. We might not be able to forward
147 * it immediately if we have unforwarded data.
148 */
149 int inbound_close;
150
151 /**
152 * Inbound half-close has been done on the pcb.
153 */
154 int inbound_close_done;
155
156 /**
157 * On systems that report POLLHUP as soon as the final FIN is
158 * received on a socket we cannot continue polling for the rest of
159 * input, so we have to read (pull) last data from the socket on
160 * the lwIP thread instead of polling/pushing it from the poll
161 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
162 */
163 int inbound_pull;
164
165
166 /**
167 * When poll manager schedules delete we may not be able to delete
168 * a pxtcp immediately if not all inbound data has been acked by
169 * the guest: lwIP may need to resend and the data are in pxtcp's
170 * inbuf::buf. We defer delete until all data are acked to
171 * pxtcp_pcb_sent().
172 *
173 * It's also implied by inbound_pull. It probably means that
174 * "deferred" is not a very fortunate name.
175 */
176 int deferred_delete;
177
178 /**
179 * Ring-buffer for inbound data.
180 */
181 struct ringbuf inbuf;
182
183 /**
184 * lwIP thread's strong reference to us.
185 */
186 struct pollmgr_refptr *rp;
187
188
189 /*
190 * We use static messages to call functions on the lwIP thread to
191 * void malloc/free overhead.
192 */
193 struct tcpip_msg msg_delete; /* delete pxtcp */
194 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
195 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
196 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
197 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
198 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
199};
200
201
202
203static struct pxtcp *pxtcp_allocate(void);
204static void pxtcp_free(struct pxtcp *);
205
206static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
207static void pxtcp_pcb_dissociate(struct pxtcp *);
208
209/* poll manager callbacks for pxtcp related channels */
210static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
211static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
212static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
213#if !HAVE_TCP_POLLHUP
214static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
215#endif
216static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
217
218/* helper functions for sending/receiving pxtcp over poll manager channels */
219static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
220static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
221static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
222static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
223
224/* poll manager callbacks for individual sockets */
225static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
226static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
227
228static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
229
230/* convenience functions for poll manager callbacks */
231static int pxtcp_schedule_delete(struct pxtcp *);
232static int pxtcp_schedule_reset(struct pxtcp *);
233static int pxtcp_schedule_reject(struct pxtcp *);
234
235/* lwip thread callbacks called via proxy_lwip_post() */
236static void pxtcp_pcb_delete_pxtcp(void *);
237static void pxtcp_pcb_reset_pxtcp(void *);
238static void pxtcp_pcb_accept_refuse(void *);
239static void pxtcp_pcb_accept_confirm(void *);
240static void pxtcp_pcb_write_outbound(void *);
241static void pxtcp_pcb_write_inbound(void *);
242static void pxtcp_pcb_pull_inbound(void *);
243
244/* tcp pcb callbacks */
245static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
246static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
247static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
248static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
249static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
250static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
251static void pxtcp_pcb_err(void *, err_t);
252
253static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
254static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
255
256static void pxtcp_pcb_forward_inbound(struct pxtcp *);
257static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
258DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
259static void pxtcp_pcb_schedule_poll(struct pxtcp *);
260static void pxtcp_pcb_cancel_poll(struct pxtcp *);
261
262static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
263DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
264
265/* poll manager handlers for pxtcp channels */
266static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
267static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
268static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
269#if !HAVE_TCP_POLLHUP
270static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
271#endif
272static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
273
274
275/**
276 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
277 * manager threads haven't been created yet.
278 */
279void
280pxtcp_init(void)
281{
282 /*
283 * Create channels.
284 */
285#define CHANNEL(SLOT, NAME) do { \
286 NAME##_hdl.callback = NAME; \
287 NAME##_hdl.data = NULL; \
288 NAME##_hdl.slot = -1; \
289 pollmgr_add_chan(SLOT, &NAME##_hdl); \
290 } while (0)
291
292 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
293 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
294 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
295#if !HAVE_TCP_POLLHUP
296 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
297#endif
298 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
299
300#undef CHANNEL
301
302 /*
303 * Listen to outgoing connection from guest(s).
304 */
305 tcp_proxy_accept(pxtcp_pcb_heard);
306}
307
308
309/**
310 * Syntactic sugar for sending pxtcp pointer over poll manager
311 * channel. Used by lwip thread functions.
312 */
313static ssize_t
314pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
315{
316 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
317}
318
319
320/**
321 * Syntactic sugar for sending weak reference to pxtcp over poll
322 * manager channel. Used by lwip thread functions.
323 */
324static ssize_t
325pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
326{
327 pollmgr_refptr_weak_ref(pxtcp->rp);
328 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
329}
330
331
332/**
333 * Counterpart of pxtcp_chan_send().
334 */
335static struct pxtcp *
336pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
337{
338 struct pxtcp *pxtcp;
339
340 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
341 return pxtcp;
342}
343
344
345/**
346 * Counterpart of pxtcp_chan_send_weak().
347 */
348static struct pxtcp *
349pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
350{
351 struct pollmgr_refptr *rp;
352 struct pollmgr_handler *base;
353 struct pxtcp *pxtcp;
354
355 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
356 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
357 pxtcp = (struct pxtcp *)base;
358
359 return pxtcp;
360}
361
362
363/**
364 * Register pxtcp with poll manager.
365 *
366 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
367 * error handling is different in these two cases, we leave it up to
368 * the caller.
369 */
370int
371pxtcp_pmgr_add(struct pxtcp *pxtcp)
372{
373 int status;
374
375 LWIP_ASSERT1(pxtcp != NULL);
376 LWIP_ASSERT1(pxtcp->sock >= 0);
377 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
378 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
379 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
380
381 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
382 return status;
383}
384
385
386/**
387 * Unregister pxtcp with poll manager.
388 *
389 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
390 * leg).
391 */
392void
393pxtcp_pmgr_del(struct pxtcp *pxtcp)
394{
395 LWIP_ASSERT1(pxtcp != NULL);
396
397 pollmgr_del_slot(pxtcp->pmhdl.slot);
398}
399
400
401/**
402 * POLLMGR_CHAN_PXTCP_ADD handler.
403 *
404 * Get new pxtcp from lwip thread and start polling its socket.
405 */
406static int
407pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
408{
409 struct pxtcp *pxtcp;
410 int status;
411
412 pxtcp = pxtcp_chan_recv(handler, fd, revents);
413 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
414 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
415
416 status = pxtcp_pmgr_add(pxtcp);
417 if (status < 0) {
418 (void) pxtcp_schedule_reset(pxtcp);
419 }
420
421 return POLLIN;
422}
423
424
425/**
426 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
427 *
428 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
429 * and failed, it now requests us to poll the socket for POLLOUT and
430 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
431 */
432static int
433pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
434{
435 struct pxtcp *pxtcp;
436
437 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
438 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
439
440 if (pxtcp == NULL) {
441 return POLLIN;
442 }
443
444 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
445 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
446
447 pxtcp->events |= POLLOUT;
448 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
449
450 return POLLIN;
451}
452
453
454/**
455 * POLLMGR_CHAN_PXTCP_POLLIN handler.
456 */
457static int
458pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
459{
460 struct pxtcp *pxtcp;
461
462 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
463 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
464
465 if (pxtcp == NULL) {
466 return POLLIN;
467 }
468
469 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
470 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
471
472 pxtcp->events |= POLLIN;
473 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
474
475 return POLLIN;
476}
477
478
479#if !HAVE_TCP_POLLHUP
480/**
481 * POLLMGR_CHAN_PXTCP_DEL handler.
482 *
483 * Schedule pxtcp deletion. We only need this if host system doesn't
484 * report POLLHUP for fully closed tcp sockets.
485 */
486static int
487pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
488{
489 struct pxtcp *pxtcp;
490
491 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
492 if (pxtcp == NULL) {
493 return POLLIN;
494 }
495
496 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
497 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
498
499 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
500 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
501
502 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
503 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
504
505 pxtcp_pmgr_del(pxtcp);
506 (void) pxtcp_schedule_delete(pxtcp);
507
508 return POLLIN;
509}
510#endif /* !HAVE_TCP_POLLHUP */
511
512
513/**
514 * POLLMGR_CHAN_PXTCP_RESET handler.
515 *
516 * Close the socket with RST and delete pxtcp.
517 */
518static int
519pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
520{
521 struct pxtcp *pxtcp;
522
523 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
524 if (pxtcp == NULL) {
525 return POLLIN;
526 }
527
528 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
529 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
530
531 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
532 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
533
534 pxtcp_pmgr_del(pxtcp);
535
536 proxy_reset_socket(pxtcp->sock);
537 pxtcp->sock = INVALID_SOCKET;
538
539 (void) pxtcp_schedule_reset(pxtcp);
540
541 return POLLIN;
542}
543
544
545static struct pxtcp *
546pxtcp_allocate(void)
547{
548 struct pxtcp *pxtcp;
549
550 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
551 if (pxtcp == NULL) {
552 return NULL;
553 }
554
555 pxtcp->pmhdl.callback = NULL;
556 pxtcp->pmhdl.data = (void *)pxtcp;
557 pxtcp->pmhdl.slot = -1;
558
559 pxtcp->pcb = NULL;
560 pxtcp->sock = INVALID_SOCKET;
561 pxtcp->events = 0;
562 pxtcp->sockerr = 0;
563 pxtcp->netif = NULL;
564 pxtcp->unsent = NULL;
565 pxtcp->outbound_close = 0;
566 pxtcp->outbound_close_done = 0;
567 pxtcp->inbound_close = 0;
568 pxtcp->inbound_close_done = 0;
569 pxtcp->inbound_pull = 0;
570 pxtcp->deferred_delete = 0;
571
572 pxtcp->inbuf.bufsize = 64 * 1024;
573 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
574 if (pxtcp->inbuf.buf == NULL) {
575 free(pxtcp);
576 return NULL;
577 }
578 pxtcp->inbuf.vacant = 0;
579 pxtcp->inbuf.unacked = 0;
580 pxtcp->inbuf.unsent = 0;
581
582 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
583 if (pxtcp->rp == NULL) {
584 free(pxtcp->inbuf.buf);
585 free(pxtcp);
586 return NULL;
587 }
588
589#define CALLBACK_MSG(MSG, FUNC) \
590 do { \
591 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
592 pxtcp->MSG.sem = NULL; \
593 pxtcp->MSG.msg.cb.function = FUNC; \
594 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
595 } while (0)
596
597 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
598 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
599 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
600 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
601 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
602 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
603
604#undef CALLBACK_MSG
605
606 return pxtcp;
607}
608
609
610/**
611 * Exported to fwtcp to create pxtcp for incoming port-forwarded
612 * connections. Completed with pcb in pxtcp_pcb_connect().
613 */
614struct pxtcp *
615pxtcp_create_forwarded(SOCKET sock)
616{
617 struct pxtcp *pxtcp;
618
619 pxtcp = pxtcp_allocate();
620 if (pxtcp == NULL) {
621 return NULL;
622 }
623
624 pxtcp->sock = sock;
625 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
626 pxtcp->events = 0;
627
628 return pxtcp;
629}
630
631
632static void
633pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
634{
635 LWIP_ASSERT1(pxtcp != NULL);
636 LWIP_ASSERT1(pcb != NULL);
637
638 pxtcp->pcb = pcb;
639
640 tcp_arg(pcb, pxtcp);
641
642 tcp_recv(pcb, pxtcp_pcb_recv);
643 tcp_sent(pcb, pxtcp_pcb_sent);
644 tcp_poll(pcb, NULL, 255);
645 tcp_err(pcb, pxtcp_pcb_err);
646}
647
648
649static void
650pxtcp_free(struct pxtcp *pxtcp)
651{
652 if (pxtcp->unsent != NULL) {
653 pbuf_free(pxtcp->unsent);
654 }
655 if (pxtcp->inbuf.buf != NULL) {
656 free(pxtcp->inbuf.buf);
657 }
658 free(pxtcp);
659}
660
661
662/**
663 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
664 * fwtcp failed to register with poll manager to post to lwip thread
665 * for doing connect.
666 */
667void
668pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
669{
670 LWIP_ASSERT1(pxtcp->pcb == NULL);
671 pxtcp_pcb_reset_pxtcp(pxtcp);
672}
673
674
675static void
676pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
677{
678 if (pxtcp == NULL || pxtcp->pcb == NULL) {
679 return;
680 }
681
682 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
683 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
684
685 /*
686 * We must have dissociated from a fully closed pcb immediately
687 * since lwip recycles them and we don't wan't to mess with what
688 * would be someone else's pcb that we happen to have a stale
689 * pointer to.
690 */
691 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
692
693 tcp_recv(pxtcp->pcb, NULL);
694 tcp_sent(pxtcp->pcb, NULL);
695 tcp_poll(pxtcp->pcb, NULL, 255);
696 tcp_err(pxtcp->pcb, NULL);
697 tcp_arg(pxtcp->pcb, NULL);
698 pxtcp->pcb = NULL;
699}
700
701
702/**
703 * Lwip thread callback invoked via pxtcp::msg_delete
704 *
705 * Since we use static messages to communicate to the lwip thread, we
706 * cannot delete pxtcp without making sure there are no unprocessed
707 * messages in the lwip thread mailbox.
708 *
709 * The easiest way to ensure that is to send this "delete" message as
710 * the last one and when it's processed we know there are no more and
711 * it's safe to delete pxtcp.
712 *
713 * Poll manager handlers should use pxtcp_schedule_delete()
714 * convenience function.
715 */
716static void
717pxtcp_pcb_delete_pxtcp(void *ctx)
718{
719 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
720
721 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
722 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
723 (pxtcp->deferred_delete && !pxtcp->inbound_pull
724 ? " (was deferred)" : "")));
725
726 LWIP_ASSERT1(pxtcp != NULL);
727 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
728 LWIP_ASSERT1(pxtcp->outbound_close_done);
729 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
730
731
732 /*
733 * pxtcp is no longer registered with poll manager, so it's safe
734 * to close the socket.
735 */
736 if (pxtcp->sock != INVALID_SOCKET) {
737 int status = closesocket(pxtcp->sock);
738 DPRINTF(("%s:%d=closesocket(%d)\n", __func__, status, pxtcp->sock));
739
740 pxtcp->sock = INVALID_SOCKET;
741 }
742
743 /*
744 * We might have already dissociated from a fully closed pcb, or
745 * guest might have sent us a reset while msg_delete was in
746 * transit. If there's no pcb, we are done.
747 */
748 if (pxtcp->pcb == NULL) {
749 pollmgr_refptr_unref(pxtcp->rp);
750 pxtcp_free(pxtcp);
751 return;
752 }
753
754 /*
755 * Have we completely forwarded all inbound traffic to the guest?
756 *
757 * We may still be waiting for ACKs. We may have failed to send
758 * some of the data (tcp_write() failed with ERR_MEM). We may
759 * have failed to send the FIN (tcp_shutdown() failed with
760 * ERR_MEM).
761 */
762 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
763 pxtcp_pcb_dissociate(pxtcp);
764 pollmgr_refptr_unref(pxtcp->rp);
765 pxtcp_free(pxtcp);
766 }
767 else {
768 DPRINTF2(("delete: pxtcp %p; pcb %p:"
769 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
770 (void *)pxtcp, (void *)pxtcp->pcb,
771 (int)pxtcp->inbuf.unacked,
772 (int)pxtcp->inbuf.unsent,
773 (int)pxtcp->inbuf.vacant,
774 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
775
776 LWIP_ASSERT1(!pxtcp->deferred_delete);
777 pxtcp->deferred_delete = 1;
778 }
779}
780
781
782/**
783 * If we couldn't delete pxtcp right away in the msg_delete callback
784 * from the poll manager thread, we repeat the check at the end of
785 * relevant pcb callbacks.
786 */
787DECLINLINE(void)
788pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
789{
790 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
791 pxtcp_pcb_delete_pxtcp(pxtcp);
792 }
793}
794
795
796/**
797 * Poll manager callbacks should use this convenience wrapper to
798 * schedule pxtcp deletion on the lwip thread and to deregister from
799 * the poll manager.
800 */
801static int
802pxtcp_schedule_delete(struct pxtcp *pxtcp)
803{
804 /*
805 * If pollmgr_refptr_get() is called by any channel before
806 * scheduled deletion happens, let them know we are gone.
807 */
808 pxtcp->pmhdl.slot = -1;
809
810 /*
811 * Schedule deletion. Since poll manager thread may be pre-empted
812 * right after we send the message, the deletion may actually
813 * happen on the lwip thread before we return from this function,
814 * so it's not safe to refer to pxtcp after this call.
815 */
816 proxy_lwip_post(&pxtcp->msg_delete);
817
818 /* tell poll manager to deregister us */
819 return -1;
820}
821
822
823/**
824 * Lwip thread callback invoked via pxtcp::msg_reset
825 *
826 * Like pxtcp_pcb_delete(), but sends RST to the guest before
827 * deleting this pxtcp.
828 */
829static void
830pxtcp_pcb_reset_pxtcp(void *ctx)
831{
832 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
833 LWIP_ASSERT1(pxtcp != NULL);
834
835 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
836 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
837
838 if (pxtcp->sock != INVALID_SOCKET) {
839 proxy_reset_socket(pxtcp->sock);
840 pxtcp->sock = INVALID_SOCKET;
841 }
842
843 if (pxtcp->pcb != NULL) {
844 struct tcp_pcb *pcb = pxtcp->pcb;
845 pxtcp_pcb_dissociate(pxtcp);
846 tcp_abort(pcb);
847 }
848
849 pollmgr_refptr_unref(pxtcp->rp);
850 pxtcp_free(pxtcp);
851}
852
853
854
855/**
856 * Poll manager callbacks should use this convenience wrapper to
857 * schedule pxtcp reset and deletion on the lwip thread and to
858 * deregister from the poll manager.
859 *
860 * See pxtcp_schedule_delete() for additional comments.
861 */
862static int
863pxtcp_schedule_reset(struct pxtcp *pxtcp)
864{
865 pxtcp->pmhdl.slot = -1;
866 proxy_lwip_post(&pxtcp->msg_reset);
867 return -1;
868}
869
870
871/**
872 * Reject proxy connection attempt. Depending on the cause (sockerr)
873 * we may just drop the pcb silently, generate an ICMP datagram or
874 * send TCP reset.
875 */
876static void
877pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
878 struct pbuf *p, int sockerr)
879{
880 struct netif *oif;
881 int reset = 0;
882
883 oif = ip_current_netif();
884 ip_current_netif() = netif;
885
886 if (sockerr == ECONNREFUSED) {
887 reset = 1;
888 }
889 else if (PCB_ISIPV6(pcb)) {
890 if (sockerr == EHOSTDOWN) {
891 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
892 }
893 else if (sockerr == EHOSTUNREACH
894 || sockerr == ENETDOWN
895 || sockerr == ENETUNREACH)
896 {
897 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
898 }
899 }
900 else {
901 if (sockerr == EHOSTDOWN
902 || sockerr == EHOSTUNREACH
903 || sockerr == ENETDOWN
904 || sockerr == ENETUNREACH)
905 {
906 icmp_dest_unreach(p, ICMP_DUR_HOST);
907 }
908 }
909
910 ip_current_netif() = oif;
911
912 tcp_abandon(pcb, reset);
913}
914
915
916/**
917 * Called from poll manager thread via pxtcp::msg_accept when proxy
918 * failed to connect to the destination. Also called when we failed
919 * to register pxtcp with poll manager.
920 *
921 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
922 * how this unestablished connection is terminated.
923 */
924static void
925pxtcp_pcb_accept_refuse(void *ctx)
926{
927 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
928
929 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: errno %d\n",
930 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
931 pxtcp->sock, pxtcp->sockerr));
932
933 LWIP_ASSERT1(pxtcp != NULL);
934 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
935
936 if (pxtcp->pcb != NULL) {
937 struct tcp_pcb *pcb = pxtcp->pcb;
938 pxtcp_pcb_dissociate(pxtcp);
939 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
940 }
941
942 pollmgr_refptr_unref(pxtcp->rp);
943 pxtcp_free(pxtcp);
944}
945
946
947/**
948 * Convenience wrapper for poll manager connect callback to reject
949 * connection attempt.
950 *
951 * Like pxtcp_schedule_reset(), but the callback is more discriminate
952 * in how this unestablished connection is terminated.
953 */
954static int
955pxtcp_schedule_reject(struct pxtcp *pxtcp)
956{
957 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
958 pxtcp->pmhdl.slot = -1;
959 proxy_lwip_post(&pxtcp->msg_accept);
960 return -1;
961}
962
963
964/**
965 * Global tcp_proxy_accept() callback for proxied outgoing TCP
966 * connections from guest(s).
967 */
968static err_t
969pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
970{
971 struct pbuf *p = (struct pbuf *)arg;
972 struct pxtcp *pxtcp;
973 ipX_addr_t dst_addr;
974 int sdom;
975 SOCKET sock;
976 ssize_t nsent;
977 int sockerr = 0;
978
979 LWIP_UNUSED_ARG(error); /* always ERR_OK */
980
981 /*
982 * TCP first calls accept callback when it receives the first SYN
983 * and "tentatively accepts" new proxied connection attempt. When
984 * proxy "confirms" the SYN and sends SYN|ACK and the guest
985 * replies with ACK the accept callback is called again, this time
986 * with the established connection.
987 */
988 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
989 tcp_accept(newpcb, pxtcp_pcb_accept);
990 tcp_arg(newpcb, NULL);
991
992 tcp_setprio(newpcb, TCP_PRIO_MAX);
993
994 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
995
996 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
997 sock = proxy_connected_socket(sdom, SOCK_STREAM,
998 &dst_addr, newpcb->local_port);
999 if (sock == INVALID_SOCKET) {
1000 sockerr = errno;
1001 goto abort;
1002 }
1003
1004 pxtcp = pxtcp_allocate();
1005 if (pxtcp == NULL) {
1006 proxy_reset_socket(sock);
1007 goto abort;
1008 }
1009
1010 /* save initial datagram in case we need to reply with ICMP */
1011 pbuf_ref(p);
1012 pxtcp->unsent = p;
1013 pxtcp->netif = ip_current_netif();
1014
1015 pxtcp_pcb_associate(pxtcp, newpcb);
1016 pxtcp->sock = sock;
1017
1018 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1019 pxtcp->events = POLLOUT;
1020
1021 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1022 if (nsent < 0) {
1023 pxtcp->sock = INVALID_SOCKET;
1024 proxy_reset_socket(sock);
1025 pxtcp_pcb_accept_refuse(pxtcp);
1026 return ERR_ABRT;
1027 }
1028
1029 return ERR_OK;
1030
1031 abort:
1032 DPRINTF0(("%s: pcb %p, sock %d: errno %d\n",
1033 __func__, (void *)newpcb, sock, sockerr));
1034 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1035 return ERR_ABRT;
1036}
1037
1038
1039/**
1040 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1041 * connections from guest(s). This is "real" accept with three-way
1042 * handshake completed.
1043 */
1044static err_t
1045pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1046{
1047 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1048
1049 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1050 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1051
1052 LWIP_ASSERT1(pxtcp != NULL);
1053 LWIP_ASSERT1(pxtcp->pcb = pcb);
1054 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1055
1056 /* send any inbound data that are already queued */
1057 pxtcp_pcb_forward_inbound(pxtcp);
1058 return ERR_OK;
1059}
1060
1061
1062/**
1063 * Initial poll manager callback for proxied outgoing TCP connections.
1064 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1065 *
1066 * Waits for connect(2) to the destination to complete. On success
1067 * replaces itself with pxtcp_pmgr_pump() callback common to all
1068 * established TCP connections.
1069 */
1070static int
1071pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1072{
1073 struct pxtcp *pxtcp;
1074 int sockerr;
1075
1076 pxtcp = (struct pxtcp *)handler->data;
1077 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1078 LWIP_ASSERT1(fd == pxtcp->sock);
1079
1080 if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
1081 if (revents & POLLNVAL) {
1082 pxtcp->sock = INVALID_SOCKET;
1083 pxtcp->sockerr = ETIMEDOUT;
1084 }
1085 else {
1086 socklen_t optlen = (socklen_t)sizeof(sockerr);
1087 int status;
1088 SOCKET s;
1089
1090 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1091 (char *)&pxtcp->sockerr, &optlen);
1092 if (status < 0) { /* should not happen */
1093 sockerr = errno; /* ??? */
1094 perror("connect: getsockopt");
1095 }
1096 else {
1097#ifndef RT_OS_WINDOWS
1098 errno = pxtcp->sockerr; /* to avoid strerror_r */
1099#else
1100 /* see winutils.h */
1101 WSASetLastError(pxtcp->sockerr);
1102#endif
1103 perror("connect");
1104 }
1105 s = pxtcp->sock;
1106 pxtcp->sock = INVALID_SOCKET;
1107 status = closesocket(s);
1108 DPRINTF(("%s: %d closesocket: %ld\n", status, s));
1109 }
1110 return pxtcp_schedule_reject(pxtcp);
1111 }
1112
1113 if (revents & POLLOUT) { /* connect is successful */
1114 /* confirm accept to the guest */
1115 proxy_lwip_post(&pxtcp->msg_accept);
1116
1117 /*
1118 * Switch to common callback used for all established proxied
1119 * connections.
1120 */
1121 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1122
1123 /*
1124 * Initially we poll for incoming traffic only. Outgoing
1125 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1126 * it will ask us to poll for POLLOUT too.
1127 */
1128 pxtcp->events = POLLIN;
1129 return pxtcp->events;
1130 }
1131
1132 /* should never get here */
1133 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1134 __func__, (void *)pxtcp, fd, revents));
1135 return pxtcp_schedule_reset(pxtcp);
1136}
1137
1138
1139/**
1140 * Called from poll manager thread via pxtcp::msg_accept when proxy
1141 * connected to the destination. Finalize accept by sending SYN|ACK
1142 * to the guest.
1143 */
1144static void
1145pxtcp_pcb_accept_confirm(void *ctx)
1146{
1147 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1148 err_t error;
1149
1150 LWIP_ASSERT1(pxtcp != NULL);
1151 if (pxtcp->pcb == NULL) {
1152 return;
1153 }
1154
1155 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1156 LWIP_ASSERT1(pxtcp->unsent != NULL);
1157 pbuf_free(pxtcp->unsent);
1158 pxtcp->unsent = NULL;
1159
1160 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1161
1162 /*
1163 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1164 * abandons the pcb. Retrying that is not very easy, since it
1165 * would require keeping "fractional state". From guest's point
1166 * of view there is no reply to its SYN so it will either resend
1167 * the SYN (effetively triggering full connection retry for us),
1168 * or it will eventually time out.
1169 */
1170 if (error == ERR_ABRT) {
1171 pxtcp->pcb = NULL; /* pcb is gone */
1172 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1173 }
1174
1175 /*
1176 * else if (error != ERR_OK): even if tcp_output() failed with
1177 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1178 * retransmitted eventually.
1179 */
1180}
1181
1182
1183/**
1184 * Entry point for port-forwarding.
1185 *
1186 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1187 * (with no pcb yet) and adds it to the poll manager (polling for
1188 * errors only). Then it calls this function to construct the pcb and
1189 * perform connection to the guest.
1190 */
1191void
1192pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1193{
1194 struct sockaddr_storage ss;
1195 socklen_t sslen;
1196 struct tcp_pcb *pcb;
1197 ipX_addr_t src_addr, dst_addr;
1198 u16_t src_port, dst_port;
1199 int status;
1200 err_t error;
1201
1202 LWIP_ASSERT1(pxtcp != NULL);
1203 LWIP_ASSERT1(pxtcp->pcb == NULL);
1204 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1205
1206 pcb = tcp_new();
1207 if (pcb == NULL) {
1208 goto reset;
1209 }
1210
1211 tcp_setprio(pcb, TCP_PRIO_MAX);
1212 pxtcp_pcb_associate(pxtcp, pcb);
1213
1214 sslen = sizeof(ss);
1215 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1216 if (status == SOCKET_ERROR) {
1217 goto reset;
1218 }
1219
1220 /* nit: comapres PF and AF, but they are the same everywhere */
1221 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1222
1223 fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1224 if (ss.ss_family == PF_INET) {
1225 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1226
1227 src_port = peer4->sin_port;
1228
1229 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1230 dst_port = fwspec->dst.sin.sin_port;
1231 }
1232 else { /* PF_INET6 */
1233 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1234 ip_set_v6(pcb, 1);
1235
1236 src_port = peer6->sin6_port;
1237
1238 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1239 dst_port = fwspec->dst.sin6.sin6_port;
1240 }
1241
1242 /* lwip port arguments are in host order */
1243 src_port = ntohs(src_port);
1244 dst_port = ntohs(dst_port);
1245
1246 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1247 if (error != ERR_OK) {
1248 goto reset;
1249 }
1250
1251 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1252 /* callback: */ pxtcp_pcb_connected);
1253 if (error != ERR_OK) {
1254 goto reset;
1255 }
1256
1257 return;
1258
1259 reset:
1260 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1261}
1262
1263
1264/**
1265 * Port-forwarded connection to guest is successful, pump data.
1266 */
1267static err_t
1268pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1269{
1270 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1271
1272 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1273 LWIP_UNUSED_ARG(error);
1274
1275 LWIP_ASSERT1(pxtcp != NULL);
1276 LWIP_ASSERT1(pxtcp->pcb == pcb);
1277 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1278 LWIP_UNUSED_ARG(pcb);
1279
1280 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1281 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1282
1283 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1284 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1285
1286 return ERR_OK;
1287}
1288
1289
1290/**
1291 * tcp_recv() callback.
1292 */
1293static err_t
1294pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1295{
1296 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1297
1298 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1299 LWIP_UNUSED_ARG(error);
1300
1301 LWIP_ASSERT1(pxtcp != NULL);
1302 LWIP_ASSERT1(pxtcp->pcb == pcb);
1303 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1304 LWIP_UNUSED_ARG(pcb);
1305
1306
1307 /*
1308 * Have we done sending previous batch?
1309 */
1310 if (pxtcp->unsent != NULL) {
1311 if (p != NULL) {
1312 /*
1313 * Return an error to tell TCP to hold onto that pbuf.
1314 * It will be presented to us later from tcp_fasttmr().
1315 */
1316 return ERR_WOULDBLOCK;
1317 }
1318 else {
1319 /*
1320 * Unlike data, p == NULL indicating orderly shutdown is
1321 * NOT presented to us again
1322 */
1323 pxtcp->outbound_close = 1;
1324 return ERR_OK;
1325 }
1326 }
1327
1328
1329 /*
1330 * Guest closed?
1331 */
1332 if (p == NULL) {
1333 pxtcp->outbound_close = 1;
1334 pxtcp_pcb_forward_outbound_close(pxtcp);
1335 return ERR_OK;
1336 }
1337
1338
1339 /*
1340 * Got data, send what we can without blocking.
1341 */
1342 return pxtcp_pcb_forward_outbound(pxtcp, p);
1343}
1344
1345
1346/**
1347 * Guest half-closed its TX side of the connection.
1348 *
1349 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1350 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1351 * previously unsent data and sees pxtcp::outbound_close flag saved by
1352 * pxtcp_pcb_recv().
1353 */
1354static void
1355pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1356{
1357 struct tcp_pcb *pcb;
1358
1359 LWIP_ASSERT1(pxtcp != NULL);
1360 LWIP_ASSERT1(pxtcp->outbound_close);
1361 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1362
1363 pcb = pxtcp->pcb;
1364 LWIP_ASSERT1(pcb != NULL);
1365
1366 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1367 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1368
1369
1370 /*
1371 * NB: set the flag first, since shutdown() will trigger POLLHUP
1372 * if inbound is already closed, and poll manager asserts
1373 * outbound_close_done (may be it should not?).
1374 */
1375 pxtcp->outbound_close_done = 1;
1376 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1377
1378#if !HAVE_TCP_POLLHUP
1379 /*
1380 * On NetBSD POLLHUP is not reported for TCP sockets, so we need
1381 * to nudge poll manager manually.
1382 */
1383 if (pxtcp->inbound_close) {
1384 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1385 }
1386#endif
1387
1388
1389 /* no more outbound data coming to us */
1390 tcp_recv(pcb, NULL);
1391
1392 /*
1393 * If we have already done inbound close previously (active close
1394 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1395 * state since those will be recycled by lwip when it runs out of
1396 * free pcbs in the pool.
1397 *
1398 * The test is true also for a pcb in CLOSING state that waits
1399 * just for the ACK of its FIN (to transition to TIME_WAIT).
1400 */
1401 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1402 pxtcp_pcb_dissociate(pxtcp);
1403 }
1404}
1405
1406
1407/**
1408 * Forward outbound data from pcb to socket.
1409 *
1410 * Called by pxtcp_pcb_recv() to forward new data and by callout
1411 * triggered by POLLOUT on the socket to send previously unsent data.
1412 *
1413 * (Re)scehdules one-time callout if not all data are sent.
1414 */
1415static err_t
1416pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1417{
1418 struct pbuf *qs, *q;
1419 size_t qoff;
1420 size_t forwarded;
1421 int sockerr;
1422
1423#if defined(MSG_NOSIGNAL)
1424 const int send_flags = MSG_NOSIGNAL;
1425#else
1426 const int send_flags = 0;
1427#endif
1428
1429
1430 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1431
1432 forwarded = 0;
1433 sockerr = 0;
1434
1435 q = NULL;
1436 qoff = 0;
1437
1438 qs = p;
1439 while (qs != NULL) {
1440#ifndef RT_OS_WINDOWS
1441 struct msghdr mh;
1442#else
1443 int rc;
1444#endif
1445 IOVEC iov[8];
1446 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1447 size_t fwd1;
1448 ssize_t nsent;
1449 size_t i;
1450
1451 fwd1 = 0;
1452 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1453 LWIP_ASSERT1(q->len > 0);
1454 IOVEC_SET_BASE(iov[i], q->payload);
1455 IOVEC_SET_LEN(iov[i], q->len);
1456 fwd1 += q->len;
1457 }
1458
1459#ifndef RT_OS_WINDOWS
1460 memset(&mh, 0, sizeof(mh));
1461 mh.msg_iov = iov;
1462 mh.msg_iovlen = i;
1463
1464 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1465#else
1466 /**
1467 * WSASend(,,,DWORD *,,,) - takes SSIZE_T (64bit value) ... so all nsent's
1468 * bits should be zeroed before passing to WSASent.
1469 */
1470 nsent = 0;
1471 rc = WSASend(pxtcp->sock, iov, (DWORD)i, (DWORD *)&nsent, 0, NULL, NULL);
1472 if (rc == SOCKET_ERROR) {
1473 /* WSASent reports SOCKET_ERROR and updates error accessible with
1474 * WSAGetLastError(). We assign nsent to -1, enforcing code below
1475 * to access error in BSD style.
1476 */
1477 warn("pxtcp_pcb_forward_outbound:WSASend error:%d nsent:%d\n",
1478 WSAGetLastError(),
1479 nsent);
1480 nsent = -1;
1481 }
1482#endif
1483
1484 if (nsent == (ssize_t)fwd1) {
1485 /* successfully sent this chain fragment completely */
1486 forwarded += nsent;
1487 qs = q;
1488 }
1489 else if (nsent >= 0) {
1490 /* successfully sent only some data */
1491 forwarded += nsent;
1492
1493 /* find the first pbuf that was not completely forwarded */
1494 qoff = nsent;
1495 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1496 if (qoff < q->len) {
1497 break;
1498 }
1499 qoff -= q->len;
1500 }
1501 LWIP_ASSERT1(q != NULL);
1502 LWIP_ASSERT1(qoff < q->len);
1503 break;
1504 }
1505 else {
1506 /*
1507 * Some errors are really not errors - if we get them,
1508 * it's not different from getting nsent == 0, so filter
1509 * them out here.
1510 */
1511 if (errno != EWOULDBLOCK
1512 && errno != EAGAIN
1513 && errno != ENOBUFS
1514 && errno != ENOMEM
1515 && errno != EINTR)
1516 {
1517 sockerr = errno;
1518 }
1519 q = qs;
1520 qoff = 0;
1521 break;
1522 }
1523 }
1524
1525 if (forwarded > 0) {
1526 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1527 }
1528
1529 if (q == NULL) { /* everything is forwarded? */
1530 LWIP_ASSERT1(sockerr == 0);
1531 LWIP_ASSERT1(forwarded == p->tot_len);
1532
1533 pxtcp->unsent = NULL;
1534 pbuf_free(p);
1535 if (pxtcp->outbound_close) {
1536 pxtcp_pcb_forward_outbound_close(pxtcp);
1537 }
1538 }
1539 else {
1540 if (q != p) {
1541 /* free forwarded pbufs at the beginning of the chain */
1542 pbuf_ref(q);
1543 pbuf_free(p);
1544 }
1545 if (qoff > 0) {
1546 /* advance payload pointer past the forwarded part */
1547 pbuf_header(q, -(s16_t)qoff);
1548 }
1549 pxtcp->unsent = q;
1550
1551 /*
1552 * Have sendmsg() failed?
1553 *
1554 * Connection reset will be detected by poll and
1555 * pxtcp_schedule_reset() will be called.
1556 *
1557 * Otherwise something *really* unexpected must have happened,
1558 * so we'd better abort.
1559 */
1560 if (sockerr != 0 && sockerr != ECONNRESET) {
1561 struct tcp_pcb *pcb = pxtcp->pcb;
1562 pxtcp_pcb_dissociate(pxtcp);
1563
1564 tcp_abort(pcb);
1565
1566 /* call error callback manually since we've already dissociated */
1567 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1568 return ERR_ABRT;
1569 }
1570
1571 /* schedule one-shot POLLOUT on the socket */
1572 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1573 }
1574 return ERR_OK;
1575}
1576
1577
1578/**
1579 * Callback from poll manager (on POLLOUT) to send data from
1580 * pxtcp::unsent pbuf to socket.
1581 */
1582static void
1583pxtcp_pcb_write_outbound(void *ctx)
1584{
1585 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1586 LWIP_ASSERT1(pxtcp != NULL);
1587
1588 if (pxtcp->pcb == NULL) {
1589 return;
1590 }
1591
1592 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1593}
1594
1595
1596/**
1597 * Common poll manager callback used by both outgoing and incoming
1598 * (port-forwarded) connections that has connected socket.
1599 */
1600static int
1601pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1602{
1603 struct pxtcp *pxtcp;
1604 int status;
1605 int sockerr;
1606
1607 pxtcp = (struct pxtcp *)handler->data;
1608 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1609 LWIP_ASSERT1(fd == pxtcp->sock);
1610
1611 if (revents & POLLNVAL) {
1612 pxtcp->sock = INVALID_SOCKET;
1613 return pxtcp_schedule_reset(pxtcp);
1614 }
1615
1616 if (revents & POLLERR) {
1617 socklen_t optlen = (socklen_t)sizeof(sockerr);
1618
1619 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1620 (char *)&sockerr, &optlen);
1621 if (status < 0) { /* should not happen */
1622 perror("getsockopt");
1623 sockerr = ECONNRESET;
1624 }
1625
1626 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1627 return pxtcp_schedule_reset(pxtcp);
1628 }
1629
1630 if (revents & POLLOUT) {
1631 pxtcp->events &= ~POLLOUT;
1632 proxy_lwip_post(&pxtcp->msg_outbound);
1633 }
1634
1635 if (revents & POLLIN) {
1636 ssize_t nread;
1637 int stop_pollin;
1638
1639 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1640 if (nread < 0) {
1641 sockerr = -(int)nread;
1642 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1643 return pxtcp_schedule_reset(pxtcp);
1644 }
1645
1646 if (stop_pollin) {
1647 pxtcp->events &= ~POLLIN;
1648 }
1649
1650 if (nread > 0) {
1651 proxy_lwip_post(&pxtcp->msg_inbound);
1652#if !HAVE_TCP_POLLHUP
1653 /*
1654 * If host does not report POLLHUP for closed sockets
1655 * (e.g. NetBSD) we should check for full close manually.
1656 */
1657 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1658 LWIP_ASSERT1((revents & POLLHUP) == 0);
1659 return pxtcp_schedule_delete(pxtcp);
1660 }
1661#endif
1662 }
1663 }
1664
1665#if !HAVE_TCP_POLLHUP
1666 LWIP_ASSERT1((revents & POLLHUP) == 0);
1667#else
1668 if (revents & POLLHUP) {
1669 /*
1670 * Linux and Darwin seems to report POLLHUP when both
1671 * directions are shut down. And they do report POLLHUP even
1672 * when there's unread data (which they aslo report as POLLIN
1673 * along with that POLLHUP).
1674 *
1675 * FreeBSD (from source inspection) seems to follow Linux,
1676 * reporting POLLHUP when both directions are shut down, but
1677 * POLLHUP is always accompanied with POLLIN.
1678 *
1679 * NetBSD never reports POLLHUP for sockets.
1680 *
1681 * ---
1682 *
1683 * If external half-closes first, we don't get POLLHUP, we
1684 * recv 0 bytes from the socket as EOF indicator, stop polling
1685 * for POLLIN and poll with events == 0 (with occasional
1686 * one-shot POLLOUT). When guest eventually closes, we get
1687 * POLLHUP.
1688 *
1689 * If guest half-closes first things are more tricky. As soon
1690 * as host sees the FIN from external it will spam POLLHUP,
1691 * even when there's unread data. The problem is that we
1692 * might have stopped polling for POLLIN because the ring
1693 * buffer is full or we were polling POLLIN but can't read all
1694 * of the data becuase buffer doesn't have enough space.
1695 * Either way, there's unread data but we can't keep polling
1696 * the socket.
1697 */
1698 DPRINTF(("sock %d: HUP\n", fd));
1699 LWIP_ASSERT1(pxtcp->outbound_close_done);
1700
1701 if (pxtcp->inbound_close) {
1702 /* there's no unread data, we are done */
1703 return pxtcp_schedule_delete(pxtcp);
1704 }
1705 else {
1706 /* DPRINTF */ {
1707#ifndef RT_OS_WINDOWS
1708 int unread;
1709#else
1710 u_long unread;
1711#endif
1712 status = ioctlsocket(fd, FIONREAD, &unread);
1713 if (status == SOCKET_ERROR) {
1714 perror("FIONREAD");
1715 }
1716 else {
1717 DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
1718 }
1719 }
1720
1721 /*
1722 * We cannot just set a flag here and let pxtcp_pcb_sent()
1723 * notice and start pulling, because if we are preempted
1724 * before setting the flag and all data in inbuf is ACKed
1725 * there will be no more calls to pxtcp_pcb_sent() to
1726 * notice the flag.
1727 *
1728 * We cannot set a flag and then send a message to make
1729 * sure it noticed, because if it has and it has read all
1730 * data while the message is in transit it will delete
1731 * pxtcp.
1732 *
1733 * In a sense this message is like msg_delete (except we
1734 * ask to pull some data first).
1735 */
1736 proxy_lwip_post(&pxtcp->msg_inpull);
1737 pxtcp->pmhdl.slot = -1;
1738 return -1;
1739 }
1740 /* NOTREACHED */
1741 } /* POLLHUP */
1742#endif /* HAVE_TCP_POLLHUP */
1743
1744 return pxtcp->events;
1745}
1746
1747
1748/**
1749 * Read data from socket to ringbuf. This may be used both on lwip
1750 * and poll manager threads.
1751 *
1752 * Flag pointed to by pstop is set when further reading is impossible,
1753 * either temporary when buffer is full, or permanently when EOF is
1754 * received.
1755 *
1756 * Returns number of bytes read. NB: EOF is reported as 1!
1757 *
1758 * Returns zero if nothing was read, either because buffer is full, or
1759 * if no data is available (EAGAIN, EINTR &c).
1760 *
1761 * Returns -errno on real socket errors.
1762 */
1763static ssize_t
1764pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1765{
1766 IOVEC iov[2];
1767#ifndef RT_OS_WINDOWS
1768 struct msghdr mh;
1769#else
1770 DWORD dwFlags;
1771 int rc;
1772#endif
1773 int iovlen;
1774 ssize_t nread;
1775
1776 const size_t sz = pxtcp->inbuf.bufsize;
1777 size_t beg, lim, wrnew;
1778
1779 *pstop = 0;
1780
1781#ifndef RT_OS_WINDOWS
1782 memset(&mh, 0, sizeof(mh));
1783 mh.msg_iov = iov;
1784#endif
1785
1786 beg = pxtcp->inbuf.vacant;
1787 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1788
1789 /* lim is the index we can NOT write to */
1790 lim = pxtcp->inbuf.unacked;
1791 if (lim == 0) {
1792 lim = sz - 1; /* empty slot at the end */
1793 }
1794 else if (lim == 1) {
1795 lim = sz; /* empty slot at the beginning */
1796 }
1797 else {
1798 --lim;
1799 }
1800
1801 if (beg == lim) {
1802 /*
1803 * Buffer is full, stop polling for POLLIN.
1804 *
1805 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1806 * data, freeing space in the ring buffer.
1807 */
1808 *pstop = 1;
1809 return 0;
1810 }
1811
1812 if (beg < lim) {
1813 /* free space in one chunk */
1814 iovlen = 1;
1815 IOVEC_SET_LEN(iov[0], lim - beg);
1816 }
1817 else {
1818 /* free space in two chunks */
1819 iovlen = 2;
1820 IOVEC_SET_LEN(iov[0], sz - beg);
1821 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1822 IOVEC_SET_LEN(iov[1], lim);
1823 }
1824
1825#ifndef RT_OS_WINDOWS
1826 mh.msg_iovlen = iovlen;
1827 nread = recvmsg(pxtcp->sock, &mh, 0);
1828#else
1829 dwFlags = 0;
1830 /* We can't assign nread to -1 expecting, that we'll got it back in case of error,
1831 * instead, WSARecv(,,,DWORD *,,,) will rewrite only half of the 64bit value.
1832 */
1833 nread = 0;
1834 rc = WSARecv(pxtcp->sock, iov, iovlen, (DWORD *)&nread, &dwFlags, NULL, NULL);
1835 if (rc == SOCKET_ERROR) {
1836 warn("pxtcp_sock_read:WSARecv(%d) error:%d nread:%d\n",
1837 pxtcp->sock,
1838 WSAGetLastError(),
1839 nread);
1840 nread = -1;
1841 }
1842
1843 if (dwFlags) {
1844 warn("pxtcp_sock_read:WSARecv(%d) dwFlags:%x nread:%d\n",
1845 pxtcp->sock,
1846 WSAGetLastError(),
1847 nread);
1848 }
1849#endif
1850
1851 if (nread > 0) {
1852 wrnew = beg + nread;
1853 if (wrnew >= sz) {
1854 wrnew -= sz;
1855 }
1856 pxtcp->inbuf.vacant = wrnew;
1857 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1858 (void *)pxtcp, pxtcp->sock, (int)nread));
1859 return nread;
1860 }
1861 else if (nread == 0) {
1862 *pstop = 1;
1863 pxtcp->inbound_close = 1;
1864 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1865 (void *)pxtcp, pxtcp->sock));
1866 return 1;
1867 }
1868 else if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) {
1869 /* haven't read anything, just return */
1870 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1871 (void *)pxtcp, pxtcp->sock));
1872 return 0;
1873 }
1874 else {
1875 /* socket error! */
1876 DPRINTF0(("pxtcp %p: sock %d read errno %d\n",
1877 (void *)pxtcp, pxtcp->sock, errno));
1878 return -errno;
1879 }
1880}
1881
1882
1883/**
1884 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1885 * from ringbuf to guest.
1886 */
1887static void
1888pxtcp_pcb_write_inbound(void *ctx)
1889{
1890 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1891 LWIP_ASSERT1(pxtcp != NULL);
1892
1893 if (pxtcp->pcb == NULL) {
1894 return;
1895 }
1896
1897 pxtcp_pcb_forward_inbound(pxtcp);
1898}
1899
1900
1901/**
1902 * tcp_poll() callback
1903 *
1904 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1905 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1906 * more inbound data then pxtcp_pcb_forward_inbound() will be
1907 * triggered again, but if neither happens, tcp_poll() comes to the
1908 * rescue.
1909 */
1910static err_t
1911pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1912{
1913 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1914 LWIP_UNUSED_ARG(pcb);
1915
1916 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1917 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1918
1919 pxtcp_pcb_forward_inbound(pxtcp);
1920
1921 /*
1922 * If the last thing holding up deletion of the pxtcp was failed
1923 * tcp_shutdown() and it succeeded, we may be the last callback.
1924 */
1925 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1926
1927 return ERR_OK;
1928}
1929
1930
1931static void
1932pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1933{
1934 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1935}
1936
1937
1938static void
1939pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1940{
1941 tcp_poll(pxtcp->pcb, NULL, 255);
1942}
1943
1944
1945/**
1946 * Forward inbound data from ring buffer to the guest.
1947 *
1948 * Scheduled by poll manager thread after it receives more data into
1949 * the ring buffer (we have more data to send).
1950
1951 * Also called from tcp_sent() callback when guest ACKs some data,
1952 * increasing pcb->snd_buf (we are permitted to send more data).
1953 *
1954 * Also called from tcp_poll() callback if previous attempt to forward
1955 * inbound data failed with ERR_MEM (we need to try again).
1956 */
1957static void
1958pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1959{
1960 struct tcp_pcb *pcb;
1961 size_t sndbuf;
1962 size_t beg, lim, sndlim;
1963 size_t toeob, tolim;
1964 size_t nsent;
1965 err_t error;
1966
1967 LWIP_ASSERT1(pxtcp != NULL);
1968 pcb = pxtcp->pcb;
1969 if (pcb == NULL) {
1970 return;
1971 }
1972
1973 if (/* __predict_false */ pcb->state < ESTABLISHED) {
1974 /*
1975 * If we have just confirmed accept of this connection, the
1976 * pcb is in SYN_RCVD state and we still haven't received the
1977 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
1978 * transition that lwip decrements pcb->acked so that that ACK
1979 * is not reported to pxtcp_pcb_sent(). If we send something
1980 * now and immediately close (think "daytime", e.g.) while
1981 * still in SYN_RCVD state, we will move directly to
1982 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
1983 * report it to pxtcp_pcb_sent().
1984 */
1985 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
1986 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1987 return;
1988 }
1989
1990
1991 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
1992 lim = pxtcp->inbuf.vacant;
1993
1994 if (beg == lim) {
1995 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
1996 pxtcp_pcb_forward_inbound_close(pxtcp);
1997 tcp_output(pcb);
1998 return;
1999 }
2000
2001 /*
2002 * Else, there's no data to send.
2003 *
2004 * If there is free space in the buffer, producer will
2005 * reschedule us as it receives more data and vacant (lim)
2006 * advances.
2007 *
2008 * If buffer is full when all data have been passed to
2009 * tcp_write() but not yet acknowledged, we will advance
2010 * unacked on ACK, freeing some space for producer to write to
2011 * (then see above).
2012 */
2013 return;
2014 }
2015
2016 sndbuf = tcp_sndbuf(pcb);
2017 if (sndbuf == 0) {
2018 /*
2019 * Can't send anything now. As guest ACKs some data, TCP will
2020 * call pxtcp_pcb_sent() callback and we will come here again.
2021 */
2022 return;
2023 }
2024
2025 nsent = 0;
2026
2027 /*
2028 * We have three limits to consider:
2029 * - how much data we have in the ringbuf
2030 * - how much data we are allowed to send
2031 * - ringbuf size
2032 */
2033 toeob = pxtcp->inbuf.bufsize - beg;
2034 if (lim < beg) { /* lim wrapped */
2035 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2036 /* so beg is not going to wrap, treat sndbuf as lim */
2037 lim = beg + sndbuf; /* ... and proceed to the simple case */
2038 }
2039 else { /* we are limited by the end of the buffer, beg will wrap */
2040 u8_t maybemore;
2041 if (toeob == sndbuf || lim == 0) {
2042 maybemore = 0;
2043 }
2044 else {
2045 maybemore = TCP_WRITE_FLAG_MORE;
2046 }
2047
2048 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2049 if (error != ERR_OK) {
2050 goto writeerr;
2051 }
2052 nsent += toeob;
2053 pxtcp->inbuf.unsent = 0; /* wrap */
2054
2055 if (maybemore) {
2056 beg = 0;
2057 sndbuf -= toeob;
2058 }
2059 else {
2060 /* we are done sending, but ... */
2061 goto check_inbound_close;
2062 }
2063 }
2064 }
2065
2066 LWIP_ASSERT1(beg < lim);
2067 sndlim = beg + sndbuf;
2068 if (lim > sndlim) {
2069 lim = sndlim;
2070 }
2071 tolim = lim - beg;
2072 if (tolim > 0) {
2073 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2074 if (error != ERR_OK) {
2075 goto writeerr;
2076 }
2077 nsent += tolim;
2078 pxtcp->inbuf.unsent = lim;
2079 }
2080
2081 check_inbound_close:
2082 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2083 pxtcp_pcb_forward_inbound_close(pxtcp);
2084 }
2085
2086 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2087 (void *)pxtcp, (void *)pcb, (int)nsent));
2088 tcp_output(pcb);
2089 pxtcp_pcb_cancel_poll(pxtcp);
2090 return;
2091
2092 writeerr:
2093 if (error == ERR_MEM) {
2094 if (nsent > 0) { /* first write succeeded, second failed */
2095 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2096 (void *)pxtcp, (void *)pcb, (int)nsent));
2097 tcp_output(pcb);
2098 }
2099 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2100 (void *)pxtcp, (void *)pcb));
2101 pxtcp_pcb_schedule_poll(pxtcp);
2102 }
2103 else {
2104 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2105 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2106
2107 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2108 LWIP_ASSERT1(error == ERR_MEM);
2109 }
2110}
2111
2112
2113static void
2114pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2115{
2116 struct tcp_pcb *pcb;
2117 err_t error;
2118
2119 LWIP_ASSERT1(pxtcp != NULL);
2120 LWIP_ASSERT1(pxtcp->inbound_close);
2121 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2122 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2123
2124 pcb = pxtcp->pcb;
2125 LWIP_ASSERT1(pcb != NULL);
2126
2127 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2128 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2129
2130 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2131 if (error != ERR_OK) {
2132 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2133 " tcp_shutdown: error=%s\n",
2134 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2135 pxtcp_pcb_schedule_poll(pxtcp);
2136 return;
2137 }
2138
2139 pxtcp_pcb_cancel_poll(pxtcp);
2140 pxtcp->inbound_close_done = 1;
2141
2142
2143 /*
2144 * If we have already done outbound close previously (passive
2145 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2146 * state since those will be deleted by lwip when that last ack
2147 * comes from the guest.
2148 *
2149 * NB: We do NOT check for deferred delete here, even though we
2150 * have just set one of its conditions, inbound_close_done. We
2151 * let pcb callbacks that called us do that. It's simpler and
2152 * cleaner that way.
2153 */
2154 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2155 pxtcp_pcb_dissociate(pxtcp);
2156 }
2157}
2158
2159
2160/**
2161 * Check that all forwarded inbound data is sent and acked, and that
2162 * inbound close is scheduled (we aren't called back when it's acked).
2163 */
2164DECLINLINE(int)
2165pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2166{
2167 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2168 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2169}
2170
2171
2172/**
2173 * tcp_sent() callback - guest acknowledged len bytes.
2174 *
2175 * We can advance inbuf::unacked index, making more free space in the
2176 * ringbuf and wake up producer on poll manager thread.
2177 *
2178 * We can also try to send more data if we have any since pcb->snd_buf
2179 * was increased and we are now permitted to send more.
2180 */
2181static err_t
2182pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2183{
2184 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2185 size_t unacked;
2186
2187 LWIP_ASSERT1(pxtcp != NULL);
2188 LWIP_ASSERT1(pxtcp->pcb == pcb);
2189 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2190 LWIP_UNUSED_ARG(pcb); /* only in assert */
2191
2192 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2193 " unacked %d, unsent %d, vacant %d\n",
2194 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2195 (int)pxtcp->inbuf.unacked,
2196 (int)pxtcp->inbuf.unsent,
2197 (int)pxtcp->inbuf.vacant));
2198
2199 if (/* __predict_false */ len == 0) {
2200 /* we are notified to start pulling */
2201 LWIP_ASSERT1(pxtcp->outbound_close_done);
2202 LWIP_ASSERT1(!pxtcp->inbound_close);
2203 LWIP_ASSERT1(pxtcp->inbound_pull);
2204
2205 unacked = pxtcp->inbuf.unacked;
2206 }
2207 else {
2208 /*
2209 * Advance unacked index. Guest acknowledged the data, so it
2210 * won't be needed again for potential retransmits.
2211 */
2212 unacked = pxtcp->inbuf.unacked + len;
2213 if (unacked > pxtcp->inbuf.bufsize) {
2214 unacked -= pxtcp->inbuf.bufsize;
2215 }
2216 pxtcp->inbuf.unacked = unacked;
2217 }
2218
2219 /* arrange for more inbound data */
2220 if (!pxtcp->inbound_close) {
2221 if (!pxtcp->inbound_pull) {
2222 /* wake up producer, in case it has stopped polling for POLLIN */
2223 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2224#ifdef RT_OS_WINDOWS
2225 /**
2226 * We have't got enought room in ring buffer to read atm,
2227 * but we don't want to lose notification from WSAW4ME when
2228 * space would be available, so we reset event with empty recv
2229 */
2230 recv(pxtcp->sock, NULL, 0, 0);
2231#endif
2232 }
2233 else {
2234 ssize_t nread;
2235 int stop_pollin; /* ignored */
2236
2237 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2238
2239 if (nread < 0) {
2240 int sockerr = -(int)nread;
2241 LWIP_UNUSED_ARG(sockerr);
2242 DPRINTF0(("%s: sock %d: errno %d\n",
2243 __func__, pxtcp->sock, sockerr));
2244
2245 /*
2246 * Since we are pulling, pxtcp is no longer registered
2247 * with poll manager so we can kill it directly.
2248 */
2249 pxtcp_pcb_reset_pxtcp(pxtcp);
2250 return ERR_ABRT;
2251 }
2252 }
2253 }
2254
2255 /* forward more data if we can */
2256 if (!pxtcp->inbound_close_done) {
2257 pxtcp_pcb_forward_inbound(pxtcp);
2258
2259 /*
2260 * NB: we might have dissociated from a pcb that transitioned
2261 * to LAST_ACK state, so don't refer to pcb below.
2262 */
2263 }
2264
2265
2266 /* have we got all the acks? */
2267 if (pxtcp->inbound_close /* no more new data */
2268 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2269 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2270 {
2271 char *buf;
2272
2273 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2274 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2275
2276 /* no more retransmits, so buf is not needed */
2277 buf = pxtcp->inbuf.buf;
2278 pxtcp->inbuf.buf = NULL;
2279 free(buf);
2280
2281 /* no more acks, so no more callbacks */
2282 if (pxtcp->pcb != NULL) {
2283 tcp_sent(pxtcp->pcb, NULL);
2284 }
2285
2286 /*
2287 * We may be the last callback for this pcb if we have also
2288 * successfully forwarded inbound_close.
2289 */
2290 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2291 }
2292
2293 return ERR_OK;
2294}
2295
2296
2297/**
2298 * Callback from poll manager (pxtcp::msg_inpull) to switch
2299 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2300 * POLLHUP comment in pxtcp_pmgr_pump().
2301 *
2302 * pxtcp::sock is deregistered from poll manager after this callback
2303 * is scheduled.
2304 */
2305static void
2306pxtcp_pcb_pull_inbound(void *ctx)
2307{
2308 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2309 LWIP_ASSERT1(pxtcp != NULL);
2310
2311 if (pxtcp->pcb == NULL) {
2312 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2313 pxtcp_pcb_reset_pxtcp(pxtcp);
2314 return;
2315 }
2316
2317 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2318 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2319 pxtcp->inbound_pull = 1;
2320 pxtcp->deferred_delete = 1;
2321 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2322}
2323
2324
2325/**
2326 * tcp_err() callback.
2327 *
2328 * pcb is not passed to this callback since it may be already
2329 * deallocated by the stack, but we can't do anything useful with it
2330 * anyway since connection is gone.
2331 */
2332static void
2333pxtcp_pcb_err(void *arg, err_t error)
2334{
2335 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2336 LWIP_ASSERT1(pxtcp != NULL);
2337
2338 /*
2339 * ERR_CLSD is special - it is reported here when:
2340 *
2341 * . guest has already half-closed
2342 * . we send FIN to guest when external half-closes
2343 * . guest acks that FIN
2344 *
2345 * Since connection is closed but receive has been already closed
2346 * lwip can only report this via tcp_err. At this point the pcb
2347 * is still alive, so we can peek at it if need be.
2348 *
2349 * The interesting twist is when the ACK from guest that akcs our
2350 * FIN also acks some data. In this scenario lwip will NOT call
2351 * tcp_sent() callback with the ACK for that last bit of data but
2352 * instead will call tcp_err with ERR_CLSD right away. Since that
2353 * ACK also acknowledges all the data, we should run some of
2354 * pxtcp_pcb_sent() logic here.
2355 */
2356 if (error == ERR_CLSD) {
2357 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2358
2359 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2360 " pcb->acked %d;"
2361 " unacked %d, unsent %d, vacant %d\n",
2362 (void *)pxtcp, (void *)pcb,
2363 pcb->acked,
2364 (int)pxtcp->inbuf.unacked,
2365 (int)pxtcp->inbuf.unsent,
2366 (int)pxtcp->inbuf.vacant));
2367
2368 LWIP_ASSERT1(pxtcp->pcb == pcb);
2369 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2370
2371 if (pcb->acked > 0) {
2372 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2373 }
2374 return;
2375 }
2376
2377 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2378 (void *)pxtcp, proxy_lwip_strerr(error)));
2379
2380 pxtcp->pcb = NULL; /* pcb is gone */
2381 if (pxtcp->deferred_delete) {
2382 pxtcp_pcb_reset_pxtcp(pxtcp);
2383 }
2384 else {
2385 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2386 }
2387}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette