VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 48873

Last change on this file since 48873 was 48438, checked in by vboxsync, 11 years ago

Check for PXREMAP_FAILED now that it can actually happen.

  • Property svn:eol-style set to native
File size: 66.2 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#include "winutils.h"
3
4#include "pxtcp.h"
5
6#include "proxytest.h"
7#include "proxy_pollmgr.h"
8#include "pxremap.h"
9#include "portfwd.h" /* fwspec */
10
11#ifndef RT_OS_WINDOWS
12#include <sys/types.h>
13#include <sys/socket.h>
14#include <sys/ioctl.h>
15#ifdef RT_OS_SOLARIS
16#include <sys/filio.h> /* FIONREAD is BSD'ism */
17#endif
18#include <stdlib.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <string.h>
22#include <poll.h>
23
24#include <err.h> /* BSD'ism */
25#else
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29
30#include <iprt/stdint.h>
31#include "winpoll.h"
32#endif
33
34#include "lwip/opt.h"
35
36#include "lwip/sys.h"
37#include "lwip/tcpip.h"
38#include "lwip/netif.h"
39#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
40#include "lwip/icmp.h"
41#include "lwip/icmp6.h"
42
43/* NetBSD doesn't report POLLHUP for TCP sockets */
44#ifdef __NetBSD__
45# define HAVE_TCP_POLLHUP 0
46#else
47# define HAVE_TCP_POLLHUP 1
48#endif
49
50
51/**
52 * Ring buffer for inbound data. Filled with data from the host
53 * socket on poll manager thread. Data consumed by scheduling
54 * tcp_write() to the pcb on the lwip thread.
55 *
56 * NB: There is actually third party present, the lwip stack itself.
57 * Thus the buffer doesn't have dual free vs. data split, but rather
58 * three-way free / send and unACKed data / unsent data split.
59 */
60struct ringbuf {
61 char *buf;
62 size_t bufsize;
63
64 /*
65 * Start of free space, producer writes here (up till "unacked").
66 */
67 volatile size_t vacant;
68
69 /*
70 * Start of sent but unacknowledged data. The data are "owned" by
71 * the stack as it may need to retransmit. This is the free space
72 * limit for producer.
73 */
74 volatile size_t unacked;
75
76 /*
77 * Start of unsent data, consumer reads/sends from here (up till
78 * "vacant"). Not declared volatile since it's only accessed from
79 * the consumer thread.
80 */
81 size_t unsent;
82};
83
84
85/**
86 */
87struct pxtcp {
88 /**
89 * Our poll manager handler. Must be first, strong/weak
90 * references depend on this "inheritance".
91 */
92 struct pollmgr_handler pmhdl;
93
94 /**
95 * lwIP (internal/guest) side of the proxied connection.
96 */
97 struct tcp_pcb *pcb;
98
99 /**
100 * Host (external) side of the proxied connection.
101 */
102 SOCKET sock;
103
104 /**
105 * Socket events we are currently polling for.
106 */
107 int events;
108
109 /**
110 * Socket error. Currently used to save connect(2) errors so that
111 * we can decide if we need to send ICMP error.
112 */
113 int sockerr;
114
115 /**
116 * Interface that we have got the SYN from. Needed to send ICMP
117 * with correct source address.
118 */
119 struct netif *netif;
120
121 /**
122 * For tentatively accepted connections for which we are in
123 * process of connecting to the real destination this is the
124 * initial pbuf that we might need to build ICMP error.
125 *
126 * When connection is established this is used to hold outbound
127 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
128 * forwarded over the socket. We cannot "return" it to lwIP since
129 * the head of the chain is already sent and freed.
130 */
131 struct pbuf *unsent;
132
133 /**
134 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
135 * once and we might not be able to forward it immediately if we
136 * have unsent pbuf.
137 */
138 int outbound_close;
139
140 /**
141 * Outbound half-close has been done on the socket.
142 */
143 int outbound_close_done;
144
145 /**
146 * External has closed its side. We might not be able to forward
147 * it immediately if we have unforwarded data.
148 */
149 int inbound_close;
150
151 /**
152 * Inbound half-close has been done on the pcb.
153 */
154 int inbound_close_done;
155
156 /**
157 * On systems that report POLLHUP as soon as the final FIN is
158 * received on a socket we cannot continue polling for the rest of
159 * input, so we have to read (pull) last data from the socket on
160 * the lwIP thread instead of polling/pushing it from the poll
161 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
162 */
163 int inbound_pull;
164
165
166 /**
167 * When poll manager schedules delete we may not be able to delete
168 * a pxtcp immediately if not all inbound data has been acked by
169 * the guest: lwIP may need to resend and the data are in pxtcp's
170 * inbuf::buf. We defer delete until all data are acked to
171 * pxtcp_pcb_sent().
172 *
173 * It's also implied by inbound_pull. It probably means that
174 * "deferred" is not a very fortunate name.
175 */
176 int deferred_delete;
177
178 /**
179 * Ring-buffer for inbound data.
180 */
181 struct ringbuf inbuf;
182
183 /**
184 * lwIP thread's strong reference to us.
185 */
186 struct pollmgr_refptr *rp;
187
188
189 /*
190 * We use static messages to call functions on the lwIP thread to
191 * void malloc/free overhead.
192 */
193 struct tcpip_msg msg_delete; /* delete pxtcp */
194 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
195 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
196 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
197 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
198 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
199};
200
201
202
203static struct pxtcp *pxtcp_allocate(void);
204static void pxtcp_free(struct pxtcp *);
205
206static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
207static void pxtcp_pcb_dissociate(struct pxtcp *);
208
209/* poll manager callbacks for pxtcp related channels */
210static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
211static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
212static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
213#if !HAVE_TCP_POLLHUP
214static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
215#endif
216static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
217
218/* helper functions for sending/receiving pxtcp over poll manager channels */
219static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
220static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
221static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
222static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
223
224/* poll manager callbacks for individual sockets */
225static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
226static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
227
228static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
229
230/* convenience functions for poll manager callbacks */
231static int pxtcp_schedule_delete(struct pxtcp *);
232static int pxtcp_schedule_reset(struct pxtcp *);
233static int pxtcp_schedule_reject(struct pxtcp *);
234
235/* lwip thread callbacks called via proxy_lwip_post() */
236static void pxtcp_pcb_delete_pxtcp(void *);
237static void pxtcp_pcb_reset_pxtcp(void *);
238static void pxtcp_pcb_accept_refuse(void *);
239static void pxtcp_pcb_accept_confirm(void *);
240static void pxtcp_pcb_write_outbound(void *);
241static void pxtcp_pcb_write_inbound(void *);
242static void pxtcp_pcb_pull_inbound(void *);
243
244/* tcp pcb callbacks */
245static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
246static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
247static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
248static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
249static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
250static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
251static void pxtcp_pcb_err(void *, err_t);
252
253static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
254static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
255
256static void pxtcp_pcb_forward_inbound(struct pxtcp *);
257static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
258DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
259static void pxtcp_pcb_schedule_poll(struct pxtcp *);
260static void pxtcp_pcb_cancel_poll(struct pxtcp *);
261
262static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
263DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
264
265/* poll manager handlers for pxtcp channels */
266static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
267static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
268static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
269#if !HAVE_TCP_POLLHUP
270static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
271#endif
272static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
273
274
275/**
276 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
277 * manager threads haven't been created yet.
278 */
279void
280pxtcp_init(void)
281{
282 /*
283 * Create channels.
284 */
285#define CHANNEL(SLOT, NAME) do { \
286 NAME##_hdl.callback = NAME; \
287 NAME##_hdl.data = NULL; \
288 NAME##_hdl.slot = -1; \
289 pollmgr_add_chan(SLOT, &NAME##_hdl); \
290 } while (0)
291
292 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
293 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
294 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
295#if !HAVE_TCP_POLLHUP
296 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
297#endif
298 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
299
300#undef CHANNEL
301
302 /*
303 * Listen to outgoing connection from guest(s).
304 */
305 tcp_proxy_accept(pxtcp_pcb_heard);
306}
307
308
309/**
310 * Syntactic sugar for sending pxtcp pointer over poll manager
311 * channel. Used by lwip thread functions.
312 */
313static ssize_t
314pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
315{
316 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
317}
318
319
320/**
321 * Syntactic sugar for sending weak reference to pxtcp over poll
322 * manager channel. Used by lwip thread functions.
323 */
324static ssize_t
325pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
326{
327 pollmgr_refptr_weak_ref(pxtcp->rp);
328 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
329}
330
331
332/**
333 * Counterpart of pxtcp_chan_send().
334 */
335static struct pxtcp *
336pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
337{
338 struct pxtcp *pxtcp;
339
340 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
341 return pxtcp;
342}
343
344
345/**
346 * Counterpart of pxtcp_chan_send_weak().
347 */
348static struct pxtcp *
349pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
350{
351 struct pollmgr_refptr *rp;
352 struct pollmgr_handler *base;
353 struct pxtcp *pxtcp;
354
355 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
356 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
357 pxtcp = (struct pxtcp *)base;
358
359 return pxtcp;
360}
361
362
363/**
364 * Register pxtcp with poll manager.
365 *
366 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
367 * error handling is different in these two cases, we leave it up to
368 * the caller.
369 */
370int
371pxtcp_pmgr_add(struct pxtcp *pxtcp)
372{
373 int status;
374
375 LWIP_ASSERT1(pxtcp != NULL);
376 LWIP_ASSERT1(pxtcp->sock >= 0);
377 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
378 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
379 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
380
381 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
382 return status;
383}
384
385
386/**
387 * Unregister pxtcp with poll manager.
388 *
389 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
390 * leg).
391 */
392void
393pxtcp_pmgr_del(struct pxtcp *pxtcp)
394{
395 LWIP_ASSERT1(pxtcp != NULL);
396
397 pollmgr_del_slot(pxtcp->pmhdl.slot);
398}
399
400
401/**
402 * POLLMGR_CHAN_PXTCP_ADD handler.
403 *
404 * Get new pxtcp from lwip thread and start polling its socket.
405 */
406static int
407pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
408{
409 struct pxtcp *pxtcp;
410 int status;
411
412 pxtcp = pxtcp_chan_recv(handler, fd, revents);
413 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
414 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
415
416 status = pxtcp_pmgr_add(pxtcp);
417 if (status < 0) {
418 (void) pxtcp_schedule_reset(pxtcp);
419 }
420
421 return POLLIN;
422}
423
424
425/**
426 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
427 *
428 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
429 * and failed, it now requests us to poll the socket for POLLOUT and
430 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
431 */
432static int
433pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
434{
435 struct pxtcp *pxtcp;
436
437 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
438 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
439
440 if (pxtcp == NULL) {
441 return POLLIN;
442 }
443
444 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
445 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
446
447 pxtcp->events |= POLLOUT;
448 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
449
450 return POLLIN;
451}
452
453
454/**
455 * POLLMGR_CHAN_PXTCP_POLLIN handler.
456 */
457static int
458pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
459{
460 struct pxtcp *pxtcp;
461
462 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
463 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
464
465 if (pxtcp == NULL) {
466 return POLLIN;
467 }
468
469 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
470 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
471
472 pxtcp->events |= POLLIN;
473 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
474
475 return POLLIN;
476}
477
478
479#if !HAVE_TCP_POLLHUP
480/**
481 * POLLMGR_CHAN_PXTCP_DEL handler.
482 *
483 * Schedule pxtcp deletion. We only need this if host system doesn't
484 * report POLLHUP for fully closed tcp sockets.
485 */
486static int
487pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
488{
489 struct pxtcp *pxtcp;
490
491 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
492 if (pxtcp == NULL) {
493 return POLLIN;
494 }
495
496 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
497 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
498
499 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
500 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
501
502 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
503 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
504
505 pxtcp_pmgr_del(pxtcp);
506 (void) pxtcp_schedule_delete(pxtcp);
507
508 return POLLIN;
509}
510#endif /* !HAVE_TCP_POLLHUP */
511
512
513/**
514 * POLLMGR_CHAN_PXTCP_RESET handler.
515 *
516 * Close the socket with RST and delete pxtcp.
517 */
518static int
519pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
520{
521 struct pxtcp *pxtcp;
522
523 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
524 if (pxtcp == NULL) {
525 return POLLIN;
526 }
527
528 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
529 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
530
531 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
532 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
533
534 pxtcp_pmgr_del(pxtcp);
535
536 proxy_reset_socket(pxtcp->sock);
537 pxtcp->sock = INVALID_SOCKET;
538
539 (void) pxtcp_schedule_reset(pxtcp);
540
541 return POLLIN;
542}
543
544
545static struct pxtcp *
546pxtcp_allocate(void)
547{
548 struct pxtcp *pxtcp;
549
550 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
551 if (pxtcp == NULL) {
552 return NULL;
553 }
554
555 pxtcp->pmhdl.callback = NULL;
556 pxtcp->pmhdl.data = (void *)pxtcp;
557 pxtcp->pmhdl.slot = -1;
558
559 pxtcp->pcb = NULL;
560 pxtcp->sock = INVALID_SOCKET;
561 pxtcp->events = 0;
562 pxtcp->sockerr = 0;
563 pxtcp->netif = NULL;
564 pxtcp->unsent = NULL;
565 pxtcp->outbound_close = 0;
566 pxtcp->outbound_close_done = 0;
567 pxtcp->inbound_close = 0;
568 pxtcp->inbound_close_done = 0;
569 pxtcp->inbound_pull = 0;
570 pxtcp->deferred_delete = 0;
571
572 pxtcp->inbuf.bufsize = 64 * 1024;
573 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
574 if (pxtcp->inbuf.buf == NULL) {
575 free(pxtcp);
576 return NULL;
577 }
578 pxtcp->inbuf.vacant = 0;
579 pxtcp->inbuf.unacked = 0;
580 pxtcp->inbuf.unsent = 0;
581
582 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
583 if (pxtcp->rp == NULL) {
584 free(pxtcp->inbuf.buf);
585 free(pxtcp);
586 return NULL;
587 }
588
589#define CALLBACK_MSG(MSG, FUNC) \
590 do { \
591 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
592 pxtcp->MSG.sem = NULL; \
593 pxtcp->MSG.msg.cb.function = FUNC; \
594 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
595 } while (0)
596
597 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
598 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
599 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
600 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
601 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
602 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
603
604#undef CALLBACK_MSG
605
606 return pxtcp;
607}
608
609
610/**
611 * Exported to fwtcp to create pxtcp for incoming port-forwarded
612 * connections. Completed with pcb in pxtcp_pcb_connect().
613 */
614struct pxtcp *
615pxtcp_create_forwarded(SOCKET sock)
616{
617 struct pxtcp *pxtcp;
618
619 pxtcp = pxtcp_allocate();
620 if (pxtcp == NULL) {
621 return NULL;
622 }
623
624 pxtcp->sock = sock;
625 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
626 pxtcp->events = 0;
627
628 return pxtcp;
629}
630
631
632static void
633pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
634{
635 LWIP_ASSERT1(pxtcp != NULL);
636 LWIP_ASSERT1(pcb != NULL);
637
638 pxtcp->pcb = pcb;
639
640 tcp_arg(pcb, pxtcp);
641
642 tcp_recv(pcb, pxtcp_pcb_recv);
643 tcp_sent(pcb, pxtcp_pcb_sent);
644 tcp_poll(pcb, NULL, 255);
645 tcp_err(pcb, pxtcp_pcb_err);
646}
647
648
649static void
650pxtcp_free(struct pxtcp *pxtcp)
651{
652 if (pxtcp->unsent != NULL) {
653 pbuf_free(pxtcp->unsent);
654 }
655 if (pxtcp->inbuf.buf != NULL) {
656 free(pxtcp->inbuf.buf);
657 }
658 free(pxtcp);
659}
660
661
662/**
663 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
664 * fwtcp failed to register with poll manager to post to lwip thread
665 * for doing connect.
666 */
667void
668pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
669{
670 LWIP_ASSERT1(pxtcp->pcb == NULL);
671 pxtcp_pcb_reset_pxtcp(pxtcp);
672}
673
674
675static void
676pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
677{
678 if (pxtcp == NULL || pxtcp->pcb == NULL) {
679 return;
680 }
681
682 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
683 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
684
685 /*
686 * We must have dissociated from a fully closed pcb immediately
687 * since lwip recycles them and we don't wan't to mess with what
688 * would be someone else's pcb that we happen to have a stale
689 * pointer to.
690 */
691 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
692
693 tcp_recv(pxtcp->pcb, NULL);
694 tcp_sent(pxtcp->pcb, NULL);
695 tcp_poll(pxtcp->pcb, NULL, 255);
696 tcp_err(pxtcp->pcb, NULL);
697 tcp_arg(pxtcp->pcb, NULL);
698 pxtcp->pcb = NULL;
699}
700
701
702/**
703 * Lwip thread callback invoked via pxtcp::msg_delete
704 *
705 * Since we use static messages to communicate to the lwip thread, we
706 * cannot delete pxtcp without making sure there are no unprocessed
707 * messages in the lwip thread mailbox.
708 *
709 * The easiest way to ensure that is to send this "delete" message as
710 * the last one and when it's processed we know there are no more and
711 * it's safe to delete pxtcp.
712 *
713 * Poll manager handlers should use pxtcp_schedule_delete()
714 * convenience function.
715 */
716static void
717pxtcp_pcb_delete_pxtcp(void *ctx)
718{
719 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
720
721 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
722 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
723 (pxtcp->deferred_delete && !pxtcp->inbound_pull
724 ? " (was deferred)" : "")));
725
726 LWIP_ASSERT1(pxtcp != NULL);
727 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
728 LWIP_ASSERT1(pxtcp->outbound_close_done);
729 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
730
731
732 /*
733 * pxtcp is no longer registered with poll manager, so it's safe
734 * to close the socket.
735 */
736 if (pxtcp->sock != INVALID_SOCKET) {
737 int status = closesocket(pxtcp->sock);
738 DPRINTF(("%s:%d=closesocket(%d)\n", __func__, status, pxtcp->sock));
739
740 pxtcp->sock = INVALID_SOCKET;
741 }
742
743 /*
744 * We might have already dissociated from a fully closed pcb, or
745 * guest might have sent us a reset while msg_delete was in
746 * transit. If there's no pcb, we are done.
747 */
748 if (pxtcp->pcb == NULL) {
749 pollmgr_refptr_unref(pxtcp->rp);
750 pxtcp_free(pxtcp);
751 return;
752 }
753
754 /*
755 * Have we completely forwarded all inbound traffic to the guest?
756 *
757 * We may still be waiting for ACKs. We may have failed to send
758 * some of the data (tcp_write() failed with ERR_MEM). We may
759 * have failed to send the FIN (tcp_shutdown() failed with
760 * ERR_MEM).
761 */
762 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
763 pxtcp_pcb_dissociate(pxtcp);
764 pollmgr_refptr_unref(pxtcp->rp);
765 pxtcp_free(pxtcp);
766 }
767 else {
768 DPRINTF2(("delete: pxtcp %p; pcb %p:"
769 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
770 (void *)pxtcp, (void *)pxtcp->pcb,
771 (int)pxtcp->inbuf.unacked,
772 (int)pxtcp->inbuf.unsent,
773 (int)pxtcp->inbuf.vacant,
774 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
775
776 LWIP_ASSERT1(!pxtcp->deferred_delete);
777 pxtcp->deferred_delete = 1;
778 }
779}
780
781
782/**
783 * If we couldn't delete pxtcp right away in the msg_delete callback
784 * from the poll manager thread, we repeat the check at the end of
785 * relevant pcb callbacks.
786 */
787DECLINLINE(void)
788pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
789{
790 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
791 pxtcp_pcb_delete_pxtcp(pxtcp);
792 }
793}
794
795
796/**
797 * Poll manager callbacks should use this convenience wrapper to
798 * schedule pxtcp deletion on the lwip thread and to deregister from
799 * the poll manager.
800 */
801static int
802pxtcp_schedule_delete(struct pxtcp *pxtcp)
803{
804 /*
805 * If pollmgr_refptr_get() is called by any channel before
806 * scheduled deletion happens, let them know we are gone.
807 */
808 pxtcp->pmhdl.slot = -1;
809
810 /*
811 * Schedule deletion. Since poll manager thread may be pre-empted
812 * right after we send the message, the deletion may actually
813 * happen on the lwip thread before we return from this function,
814 * so it's not safe to refer to pxtcp after this call.
815 */
816 proxy_lwip_post(&pxtcp->msg_delete);
817
818 /* tell poll manager to deregister us */
819 return -1;
820}
821
822
823/**
824 * Lwip thread callback invoked via pxtcp::msg_reset
825 *
826 * Like pxtcp_pcb_delete(), but sends RST to the guest before
827 * deleting this pxtcp.
828 */
829static void
830pxtcp_pcb_reset_pxtcp(void *ctx)
831{
832 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
833 LWIP_ASSERT1(pxtcp != NULL);
834
835 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
836 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
837
838 if (pxtcp->sock != INVALID_SOCKET) {
839 proxy_reset_socket(pxtcp->sock);
840 pxtcp->sock = INVALID_SOCKET;
841 }
842
843 if (pxtcp->pcb != NULL) {
844 struct tcp_pcb *pcb = pxtcp->pcb;
845 pxtcp_pcb_dissociate(pxtcp);
846 tcp_abort(pcb);
847 }
848
849 pollmgr_refptr_unref(pxtcp->rp);
850 pxtcp_free(pxtcp);
851}
852
853
854
855/**
856 * Poll manager callbacks should use this convenience wrapper to
857 * schedule pxtcp reset and deletion on the lwip thread and to
858 * deregister from the poll manager.
859 *
860 * See pxtcp_schedule_delete() for additional comments.
861 */
862static int
863pxtcp_schedule_reset(struct pxtcp *pxtcp)
864{
865 pxtcp->pmhdl.slot = -1;
866 proxy_lwip_post(&pxtcp->msg_reset);
867 return -1;
868}
869
870
871/**
872 * Reject proxy connection attempt. Depending on the cause (sockerr)
873 * we may just drop the pcb silently, generate an ICMP datagram or
874 * send TCP reset.
875 */
876static void
877pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
878 struct pbuf *p, int sockerr)
879{
880 struct netif *oif;
881 int reset = 0;
882
883 oif = ip_current_netif();
884 ip_current_netif() = netif;
885
886 if (sockerr == ECONNREFUSED) {
887 reset = 1;
888 }
889 else if (PCB_ISIPV6(pcb)) {
890 if (sockerr == EHOSTDOWN) {
891 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
892 }
893 else if (sockerr == EHOSTUNREACH
894 || sockerr == ENETDOWN
895 || sockerr == ENETUNREACH)
896 {
897 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
898 }
899 }
900 else {
901 if (sockerr == EHOSTDOWN
902 || sockerr == EHOSTUNREACH
903 || sockerr == ENETDOWN
904 || sockerr == ENETUNREACH)
905 {
906 icmp_dest_unreach(p, ICMP_DUR_HOST);
907 }
908 }
909
910 ip_current_netif() = oif;
911
912 tcp_abandon(pcb, reset);
913}
914
915
916/**
917 * Called from poll manager thread via pxtcp::msg_accept when proxy
918 * failed to connect to the destination. Also called when we failed
919 * to register pxtcp with poll manager.
920 *
921 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
922 * how this unestablished connection is terminated.
923 */
924static void
925pxtcp_pcb_accept_refuse(void *ctx)
926{
927 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
928
929 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: errno %d\n",
930 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
931 pxtcp->sock, pxtcp->sockerr));
932
933 LWIP_ASSERT1(pxtcp != NULL);
934 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
935
936 if (pxtcp->pcb != NULL) {
937 struct tcp_pcb *pcb = pxtcp->pcb;
938 pxtcp_pcb_dissociate(pxtcp);
939 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
940 }
941
942 pollmgr_refptr_unref(pxtcp->rp);
943 pxtcp_free(pxtcp);
944}
945
946
947/**
948 * Convenience wrapper for poll manager connect callback to reject
949 * connection attempt.
950 *
951 * Like pxtcp_schedule_reset(), but the callback is more discriminate
952 * in how this unestablished connection is terminated.
953 */
954static int
955pxtcp_schedule_reject(struct pxtcp *pxtcp)
956{
957 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
958 pxtcp->pmhdl.slot = -1;
959 proxy_lwip_post(&pxtcp->msg_accept);
960 return -1;
961}
962
963
964/**
965 * Global tcp_proxy_accept() callback for proxied outgoing TCP
966 * connections from guest(s).
967 */
968static err_t
969pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
970{
971 struct pbuf *p = (struct pbuf *)arg;
972 struct pxtcp *pxtcp;
973 ipX_addr_t dst_addr;
974 int sdom;
975 SOCKET sock;
976 ssize_t nsent;
977 int sockerr = 0;
978
979 LWIP_UNUSED_ARG(error); /* always ERR_OK */
980
981 /*
982 * TCP first calls accept callback when it receives the first SYN
983 * and "tentatively accepts" new proxied connection attempt. When
984 * proxy "confirms" the SYN and sends SYN|ACK and the guest
985 * replies with ACK the accept callback is called again, this time
986 * with the established connection.
987 */
988 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
989 tcp_accept(newpcb, pxtcp_pcb_accept);
990 tcp_arg(newpcb, NULL);
991
992 tcp_setprio(newpcb, TCP_PRIO_MAX);
993
994 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
995
996 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
997 sock = proxy_connected_socket(sdom, SOCK_STREAM,
998 &dst_addr, newpcb->local_port);
999 if (sock == INVALID_SOCKET) {
1000 sockerr = errno;
1001 goto abort;
1002 }
1003
1004 pxtcp = pxtcp_allocate();
1005 if (pxtcp == NULL) {
1006 proxy_reset_socket(sock);
1007 goto abort;
1008 }
1009
1010 /* save initial datagram in case we need to reply with ICMP */
1011 pbuf_ref(p);
1012 pxtcp->unsent = p;
1013 pxtcp->netif = ip_current_netif();
1014
1015 pxtcp_pcb_associate(pxtcp, newpcb);
1016 pxtcp->sock = sock;
1017
1018 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1019 pxtcp->events = POLLOUT;
1020
1021 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1022 if (nsent < 0) {
1023 pxtcp->sock = INVALID_SOCKET;
1024 proxy_reset_socket(sock);
1025 pxtcp_pcb_accept_refuse(pxtcp);
1026 return ERR_ABRT;
1027 }
1028
1029 return ERR_OK;
1030
1031 abort:
1032 DPRINTF0(("%s: pcb %p, sock %d: errno %d\n",
1033 __func__, (void *)newpcb, sock, sockerr));
1034 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1035 return ERR_ABRT;
1036}
1037
1038
1039/**
1040 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1041 * connections from guest(s). This is "real" accept with three-way
1042 * handshake completed.
1043 */
1044static err_t
1045pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1046{
1047 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1048
1049 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1050 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1051
1052 LWIP_ASSERT1(pxtcp != NULL);
1053 LWIP_ASSERT1(pxtcp->pcb = pcb);
1054 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1055
1056 /* send any inbound data that are already queued */
1057 pxtcp_pcb_forward_inbound(pxtcp);
1058 return ERR_OK;
1059}
1060
1061
1062/**
1063 * Initial poll manager callback for proxied outgoing TCP connections.
1064 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1065 *
1066 * Waits for connect(2) to the destination to complete. On success
1067 * replaces itself with pxtcp_pmgr_pump() callback common to all
1068 * established TCP connections.
1069 */
1070static int
1071pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1072{
1073 struct pxtcp *pxtcp;
1074 int sockerr;
1075
1076 pxtcp = (struct pxtcp *)handler->data;
1077 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1078 LWIP_ASSERT1(fd == pxtcp->sock);
1079
1080 if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
1081 if (revents & POLLNVAL) {
1082 pxtcp->sock = INVALID_SOCKET;
1083 pxtcp->sockerr = ETIMEDOUT;
1084 }
1085 else {
1086 socklen_t optlen = (socklen_t)sizeof(sockerr);
1087 int status;
1088 SOCKET s;
1089
1090 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1091 (char *)&pxtcp->sockerr, &optlen);
1092 if (status < 0) { /* should not happen */
1093 sockerr = errno; /* ??? */
1094 perror("connect: getsockopt");
1095 }
1096 else {
1097#ifndef RT_OS_WINDOWS
1098 errno = pxtcp->sockerr; /* to avoid strerror_r */
1099#else
1100 /* see winutils.h */
1101 WSASetLastError(pxtcp->sockerr);
1102#endif
1103 perror("connect");
1104 }
1105 s = pxtcp->sock;
1106 pxtcp->sock = INVALID_SOCKET;
1107 status = closesocket(s);
1108 DPRINTF(("%s: %d closesocket: %ld\n", status, s));
1109 }
1110 return pxtcp_schedule_reject(pxtcp);
1111 }
1112
1113 if (revents & POLLOUT) { /* connect is successful */
1114 /* confirm accept to the guest */
1115 proxy_lwip_post(&pxtcp->msg_accept);
1116
1117 /*
1118 * Switch to common callback used for all established proxied
1119 * connections.
1120 */
1121 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1122
1123 /*
1124 * Initially we poll for incoming traffic only. Outgoing
1125 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1126 * it will ask us to poll for POLLOUT too.
1127 */
1128 pxtcp->events = POLLIN;
1129 return pxtcp->events;
1130 }
1131
1132 /* should never get here */
1133 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1134 __func__, (void *)pxtcp, fd, revents));
1135 return pxtcp_schedule_reset(pxtcp);
1136}
1137
1138
1139/**
1140 * Called from poll manager thread via pxtcp::msg_accept when proxy
1141 * connected to the destination. Finalize accept by sending SYN|ACK
1142 * to the guest.
1143 */
1144static void
1145pxtcp_pcb_accept_confirm(void *ctx)
1146{
1147 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1148 err_t error;
1149
1150 LWIP_ASSERT1(pxtcp != NULL);
1151 if (pxtcp->pcb == NULL) {
1152 return;
1153 }
1154
1155 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1156 LWIP_ASSERT1(pxtcp->unsent != NULL);
1157 pbuf_free(pxtcp->unsent);
1158 pxtcp->unsent = NULL;
1159
1160 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1161
1162 /*
1163 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1164 * abandons the pcb. Retrying that is not very easy, since it
1165 * would require keeping "fractional state". From guest's point
1166 * of view there is no reply to its SYN so it will either resend
1167 * the SYN (effetively triggering full connection retry for us),
1168 * or it will eventually time out.
1169 */
1170 if (error == ERR_ABRT) {
1171 pxtcp->pcb = NULL; /* pcb is gone */
1172 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1173 }
1174
1175 /*
1176 * else if (error != ERR_OK): even if tcp_output() failed with
1177 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1178 * retransmitted eventually.
1179 */
1180}
1181
1182
1183/**
1184 * Entry point for port-forwarding.
1185 *
1186 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1187 * (with no pcb yet) and adds it to the poll manager (polling for
1188 * errors only). Then it calls this function to construct the pcb and
1189 * perform connection to the guest.
1190 */
1191void
1192pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1193{
1194 struct sockaddr_storage ss;
1195 socklen_t sslen;
1196 struct tcp_pcb *pcb;
1197 ipX_addr_t src_addr, dst_addr;
1198 u16_t src_port, dst_port;
1199 int status;
1200 err_t error;
1201
1202 LWIP_ASSERT1(pxtcp != NULL);
1203 LWIP_ASSERT1(pxtcp->pcb == NULL);
1204 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1205
1206 pcb = tcp_new();
1207 if (pcb == NULL) {
1208 goto reset;
1209 }
1210
1211 tcp_setprio(pcb, TCP_PRIO_MAX);
1212 pxtcp_pcb_associate(pxtcp, pcb);
1213
1214 sslen = sizeof(ss);
1215 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1216 if (status == SOCKET_ERROR) {
1217 goto reset;
1218 }
1219
1220 /* nit: comapres PF and AF, but they are the same everywhere */
1221 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1222
1223 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1224 if (status == PXREMAP_FAILED) {
1225 goto reset;
1226 }
1227
1228 if (ss.ss_family == PF_INET) {
1229 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1230
1231 src_port = peer4->sin_port;
1232
1233 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1234 dst_port = fwspec->dst.sin.sin_port;
1235 }
1236 else { /* PF_INET6 */
1237 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1238 ip_set_v6(pcb, 1);
1239
1240 src_port = peer6->sin6_port;
1241
1242 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1243 dst_port = fwspec->dst.sin6.sin6_port;
1244 }
1245
1246 /* lwip port arguments are in host order */
1247 src_port = ntohs(src_port);
1248 dst_port = ntohs(dst_port);
1249
1250 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1251 if (error != ERR_OK) {
1252 goto reset;
1253 }
1254
1255 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1256 /* callback: */ pxtcp_pcb_connected);
1257 if (error != ERR_OK) {
1258 goto reset;
1259 }
1260
1261 return;
1262
1263 reset:
1264 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1265}
1266
1267
1268/**
1269 * Port-forwarded connection to guest is successful, pump data.
1270 */
1271static err_t
1272pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1273{
1274 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1275
1276 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1277 LWIP_UNUSED_ARG(error);
1278
1279 LWIP_ASSERT1(pxtcp != NULL);
1280 LWIP_ASSERT1(pxtcp->pcb == pcb);
1281 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1282 LWIP_UNUSED_ARG(pcb);
1283
1284 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1285 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1286
1287 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1288 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1289
1290 return ERR_OK;
1291}
1292
1293
1294/**
1295 * tcp_recv() callback.
1296 */
1297static err_t
1298pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1299{
1300 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1301
1302 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1303 LWIP_UNUSED_ARG(error);
1304
1305 LWIP_ASSERT1(pxtcp != NULL);
1306 LWIP_ASSERT1(pxtcp->pcb == pcb);
1307 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1308 LWIP_UNUSED_ARG(pcb);
1309
1310
1311 /*
1312 * Have we done sending previous batch?
1313 */
1314 if (pxtcp->unsent != NULL) {
1315 if (p != NULL) {
1316 /*
1317 * Return an error to tell TCP to hold onto that pbuf.
1318 * It will be presented to us later from tcp_fasttmr().
1319 */
1320 return ERR_WOULDBLOCK;
1321 }
1322 else {
1323 /*
1324 * Unlike data, p == NULL indicating orderly shutdown is
1325 * NOT presented to us again
1326 */
1327 pxtcp->outbound_close = 1;
1328 return ERR_OK;
1329 }
1330 }
1331
1332
1333 /*
1334 * Guest closed?
1335 */
1336 if (p == NULL) {
1337 pxtcp->outbound_close = 1;
1338 pxtcp_pcb_forward_outbound_close(pxtcp);
1339 return ERR_OK;
1340 }
1341
1342
1343 /*
1344 * Got data, send what we can without blocking.
1345 */
1346 return pxtcp_pcb_forward_outbound(pxtcp, p);
1347}
1348
1349
1350/**
1351 * Guest half-closed its TX side of the connection.
1352 *
1353 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1354 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1355 * previously unsent data and sees pxtcp::outbound_close flag saved by
1356 * pxtcp_pcb_recv().
1357 */
1358static void
1359pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1360{
1361 struct tcp_pcb *pcb;
1362
1363 LWIP_ASSERT1(pxtcp != NULL);
1364 LWIP_ASSERT1(pxtcp->outbound_close);
1365 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1366
1367 pcb = pxtcp->pcb;
1368 LWIP_ASSERT1(pcb != NULL);
1369
1370 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1371 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1372
1373
1374 /*
1375 * NB: set the flag first, since shutdown() will trigger POLLHUP
1376 * if inbound is already closed, and poll manager asserts
1377 * outbound_close_done (may be it should not?).
1378 */
1379 pxtcp->outbound_close_done = 1;
1380 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1381
1382#if !HAVE_TCP_POLLHUP
1383 /*
1384 * On NetBSD POLLHUP is not reported for TCP sockets, so we need
1385 * to nudge poll manager manually.
1386 */
1387 if (pxtcp->inbound_close) {
1388 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1389 }
1390#endif
1391
1392
1393 /* no more outbound data coming to us */
1394 tcp_recv(pcb, NULL);
1395
1396 /*
1397 * If we have already done inbound close previously (active close
1398 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1399 * state since those will be recycled by lwip when it runs out of
1400 * free pcbs in the pool.
1401 *
1402 * The test is true also for a pcb in CLOSING state that waits
1403 * just for the ACK of its FIN (to transition to TIME_WAIT).
1404 */
1405 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1406 pxtcp_pcb_dissociate(pxtcp);
1407 }
1408}
1409
1410
1411/**
1412 * Forward outbound data from pcb to socket.
1413 *
1414 * Called by pxtcp_pcb_recv() to forward new data and by callout
1415 * triggered by POLLOUT on the socket to send previously unsent data.
1416 *
1417 * (Re)scehdules one-time callout if not all data are sent.
1418 */
1419static err_t
1420pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1421{
1422 struct pbuf *qs, *q;
1423 size_t qoff;
1424 size_t forwarded;
1425 int sockerr;
1426
1427#if defined(MSG_NOSIGNAL)
1428 const int send_flags = MSG_NOSIGNAL;
1429#else
1430 const int send_flags = 0;
1431#endif
1432
1433
1434 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1435
1436 forwarded = 0;
1437 sockerr = 0;
1438
1439 q = NULL;
1440 qoff = 0;
1441
1442 qs = p;
1443 while (qs != NULL) {
1444#ifndef RT_OS_WINDOWS
1445 struct msghdr mh;
1446#else
1447 int rc;
1448#endif
1449 IOVEC iov[8];
1450 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1451 size_t fwd1;
1452 ssize_t nsent;
1453 size_t i;
1454
1455 fwd1 = 0;
1456 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1457 LWIP_ASSERT1(q->len > 0);
1458 IOVEC_SET_BASE(iov[i], q->payload);
1459 IOVEC_SET_LEN(iov[i], q->len);
1460 fwd1 += q->len;
1461 }
1462
1463#ifndef RT_OS_WINDOWS
1464 memset(&mh, 0, sizeof(mh));
1465 mh.msg_iov = iov;
1466 mh.msg_iovlen = i;
1467
1468 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1469#else
1470 /**
1471 * WSASend(,,,DWORD *,,,) - takes SSIZE_T (64bit value) ... so all nsent's
1472 * bits should be zeroed before passing to WSASent.
1473 */
1474 nsent = 0;
1475 rc = WSASend(pxtcp->sock, iov, (DWORD)i, (DWORD *)&nsent, 0, NULL, NULL);
1476 if (rc == SOCKET_ERROR) {
1477 /* WSASent reports SOCKET_ERROR and updates error accessible with
1478 * WSAGetLastError(). We assign nsent to -1, enforcing code below
1479 * to access error in BSD style.
1480 */
1481 warn("pxtcp_pcb_forward_outbound:WSASend error:%d nsent:%d\n",
1482 WSAGetLastError(),
1483 nsent);
1484 nsent = -1;
1485 }
1486#endif
1487
1488 if (nsent == (ssize_t)fwd1) {
1489 /* successfully sent this chain fragment completely */
1490 forwarded += nsent;
1491 qs = q;
1492 }
1493 else if (nsent >= 0) {
1494 /* successfully sent only some data */
1495 forwarded += nsent;
1496
1497 /* find the first pbuf that was not completely forwarded */
1498 qoff = nsent;
1499 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1500 if (qoff < q->len) {
1501 break;
1502 }
1503 qoff -= q->len;
1504 }
1505 LWIP_ASSERT1(q != NULL);
1506 LWIP_ASSERT1(qoff < q->len);
1507 break;
1508 }
1509 else {
1510 /*
1511 * Some errors are really not errors - if we get them,
1512 * it's not different from getting nsent == 0, so filter
1513 * them out here.
1514 */
1515 if (errno != EWOULDBLOCK
1516 && errno != EAGAIN
1517 && errno != ENOBUFS
1518 && errno != ENOMEM
1519 && errno != EINTR)
1520 {
1521 sockerr = errno;
1522 }
1523 q = qs;
1524 qoff = 0;
1525 break;
1526 }
1527 }
1528
1529 if (forwarded > 0) {
1530 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1531 }
1532
1533 if (q == NULL) { /* everything is forwarded? */
1534 LWIP_ASSERT1(sockerr == 0);
1535 LWIP_ASSERT1(forwarded == p->tot_len);
1536
1537 pxtcp->unsent = NULL;
1538 pbuf_free(p);
1539 if (pxtcp->outbound_close) {
1540 pxtcp_pcb_forward_outbound_close(pxtcp);
1541 }
1542 }
1543 else {
1544 if (q != p) {
1545 /* free forwarded pbufs at the beginning of the chain */
1546 pbuf_ref(q);
1547 pbuf_free(p);
1548 }
1549 if (qoff > 0) {
1550 /* advance payload pointer past the forwarded part */
1551 pbuf_header(q, -(s16_t)qoff);
1552 }
1553 pxtcp->unsent = q;
1554
1555 /*
1556 * Have sendmsg() failed?
1557 *
1558 * Connection reset will be detected by poll and
1559 * pxtcp_schedule_reset() will be called.
1560 *
1561 * Otherwise something *really* unexpected must have happened,
1562 * so we'd better abort.
1563 */
1564 if (sockerr != 0 && sockerr != ECONNRESET) {
1565 struct tcp_pcb *pcb = pxtcp->pcb;
1566 pxtcp_pcb_dissociate(pxtcp);
1567
1568 tcp_abort(pcb);
1569
1570 /* call error callback manually since we've already dissociated */
1571 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1572 return ERR_ABRT;
1573 }
1574
1575 /* schedule one-shot POLLOUT on the socket */
1576 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1577 }
1578 return ERR_OK;
1579}
1580
1581
1582/**
1583 * Callback from poll manager (on POLLOUT) to send data from
1584 * pxtcp::unsent pbuf to socket.
1585 */
1586static void
1587pxtcp_pcb_write_outbound(void *ctx)
1588{
1589 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1590 LWIP_ASSERT1(pxtcp != NULL);
1591
1592 if (pxtcp->pcb == NULL) {
1593 return;
1594 }
1595
1596 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1597}
1598
1599
1600/**
1601 * Common poll manager callback used by both outgoing and incoming
1602 * (port-forwarded) connections that has connected socket.
1603 */
1604static int
1605pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1606{
1607 struct pxtcp *pxtcp;
1608 int status;
1609 int sockerr;
1610
1611 pxtcp = (struct pxtcp *)handler->data;
1612 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1613 LWIP_ASSERT1(fd == pxtcp->sock);
1614
1615 if (revents & POLLNVAL) {
1616 pxtcp->sock = INVALID_SOCKET;
1617 return pxtcp_schedule_reset(pxtcp);
1618 }
1619
1620 if (revents & POLLERR) {
1621 socklen_t optlen = (socklen_t)sizeof(sockerr);
1622
1623 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1624 (char *)&sockerr, &optlen);
1625 if (status < 0) { /* should not happen */
1626 perror("getsockopt");
1627 sockerr = ECONNRESET;
1628 }
1629
1630 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1631 return pxtcp_schedule_reset(pxtcp);
1632 }
1633
1634 if (revents & POLLOUT) {
1635 pxtcp->events &= ~POLLOUT;
1636 proxy_lwip_post(&pxtcp->msg_outbound);
1637 }
1638
1639 if (revents & POLLIN) {
1640 ssize_t nread;
1641 int stop_pollin;
1642
1643 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1644 if (nread < 0) {
1645 sockerr = -(int)nread;
1646 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1647 return pxtcp_schedule_reset(pxtcp);
1648 }
1649
1650 if (stop_pollin) {
1651 pxtcp->events &= ~POLLIN;
1652 }
1653
1654 if (nread > 0) {
1655 proxy_lwip_post(&pxtcp->msg_inbound);
1656#if !HAVE_TCP_POLLHUP
1657 /*
1658 * If host does not report POLLHUP for closed sockets
1659 * (e.g. NetBSD) we should check for full close manually.
1660 */
1661 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1662 LWIP_ASSERT1((revents & POLLHUP) == 0);
1663 return pxtcp_schedule_delete(pxtcp);
1664 }
1665#endif
1666 }
1667 }
1668
1669#if !HAVE_TCP_POLLHUP
1670 LWIP_ASSERT1((revents & POLLHUP) == 0);
1671#else
1672 if (revents & POLLHUP) {
1673 /*
1674 * Linux and Darwin seems to report POLLHUP when both
1675 * directions are shut down. And they do report POLLHUP even
1676 * when there's unread data (which they aslo report as POLLIN
1677 * along with that POLLHUP).
1678 *
1679 * FreeBSD (from source inspection) seems to follow Linux,
1680 * reporting POLLHUP when both directions are shut down, but
1681 * POLLHUP is always accompanied with POLLIN.
1682 *
1683 * NetBSD never reports POLLHUP for sockets.
1684 *
1685 * ---
1686 *
1687 * If external half-closes first, we don't get POLLHUP, we
1688 * recv 0 bytes from the socket as EOF indicator, stop polling
1689 * for POLLIN and poll with events == 0 (with occasional
1690 * one-shot POLLOUT). When guest eventually closes, we get
1691 * POLLHUP.
1692 *
1693 * If guest half-closes first things are more tricky. As soon
1694 * as host sees the FIN from external it will spam POLLHUP,
1695 * even when there's unread data. The problem is that we
1696 * might have stopped polling for POLLIN because the ring
1697 * buffer is full or we were polling POLLIN but can't read all
1698 * of the data becuase buffer doesn't have enough space.
1699 * Either way, there's unread data but we can't keep polling
1700 * the socket.
1701 */
1702 DPRINTF(("sock %d: HUP\n", fd));
1703 LWIP_ASSERT1(pxtcp->outbound_close_done);
1704
1705 if (pxtcp->inbound_close) {
1706 /* there's no unread data, we are done */
1707 return pxtcp_schedule_delete(pxtcp);
1708 }
1709 else {
1710 /* DPRINTF */ {
1711#ifndef RT_OS_WINDOWS
1712 int unread;
1713#else
1714 u_long unread;
1715#endif
1716 status = ioctlsocket(fd, FIONREAD, &unread);
1717 if (status == SOCKET_ERROR) {
1718 perror("FIONREAD");
1719 }
1720 else {
1721 DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
1722 }
1723 }
1724
1725 /*
1726 * We cannot just set a flag here and let pxtcp_pcb_sent()
1727 * notice and start pulling, because if we are preempted
1728 * before setting the flag and all data in inbuf is ACKed
1729 * there will be no more calls to pxtcp_pcb_sent() to
1730 * notice the flag.
1731 *
1732 * We cannot set a flag and then send a message to make
1733 * sure it noticed, because if it has and it has read all
1734 * data while the message is in transit it will delete
1735 * pxtcp.
1736 *
1737 * In a sense this message is like msg_delete (except we
1738 * ask to pull some data first).
1739 */
1740 proxy_lwip_post(&pxtcp->msg_inpull);
1741 pxtcp->pmhdl.slot = -1;
1742 return -1;
1743 }
1744 /* NOTREACHED */
1745 } /* POLLHUP */
1746#endif /* HAVE_TCP_POLLHUP */
1747
1748 return pxtcp->events;
1749}
1750
1751
1752/**
1753 * Read data from socket to ringbuf. This may be used both on lwip
1754 * and poll manager threads.
1755 *
1756 * Flag pointed to by pstop is set when further reading is impossible,
1757 * either temporary when buffer is full, or permanently when EOF is
1758 * received.
1759 *
1760 * Returns number of bytes read. NB: EOF is reported as 1!
1761 *
1762 * Returns zero if nothing was read, either because buffer is full, or
1763 * if no data is available (EAGAIN, EINTR &c).
1764 *
1765 * Returns -errno on real socket errors.
1766 */
1767static ssize_t
1768pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1769{
1770 IOVEC iov[2];
1771#ifndef RT_OS_WINDOWS
1772 struct msghdr mh;
1773#else
1774 DWORD dwFlags;
1775 int rc;
1776#endif
1777 int iovlen;
1778 ssize_t nread;
1779
1780 const size_t sz = pxtcp->inbuf.bufsize;
1781 size_t beg, lim, wrnew;
1782
1783 *pstop = 0;
1784
1785#ifndef RT_OS_WINDOWS
1786 memset(&mh, 0, sizeof(mh));
1787 mh.msg_iov = iov;
1788#endif
1789
1790 beg = pxtcp->inbuf.vacant;
1791 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1792
1793 /* lim is the index we can NOT write to */
1794 lim = pxtcp->inbuf.unacked;
1795 if (lim == 0) {
1796 lim = sz - 1; /* empty slot at the end */
1797 }
1798 else if (lim == 1) {
1799 lim = sz; /* empty slot at the beginning */
1800 }
1801 else {
1802 --lim;
1803 }
1804
1805 if (beg == lim) {
1806 /*
1807 * Buffer is full, stop polling for POLLIN.
1808 *
1809 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1810 * data, freeing space in the ring buffer.
1811 */
1812 *pstop = 1;
1813 return 0;
1814 }
1815
1816 if (beg < lim) {
1817 /* free space in one chunk */
1818 iovlen = 1;
1819 IOVEC_SET_LEN(iov[0], lim - beg);
1820 }
1821 else {
1822 /* free space in two chunks */
1823 iovlen = 2;
1824 IOVEC_SET_LEN(iov[0], sz - beg);
1825 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1826 IOVEC_SET_LEN(iov[1], lim);
1827 }
1828
1829#ifndef RT_OS_WINDOWS
1830 mh.msg_iovlen = iovlen;
1831 nread = recvmsg(pxtcp->sock, &mh, 0);
1832#else
1833 dwFlags = 0;
1834 /* We can't assign nread to -1 expecting, that we'll got it back in case of error,
1835 * instead, WSARecv(,,,DWORD *,,,) will rewrite only half of the 64bit value.
1836 */
1837 nread = 0;
1838 rc = WSARecv(pxtcp->sock, iov, iovlen, (DWORD *)&nread, &dwFlags, NULL, NULL);
1839 if (rc == SOCKET_ERROR) {
1840 warn("pxtcp_sock_read:WSARecv(%d) error:%d nread:%d\n",
1841 pxtcp->sock,
1842 WSAGetLastError(),
1843 nread);
1844 nread = -1;
1845 }
1846
1847 if (dwFlags) {
1848 warn("pxtcp_sock_read:WSARecv(%d) dwFlags:%x nread:%d\n",
1849 pxtcp->sock,
1850 WSAGetLastError(),
1851 nread);
1852 }
1853#endif
1854
1855 if (nread > 0) {
1856 wrnew = beg + nread;
1857 if (wrnew >= sz) {
1858 wrnew -= sz;
1859 }
1860 pxtcp->inbuf.vacant = wrnew;
1861 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1862 (void *)pxtcp, pxtcp->sock, (int)nread));
1863 return nread;
1864 }
1865 else if (nread == 0) {
1866 *pstop = 1;
1867 pxtcp->inbound_close = 1;
1868 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1869 (void *)pxtcp, pxtcp->sock));
1870 return 1;
1871 }
1872 else if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) {
1873 /* haven't read anything, just return */
1874 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1875 (void *)pxtcp, pxtcp->sock));
1876 return 0;
1877 }
1878 else {
1879 /* socket error! */
1880 DPRINTF0(("pxtcp %p: sock %d read errno %d\n",
1881 (void *)pxtcp, pxtcp->sock, errno));
1882 return -errno;
1883 }
1884}
1885
1886
1887/**
1888 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1889 * from ringbuf to guest.
1890 */
1891static void
1892pxtcp_pcb_write_inbound(void *ctx)
1893{
1894 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1895 LWIP_ASSERT1(pxtcp != NULL);
1896
1897 if (pxtcp->pcb == NULL) {
1898 return;
1899 }
1900
1901 pxtcp_pcb_forward_inbound(pxtcp);
1902}
1903
1904
1905/**
1906 * tcp_poll() callback
1907 *
1908 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1909 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1910 * more inbound data then pxtcp_pcb_forward_inbound() will be
1911 * triggered again, but if neither happens, tcp_poll() comes to the
1912 * rescue.
1913 */
1914static err_t
1915pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1916{
1917 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1918 LWIP_UNUSED_ARG(pcb);
1919
1920 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1921 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1922
1923 pxtcp_pcb_forward_inbound(pxtcp);
1924
1925 /*
1926 * If the last thing holding up deletion of the pxtcp was failed
1927 * tcp_shutdown() and it succeeded, we may be the last callback.
1928 */
1929 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1930
1931 return ERR_OK;
1932}
1933
1934
1935static void
1936pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1937{
1938 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1939}
1940
1941
1942static void
1943pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1944{
1945 tcp_poll(pxtcp->pcb, NULL, 255);
1946}
1947
1948
1949/**
1950 * Forward inbound data from ring buffer to the guest.
1951 *
1952 * Scheduled by poll manager thread after it receives more data into
1953 * the ring buffer (we have more data to send).
1954
1955 * Also called from tcp_sent() callback when guest ACKs some data,
1956 * increasing pcb->snd_buf (we are permitted to send more data).
1957 *
1958 * Also called from tcp_poll() callback if previous attempt to forward
1959 * inbound data failed with ERR_MEM (we need to try again).
1960 */
1961static void
1962pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1963{
1964 struct tcp_pcb *pcb;
1965 size_t sndbuf;
1966 size_t beg, lim, sndlim;
1967 size_t toeob, tolim;
1968 size_t nsent;
1969 err_t error;
1970
1971 LWIP_ASSERT1(pxtcp != NULL);
1972 pcb = pxtcp->pcb;
1973 if (pcb == NULL) {
1974 return;
1975 }
1976
1977 if (/* __predict_false */ pcb->state < ESTABLISHED) {
1978 /*
1979 * If we have just confirmed accept of this connection, the
1980 * pcb is in SYN_RCVD state and we still haven't received the
1981 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
1982 * transition that lwip decrements pcb->acked so that that ACK
1983 * is not reported to pxtcp_pcb_sent(). If we send something
1984 * now and immediately close (think "daytime", e.g.) while
1985 * still in SYN_RCVD state, we will move directly to
1986 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
1987 * report it to pxtcp_pcb_sent().
1988 */
1989 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
1990 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1991 return;
1992 }
1993
1994
1995 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
1996 lim = pxtcp->inbuf.vacant;
1997
1998 if (beg == lim) {
1999 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2000 pxtcp_pcb_forward_inbound_close(pxtcp);
2001 tcp_output(pcb);
2002 return;
2003 }
2004
2005 /*
2006 * Else, there's no data to send.
2007 *
2008 * If there is free space in the buffer, producer will
2009 * reschedule us as it receives more data and vacant (lim)
2010 * advances.
2011 *
2012 * If buffer is full when all data have been passed to
2013 * tcp_write() but not yet acknowledged, we will advance
2014 * unacked on ACK, freeing some space for producer to write to
2015 * (then see above).
2016 */
2017 return;
2018 }
2019
2020 sndbuf = tcp_sndbuf(pcb);
2021 if (sndbuf == 0) {
2022 /*
2023 * Can't send anything now. As guest ACKs some data, TCP will
2024 * call pxtcp_pcb_sent() callback and we will come here again.
2025 */
2026 return;
2027 }
2028
2029 nsent = 0;
2030
2031 /*
2032 * We have three limits to consider:
2033 * - how much data we have in the ringbuf
2034 * - how much data we are allowed to send
2035 * - ringbuf size
2036 */
2037 toeob = pxtcp->inbuf.bufsize - beg;
2038 if (lim < beg) { /* lim wrapped */
2039 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2040 /* so beg is not going to wrap, treat sndbuf as lim */
2041 lim = beg + sndbuf; /* ... and proceed to the simple case */
2042 }
2043 else { /* we are limited by the end of the buffer, beg will wrap */
2044 u8_t maybemore;
2045 if (toeob == sndbuf || lim == 0) {
2046 maybemore = 0;
2047 }
2048 else {
2049 maybemore = TCP_WRITE_FLAG_MORE;
2050 }
2051
2052 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2053 if (error != ERR_OK) {
2054 goto writeerr;
2055 }
2056 nsent += toeob;
2057 pxtcp->inbuf.unsent = 0; /* wrap */
2058
2059 if (maybemore) {
2060 beg = 0;
2061 sndbuf -= toeob;
2062 }
2063 else {
2064 /* we are done sending, but ... */
2065 goto check_inbound_close;
2066 }
2067 }
2068 }
2069
2070 LWIP_ASSERT1(beg < lim);
2071 sndlim = beg + sndbuf;
2072 if (lim > sndlim) {
2073 lim = sndlim;
2074 }
2075 tolim = lim - beg;
2076 if (tolim > 0) {
2077 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2078 if (error != ERR_OK) {
2079 goto writeerr;
2080 }
2081 nsent += tolim;
2082 pxtcp->inbuf.unsent = lim;
2083 }
2084
2085 check_inbound_close:
2086 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2087 pxtcp_pcb_forward_inbound_close(pxtcp);
2088 }
2089
2090 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2091 (void *)pxtcp, (void *)pcb, (int)nsent));
2092 tcp_output(pcb);
2093 pxtcp_pcb_cancel_poll(pxtcp);
2094 return;
2095
2096 writeerr:
2097 if (error == ERR_MEM) {
2098 if (nsent > 0) { /* first write succeeded, second failed */
2099 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2100 (void *)pxtcp, (void *)pcb, (int)nsent));
2101 tcp_output(pcb);
2102 }
2103 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2104 (void *)pxtcp, (void *)pcb));
2105 pxtcp_pcb_schedule_poll(pxtcp);
2106 }
2107 else {
2108 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2109 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2110
2111 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2112 LWIP_ASSERT1(error == ERR_MEM);
2113 }
2114}
2115
2116
2117static void
2118pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2119{
2120 struct tcp_pcb *pcb;
2121 err_t error;
2122
2123 LWIP_ASSERT1(pxtcp != NULL);
2124 LWIP_ASSERT1(pxtcp->inbound_close);
2125 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2126 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2127
2128 pcb = pxtcp->pcb;
2129 LWIP_ASSERT1(pcb != NULL);
2130
2131 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2132 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2133
2134 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2135 if (error != ERR_OK) {
2136 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2137 " tcp_shutdown: error=%s\n",
2138 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2139 pxtcp_pcb_schedule_poll(pxtcp);
2140 return;
2141 }
2142
2143 pxtcp_pcb_cancel_poll(pxtcp);
2144 pxtcp->inbound_close_done = 1;
2145
2146
2147 /*
2148 * If we have already done outbound close previously (passive
2149 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2150 * state since those will be deleted by lwip when that last ack
2151 * comes from the guest.
2152 *
2153 * NB: We do NOT check for deferred delete here, even though we
2154 * have just set one of its conditions, inbound_close_done. We
2155 * let pcb callbacks that called us do that. It's simpler and
2156 * cleaner that way.
2157 */
2158 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2159 pxtcp_pcb_dissociate(pxtcp);
2160 }
2161}
2162
2163
2164/**
2165 * Check that all forwarded inbound data is sent and acked, and that
2166 * inbound close is scheduled (we aren't called back when it's acked).
2167 */
2168DECLINLINE(int)
2169pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2170{
2171 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2172 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2173}
2174
2175
2176/**
2177 * tcp_sent() callback - guest acknowledged len bytes.
2178 *
2179 * We can advance inbuf::unacked index, making more free space in the
2180 * ringbuf and wake up producer on poll manager thread.
2181 *
2182 * We can also try to send more data if we have any since pcb->snd_buf
2183 * was increased and we are now permitted to send more.
2184 */
2185static err_t
2186pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2187{
2188 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2189 size_t unacked;
2190
2191 LWIP_ASSERT1(pxtcp != NULL);
2192 LWIP_ASSERT1(pxtcp->pcb == pcb);
2193 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2194 LWIP_UNUSED_ARG(pcb); /* only in assert */
2195
2196 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2197 " unacked %d, unsent %d, vacant %d\n",
2198 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2199 (int)pxtcp->inbuf.unacked,
2200 (int)pxtcp->inbuf.unsent,
2201 (int)pxtcp->inbuf.vacant));
2202
2203 if (/* __predict_false */ len == 0) {
2204 /* we are notified to start pulling */
2205 LWIP_ASSERT1(pxtcp->outbound_close_done);
2206 LWIP_ASSERT1(!pxtcp->inbound_close);
2207 LWIP_ASSERT1(pxtcp->inbound_pull);
2208
2209 unacked = pxtcp->inbuf.unacked;
2210 }
2211 else {
2212 /*
2213 * Advance unacked index. Guest acknowledged the data, so it
2214 * won't be needed again for potential retransmits.
2215 */
2216 unacked = pxtcp->inbuf.unacked + len;
2217 if (unacked > pxtcp->inbuf.bufsize) {
2218 unacked -= pxtcp->inbuf.bufsize;
2219 }
2220 pxtcp->inbuf.unacked = unacked;
2221 }
2222
2223 /* arrange for more inbound data */
2224 if (!pxtcp->inbound_close) {
2225 if (!pxtcp->inbound_pull) {
2226 /* wake up producer, in case it has stopped polling for POLLIN */
2227 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2228#ifdef RT_OS_WINDOWS
2229 /**
2230 * We have't got enought room in ring buffer to read atm,
2231 * but we don't want to lose notification from WSAW4ME when
2232 * space would be available, so we reset event with empty recv
2233 */
2234 recv(pxtcp->sock, NULL, 0, 0);
2235#endif
2236 }
2237 else {
2238 ssize_t nread;
2239 int stop_pollin; /* ignored */
2240
2241 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2242
2243 if (nread < 0) {
2244 int sockerr = -(int)nread;
2245 LWIP_UNUSED_ARG(sockerr);
2246 DPRINTF0(("%s: sock %d: errno %d\n",
2247 __func__, pxtcp->sock, sockerr));
2248
2249 /*
2250 * Since we are pulling, pxtcp is no longer registered
2251 * with poll manager so we can kill it directly.
2252 */
2253 pxtcp_pcb_reset_pxtcp(pxtcp);
2254 return ERR_ABRT;
2255 }
2256 }
2257 }
2258
2259 /* forward more data if we can */
2260 if (!pxtcp->inbound_close_done) {
2261 pxtcp_pcb_forward_inbound(pxtcp);
2262
2263 /*
2264 * NB: we might have dissociated from a pcb that transitioned
2265 * to LAST_ACK state, so don't refer to pcb below.
2266 */
2267 }
2268
2269
2270 /* have we got all the acks? */
2271 if (pxtcp->inbound_close /* no more new data */
2272 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2273 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2274 {
2275 char *buf;
2276
2277 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2278 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2279
2280 /* no more retransmits, so buf is not needed */
2281 buf = pxtcp->inbuf.buf;
2282 pxtcp->inbuf.buf = NULL;
2283 free(buf);
2284
2285 /* no more acks, so no more callbacks */
2286 if (pxtcp->pcb != NULL) {
2287 tcp_sent(pxtcp->pcb, NULL);
2288 }
2289
2290 /*
2291 * We may be the last callback for this pcb if we have also
2292 * successfully forwarded inbound_close.
2293 */
2294 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2295 }
2296
2297 return ERR_OK;
2298}
2299
2300
2301/**
2302 * Callback from poll manager (pxtcp::msg_inpull) to switch
2303 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2304 * POLLHUP comment in pxtcp_pmgr_pump().
2305 *
2306 * pxtcp::sock is deregistered from poll manager after this callback
2307 * is scheduled.
2308 */
2309static void
2310pxtcp_pcb_pull_inbound(void *ctx)
2311{
2312 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2313 LWIP_ASSERT1(pxtcp != NULL);
2314
2315 if (pxtcp->pcb == NULL) {
2316 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2317 pxtcp_pcb_reset_pxtcp(pxtcp);
2318 return;
2319 }
2320
2321 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2322 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2323 pxtcp->inbound_pull = 1;
2324 pxtcp->deferred_delete = 1;
2325 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2326}
2327
2328
2329/**
2330 * tcp_err() callback.
2331 *
2332 * pcb is not passed to this callback since it may be already
2333 * deallocated by the stack, but we can't do anything useful with it
2334 * anyway since connection is gone.
2335 */
2336static void
2337pxtcp_pcb_err(void *arg, err_t error)
2338{
2339 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2340 LWIP_ASSERT1(pxtcp != NULL);
2341
2342 /*
2343 * ERR_CLSD is special - it is reported here when:
2344 *
2345 * . guest has already half-closed
2346 * . we send FIN to guest when external half-closes
2347 * . guest acks that FIN
2348 *
2349 * Since connection is closed but receive has been already closed
2350 * lwip can only report this via tcp_err. At this point the pcb
2351 * is still alive, so we can peek at it if need be.
2352 *
2353 * The interesting twist is when the ACK from guest that akcs our
2354 * FIN also acks some data. In this scenario lwip will NOT call
2355 * tcp_sent() callback with the ACK for that last bit of data but
2356 * instead will call tcp_err with ERR_CLSD right away. Since that
2357 * ACK also acknowledges all the data, we should run some of
2358 * pxtcp_pcb_sent() logic here.
2359 */
2360 if (error == ERR_CLSD) {
2361 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2362
2363 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2364 " pcb->acked %d;"
2365 " unacked %d, unsent %d, vacant %d\n",
2366 (void *)pxtcp, (void *)pcb,
2367 pcb->acked,
2368 (int)pxtcp->inbuf.unacked,
2369 (int)pxtcp->inbuf.unsent,
2370 (int)pxtcp->inbuf.vacant));
2371
2372 LWIP_ASSERT1(pxtcp->pcb == pcb);
2373 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2374
2375 if (pcb->acked > 0) {
2376 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2377 }
2378 return;
2379 }
2380
2381 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2382 (void *)pxtcp, proxy_lwip_strerr(error)));
2383
2384 pxtcp->pcb = NULL; /* pcb is gone */
2385 if (pxtcp->deferred_delete) {
2386 pxtcp_pcb_reset_pxtcp(pxtcp);
2387 }
2388 else {
2389 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2390 }
2391}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette