VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 51669

Last change on this file since 51669 was 51608, checked in by vboxsync, 11 years ago

NAT/Net: return negated errno from pxtcp_sock_recv()/pxtcp_sock_send()
on error.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 66.2 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#define LOG_GROUP LOG_GROUP_NAT_SERVICE
3
4#include "winutils.h"
5
6#include "pxtcp.h"
7
8#include "proxy.h"
9#include "proxy_pollmgr.h"
10#include "pxremap.h"
11#include "portfwd.h" /* fwspec */
12
13#ifndef RT_OS_WINDOWS
14#include <sys/types.h>
15#include <sys/socket.h>
16#include <sys/ioctl.h>
17#ifdef RT_OS_SOLARIS
18#include <sys/filio.h> /* FIONREAD is BSD'ism */
19#endif
20#include <stdlib.h>
21#include <stdint.h>
22#include <stdio.h>
23#include <string.h>
24#include <poll.h>
25
26#include <err.h> /* BSD'ism */
27#else
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31
32#include <iprt/stdint.h>
33#include "winpoll.h"
34#endif
35
36#include "lwip/opt.h"
37
38#include "lwip/sys.h"
39#include "lwip/tcpip.h"
40#include "lwip/netif.h"
41#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
42#include "lwip/icmp.h"
43#include "lwip/icmp6.h"
44
45/* NetBSD doesn't report POLLHUP for TCP sockets */
46#ifdef __NetBSD__
47# define HAVE_TCP_POLLHUP 0
48#else
49# define HAVE_TCP_POLLHUP 1
50#endif
51
52
53/**
54 * Ring buffer for inbound data. Filled with data from the host
55 * socket on poll manager thread. Data consumed by scheduling
56 * tcp_write() to the pcb on the lwip thread.
57 *
58 * NB: There is actually third party present, the lwip stack itself.
59 * Thus the buffer doesn't have dual free vs. data split, but rather
60 * three-way free / send and unACKed data / unsent data split.
61 */
62struct ringbuf {
63 char *buf;
64 size_t bufsize;
65
66 /*
67 * Start of free space, producer writes here (up till "unacked").
68 */
69 volatile size_t vacant;
70
71 /*
72 * Start of sent but unacknowledged data. The data are "owned" by
73 * the stack as it may need to retransmit. This is the free space
74 * limit for producer.
75 */
76 volatile size_t unacked;
77
78 /*
79 * Start of unsent data, consumer reads/sends from here (up till
80 * "vacant"). Not declared volatile since it's only accessed from
81 * the consumer thread.
82 */
83 size_t unsent;
84};
85
86
87/**
88 */
89struct pxtcp {
90 /**
91 * Our poll manager handler. Must be first, strong/weak
92 * references depend on this "inheritance".
93 */
94 struct pollmgr_handler pmhdl;
95
96 /**
97 * lwIP (internal/guest) side of the proxied connection.
98 */
99 struct tcp_pcb *pcb;
100
101 /**
102 * Host (external) side of the proxied connection.
103 */
104 SOCKET sock;
105
106 /**
107 * Socket events we are currently polling for.
108 */
109 int events;
110
111 /**
112 * Socket error. Currently used to save connect(2) errors so that
113 * we can decide if we need to send ICMP error.
114 */
115 int sockerr;
116
117 /**
118 * Interface that we have got the SYN from. Needed to send ICMP
119 * with correct source address.
120 */
121 struct netif *netif;
122
123 /**
124 * For tentatively accepted connections for which we are in
125 * process of connecting to the real destination this is the
126 * initial pbuf that we might need to build ICMP error.
127 *
128 * When connection is established this is used to hold outbound
129 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
130 * forwarded over the socket. We cannot "return" it to lwIP since
131 * the head of the chain is already sent and freed.
132 */
133 struct pbuf *unsent;
134
135 /**
136 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
137 * once and we might not be able to forward it immediately if we
138 * have unsent pbuf.
139 */
140 int outbound_close;
141
142 /**
143 * Outbound half-close has been done on the socket.
144 */
145 int outbound_close_done;
146
147 /**
148 * External has closed its side. We might not be able to forward
149 * it immediately if we have unforwarded data.
150 */
151 int inbound_close;
152
153 /**
154 * Inbound half-close has been done on the pcb.
155 */
156 int inbound_close_done;
157
158 /**
159 * On systems that report POLLHUP as soon as the final FIN is
160 * received on a socket we cannot continue polling for the rest of
161 * input, so we have to read (pull) last data from the socket on
162 * the lwIP thread instead of polling/pushing it from the poll
163 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
164 */
165 int inbound_pull;
166
167
168 /**
169 * When poll manager schedules delete we may not be able to delete
170 * a pxtcp immediately if not all inbound data has been acked by
171 * the guest: lwIP may need to resend and the data are in pxtcp's
172 * inbuf::buf. We defer delete until all data are acked to
173 * pxtcp_pcb_sent().
174 *
175 * It's also implied by inbound_pull. It probably means that
176 * "deferred" is not a very fortunate name.
177 */
178 int deferred_delete;
179
180 /**
181 * Ring-buffer for inbound data.
182 */
183 struct ringbuf inbuf;
184
185 /**
186 * lwIP thread's strong reference to us.
187 */
188 struct pollmgr_refptr *rp;
189
190
191 /*
192 * We use static messages to call functions on the lwIP thread to
193 * void malloc/free overhead.
194 */
195 struct tcpip_msg msg_delete; /* delete pxtcp */
196 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
197 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
198 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
199 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
200 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
201};
202
203
204
205static struct pxtcp *pxtcp_allocate(void);
206static void pxtcp_free(struct pxtcp *);
207
208static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
209static void pxtcp_pcb_dissociate(struct pxtcp *);
210
211/* poll manager callbacks for pxtcp related channels */
212static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
213static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
214static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
215#if !HAVE_TCP_POLLHUP
216static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
217#endif
218static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
219
220/* helper functions for sending/receiving pxtcp over poll manager channels */
221static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
222static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
223static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
224static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
225
226/* poll manager callbacks for individual sockets */
227static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
228static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
229
230/* get incoming traffic into ring buffer */
231static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
232static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
233
234/* convenience functions for poll manager callbacks */
235static int pxtcp_schedule_delete(struct pxtcp *);
236static int pxtcp_schedule_reset(struct pxtcp *);
237static int pxtcp_schedule_reject(struct pxtcp *);
238
239/* lwip thread callbacks called via proxy_lwip_post() */
240static void pxtcp_pcb_delete_pxtcp(void *);
241static void pxtcp_pcb_reset_pxtcp(void *);
242static void pxtcp_pcb_accept_refuse(void *);
243static void pxtcp_pcb_accept_confirm(void *);
244static void pxtcp_pcb_write_outbound(void *);
245static void pxtcp_pcb_write_inbound(void *);
246static void pxtcp_pcb_pull_inbound(void *);
247
248/* tcp pcb callbacks */
249static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
250static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
251static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
252static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
253static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
254static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
255static void pxtcp_pcb_err(void *, err_t);
256
257static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
258static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
259
260static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
261
262static void pxtcp_pcb_forward_inbound(struct pxtcp *);
263static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
264DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
265static void pxtcp_pcb_schedule_poll(struct pxtcp *);
266static void pxtcp_pcb_cancel_poll(struct pxtcp *);
267
268static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
269DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
270
271/* poll manager handlers for pxtcp channels */
272static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
273static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
274static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
275#if !HAVE_TCP_POLLHUP
276static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
277#endif
278static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
279
280
281/**
282 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
283 * manager threads haven't been created yet.
284 */
285void
286pxtcp_init(void)
287{
288 /*
289 * Create channels.
290 */
291#define CHANNEL(SLOT, NAME) do { \
292 NAME##_hdl.callback = NAME; \
293 NAME##_hdl.data = NULL; \
294 NAME##_hdl.slot = -1; \
295 pollmgr_add_chan(SLOT, &NAME##_hdl); \
296 } while (0)
297
298 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
299 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
300 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
301#if !HAVE_TCP_POLLHUP
302 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
303#endif
304 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
305
306#undef CHANNEL
307
308 /*
309 * Listen to outgoing connection from guest(s).
310 */
311 tcp_proxy_accept(pxtcp_pcb_heard);
312}
313
314
315/**
316 * Syntactic sugar for sending pxtcp pointer over poll manager
317 * channel. Used by lwip thread functions.
318 */
319static ssize_t
320pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
321{
322 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
323}
324
325
326/**
327 * Syntactic sugar for sending weak reference to pxtcp over poll
328 * manager channel. Used by lwip thread functions.
329 */
330static ssize_t
331pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
332{
333 pollmgr_refptr_weak_ref(pxtcp->rp);
334 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
335}
336
337
338/**
339 * Counterpart of pxtcp_chan_send().
340 */
341static struct pxtcp *
342pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
343{
344 struct pxtcp *pxtcp;
345
346 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
347 return pxtcp;
348}
349
350
351/**
352 * Counterpart of pxtcp_chan_send_weak().
353 */
354static struct pxtcp *
355pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
356{
357 struct pollmgr_refptr *rp;
358 struct pollmgr_handler *base;
359 struct pxtcp *pxtcp;
360
361 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
362 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
363 pxtcp = (struct pxtcp *)base;
364
365 return pxtcp;
366}
367
368
369/**
370 * Register pxtcp with poll manager.
371 *
372 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
373 * error handling is different in these two cases, we leave it up to
374 * the caller.
375 */
376int
377pxtcp_pmgr_add(struct pxtcp *pxtcp)
378{
379 int status;
380
381 LWIP_ASSERT1(pxtcp != NULL);
382 LWIP_ASSERT1(pxtcp->sock >= 0);
383 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
384 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
385 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
386
387 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
388 return status;
389}
390
391
392/**
393 * Unregister pxtcp with poll manager.
394 *
395 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
396 * leg).
397 */
398void
399pxtcp_pmgr_del(struct pxtcp *pxtcp)
400{
401 LWIP_ASSERT1(pxtcp != NULL);
402
403 pollmgr_del_slot(pxtcp->pmhdl.slot);
404}
405
406
407/**
408 * POLLMGR_CHAN_PXTCP_ADD handler.
409 *
410 * Get new pxtcp from lwip thread and start polling its socket.
411 */
412static int
413pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
414{
415 struct pxtcp *pxtcp;
416 int status;
417
418 pxtcp = pxtcp_chan_recv(handler, fd, revents);
419 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
420 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
421
422 status = pxtcp_pmgr_add(pxtcp);
423 if (status < 0) {
424 (void) pxtcp_schedule_reset(pxtcp);
425 }
426
427 return POLLIN;
428}
429
430
431/**
432 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
433 *
434 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
435 * and failed, it now requests us to poll the socket for POLLOUT and
436 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
437 */
438static int
439pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
440{
441 struct pxtcp *pxtcp;
442
443 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
444 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
445
446 if (pxtcp == NULL) {
447 return POLLIN;
448 }
449
450 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
451 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
452
453 pxtcp->events |= POLLOUT;
454 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
455
456 return POLLIN;
457}
458
459
460/**
461 * POLLMGR_CHAN_PXTCP_POLLIN handler.
462 */
463static int
464pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
465{
466 struct pxtcp *pxtcp;
467
468 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
469 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
470
471 if (pxtcp == NULL) {
472 return POLLIN;
473 }
474
475 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
476 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
477
478 if (pxtcp->inbound_close) {
479 return POLLIN;
480 }
481
482 pxtcp->events |= POLLIN;
483 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
484
485 return POLLIN;
486}
487
488
489#if !HAVE_TCP_POLLHUP
490/**
491 * POLLMGR_CHAN_PXTCP_DEL handler.
492 *
493 * Schedule pxtcp deletion. We only need this if host system doesn't
494 * report POLLHUP for fully closed tcp sockets.
495 */
496static int
497pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
498{
499 struct pxtcp *pxtcp;
500
501 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
502 if (pxtcp == NULL) {
503 return POLLIN;
504 }
505
506 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
507 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
508
509 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
510 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
511
512 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
513 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
514
515 pxtcp_pmgr_del(pxtcp);
516 (void) pxtcp_schedule_delete(pxtcp);
517
518 return POLLIN;
519}
520#endif /* !HAVE_TCP_POLLHUP */
521
522
523/**
524 * POLLMGR_CHAN_PXTCP_RESET handler.
525 *
526 * Close the socket with RST and delete pxtcp.
527 */
528static int
529pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
530{
531 struct pxtcp *pxtcp;
532
533 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
534 if (pxtcp == NULL) {
535 return POLLIN;
536 }
537
538 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
539 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
540
541 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
542 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
543
544 pxtcp_pmgr_del(pxtcp);
545
546 proxy_reset_socket(pxtcp->sock);
547 pxtcp->sock = INVALID_SOCKET;
548
549 (void) pxtcp_schedule_reset(pxtcp);
550
551 return POLLIN;
552}
553
554
555static struct pxtcp *
556pxtcp_allocate(void)
557{
558 struct pxtcp *pxtcp;
559
560 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
561 if (pxtcp == NULL) {
562 return NULL;
563 }
564
565 pxtcp->pmhdl.callback = NULL;
566 pxtcp->pmhdl.data = (void *)pxtcp;
567 pxtcp->pmhdl.slot = -1;
568
569 pxtcp->pcb = NULL;
570 pxtcp->sock = INVALID_SOCKET;
571 pxtcp->events = 0;
572 pxtcp->sockerr = 0;
573 pxtcp->netif = NULL;
574 pxtcp->unsent = NULL;
575 pxtcp->outbound_close = 0;
576 pxtcp->outbound_close_done = 0;
577 pxtcp->inbound_close = 0;
578 pxtcp->inbound_close_done = 0;
579 pxtcp->inbound_pull = 0;
580 pxtcp->deferred_delete = 0;
581
582 pxtcp->inbuf.bufsize = 64 * 1024;
583 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
584 if (pxtcp->inbuf.buf == NULL) {
585 free(pxtcp);
586 return NULL;
587 }
588 pxtcp->inbuf.vacant = 0;
589 pxtcp->inbuf.unacked = 0;
590 pxtcp->inbuf.unsent = 0;
591
592 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
593 if (pxtcp->rp == NULL) {
594 free(pxtcp->inbuf.buf);
595 free(pxtcp);
596 return NULL;
597 }
598
599#define CALLBACK_MSG(MSG, FUNC) \
600 do { \
601 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
602 pxtcp->MSG.sem = NULL; \
603 pxtcp->MSG.msg.cb.function = FUNC; \
604 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
605 } while (0)
606
607 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
608 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
609 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
610 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
611 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
612 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
613
614#undef CALLBACK_MSG
615
616 return pxtcp;
617}
618
619
620/**
621 * Exported to fwtcp to create pxtcp for incoming port-forwarded
622 * connections. Completed with pcb in pxtcp_pcb_connect().
623 */
624struct pxtcp *
625pxtcp_create_forwarded(SOCKET sock)
626{
627 struct pxtcp *pxtcp;
628
629 pxtcp = pxtcp_allocate();
630 if (pxtcp == NULL) {
631 return NULL;
632 }
633
634 pxtcp->sock = sock;
635 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
636 pxtcp->events = 0;
637
638 return pxtcp;
639}
640
641
642static void
643pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
644{
645 LWIP_ASSERT1(pxtcp != NULL);
646 LWIP_ASSERT1(pcb != NULL);
647
648 pxtcp->pcb = pcb;
649
650 tcp_arg(pcb, pxtcp);
651
652 tcp_recv(pcb, pxtcp_pcb_recv);
653 tcp_sent(pcb, pxtcp_pcb_sent);
654 tcp_poll(pcb, NULL, 255);
655 tcp_err(pcb, pxtcp_pcb_err);
656}
657
658
659static void
660pxtcp_free(struct pxtcp *pxtcp)
661{
662 if (pxtcp->unsent != NULL) {
663 pbuf_free(pxtcp->unsent);
664 }
665 if (pxtcp->inbuf.buf != NULL) {
666 free(pxtcp->inbuf.buf);
667 }
668 free(pxtcp);
669}
670
671
672/**
673 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
674 * fwtcp failed to register with poll manager to post to lwip thread
675 * for doing connect.
676 */
677void
678pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
679{
680 LWIP_ASSERT1(pxtcp->pcb == NULL);
681 pxtcp_pcb_reset_pxtcp(pxtcp);
682}
683
684
685static void
686pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
687{
688 if (pxtcp == NULL || pxtcp->pcb == NULL) {
689 return;
690 }
691
692 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
693 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
694
695 /*
696 * We must have dissociated from a fully closed pcb immediately
697 * since lwip recycles them and we don't wan't to mess with what
698 * would be someone else's pcb that we happen to have a stale
699 * pointer to.
700 */
701 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
702
703 tcp_recv(pxtcp->pcb, NULL);
704 tcp_sent(pxtcp->pcb, NULL);
705 tcp_poll(pxtcp->pcb, NULL, 255);
706 tcp_err(pxtcp->pcb, NULL);
707 tcp_arg(pxtcp->pcb, NULL);
708 pxtcp->pcb = NULL;
709}
710
711
712/**
713 * Lwip thread callback invoked via pxtcp::msg_delete
714 *
715 * Since we use static messages to communicate to the lwip thread, we
716 * cannot delete pxtcp without making sure there are no unprocessed
717 * messages in the lwip thread mailbox.
718 *
719 * The easiest way to ensure that is to send this "delete" message as
720 * the last one and when it's processed we know there are no more and
721 * it's safe to delete pxtcp.
722 *
723 * Poll manager handlers should use pxtcp_schedule_delete()
724 * convenience function.
725 */
726static void
727pxtcp_pcb_delete_pxtcp(void *ctx)
728{
729 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
730
731 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
732 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
733 (pxtcp->deferred_delete && !pxtcp->inbound_pull
734 ? " (was deferred)" : "")));
735
736 LWIP_ASSERT1(pxtcp != NULL);
737 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
738 LWIP_ASSERT1(pxtcp->outbound_close_done);
739 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
740
741
742 /*
743 * pxtcp is no longer registered with poll manager, so it's safe
744 * to close the socket.
745 */
746 if (pxtcp->sock != INVALID_SOCKET) {
747 closesocket(pxtcp->sock);
748 pxtcp->sock = INVALID_SOCKET;
749 }
750
751 /*
752 * We might have already dissociated from a fully closed pcb, or
753 * guest might have sent us a reset while msg_delete was in
754 * transit. If there's no pcb, we are done.
755 */
756 if (pxtcp->pcb == NULL) {
757 pollmgr_refptr_unref(pxtcp->rp);
758 pxtcp_free(pxtcp);
759 return;
760 }
761
762 /*
763 * Have we completely forwarded all inbound traffic to the guest?
764 *
765 * We may still be waiting for ACKs. We may have failed to send
766 * some of the data (tcp_write() failed with ERR_MEM). We may
767 * have failed to send the FIN (tcp_shutdown() failed with
768 * ERR_MEM).
769 */
770 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
771 pxtcp_pcb_dissociate(pxtcp);
772 pollmgr_refptr_unref(pxtcp->rp);
773 pxtcp_free(pxtcp);
774 }
775 else {
776 DPRINTF2(("delete: pxtcp %p; pcb %p:"
777 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
778 (void *)pxtcp, (void *)pxtcp->pcb,
779 (int)pxtcp->inbuf.unacked,
780 (int)pxtcp->inbuf.unsent,
781 (int)pxtcp->inbuf.vacant,
782 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
783
784 LWIP_ASSERT1(!pxtcp->deferred_delete);
785 pxtcp->deferred_delete = 1;
786 }
787}
788
789
790/**
791 * If we couldn't delete pxtcp right away in the msg_delete callback
792 * from the poll manager thread, we repeat the check at the end of
793 * relevant pcb callbacks.
794 */
795DECLINLINE(void)
796pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
797{
798 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
799 pxtcp_pcb_delete_pxtcp(pxtcp);
800 }
801}
802
803
804/**
805 * Poll manager callbacks should use this convenience wrapper to
806 * schedule pxtcp deletion on the lwip thread and to deregister from
807 * the poll manager.
808 */
809static int
810pxtcp_schedule_delete(struct pxtcp *pxtcp)
811{
812 /*
813 * If pollmgr_refptr_get() is called by any channel before
814 * scheduled deletion happens, let them know we are gone.
815 */
816 pxtcp->pmhdl.slot = -1;
817
818 /*
819 * Schedule deletion. Since poll manager thread may be pre-empted
820 * right after we send the message, the deletion may actually
821 * happen on the lwip thread before we return from this function,
822 * so it's not safe to refer to pxtcp after this call.
823 */
824 proxy_lwip_post(&pxtcp->msg_delete);
825
826 /* tell poll manager to deregister us */
827 return -1;
828}
829
830
831/**
832 * Lwip thread callback invoked via pxtcp::msg_reset
833 *
834 * Like pxtcp_pcb_delete(), but sends RST to the guest before
835 * deleting this pxtcp.
836 */
837static void
838pxtcp_pcb_reset_pxtcp(void *ctx)
839{
840 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
841 LWIP_ASSERT1(pxtcp != NULL);
842
843 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
844 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
845
846 if (pxtcp->sock != INVALID_SOCKET) {
847 proxy_reset_socket(pxtcp->sock);
848 pxtcp->sock = INVALID_SOCKET;
849 }
850
851 if (pxtcp->pcb != NULL) {
852 struct tcp_pcb *pcb = pxtcp->pcb;
853 pxtcp_pcb_dissociate(pxtcp);
854 tcp_abort(pcb);
855 }
856
857 pollmgr_refptr_unref(pxtcp->rp);
858 pxtcp_free(pxtcp);
859}
860
861
862
863/**
864 * Poll manager callbacks should use this convenience wrapper to
865 * schedule pxtcp reset and deletion on the lwip thread and to
866 * deregister from the poll manager.
867 *
868 * See pxtcp_schedule_delete() for additional comments.
869 */
870static int
871pxtcp_schedule_reset(struct pxtcp *pxtcp)
872{
873 pxtcp->pmhdl.slot = -1;
874 proxy_lwip_post(&pxtcp->msg_reset);
875 return -1;
876}
877
878
879/**
880 * Reject proxy connection attempt. Depending on the cause (sockerr)
881 * we may just drop the pcb silently, generate an ICMP datagram or
882 * send TCP reset.
883 */
884static void
885pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
886 struct pbuf *p, int sockerr)
887{
888 struct netif *oif;
889 int reset = 0;
890
891 oif = ip_current_netif();
892 ip_current_netif() = netif;
893
894 if (sockerr == ECONNREFUSED) {
895 reset = 1;
896 }
897 else if (PCB_ISIPV6(pcb)) {
898 if (sockerr == EHOSTDOWN) {
899 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
900 }
901 else if (sockerr == EHOSTUNREACH
902 || sockerr == ENETDOWN
903 || sockerr == ENETUNREACH)
904 {
905 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
906 }
907 }
908 else {
909 if (sockerr == EHOSTDOWN
910 || sockerr == EHOSTUNREACH
911 || sockerr == ENETDOWN
912 || sockerr == ENETUNREACH)
913 {
914 icmp_dest_unreach(p, ICMP_DUR_HOST);
915 }
916 }
917
918 ip_current_netif() = oif;
919
920 tcp_abandon(pcb, reset);
921}
922
923
924/**
925 * Called from poll manager thread via pxtcp::msg_accept when proxy
926 * failed to connect to the destination. Also called when we failed
927 * to register pxtcp with poll manager.
928 *
929 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
930 * how this unestablished connection is terminated.
931 */
932static void
933pxtcp_pcb_accept_refuse(void *ctx)
934{
935 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
936
937 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
938 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
939 pxtcp->sock, pxtcp->sockerr));
940
941 LWIP_ASSERT1(pxtcp != NULL);
942 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
943
944 if (pxtcp->pcb != NULL) {
945 struct tcp_pcb *pcb = pxtcp->pcb;
946 pxtcp_pcb_dissociate(pxtcp);
947 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
948 }
949
950 pollmgr_refptr_unref(pxtcp->rp);
951 pxtcp_free(pxtcp);
952}
953
954
955/**
956 * Convenience wrapper for poll manager connect callback to reject
957 * connection attempt.
958 *
959 * Like pxtcp_schedule_reset(), but the callback is more discriminate
960 * in how this unestablished connection is terminated.
961 */
962static int
963pxtcp_schedule_reject(struct pxtcp *pxtcp)
964{
965 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
966 pxtcp->pmhdl.slot = -1;
967 proxy_lwip_post(&pxtcp->msg_accept);
968 return -1;
969}
970
971
972/**
973 * Global tcp_proxy_accept() callback for proxied outgoing TCP
974 * connections from guest(s).
975 */
976static err_t
977pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
978{
979 struct pbuf *p = (struct pbuf *)arg;
980 struct pxtcp *pxtcp;
981 ipX_addr_t dst_addr;
982 int sdom;
983 SOCKET sock;
984 ssize_t nsent;
985 int sockerr = 0;
986
987 LWIP_UNUSED_ARG(error); /* always ERR_OK */
988
989 /*
990 * TCP first calls accept callback when it receives the first SYN
991 * and "tentatively accepts" new proxied connection attempt. When
992 * proxy "confirms" the SYN and sends SYN|ACK and the guest
993 * replies with ACK the accept callback is called again, this time
994 * with the established connection.
995 */
996 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
997 tcp_accept(newpcb, pxtcp_pcb_accept);
998 tcp_arg(newpcb, NULL);
999
1000 tcp_setprio(newpcb, TCP_PRIO_MAX);
1001
1002 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
1003
1004 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
1005 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1006 &dst_addr, newpcb->local_port);
1007 if (sock == INVALID_SOCKET) {
1008 sockerr = SOCKERRNO();
1009 goto abort;
1010 }
1011
1012 pxtcp = pxtcp_allocate();
1013 if (pxtcp == NULL) {
1014 proxy_reset_socket(sock);
1015 goto abort;
1016 }
1017
1018 /* save initial datagram in case we need to reply with ICMP */
1019 pbuf_ref(p);
1020 pxtcp->unsent = p;
1021 pxtcp->netif = ip_current_netif();
1022
1023 pxtcp_pcb_associate(pxtcp, newpcb);
1024 pxtcp->sock = sock;
1025
1026 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1027 pxtcp->events = POLLOUT;
1028
1029 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1030 if (nsent < 0) {
1031 pxtcp->sock = INVALID_SOCKET;
1032 proxy_reset_socket(sock);
1033 pxtcp_pcb_accept_refuse(pxtcp);
1034 return ERR_ABRT;
1035 }
1036
1037 return ERR_OK;
1038
1039 abort:
1040 DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
1041 __func__, (void *)newpcb, sock, sockerr));
1042 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1043 return ERR_ABRT;
1044}
1045
1046
1047/**
1048 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1049 * connections from guest(s). This is "real" accept with three-way
1050 * handshake completed.
1051 */
1052static err_t
1053pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1054{
1055 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1056
1057 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1058 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1059
1060 LWIP_ASSERT1(pxtcp != NULL);
1061 LWIP_ASSERT1(pxtcp->pcb = pcb);
1062 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1063
1064 /* send any inbound data that are already queued */
1065 pxtcp_pcb_forward_inbound(pxtcp);
1066 return ERR_OK;
1067}
1068
1069
1070/**
1071 * Initial poll manager callback for proxied outgoing TCP connections.
1072 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1073 *
1074 * Waits for connect(2) to the destination to complete. On success
1075 * replaces itself with pxtcp_pmgr_pump() callback common to all
1076 * established TCP connections.
1077 */
1078static int
1079pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1080{
1081 struct pxtcp *pxtcp;
1082
1083 pxtcp = (struct pxtcp *)handler->data;
1084 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1085 LWIP_ASSERT1(fd == pxtcp->sock);
1086
1087 if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
1088 if (revents & POLLNVAL) {
1089 pxtcp->sock = INVALID_SOCKET;
1090 pxtcp->sockerr = ETIMEDOUT;
1091 }
1092 else {
1093 socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr);
1094 int status;
1095 SOCKET s;
1096
1097 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1098 (char *)&pxtcp->sockerr, &optlen);
1099 if (status < 0) { /* should not happen */
1100 DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
1101 __func__, fd, SOCKERRNO()));
1102 }
1103 else {
1104 DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
1105 __func__, fd, pxtcp->sockerr));
1106 }
1107 s = pxtcp->sock;
1108 pxtcp->sock = INVALID_SOCKET;
1109 closesocket(s);
1110 }
1111 return pxtcp_schedule_reject(pxtcp);
1112 }
1113
1114 if (revents & POLLOUT) { /* connect is successful */
1115 /* confirm accept to the guest */
1116 proxy_lwip_post(&pxtcp->msg_accept);
1117
1118 /*
1119 * Switch to common callback used for all established proxied
1120 * connections.
1121 */
1122 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1123
1124 /*
1125 * Initially we poll for incoming traffic only. Outgoing
1126 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1127 * it will ask us to poll for POLLOUT too.
1128 */
1129 pxtcp->events = POLLIN;
1130 return pxtcp->events;
1131 }
1132
1133 /* should never get here */
1134 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1135 __func__, (void *)pxtcp, fd, revents));
1136 return pxtcp_schedule_reset(pxtcp);
1137}
1138
1139
1140/**
1141 * Called from poll manager thread via pxtcp::msg_accept when proxy
1142 * connected to the destination. Finalize accept by sending SYN|ACK
1143 * to the guest.
1144 */
1145static void
1146pxtcp_pcb_accept_confirm(void *ctx)
1147{
1148 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1149 err_t error;
1150
1151 LWIP_ASSERT1(pxtcp != NULL);
1152 if (pxtcp->pcb == NULL) {
1153 return;
1154 }
1155
1156 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1157 LWIP_ASSERT1(pxtcp->unsent != NULL);
1158 pbuf_free(pxtcp->unsent);
1159 pxtcp->unsent = NULL;
1160
1161 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1162
1163 /*
1164 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1165 * abandons the pcb. Retrying that is not very easy, since it
1166 * would require keeping "fractional state". From guest's point
1167 * of view there is no reply to its SYN so it will either resend
1168 * the SYN (effetively triggering full connection retry for us),
1169 * or it will eventually time out.
1170 */
1171 if (error == ERR_ABRT) {
1172 pxtcp->pcb = NULL; /* pcb is gone */
1173 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1174 }
1175
1176 /*
1177 * else if (error != ERR_OK): even if tcp_output() failed with
1178 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1179 * retransmitted eventually.
1180 */
1181}
1182
1183
1184/**
1185 * Entry point for port-forwarding.
1186 *
1187 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1188 * (with no pcb yet) and adds it to the poll manager (polling for
1189 * errors only). Then it calls this function to construct the pcb and
1190 * perform connection to the guest.
1191 */
1192void
1193pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1194{
1195 struct sockaddr_storage ss;
1196 socklen_t sslen;
1197 struct tcp_pcb *pcb;
1198 ipX_addr_t src_addr, dst_addr;
1199 u16_t src_port, dst_port;
1200 int status;
1201 err_t error;
1202
1203 LWIP_ASSERT1(pxtcp != NULL);
1204 LWIP_ASSERT1(pxtcp->pcb == NULL);
1205 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1206
1207 pcb = tcp_new();
1208 if (pcb == NULL) {
1209 goto reset;
1210 }
1211
1212 tcp_setprio(pcb, TCP_PRIO_MAX);
1213 pxtcp_pcb_associate(pxtcp, pcb);
1214
1215 sslen = sizeof(ss);
1216 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1217 if (status == SOCKET_ERROR) {
1218 goto reset;
1219 }
1220
1221 /* nit: comapres PF and AF, but they are the same everywhere */
1222 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1223
1224 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1225 if (status == PXREMAP_FAILED) {
1226 goto reset;
1227 }
1228
1229 if (ss.ss_family == PF_INET) {
1230 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1231
1232 src_port = peer4->sin_port;
1233
1234 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1235 dst_port = fwspec->dst.sin.sin_port;
1236 }
1237 else { /* PF_INET6 */
1238 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1239 ip_set_v6(pcb, 1);
1240
1241 src_port = peer6->sin6_port;
1242
1243 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1244 dst_port = fwspec->dst.sin6.sin6_port;
1245 }
1246
1247 /* lwip port arguments are in host order */
1248 src_port = ntohs(src_port);
1249 dst_port = ntohs(dst_port);
1250
1251 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1252 if (error != ERR_OK) {
1253 goto reset;
1254 }
1255
1256 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1257 /* callback: */ pxtcp_pcb_connected);
1258 if (error != ERR_OK) {
1259 goto reset;
1260 }
1261
1262 return;
1263
1264 reset:
1265 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1266}
1267
1268
1269/**
1270 * Port-forwarded connection to guest is successful, pump data.
1271 */
1272static err_t
1273pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1274{
1275 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1276
1277 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1278 LWIP_UNUSED_ARG(error);
1279
1280 LWIP_ASSERT1(pxtcp != NULL);
1281 LWIP_ASSERT1(pxtcp->pcb == pcb);
1282 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1283 LWIP_UNUSED_ARG(pcb);
1284
1285 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1286 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1287
1288 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1289 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1290
1291 return ERR_OK;
1292}
1293
1294
1295/**
1296 * tcp_recv() callback.
1297 */
1298static err_t
1299pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1300{
1301 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1302
1303 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1304 LWIP_UNUSED_ARG(error);
1305
1306 LWIP_ASSERT1(pxtcp != NULL);
1307 LWIP_ASSERT1(pxtcp->pcb == pcb);
1308 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1309 LWIP_UNUSED_ARG(pcb);
1310
1311
1312 /*
1313 * Have we done sending previous batch?
1314 */
1315 if (pxtcp->unsent != NULL) {
1316 if (p != NULL) {
1317 /*
1318 * Return an error to tell TCP to hold onto that pbuf.
1319 * It will be presented to us later from tcp_fasttmr().
1320 */
1321 return ERR_WOULDBLOCK;
1322 }
1323 else {
1324 /*
1325 * Unlike data, p == NULL indicating orderly shutdown is
1326 * NOT presented to us again
1327 */
1328 pxtcp->outbound_close = 1;
1329 return ERR_OK;
1330 }
1331 }
1332
1333
1334 /*
1335 * Guest closed?
1336 */
1337 if (p == NULL) {
1338 pxtcp->outbound_close = 1;
1339 pxtcp_pcb_forward_outbound_close(pxtcp);
1340 return ERR_OK;
1341 }
1342
1343
1344 /*
1345 * Got data, send what we can without blocking.
1346 */
1347 return pxtcp_pcb_forward_outbound(pxtcp, p);
1348}
1349
1350
1351/**
1352 * Guest half-closed its TX side of the connection.
1353 *
1354 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1355 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1356 * previously unsent data and sees pxtcp::outbound_close flag saved by
1357 * pxtcp_pcb_recv().
1358 */
1359static void
1360pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1361{
1362 struct tcp_pcb *pcb;
1363
1364 LWIP_ASSERT1(pxtcp != NULL);
1365 LWIP_ASSERT1(pxtcp->outbound_close);
1366 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1367
1368 pcb = pxtcp->pcb;
1369 LWIP_ASSERT1(pcb != NULL);
1370
1371 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1372 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1373
1374
1375 /*
1376 * NB: set the flag first, since shutdown() will trigger POLLHUP
1377 * if inbound is already closed, and poll manager asserts
1378 * outbound_close_done (may be it should not?).
1379 */
1380 pxtcp->outbound_close_done = 1;
1381 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1382
1383#if !HAVE_TCP_POLLHUP
1384 /*
1385 * On NetBSD POLLHUP is not reported for TCP sockets, so we need
1386 * to nudge poll manager manually.
1387 */
1388 if (pxtcp->inbound_close) {
1389 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1390 }
1391#endif
1392
1393
1394 /* no more outbound data coming to us */
1395 tcp_recv(pcb, NULL);
1396
1397 /*
1398 * If we have already done inbound close previously (active close
1399 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1400 * state since those will be recycled by lwip when it runs out of
1401 * free pcbs in the pool.
1402 *
1403 * The test is true also for a pcb in CLOSING state that waits
1404 * just for the ACK of its FIN (to transition to TIME_WAIT).
1405 */
1406 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1407 pxtcp_pcb_dissociate(pxtcp);
1408 }
1409}
1410
1411
1412/**
1413 * Forward outbound data from pcb to socket.
1414 *
1415 * Called by pxtcp_pcb_recv() to forward new data and by callout
1416 * triggered by POLLOUT on the socket to send previously unsent data.
1417 *
1418 * (Re)scehdules one-time callout if not all data are sent.
1419 */
1420static err_t
1421pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1422{
1423 struct pbuf *qs, *q;
1424 size_t qoff;
1425 size_t forwarded;
1426 int sockerr;
1427
1428 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1429
1430 forwarded = 0;
1431 sockerr = 0;
1432
1433 q = NULL;
1434 qoff = 0;
1435
1436 qs = p;
1437 while (qs != NULL) {
1438 IOVEC iov[8];
1439 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1440 size_t fwd1;
1441 ssize_t nsent;
1442 size_t i;
1443
1444 fwd1 = 0;
1445 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1446 LWIP_ASSERT1(q->len > 0);
1447 IOVEC_SET_BASE(iov[i], q->payload);
1448 IOVEC_SET_LEN(iov[i], q->len);
1449 fwd1 += q->len;
1450 }
1451
1452 /*
1453 * TODO: This is where application-level proxy can hook into
1454 * to process outbound traffic.
1455 */
1456 nsent = pxtcp_sock_send(pxtcp, iov, i);
1457
1458 if (nsent == (ssize_t)fwd1) {
1459 /* successfully sent this chain fragment completely */
1460 forwarded += nsent;
1461 qs = q;
1462 }
1463 else if (nsent >= 0) {
1464 /* successfully sent only some data */
1465 forwarded += nsent;
1466
1467 /* find the first pbuf that was not completely forwarded */
1468 qoff = nsent;
1469 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1470 if (qoff < q->len) {
1471 break;
1472 }
1473 qoff -= q->len;
1474 }
1475 LWIP_ASSERT1(q != NULL);
1476 LWIP_ASSERT1(qoff < q->len);
1477 break;
1478 }
1479 else {
1480 sockerr = -nsent;
1481
1482 /*
1483 * Some errors are really not errors - if we get them,
1484 * it's not different from getting nsent == 0, so filter
1485 * them out here.
1486 */
1487 if (proxy_error_is_transient(sockerr)) {
1488 sockerr = 0;
1489 }
1490 q = qs;
1491 qoff = 0;
1492 break;
1493 }
1494 }
1495
1496 if (forwarded > 0) {
1497 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1498 }
1499
1500 if (q == NULL) { /* everything is forwarded? */
1501 LWIP_ASSERT1(sockerr == 0);
1502 LWIP_ASSERT1(forwarded == p->tot_len);
1503
1504 pxtcp->unsent = NULL;
1505 pbuf_free(p);
1506 if (pxtcp->outbound_close) {
1507 pxtcp_pcb_forward_outbound_close(pxtcp);
1508 }
1509 }
1510 else {
1511 if (q != p) {
1512 /* free forwarded pbufs at the beginning of the chain */
1513 pbuf_ref(q);
1514 pbuf_free(p);
1515 }
1516 if (qoff > 0) {
1517 /* advance payload pointer past the forwarded part */
1518 pbuf_header(q, -(s16_t)qoff);
1519 }
1520 pxtcp->unsent = q;
1521
1522 /*
1523 * Have sendmsg() failed?
1524 *
1525 * Connection reset will be detected by poll and
1526 * pxtcp_schedule_reset() will be called.
1527 *
1528 * Otherwise something *really* unexpected must have happened,
1529 * so we'd better abort.
1530 */
1531 if (sockerr != 0 && sockerr != ECONNRESET) {
1532 struct tcp_pcb *pcb = pxtcp->pcb;
1533 pxtcp_pcb_dissociate(pxtcp);
1534
1535 tcp_abort(pcb);
1536
1537 /* call error callback manually since we've already dissociated */
1538 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1539 return ERR_ABRT;
1540 }
1541
1542 /* schedule one-shot POLLOUT on the socket */
1543 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1544 }
1545 return ERR_OK;
1546}
1547
1548
1549#if !defined(RT_OS_WINDOWS)
1550static ssize_t
1551pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1552{
1553 struct msghdr mh;
1554 ssize_t nsent;
1555
1556#ifdef MSG_NOSIGNAL
1557 const int send_flags = MSG_NOSIGNAL;
1558#else
1559 const int send_flags = 0;
1560#endif
1561
1562 memset(&mh, 0, sizeof(mh));
1563
1564 mh.msg_iov = iov;
1565 mh.msg_iovlen = iovlen;
1566
1567 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1568 if (nsent < 0) {
1569 nsent = -SOCKERRNO();
1570 }
1571
1572 return nsent;
1573}
1574#else /* RT_OS_WINDOWS */
1575static ssize_t
1576pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1577{
1578 DWORD nsent;
1579 int status;
1580
1581 status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
1582 0, NULL, NULL);
1583 if (status == SOCKET_ERROR) {
1584 nsent = -SOCKERRNO();
1585 }
1586
1587 return nsent;
1588}
1589#endif /* RT_OS_WINDOWS */
1590
1591
1592/**
1593 * Callback from poll manager (on POLLOUT) to send data from
1594 * pxtcp::unsent pbuf to socket.
1595 */
1596static void
1597pxtcp_pcb_write_outbound(void *ctx)
1598{
1599 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1600 LWIP_ASSERT1(pxtcp != NULL);
1601
1602 if (pxtcp->pcb == NULL) {
1603 return;
1604 }
1605
1606 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1607}
1608
1609
1610/**
1611 * Common poll manager callback used by both outgoing and incoming
1612 * (port-forwarded) connections that has connected socket.
1613 */
1614static int
1615pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1616{
1617 struct pxtcp *pxtcp;
1618 int status;
1619 int sockerr;
1620
1621 pxtcp = (struct pxtcp *)handler->data;
1622 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1623 LWIP_ASSERT1(fd == pxtcp->sock);
1624
1625 if (revents & POLLNVAL) {
1626 pxtcp->sock = INVALID_SOCKET;
1627 return pxtcp_schedule_reset(pxtcp);
1628 }
1629
1630 if (revents & POLLERR) {
1631 socklen_t optlen = (socklen_t)sizeof(sockerr);
1632
1633 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1634 (char *)&sockerr, &optlen);
1635 if (status < 0) { /* should not happen */
1636 DPRINTF(("sock %d: SO_ERROR failed: %R[sockerr]\n",
1637 fd, SOCKERRNO()));
1638 }
1639 else {
1640 DPRINTF0(("sock %d: %R[sockerr]\n", fd, sockerr));
1641 }
1642 return pxtcp_schedule_reset(pxtcp);
1643 }
1644
1645 if (revents & POLLOUT) {
1646 pxtcp->events &= ~POLLOUT;
1647 proxy_lwip_post(&pxtcp->msg_outbound);
1648 }
1649
1650 if (revents & POLLIN) {
1651 ssize_t nread;
1652 int stop_pollin;
1653
1654 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1655 if (nread < 0) {
1656 sockerr = -(int)nread;
1657 DPRINTF0(("sock %d: %R[sockerr]\n", fd, sockerr));
1658 return pxtcp_schedule_reset(pxtcp);
1659 }
1660
1661 if (stop_pollin) {
1662 pxtcp->events &= ~POLLIN;
1663 }
1664
1665 if (nread > 0) {
1666 proxy_lwip_post(&pxtcp->msg_inbound);
1667#if !HAVE_TCP_POLLHUP
1668 /*
1669 * If host does not report POLLHUP for closed sockets
1670 * (e.g. NetBSD) we should check for full close manually.
1671 */
1672 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1673 LWIP_ASSERT1((revents & POLLHUP) == 0);
1674 return pxtcp_schedule_delete(pxtcp);
1675 }
1676#endif
1677 }
1678 }
1679
1680#if !HAVE_TCP_POLLHUP
1681 LWIP_ASSERT1((revents & POLLHUP) == 0);
1682#else
1683 if (revents & POLLHUP) {
1684 /*
1685 * Linux and Darwin seems to report POLLHUP when both
1686 * directions are shut down. And they do report POLLHUP even
1687 * when there's unread data (which they aslo report as POLLIN
1688 * along with that POLLHUP).
1689 *
1690 * FreeBSD (from source inspection) seems to follow Linux,
1691 * reporting POLLHUP when both directions are shut down, but
1692 * POLLHUP is always accompanied with POLLIN.
1693 *
1694 * NetBSD never reports POLLHUP for sockets.
1695 *
1696 * ---
1697 *
1698 * If external half-closes first, we don't get POLLHUP, we
1699 * recv 0 bytes from the socket as EOF indicator, stop polling
1700 * for POLLIN and poll with events == 0 (with occasional
1701 * one-shot POLLOUT). When guest eventually closes, we get
1702 * POLLHUP.
1703 *
1704 * If guest half-closes first things are more tricky. As soon
1705 * as host sees the FIN from external it will spam POLLHUP,
1706 * even when there's unread data. The problem is that we
1707 * might have stopped polling for POLLIN because the ring
1708 * buffer is full or we were polling POLLIN but can't read all
1709 * of the data becuase buffer doesn't have enough space.
1710 * Either way, there's unread data but we can't keep polling
1711 * the socket.
1712 */
1713 DPRINTF(("sock %d: HUP\n", fd));
1714 LWIP_ASSERT1(pxtcp->outbound_close_done);
1715
1716 if (pxtcp->inbound_close) {
1717 /* there's no unread data, we are done */
1718 return pxtcp_schedule_delete(pxtcp);
1719 }
1720 else {
1721 /* DPRINTF */ {
1722#ifndef RT_OS_WINDOWS
1723 int unread;
1724#else
1725 u_long unread;
1726#endif
1727 status = ioctlsocket(fd, FIONREAD, &unread);
1728 if (status == SOCKET_ERROR) {
1729 DPRINTF2(("sock %d: FIONREAD: %R[sockerr]\n",
1730 fd, SOCKERRNO()));
1731 }
1732 else {
1733 DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
1734 }
1735 }
1736
1737 /*
1738 * We cannot just set a flag here and let pxtcp_pcb_sent()
1739 * notice and start pulling, because if we are preempted
1740 * before setting the flag and all data in inbuf is ACKed
1741 * there will be no more calls to pxtcp_pcb_sent() to
1742 * notice the flag.
1743 *
1744 * We cannot set a flag and then send a message to make
1745 * sure it noticed, because if it has and it has read all
1746 * data while the message is in transit it will delete
1747 * pxtcp.
1748 *
1749 * In a sense this message is like msg_delete (except we
1750 * ask to pull some data first).
1751 */
1752 proxy_lwip_post(&pxtcp->msg_inpull);
1753 pxtcp->pmhdl.slot = -1;
1754 return -1;
1755 }
1756 /* NOTREACHED */
1757 } /* POLLHUP */
1758#endif /* HAVE_TCP_POLLHUP */
1759
1760 return pxtcp->events;
1761}
1762
1763
1764/**
1765 * Read data from socket to ringbuf. This may be used both on lwip
1766 * and poll manager threads.
1767 *
1768 * Flag pointed to by pstop is set when further reading is impossible,
1769 * either temporary when buffer is full, or permanently when EOF is
1770 * received.
1771 *
1772 * Returns number of bytes read. NB: EOF is reported as 1!
1773 *
1774 * Returns zero if nothing was read, either because buffer is full, or
1775 * if no data is available (EWOULDBLOCK, EINTR &c).
1776 *
1777 * Returns -errno on real socket errors.
1778 */
1779static ssize_t
1780pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1781{
1782 IOVEC iov[2];
1783 size_t iovlen;
1784 ssize_t nread;
1785
1786 const size_t sz = pxtcp->inbuf.bufsize;
1787 size_t beg, lim, wrnew;
1788
1789 *pstop = 0;
1790
1791 beg = pxtcp->inbuf.vacant;
1792 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1793
1794 /* lim is the index we can NOT write to */
1795 lim = pxtcp->inbuf.unacked;
1796 if (lim == 0) {
1797 lim = sz - 1; /* empty slot at the end */
1798 }
1799 else if (lim == 1) {
1800 lim = sz; /* empty slot at the beginning */
1801 }
1802 else {
1803 --lim;
1804 }
1805
1806 if (beg == lim) {
1807 /*
1808 * Buffer is full, stop polling for POLLIN.
1809 *
1810 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1811 * data, freeing space in the ring buffer.
1812 */
1813 *pstop = 1;
1814 return 0;
1815 }
1816
1817 if (beg < lim) {
1818 /* free space in one chunk */
1819 iovlen = 1;
1820 IOVEC_SET_LEN(iov[0], lim - beg);
1821 }
1822 else {
1823 /* free space in two chunks */
1824 iovlen = 2;
1825 IOVEC_SET_LEN(iov[0], sz - beg);
1826 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1827 IOVEC_SET_LEN(iov[1], lim);
1828 }
1829
1830 /*
1831 * TODO: This is where application-level proxy can hook into to
1832 * process inbound traffic.
1833 */
1834 nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
1835
1836 if (nread > 0) {
1837 wrnew = beg + nread;
1838 if (wrnew >= sz) {
1839 wrnew -= sz;
1840 }
1841 pxtcp->inbuf.vacant = wrnew;
1842 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1843 (void *)pxtcp, pxtcp->sock, (int)nread));
1844 return nread;
1845 }
1846 else if (nread == 0) {
1847 *pstop = 1;
1848 pxtcp->inbound_close = 1;
1849 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1850 (void *)pxtcp, pxtcp->sock));
1851 return 1;
1852 }
1853 else {
1854 int sockerr = -nread;
1855
1856 if (proxy_error_is_transient(sockerr)) {
1857 /* haven't read anything, just return */
1858 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1859 (void *)pxtcp, pxtcp->sock));
1860 return 0;
1861 }
1862 else {
1863 /* socket error! */
1864 DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
1865 (void *)pxtcp, pxtcp->sock, sockerr));
1866 return -sockerr;
1867 }
1868 }
1869}
1870
1871
1872#if !defined(RT_OS_WINDOWS)
1873static ssize_t
1874pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1875{
1876 struct msghdr mh;
1877 ssize_t nread;
1878
1879 memset(&mh, 0, sizeof(mh));
1880
1881 mh.msg_iov = iov;
1882 mh.msg_iovlen = iovlen;
1883
1884 nread = recvmsg(pxtcp->sock, &mh, 0);
1885 if (nread < 0) {
1886 nread = -SOCKERRNO();
1887 }
1888
1889 return nread;
1890}
1891#else /* RT_OS_WINDOWS */
1892static ssize_t
1893pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1894{
1895 DWORD flags;
1896 DWORD nread;
1897 int status;
1898
1899 flags = 0;
1900 status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
1901 &flags, NULL, NULL);
1902 if (status == SOCKET_ERROR) {
1903 nread = -SOCKERRNO();
1904 }
1905
1906 return (ssize_t)nread;
1907}
1908#endif /* RT_OS_WINDOWS */
1909
1910
1911/**
1912 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1913 * from ringbuf to guest.
1914 */
1915static void
1916pxtcp_pcb_write_inbound(void *ctx)
1917{
1918 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1919 LWIP_ASSERT1(pxtcp != NULL);
1920
1921 if (pxtcp->pcb == NULL) {
1922 return;
1923 }
1924
1925 pxtcp_pcb_forward_inbound(pxtcp);
1926}
1927
1928
1929/**
1930 * tcp_poll() callback
1931 *
1932 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1933 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1934 * more inbound data then pxtcp_pcb_forward_inbound() will be
1935 * triggered again, but if neither happens, tcp_poll() comes to the
1936 * rescue.
1937 */
1938static err_t
1939pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1940{
1941 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1942 LWIP_UNUSED_ARG(pcb);
1943
1944 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1945 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1946
1947 pxtcp_pcb_forward_inbound(pxtcp);
1948
1949 /*
1950 * If the last thing holding up deletion of the pxtcp was failed
1951 * tcp_shutdown() and it succeeded, we may be the last callback.
1952 */
1953 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1954
1955 return ERR_OK;
1956}
1957
1958
1959static void
1960pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1961{
1962 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1963}
1964
1965
1966static void
1967pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1968{
1969 tcp_poll(pxtcp->pcb, NULL, 255);
1970}
1971
1972
1973/**
1974 * Forward inbound data from ring buffer to the guest.
1975 *
1976 * Scheduled by poll manager thread after it receives more data into
1977 * the ring buffer (we have more data to send).
1978
1979 * Also called from tcp_sent() callback when guest ACKs some data,
1980 * increasing pcb->snd_buf (we are permitted to send more data).
1981 *
1982 * Also called from tcp_poll() callback if previous attempt to forward
1983 * inbound data failed with ERR_MEM (we need to try again).
1984 */
1985static void
1986pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1987{
1988 struct tcp_pcb *pcb;
1989 size_t sndbuf;
1990 size_t beg, lim, sndlim;
1991 size_t toeob, tolim;
1992 size_t nsent;
1993 err_t error;
1994
1995 LWIP_ASSERT1(pxtcp != NULL);
1996 pcb = pxtcp->pcb;
1997 if (pcb == NULL) {
1998 return;
1999 }
2000
2001 if (/* __predict_false */ pcb->state < ESTABLISHED) {
2002 /*
2003 * If we have just confirmed accept of this connection, the
2004 * pcb is in SYN_RCVD state and we still haven't received the
2005 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
2006 * transition that lwip decrements pcb->acked so that that ACK
2007 * is not reported to pxtcp_pcb_sent(). If we send something
2008 * now and immediately close (think "daytime", e.g.) while
2009 * still in SYN_RCVD state, we will move directly to
2010 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
2011 * report it to pxtcp_pcb_sent().
2012 */
2013 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
2014 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2015 return;
2016 }
2017
2018
2019 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
2020 lim = pxtcp->inbuf.vacant;
2021
2022 if (beg == lim) {
2023 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2024 pxtcp_pcb_forward_inbound_close(pxtcp);
2025 tcp_output(pcb);
2026 return;
2027 }
2028
2029 /*
2030 * Else, there's no data to send.
2031 *
2032 * If there is free space in the buffer, producer will
2033 * reschedule us as it receives more data and vacant (lim)
2034 * advances.
2035 *
2036 * If buffer is full when all data have been passed to
2037 * tcp_write() but not yet acknowledged, we will advance
2038 * unacked on ACK, freeing some space for producer to write to
2039 * (then see above).
2040 */
2041 return;
2042 }
2043
2044 sndbuf = tcp_sndbuf(pcb);
2045 if (sndbuf == 0) {
2046 /*
2047 * Can't send anything now. As guest ACKs some data, TCP will
2048 * call pxtcp_pcb_sent() callback and we will come here again.
2049 */
2050 return;
2051 }
2052
2053 nsent = 0;
2054
2055 /*
2056 * We have three limits to consider:
2057 * - how much data we have in the ringbuf
2058 * - how much data we are allowed to send
2059 * - ringbuf size
2060 */
2061 toeob = pxtcp->inbuf.bufsize - beg;
2062 if (lim < beg) { /* lim wrapped */
2063 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2064 /* so beg is not going to wrap, treat sndbuf as lim */
2065 lim = beg + sndbuf; /* ... and proceed to the simple case */
2066 }
2067 else { /* we are limited by the end of the buffer, beg will wrap */
2068 u8_t maybemore;
2069 if (toeob == sndbuf || lim == 0) {
2070 maybemore = 0;
2071 }
2072 else {
2073 maybemore = TCP_WRITE_FLAG_MORE;
2074 }
2075
2076 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2077 if (error != ERR_OK) {
2078 goto writeerr;
2079 }
2080 nsent += toeob;
2081 pxtcp->inbuf.unsent = 0; /* wrap */
2082
2083 if (maybemore) {
2084 beg = 0;
2085 sndbuf -= toeob;
2086 }
2087 else {
2088 /* we are done sending, but ... */
2089 goto check_inbound_close;
2090 }
2091 }
2092 }
2093
2094 LWIP_ASSERT1(beg < lim);
2095 sndlim = beg + sndbuf;
2096 if (lim > sndlim) {
2097 lim = sndlim;
2098 }
2099 tolim = lim - beg;
2100 if (tolim > 0) {
2101 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2102 if (error != ERR_OK) {
2103 goto writeerr;
2104 }
2105 nsent += tolim;
2106 pxtcp->inbuf.unsent = lim;
2107 }
2108
2109 check_inbound_close:
2110 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2111 pxtcp_pcb_forward_inbound_close(pxtcp);
2112 }
2113
2114 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2115 (void *)pxtcp, (void *)pcb, (int)nsent));
2116 tcp_output(pcb);
2117 pxtcp_pcb_cancel_poll(pxtcp);
2118 return;
2119
2120 writeerr:
2121 if (error == ERR_MEM) {
2122 if (nsent > 0) { /* first write succeeded, second failed */
2123 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2124 (void *)pxtcp, (void *)pcb, (int)nsent));
2125 tcp_output(pcb);
2126 }
2127 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2128 (void *)pxtcp, (void *)pcb));
2129 pxtcp_pcb_schedule_poll(pxtcp);
2130 }
2131 else {
2132 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2133 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2134
2135 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2136 LWIP_ASSERT1(error == ERR_MEM);
2137 }
2138}
2139
2140
2141static void
2142pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2143{
2144 struct tcp_pcb *pcb;
2145 err_t error;
2146
2147 LWIP_ASSERT1(pxtcp != NULL);
2148 LWIP_ASSERT1(pxtcp->inbound_close);
2149 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2150 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2151
2152 pcb = pxtcp->pcb;
2153 LWIP_ASSERT1(pcb != NULL);
2154
2155 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2156 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2157
2158 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2159 if (error != ERR_OK) {
2160 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2161 " tcp_shutdown: error=%s\n",
2162 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2163 pxtcp_pcb_schedule_poll(pxtcp);
2164 return;
2165 }
2166
2167 pxtcp_pcb_cancel_poll(pxtcp);
2168 pxtcp->inbound_close_done = 1;
2169
2170
2171 /*
2172 * If we have already done outbound close previously (passive
2173 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2174 * state since those will be deleted by lwip when that last ack
2175 * comes from the guest.
2176 *
2177 * NB: We do NOT check for deferred delete here, even though we
2178 * have just set one of its conditions, inbound_close_done. We
2179 * let pcb callbacks that called us do that. It's simpler and
2180 * cleaner that way.
2181 */
2182 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2183 pxtcp_pcb_dissociate(pxtcp);
2184 }
2185}
2186
2187
2188/**
2189 * Check that all forwarded inbound data is sent and acked, and that
2190 * inbound close is scheduled (we aren't called back when it's acked).
2191 */
2192DECLINLINE(int)
2193pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2194{
2195 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2196 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2197}
2198
2199
2200/**
2201 * tcp_sent() callback - guest acknowledged len bytes.
2202 *
2203 * We can advance inbuf::unacked index, making more free space in the
2204 * ringbuf and wake up producer on poll manager thread.
2205 *
2206 * We can also try to send more data if we have any since pcb->snd_buf
2207 * was increased and we are now permitted to send more.
2208 */
2209static err_t
2210pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2211{
2212 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2213 size_t unacked;
2214
2215 LWIP_ASSERT1(pxtcp != NULL);
2216 LWIP_ASSERT1(pxtcp->pcb == pcb);
2217 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2218 LWIP_UNUSED_ARG(pcb); /* only in assert */
2219
2220 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2221 " unacked %d, unsent %d, vacant %d\n",
2222 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2223 (int)pxtcp->inbuf.unacked,
2224 (int)pxtcp->inbuf.unsent,
2225 (int)pxtcp->inbuf.vacant));
2226
2227 if (/* __predict_false */ len == 0) {
2228 /* we are notified to start pulling */
2229 LWIP_ASSERT1(pxtcp->outbound_close_done);
2230 LWIP_ASSERT1(!pxtcp->inbound_close);
2231 LWIP_ASSERT1(pxtcp->inbound_pull);
2232
2233 unacked = pxtcp->inbuf.unacked;
2234 }
2235 else {
2236 /*
2237 * Advance unacked index. Guest acknowledged the data, so it
2238 * won't be needed again for potential retransmits.
2239 */
2240 unacked = pxtcp->inbuf.unacked + len;
2241 if (unacked > pxtcp->inbuf.bufsize) {
2242 unacked -= pxtcp->inbuf.bufsize;
2243 }
2244 pxtcp->inbuf.unacked = unacked;
2245 }
2246
2247 /* arrange for more inbound data */
2248 if (!pxtcp->inbound_close) {
2249 if (!pxtcp->inbound_pull) {
2250 /* wake up producer, in case it has stopped polling for POLLIN */
2251 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2252#ifdef RT_OS_WINDOWS
2253 /**
2254 * We have't got enought room in ring buffer to read atm,
2255 * but we don't want to lose notification from WSAW4ME when
2256 * space would be available, so we reset event with empty recv
2257 */
2258 recv(pxtcp->sock, NULL, 0, 0);
2259#endif
2260 }
2261 else {
2262 ssize_t nread;
2263 int stop_pollin; /* ignored */
2264
2265 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2266
2267 if (nread < 0) {
2268 int sockerr = -(int)nread;
2269 LWIP_UNUSED_ARG(sockerr);
2270 DPRINTF0(("%s: sock %d: %R[sockerr]\n",
2271 __func__, pxtcp->sock, sockerr));
2272
2273 /*
2274 * Since we are pulling, pxtcp is no longer registered
2275 * with poll manager so we can kill it directly.
2276 */
2277 pxtcp_pcb_reset_pxtcp(pxtcp);
2278 return ERR_ABRT;
2279 }
2280 }
2281 }
2282
2283 /* forward more data if we can */
2284 if (!pxtcp->inbound_close_done) {
2285 pxtcp_pcb_forward_inbound(pxtcp);
2286
2287 /*
2288 * NB: we might have dissociated from a pcb that transitioned
2289 * to LAST_ACK state, so don't refer to pcb below.
2290 */
2291 }
2292
2293
2294 /* have we got all the acks? */
2295 if (pxtcp->inbound_close /* no more new data */
2296 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2297 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2298 {
2299 char *buf;
2300
2301 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2302 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2303
2304 /* no more retransmits, so buf is not needed */
2305 buf = pxtcp->inbuf.buf;
2306 pxtcp->inbuf.buf = NULL;
2307 free(buf);
2308
2309 /* no more acks, so no more callbacks */
2310 if (pxtcp->pcb != NULL) {
2311 tcp_sent(pxtcp->pcb, NULL);
2312 }
2313
2314 /*
2315 * We may be the last callback for this pcb if we have also
2316 * successfully forwarded inbound_close.
2317 */
2318 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2319 }
2320
2321 return ERR_OK;
2322}
2323
2324
2325/**
2326 * Callback from poll manager (pxtcp::msg_inpull) to switch
2327 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2328 * POLLHUP comment in pxtcp_pmgr_pump().
2329 *
2330 * pxtcp::sock is deregistered from poll manager after this callback
2331 * is scheduled.
2332 */
2333static void
2334pxtcp_pcb_pull_inbound(void *ctx)
2335{
2336 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2337 LWIP_ASSERT1(pxtcp != NULL);
2338
2339 if (pxtcp->pcb == NULL) {
2340 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2341 pxtcp_pcb_reset_pxtcp(pxtcp);
2342 return;
2343 }
2344
2345 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2346 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2347 pxtcp->inbound_pull = 1;
2348 pxtcp->deferred_delete = 1;
2349 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2350}
2351
2352
2353/**
2354 * tcp_err() callback.
2355 *
2356 * pcb is not passed to this callback since it may be already
2357 * deallocated by the stack, but we can't do anything useful with it
2358 * anyway since connection is gone.
2359 */
2360static void
2361pxtcp_pcb_err(void *arg, err_t error)
2362{
2363 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2364 LWIP_ASSERT1(pxtcp != NULL);
2365
2366 /*
2367 * ERR_CLSD is special - it is reported here when:
2368 *
2369 * . guest has already half-closed
2370 * . we send FIN to guest when external half-closes
2371 * . guest acks that FIN
2372 *
2373 * Since connection is closed but receive has been already closed
2374 * lwip can only report this via tcp_err. At this point the pcb
2375 * is still alive, so we can peek at it if need be.
2376 *
2377 * The interesting twist is when the ACK from guest that akcs our
2378 * FIN also acks some data. In this scenario lwip will NOT call
2379 * tcp_sent() callback with the ACK for that last bit of data but
2380 * instead will call tcp_err with ERR_CLSD right away. Since that
2381 * ACK also acknowledges all the data, we should run some of
2382 * pxtcp_pcb_sent() logic here.
2383 */
2384 if (error == ERR_CLSD) {
2385 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2386
2387 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2388 " pcb->acked %d;"
2389 " unacked %d, unsent %d, vacant %d\n",
2390 (void *)pxtcp, (void *)pcb,
2391 pcb->acked,
2392 (int)pxtcp->inbuf.unacked,
2393 (int)pxtcp->inbuf.unsent,
2394 (int)pxtcp->inbuf.vacant));
2395
2396 LWIP_ASSERT1(pxtcp->pcb == pcb);
2397 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2398
2399 if (pcb->acked > 0) {
2400 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2401 }
2402 return;
2403 }
2404
2405 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2406 (void *)pxtcp, proxy_lwip_strerr(error)));
2407
2408 pxtcp->pcb = NULL; /* pcb is gone */
2409 if (pxtcp->deferred_delete) {
2410 pxtcp_pcb_reset_pxtcp(pxtcp);
2411 }
2412 else {
2413 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2414 }
2415}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette