VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 51300

Last change on this file since 51300 was 51300, checked in by vboxsync, 11 years ago

pxtcp_sock_read: move code to do recvmsg/WSARecv to a separate function.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 66.1 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#include "winutils.h"
3
4#include "pxtcp.h"
5
6#include "proxy.h"
7#include "proxy_pollmgr.h"
8#include "pxremap.h"
9#include "portfwd.h" /* fwspec */
10
11#ifndef RT_OS_WINDOWS
12#include <sys/types.h>
13#include <sys/socket.h>
14#include <sys/ioctl.h>
15#ifdef RT_OS_SOLARIS
16#include <sys/filio.h> /* FIONREAD is BSD'ism */
17#endif
18#include <stdlib.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <string.h>
22#include <poll.h>
23
24#include <err.h> /* BSD'ism */
25#else
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29
30#include <iprt/stdint.h>
31#include "winpoll.h"
32#endif
33
34#include "lwip/opt.h"
35
36#include "lwip/sys.h"
37#include "lwip/tcpip.h"
38#include "lwip/netif.h"
39#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
40#include "lwip/icmp.h"
41#include "lwip/icmp6.h"
42
43/* NetBSD doesn't report POLLHUP for TCP sockets */
44#ifdef __NetBSD__
45# define HAVE_TCP_POLLHUP 0
46#else
47# define HAVE_TCP_POLLHUP 1
48#endif
49
50
51/**
52 * Ring buffer for inbound data. Filled with data from the host
53 * socket on poll manager thread. Data consumed by scheduling
54 * tcp_write() to the pcb on the lwip thread.
55 *
56 * NB: There is actually third party present, the lwip stack itself.
57 * Thus the buffer doesn't have dual free vs. data split, but rather
58 * three-way free / send and unACKed data / unsent data split.
59 */
60struct ringbuf {
61 char *buf;
62 size_t bufsize;
63
64 /*
65 * Start of free space, producer writes here (up till "unacked").
66 */
67 volatile size_t vacant;
68
69 /*
70 * Start of sent but unacknowledged data. The data are "owned" by
71 * the stack as it may need to retransmit. This is the free space
72 * limit for producer.
73 */
74 volatile size_t unacked;
75
76 /*
77 * Start of unsent data, consumer reads/sends from here (up till
78 * "vacant"). Not declared volatile since it's only accessed from
79 * the consumer thread.
80 */
81 size_t unsent;
82};
83
84
85/**
86 */
87struct pxtcp {
88 /**
89 * Our poll manager handler. Must be first, strong/weak
90 * references depend on this "inheritance".
91 */
92 struct pollmgr_handler pmhdl;
93
94 /**
95 * lwIP (internal/guest) side of the proxied connection.
96 */
97 struct tcp_pcb *pcb;
98
99 /**
100 * Host (external) side of the proxied connection.
101 */
102 SOCKET sock;
103
104 /**
105 * Socket events we are currently polling for.
106 */
107 int events;
108
109 /**
110 * Socket error. Currently used to save connect(2) errors so that
111 * we can decide if we need to send ICMP error.
112 */
113 int sockerr;
114
115 /**
116 * Interface that we have got the SYN from. Needed to send ICMP
117 * with correct source address.
118 */
119 struct netif *netif;
120
121 /**
122 * For tentatively accepted connections for which we are in
123 * process of connecting to the real destination this is the
124 * initial pbuf that we might need to build ICMP error.
125 *
126 * When connection is established this is used to hold outbound
127 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
128 * forwarded over the socket. We cannot "return" it to lwIP since
129 * the head of the chain is already sent and freed.
130 */
131 struct pbuf *unsent;
132
133 /**
134 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
135 * once and we might not be able to forward it immediately if we
136 * have unsent pbuf.
137 */
138 int outbound_close;
139
140 /**
141 * Outbound half-close has been done on the socket.
142 */
143 int outbound_close_done;
144
145 /**
146 * External has closed its side. We might not be able to forward
147 * it immediately if we have unforwarded data.
148 */
149 int inbound_close;
150
151 /**
152 * Inbound half-close has been done on the pcb.
153 */
154 int inbound_close_done;
155
156 /**
157 * On systems that report POLLHUP as soon as the final FIN is
158 * received on a socket we cannot continue polling for the rest of
159 * input, so we have to read (pull) last data from the socket on
160 * the lwIP thread instead of polling/pushing it from the poll
161 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
162 */
163 int inbound_pull;
164
165
166 /**
167 * When poll manager schedules delete we may not be able to delete
168 * a pxtcp immediately if not all inbound data has been acked by
169 * the guest: lwIP may need to resend and the data are in pxtcp's
170 * inbuf::buf. We defer delete until all data are acked to
171 * pxtcp_pcb_sent().
172 *
173 * It's also implied by inbound_pull. It probably means that
174 * "deferred" is not a very fortunate name.
175 */
176 int deferred_delete;
177
178 /**
179 * Ring-buffer for inbound data.
180 */
181 struct ringbuf inbuf;
182
183 /**
184 * lwIP thread's strong reference to us.
185 */
186 struct pollmgr_refptr *rp;
187
188
189 /*
190 * We use static messages to call functions on the lwIP thread to
191 * void malloc/free overhead.
192 */
193 struct tcpip_msg msg_delete; /* delete pxtcp */
194 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
195 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
196 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
197 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
198 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
199};
200
201
202
203static struct pxtcp *pxtcp_allocate(void);
204static void pxtcp_free(struct pxtcp *);
205
206static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
207static void pxtcp_pcb_dissociate(struct pxtcp *);
208
209/* poll manager callbacks for pxtcp related channels */
210static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
211static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
212static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
213#if !HAVE_TCP_POLLHUP
214static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
215#endif
216static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
217
218/* helper functions for sending/receiving pxtcp over poll manager channels */
219static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
220static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
221static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
222static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
223
224/* poll manager callbacks for individual sockets */
225static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
226static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
227
228/* get incoming traffic into ring buffer */
229static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
230static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
231
232/* convenience functions for poll manager callbacks */
233static int pxtcp_schedule_delete(struct pxtcp *);
234static int pxtcp_schedule_reset(struct pxtcp *);
235static int pxtcp_schedule_reject(struct pxtcp *);
236
237/* lwip thread callbacks called via proxy_lwip_post() */
238static void pxtcp_pcb_delete_pxtcp(void *);
239static void pxtcp_pcb_reset_pxtcp(void *);
240static void pxtcp_pcb_accept_refuse(void *);
241static void pxtcp_pcb_accept_confirm(void *);
242static void pxtcp_pcb_write_outbound(void *);
243static void pxtcp_pcb_write_inbound(void *);
244static void pxtcp_pcb_pull_inbound(void *);
245
246/* tcp pcb callbacks */
247static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
248static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
249static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
250static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
251static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
252static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
253static void pxtcp_pcb_err(void *, err_t);
254
255static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
256static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
257
258static void pxtcp_pcb_forward_inbound(struct pxtcp *);
259static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
260DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
261static void pxtcp_pcb_schedule_poll(struct pxtcp *);
262static void pxtcp_pcb_cancel_poll(struct pxtcp *);
263
264static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
265DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
266
267/* poll manager handlers for pxtcp channels */
268static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
269static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
270static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
271#if !HAVE_TCP_POLLHUP
272static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
273#endif
274static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
275
276
277/**
278 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
279 * manager threads haven't been created yet.
280 */
281void
282pxtcp_init(void)
283{
284 /*
285 * Create channels.
286 */
287#define CHANNEL(SLOT, NAME) do { \
288 NAME##_hdl.callback = NAME; \
289 NAME##_hdl.data = NULL; \
290 NAME##_hdl.slot = -1; \
291 pollmgr_add_chan(SLOT, &NAME##_hdl); \
292 } while (0)
293
294 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
295 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
296 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
297#if !HAVE_TCP_POLLHUP
298 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
299#endif
300 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
301
302#undef CHANNEL
303
304 /*
305 * Listen to outgoing connection from guest(s).
306 */
307 tcp_proxy_accept(pxtcp_pcb_heard);
308}
309
310
311/**
312 * Syntactic sugar for sending pxtcp pointer over poll manager
313 * channel. Used by lwip thread functions.
314 */
315static ssize_t
316pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
317{
318 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
319}
320
321
322/**
323 * Syntactic sugar for sending weak reference to pxtcp over poll
324 * manager channel. Used by lwip thread functions.
325 */
326static ssize_t
327pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
328{
329 pollmgr_refptr_weak_ref(pxtcp->rp);
330 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
331}
332
333
334/**
335 * Counterpart of pxtcp_chan_send().
336 */
337static struct pxtcp *
338pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
339{
340 struct pxtcp *pxtcp;
341
342 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
343 return pxtcp;
344}
345
346
347/**
348 * Counterpart of pxtcp_chan_send_weak().
349 */
350static struct pxtcp *
351pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
352{
353 struct pollmgr_refptr *rp;
354 struct pollmgr_handler *base;
355 struct pxtcp *pxtcp;
356
357 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
358 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
359 pxtcp = (struct pxtcp *)base;
360
361 return pxtcp;
362}
363
364
365/**
366 * Register pxtcp with poll manager.
367 *
368 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
369 * error handling is different in these two cases, we leave it up to
370 * the caller.
371 */
372int
373pxtcp_pmgr_add(struct pxtcp *pxtcp)
374{
375 int status;
376
377 LWIP_ASSERT1(pxtcp != NULL);
378 LWIP_ASSERT1(pxtcp->sock >= 0);
379 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
380 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
381 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
382
383 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
384 return status;
385}
386
387
388/**
389 * Unregister pxtcp with poll manager.
390 *
391 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
392 * leg).
393 */
394void
395pxtcp_pmgr_del(struct pxtcp *pxtcp)
396{
397 LWIP_ASSERT1(pxtcp != NULL);
398
399 pollmgr_del_slot(pxtcp->pmhdl.slot);
400}
401
402
403/**
404 * POLLMGR_CHAN_PXTCP_ADD handler.
405 *
406 * Get new pxtcp from lwip thread and start polling its socket.
407 */
408static int
409pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
410{
411 struct pxtcp *pxtcp;
412 int status;
413
414 pxtcp = pxtcp_chan_recv(handler, fd, revents);
415 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
416 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
417
418 status = pxtcp_pmgr_add(pxtcp);
419 if (status < 0) {
420 (void) pxtcp_schedule_reset(pxtcp);
421 }
422
423 return POLLIN;
424}
425
426
427/**
428 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
429 *
430 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
431 * and failed, it now requests us to poll the socket for POLLOUT and
432 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
433 */
434static int
435pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
436{
437 struct pxtcp *pxtcp;
438
439 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
440 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
441
442 if (pxtcp == NULL) {
443 return POLLIN;
444 }
445
446 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
447 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
448
449 pxtcp->events |= POLLOUT;
450 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
451
452 return POLLIN;
453}
454
455
456/**
457 * POLLMGR_CHAN_PXTCP_POLLIN handler.
458 */
459static int
460pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
461{
462 struct pxtcp *pxtcp;
463
464 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
465 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
466
467 if (pxtcp == NULL) {
468 return POLLIN;
469 }
470
471 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
472 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
473
474 if (pxtcp->inbound_close) {
475 return POLLIN;
476 }
477
478 pxtcp->events |= POLLIN;
479 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
480
481 return POLLIN;
482}
483
484
485#if !HAVE_TCP_POLLHUP
486/**
487 * POLLMGR_CHAN_PXTCP_DEL handler.
488 *
489 * Schedule pxtcp deletion. We only need this if host system doesn't
490 * report POLLHUP for fully closed tcp sockets.
491 */
492static int
493pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
494{
495 struct pxtcp *pxtcp;
496
497 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
498 if (pxtcp == NULL) {
499 return POLLIN;
500 }
501
502 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
503 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
504
505 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
506 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
507
508 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
509 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
510
511 pxtcp_pmgr_del(pxtcp);
512 (void) pxtcp_schedule_delete(pxtcp);
513
514 return POLLIN;
515}
516#endif /* !HAVE_TCP_POLLHUP */
517
518
519/**
520 * POLLMGR_CHAN_PXTCP_RESET handler.
521 *
522 * Close the socket with RST and delete pxtcp.
523 */
524static int
525pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
526{
527 struct pxtcp *pxtcp;
528
529 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
530 if (pxtcp == NULL) {
531 return POLLIN;
532 }
533
534 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
535 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
536
537 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
538 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
539
540 pxtcp_pmgr_del(pxtcp);
541
542 proxy_reset_socket(pxtcp->sock);
543 pxtcp->sock = INVALID_SOCKET;
544
545 (void) pxtcp_schedule_reset(pxtcp);
546
547 return POLLIN;
548}
549
550
551static struct pxtcp *
552pxtcp_allocate(void)
553{
554 struct pxtcp *pxtcp;
555
556 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
557 if (pxtcp == NULL) {
558 return NULL;
559 }
560
561 pxtcp->pmhdl.callback = NULL;
562 pxtcp->pmhdl.data = (void *)pxtcp;
563 pxtcp->pmhdl.slot = -1;
564
565 pxtcp->pcb = NULL;
566 pxtcp->sock = INVALID_SOCKET;
567 pxtcp->events = 0;
568 pxtcp->sockerr = 0;
569 pxtcp->netif = NULL;
570 pxtcp->unsent = NULL;
571 pxtcp->outbound_close = 0;
572 pxtcp->outbound_close_done = 0;
573 pxtcp->inbound_close = 0;
574 pxtcp->inbound_close_done = 0;
575 pxtcp->inbound_pull = 0;
576 pxtcp->deferred_delete = 0;
577
578 pxtcp->inbuf.bufsize = 64 * 1024;
579 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
580 if (pxtcp->inbuf.buf == NULL) {
581 free(pxtcp);
582 return NULL;
583 }
584 pxtcp->inbuf.vacant = 0;
585 pxtcp->inbuf.unacked = 0;
586 pxtcp->inbuf.unsent = 0;
587
588 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
589 if (pxtcp->rp == NULL) {
590 free(pxtcp->inbuf.buf);
591 free(pxtcp);
592 return NULL;
593 }
594
595#define CALLBACK_MSG(MSG, FUNC) \
596 do { \
597 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
598 pxtcp->MSG.sem = NULL; \
599 pxtcp->MSG.msg.cb.function = FUNC; \
600 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
601 } while (0)
602
603 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
604 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
605 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
606 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
607 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
608 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
609
610#undef CALLBACK_MSG
611
612 return pxtcp;
613}
614
615
616/**
617 * Exported to fwtcp to create pxtcp for incoming port-forwarded
618 * connections. Completed with pcb in pxtcp_pcb_connect().
619 */
620struct pxtcp *
621pxtcp_create_forwarded(SOCKET sock)
622{
623 struct pxtcp *pxtcp;
624
625 pxtcp = pxtcp_allocate();
626 if (pxtcp == NULL) {
627 return NULL;
628 }
629
630 pxtcp->sock = sock;
631 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
632 pxtcp->events = 0;
633
634 return pxtcp;
635}
636
637
638static void
639pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
640{
641 LWIP_ASSERT1(pxtcp != NULL);
642 LWIP_ASSERT1(pcb != NULL);
643
644 pxtcp->pcb = pcb;
645
646 tcp_arg(pcb, pxtcp);
647
648 tcp_recv(pcb, pxtcp_pcb_recv);
649 tcp_sent(pcb, pxtcp_pcb_sent);
650 tcp_poll(pcb, NULL, 255);
651 tcp_err(pcb, pxtcp_pcb_err);
652}
653
654
655static void
656pxtcp_free(struct pxtcp *pxtcp)
657{
658 if (pxtcp->unsent != NULL) {
659 pbuf_free(pxtcp->unsent);
660 }
661 if (pxtcp->inbuf.buf != NULL) {
662 free(pxtcp->inbuf.buf);
663 }
664 free(pxtcp);
665}
666
667
668/**
669 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
670 * fwtcp failed to register with poll manager to post to lwip thread
671 * for doing connect.
672 */
673void
674pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
675{
676 LWIP_ASSERT1(pxtcp->pcb == NULL);
677 pxtcp_pcb_reset_pxtcp(pxtcp);
678}
679
680
681static void
682pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
683{
684 if (pxtcp == NULL || pxtcp->pcb == NULL) {
685 return;
686 }
687
688 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
689 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
690
691 /*
692 * We must have dissociated from a fully closed pcb immediately
693 * since lwip recycles them and we don't wan't to mess with what
694 * would be someone else's pcb that we happen to have a stale
695 * pointer to.
696 */
697 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
698
699 tcp_recv(pxtcp->pcb, NULL);
700 tcp_sent(pxtcp->pcb, NULL);
701 tcp_poll(pxtcp->pcb, NULL, 255);
702 tcp_err(pxtcp->pcb, NULL);
703 tcp_arg(pxtcp->pcb, NULL);
704 pxtcp->pcb = NULL;
705}
706
707
708/**
709 * Lwip thread callback invoked via pxtcp::msg_delete
710 *
711 * Since we use static messages to communicate to the lwip thread, we
712 * cannot delete pxtcp without making sure there are no unprocessed
713 * messages in the lwip thread mailbox.
714 *
715 * The easiest way to ensure that is to send this "delete" message as
716 * the last one and when it's processed we know there are no more and
717 * it's safe to delete pxtcp.
718 *
719 * Poll manager handlers should use pxtcp_schedule_delete()
720 * convenience function.
721 */
722static void
723pxtcp_pcb_delete_pxtcp(void *ctx)
724{
725 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
726
727 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
728 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
729 (pxtcp->deferred_delete && !pxtcp->inbound_pull
730 ? " (was deferred)" : "")));
731
732 LWIP_ASSERT1(pxtcp != NULL);
733 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
734 LWIP_ASSERT1(pxtcp->outbound_close_done);
735 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
736
737
738 /*
739 * pxtcp is no longer registered with poll manager, so it's safe
740 * to close the socket.
741 */
742 if (pxtcp->sock != INVALID_SOCKET) {
743 closesocket(pxtcp->sock);
744 pxtcp->sock = INVALID_SOCKET;
745 }
746
747 /*
748 * We might have already dissociated from a fully closed pcb, or
749 * guest might have sent us a reset while msg_delete was in
750 * transit. If there's no pcb, we are done.
751 */
752 if (pxtcp->pcb == NULL) {
753 pollmgr_refptr_unref(pxtcp->rp);
754 pxtcp_free(pxtcp);
755 return;
756 }
757
758 /*
759 * Have we completely forwarded all inbound traffic to the guest?
760 *
761 * We may still be waiting for ACKs. We may have failed to send
762 * some of the data (tcp_write() failed with ERR_MEM). We may
763 * have failed to send the FIN (tcp_shutdown() failed with
764 * ERR_MEM).
765 */
766 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
767 pxtcp_pcb_dissociate(pxtcp);
768 pollmgr_refptr_unref(pxtcp->rp);
769 pxtcp_free(pxtcp);
770 }
771 else {
772 DPRINTF2(("delete: pxtcp %p; pcb %p:"
773 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
774 (void *)pxtcp, (void *)pxtcp->pcb,
775 (int)pxtcp->inbuf.unacked,
776 (int)pxtcp->inbuf.unsent,
777 (int)pxtcp->inbuf.vacant,
778 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
779
780 LWIP_ASSERT1(!pxtcp->deferred_delete);
781 pxtcp->deferred_delete = 1;
782 }
783}
784
785
786/**
787 * If we couldn't delete pxtcp right away in the msg_delete callback
788 * from the poll manager thread, we repeat the check at the end of
789 * relevant pcb callbacks.
790 */
791DECLINLINE(void)
792pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
793{
794 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
795 pxtcp_pcb_delete_pxtcp(pxtcp);
796 }
797}
798
799
800/**
801 * Poll manager callbacks should use this convenience wrapper to
802 * schedule pxtcp deletion on the lwip thread and to deregister from
803 * the poll manager.
804 */
805static int
806pxtcp_schedule_delete(struct pxtcp *pxtcp)
807{
808 /*
809 * If pollmgr_refptr_get() is called by any channel before
810 * scheduled deletion happens, let them know we are gone.
811 */
812 pxtcp->pmhdl.slot = -1;
813
814 /*
815 * Schedule deletion. Since poll manager thread may be pre-empted
816 * right after we send the message, the deletion may actually
817 * happen on the lwip thread before we return from this function,
818 * so it's not safe to refer to pxtcp after this call.
819 */
820 proxy_lwip_post(&pxtcp->msg_delete);
821
822 /* tell poll manager to deregister us */
823 return -1;
824}
825
826
827/**
828 * Lwip thread callback invoked via pxtcp::msg_reset
829 *
830 * Like pxtcp_pcb_delete(), but sends RST to the guest before
831 * deleting this pxtcp.
832 */
833static void
834pxtcp_pcb_reset_pxtcp(void *ctx)
835{
836 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
837 LWIP_ASSERT1(pxtcp != NULL);
838
839 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
840 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
841
842 if (pxtcp->sock != INVALID_SOCKET) {
843 proxy_reset_socket(pxtcp->sock);
844 pxtcp->sock = INVALID_SOCKET;
845 }
846
847 if (pxtcp->pcb != NULL) {
848 struct tcp_pcb *pcb = pxtcp->pcb;
849 pxtcp_pcb_dissociate(pxtcp);
850 tcp_abort(pcb);
851 }
852
853 pollmgr_refptr_unref(pxtcp->rp);
854 pxtcp_free(pxtcp);
855}
856
857
858
859/**
860 * Poll manager callbacks should use this convenience wrapper to
861 * schedule pxtcp reset and deletion on the lwip thread and to
862 * deregister from the poll manager.
863 *
864 * See pxtcp_schedule_delete() for additional comments.
865 */
866static int
867pxtcp_schedule_reset(struct pxtcp *pxtcp)
868{
869 pxtcp->pmhdl.slot = -1;
870 proxy_lwip_post(&pxtcp->msg_reset);
871 return -1;
872}
873
874
875/**
876 * Reject proxy connection attempt. Depending on the cause (sockerr)
877 * we may just drop the pcb silently, generate an ICMP datagram or
878 * send TCP reset.
879 */
880static void
881pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
882 struct pbuf *p, int sockerr)
883{
884 struct netif *oif;
885 int reset = 0;
886
887 oif = ip_current_netif();
888 ip_current_netif() = netif;
889
890 if (sockerr == ECONNREFUSED) {
891 reset = 1;
892 }
893 else if (PCB_ISIPV6(pcb)) {
894 if (sockerr == EHOSTDOWN) {
895 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
896 }
897 else if (sockerr == EHOSTUNREACH
898 || sockerr == ENETDOWN
899 || sockerr == ENETUNREACH)
900 {
901 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
902 }
903 }
904 else {
905 if (sockerr == EHOSTDOWN
906 || sockerr == EHOSTUNREACH
907 || sockerr == ENETDOWN
908 || sockerr == ENETUNREACH)
909 {
910 icmp_dest_unreach(p, ICMP_DUR_HOST);
911 }
912 }
913
914 ip_current_netif() = oif;
915
916 tcp_abandon(pcb, reset);
917}
918
919
920/**
921 * Called from poll manager thread via pxtcp::msg_accept when proxy
922 * failed to connect to the destination. Also called when we failed
923 * to register pxtcp with poll manager.
924 *
925 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
926 * how this unestablished connection is terminated.
927 */
928static void
929pxtcp_pcb_accept_refuse(void *ctx)
930{
931 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
932
933 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: errno %d\n",
934 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
935 pxtcp->sock, pxtcp->sockerr));
936
937 LWIP_ASSERT1(pxtcp != NULL);
938 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
939
940 if (pxtcp->pcb != NULL) {
941 struct tcp_pcb *pcb = pxtcp->pcb;
942 pxtcp_pcb_dissociate(pxtcp);
943 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
944 }
945
946 pollmgr_refptr_unref(pxtcp->rp);
947 pxtcp_free(pxtcp);
948}
949
950
951/**
952 * Convenience wrapper for poll manager connect callback to reject
953 * connection attempt.
954 *
955 * Like pxtcp_schedule_reset(), but the callback is more discriminate
956 * in how this unestablished connection is terminated.
957 */
958static int
959pxtcp_schedule_reject(struct pxtcp *pxtcp)
960{
961 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
962 pxtcp->pmhdl.slot = -1;
963 proxy_lwip_post(&pxtcp->msg_accept);
964 return -1;
965}
966
967
968/**
969 * Global tcp_proxy_accept() callback for proxied outgoing TCP
970 * connections from guest(s).
971 */
972static err_t
973pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
974{
975 struct pbuf *p = (struct pbuf *)arg;
976 struct pxtcp *pxtcp;
977 ipX_addr_t dst_addr;
978 int sdom;
979 SOCKET sock;
980 ssize_t nsent;
981 int sockerr = 0;
982
983 LWIP_UNUSED_ARG(error); /* always ERR_OK */
984
985 /*
986 * TCP first calls accept callback when it receives the first SYN
987 * and "tentatively accepts" new proxied connection attempt. When
988 * proxy "confirms" the SYN and sends SYN|ACK and the guest
989 * replies with ACK the accept callback is called again, this time
990 * with the established connection.
991 */
992 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
993 tcp_accept(newpcb, pxtcp_pcb_accept);
994 tcp_arg(newpcb, NULL);
995
996 tcp_setprio(newpcb, TCP_PRIO_MAX);
997
998 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
999
1000 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
1001 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1002 &dst_addr, newpcb->local_port);
1003 if (sock == INVALID_SOCKET) {
1004 sockerr = errno;
1005 goto abort;
1006 }
1007
1008 pxtcp = pxtcp_allocate();
1009 if (pxtcp == NULL) {
1010 proxy_reset_socket(sock);
1011 goto abort;
1012 }
1013
1014 /* save initial datagram in case we need to reply with ICMP */
1015 pbuf_ref(p);
1016 pxtcp->unsent = p;
1017 pxtcp->netif = ip_current_netif();
1018
1019 pxtcp_pcb_associate(pxtcp, newpcb);
1020 pxtcp->sock = sock;
1021
1022 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1023 pxtcp->events = POLLOUT;
1024
1025 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1026 if (nsent < 0) {
1027 pxtcp->sock = INVALID_SOCKET;
1028 proxy_reset_socket(sock);
1029 pxtcp_pcb_accept_refuse(pxtcp);
1030 return ERR_ABRT;
1031 }
1032
1033 return ERR_OK;
1034
1035 abort:
1036 DPRINTF0(("%s: pcb %p, sock %d: errno %d\n",
1037 __func__, (void *)newpcb, sock, sockerr));
1038 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1039 return ERR_ABRT;
1040}
1041
1042
1043/**
1044 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1045 * connections from guest(s). This is "real" accept with three-way
1046 * handshake completed.
1047 */
1048static err_t
1049pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1050{
1051 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1052
1053 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1054 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1055
1056 LWIP_ASSERT1(pxtcp != NULL);
1057 LWIP_ASSERT1(pxtcp->pcb = pcb);
1058 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1059
1060 /* send any inbound data that are already queued */
1061 pxtcp_pcb_forward_inbound(pxtcp);
1062 return ERR_OK;
1063}
1064
1065
1066/**
1067 * Initial poll manager callback for proxied outgoing TCP connections.
1068 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1069 *
1070 * Waits for connect(2) to the destination to complete. On success
1071 * replaces itself with pxtcp_pmgr_pump() callback common to all
1072 * established TCP connections.
1073 */
1074static int
1075pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1076{
1077 struct pxtcp *pxtcp;
1078 int sockerr;
1079
1080 pxtcp = (struct pxtcp *)handler->data;
1081 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1082 LWIP_ASSERT1(fd == pxtcp->sock);
1083
1084 if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
1085 if (revents & POLLNVAL) {
1086 pxtcp->sock = INVALID_SOCKET;
1087 pxtcp->sockerr = ETIMEDOUT;
1088 }
1089 else {
1090 socklen_t optlen = (socklen_t)sizeof(sockerr);
1091 int status;
1092 SOCKET s;
1093
1094 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1095 (char *)&pxtcp->sockerr, &optlen);
1096 if (status < 0) { /* should not happen */
1097 sockerr = errno; /* ??? */
1098 perror("connect: getsockopt");
1099 }
1100 else {
1101#ifndef RT_OS_WINDOWS
1102 errno = pxtcp->sockerr; /* to avoid strerror_r */
1103#else
1104 /* see winutils.h */
1105 WSASetLastError(pxtcp->sockerr);
1106#endif
1107 perror("connect");
1108 }
1109 s = pxtcp->sock;
1110 pxtcp->sock = INVALID_SOCKET;
1111 closesocket(s);
1112 }
1113 return pxtcp_schedule_reject(pxtcp);
1114 }
1115
1116 if (revents & POLLOUT) { /* connect is successful */
1117 /* confirm accept to the guest */
1118 proxy_lwip_post(&pxtcp->msg_accept);
1119
1120 /*
1121 * Switch to common callback used for all established proxied
1122 * connections.
1123 */
1124 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1125
1126 /*
1127 * Initially we poll for incoming traffic only. Outgoing
1128 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1129 * it will ask us to poll for POLLOUT too.
1130 */
1131 pxtcp->events = POLLIN;
1132 return pxtcp->events;
1133 }
1134
1135 /* should never get here */
1136 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1137 __func__, (void *)pxtcp, fd, revents));
1138 return pxtcp_schedule_reset(pxtcp);
1139}
1140
1141
1142/**
1143 * Called from poll manager thread via pxtcp::msg_accept when proxy
1144 * connected to the destination. Finalize accept by sending SYN|ACK
1145 * to the guest.
1146 */
1147static void
1148pxtcp_pcb_accept_confirm(void *ctx)
1149{
1150 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1151 err_t error;
1152
1153 LWIP_ASSERT1(pxtcp != NULL);
1154 if (pxtcp->pcb == NULL) {
1155 return;
1156 }
1157
1158 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1159 LWIP_ASSERT1(pxtcp->unsent != NULL);
1160 pbuf_free(pxtcp->unsent);
1161 pxtcp->unsent = NULL;
1162
1163 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1164
1165 /*
1166 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1167 * abandons the pcb. Retrying that is not very easy, since it
1168 * would require keeping "fractional state". From guest's point
1169 * of view there is no reply to its SYN so it will either resend
1170 * the SYN (effetively triggering full connection retry for us),
1171 * or it will eventually time out.
1172 */
1173 if (error == ERR_ABRT) {
1174 pxtcp->pcb = NULL; /* pcb is gone */
1175 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1176 }
1177
1178 /*
1179 * else if (error != ERR_OK): even if tcp_output() failed with
1180 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1181 * retransmitted eventually.
1182 */
1183}
1184
1185
1186/**
1187 * Entry point for port-forwarding.
1188 *
1189 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1190 * (with no pcb yet) and adds it to the poll manager (polling for
1191 * errors only). Then it calls this function to construct the pcb and
1192 * perform connection to the guest.
1193 */
1194void
1195pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1196{
1197 struct sockaddr_storage ss;
1198 socklen_t sslen;
1199 struct tcp_pcb *pcb;
1200 ipX_addr_t src_addr, dst_addr;
1201 u16_t src_port, dst_port;
1202 int status;
1203 err_t error;
1204
1205 LWIP_ASSERT1(pxtcp != NULL);
1206 LWIP_ASSERT1(pxtcp->pcb == NULL);
1207 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1208
1209 pcb = tcp_new();
1210 if (pcb == NULL) {
1211 goto reset;
1212 }
1213
1214 tcp_setprio(pcb, TCP_PRIO_MAX);
1215 pxtcp_pcb_associate(pxtcp, pcb);
1216
1217 sslen = sizeof(ss);
1218 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1219 if (status == SOCKET_ERROR) {
1220 goto reset;
1221 }
1222
1223 /* nit: comapres PF and AF, but they are the same everywhere */
1224 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1225
1226 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1227 if (status == PXREMAP_FAILED) {
1228 goto reset;
1229 }
1230
1231 if (ss.ss_family == PF_INET) {
1232 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1233
1234 src_port = peer4->sin_port;
1235
1236 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1237 dst_port = fwspec->dst.sin.sin_port;
1238 }
1239 else { /* PF_INET6 */
1240 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1241 ip_set_v6(pcb, 1);
1242
1243 src_port = peer6->sin6_port;
1244
1245 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1246 dst_port = fwspec->dst.sin6.sin6_port;
1247 }
1248
1249 /* lwip port arguments are in host order */
1250 src_port = ntohs(src_port);
1251 dst_port = ntohs(dst_port);
1252
1253 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1254 if (error != ERR_OK) {
1255 goto reset;
1256 }
1257
1258 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1259 /* callback: */ pxtcp_pcb_connected);
1260 if (error != ERR_OK) {
1261 goto reset;
1262 }
1263
1264 return;
1265
1266 reset:
1267 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1268}
1269
1270
1271/**
1272 * Port-forwarded connection to guest is successful, pump data.
1273 */
1274static err_t
1275pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1276{
1277 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1278
1279 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1280 LWIP_UNUSED_ARG(error);
1281
1282 LWIP_ASSERT1(pxtcp != NULL);
1283 LWIP_ASSERT1(pxtcp->pcb == pcb);
1284 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1285 LWIP_UNUSED_ARG(pcb);
1286
1287 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1288 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1289
1290 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1291 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1292
1293 return ERR_OK;
1294}
1295
1296
1297/**
1298 * tcp_recv() callback.
1299 */
1300static err_t
1301pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1302{
1303 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1304
1305 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1306 LWIP_UNUSED_ARG(error);
1307
1308 LWIP_ASSERT1(pxtcp != NULL);
1309 LWIP_ASSERT1(pxtcp->pcb == pcb);
1310 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1311 LWIP_UNUSED_ARG(pcb);
1312
1313
1314 /*
1315 * Have we done sending previous batch?
1316 */
1317 if (pxtcp->unsent != NULL) {
1318 if (p != NULL) {
1319 /*
1320 * Return an error to tell TCP to hold onto that pbuf.
1321 * It will be presented to us later from tcp_fasttmr().
1322 */
1323 return ERR_WOULDBLOCK;
1324 }
1325 else {
1326 /*
1327 * Unlike data, p == NULL indicating orderly shutdown is
1328 * NOT presented to us again
1329 */
1330 pxtcp->outbound_close = 1;
1331 return ERR_OK;
1332 }
1333 }
1334
1335
1336 /*
1337 * Guest closed?
1338 */
1339 if (p == NULL) {
1340 pxtcp->outbound_close = 1;
1341 pxtcp_pcb_forward_outbound_close(pxtcp);
1342 return ERR_OK;
1343 }
1344
1345
1346 /*
1347 * Got data, send what we can without blocking.
1348 */
1349 return pxtcp_pcb_forward_outbound(pxtcp, p);
1350}
1351
1352
1353/**
1354 * Guest half-closed its TX side of the connection.
1355 *
1356 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1357 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1358 * previously unsent data and sees pxtcp::outbound_close flag saved by
1359 * pxtcp_pcb_recv().
1360 */
1361static void
1362pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1363{
1364 struct tcp_pcb *pcb;
1365
1366 LWIP_ASSERT1(pxtcp != NULL);
1367 LWIP_ASSERT1(pxtcp->outbound_close);
1368 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1369
1370 pcb = pxtcp->pcb;
1371 LWIP_ASSERT1(pcb != NULL);
1372
1373 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1374 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1375
1376
1377 /*
1378 * NB: set the flag first, since shutdown() will trigger POLLHUP
1379 * if inbound is already closed, and poll manager asserts
1380 * outbound_close_done (may be it should not?).
1381 */
1382 pxtcp->outbound_close_done = 1;
1383 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1384
1385#if !HAVE_TCP_POLLHUP
1386 /*
1387 * On NetBSD POLLHUP is not reported for TCP sockets, so we need
1388 * to nudge poll manager manually.
1389 */
1390 if (pxtcp->inbound_close) {
1391 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1392 }
1393#endif
1394
1395
1396 /* no more outbound data coming to us */
1397 tcp_recv(pcb, NULL);
1398
1399 /*
1400 * If we have already done inbound close previously (active close
1401 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1402 * state since those will be recycled by lwip when it runs out of
1403 * free pcbs in the pool.
1404 *
1405 * The test is true also for a pcb in CLOSING state that waits
1406 * just for the ACK of its FIN (to transition to TIME_WAIT).
1407 */
1408 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1409 pxtcp_pcb_dissociate(pxtcp);
1410 }
1411}
1412
1413
1414/**
1415 * Forward outbound data from pcb to socket.
1416 *
1417 * Called by pxtcp_pcb_recv() to forward new data and by callout
1418 * triggered by POLLOUT on the socket to send previously unsent data.
1419 *
1420 * (Re)scehdules one-time callout if not all data are sent.
1421 */
1422static err_t
1423pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1424{
1425 struct pbuf *qs, *q;
1426 size_t qoff;
1427 size_t forwarded;
1428 int sockerr;
1429
1430#if defined(MSG_NOSIGNAL)
1431 const int send_flags = MSG_NOSIGNAL;
1432#else
1433 const int send_flags = 0;
1434#endif
1435
1436
1437 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1438
1439 forwarded = 0;
1440 sockerr = 0;
1441
1442 q = NULL;
1443 qoff = 0;
1444
1445 qs = p;
1446 while (qs != NULL) {
1447#ifndef RT_OS_WINDOWS
1448 struct msghdr mh;
1449#else
1450 int rc;
1451#endif
1452 IOVEC iov[8];
1453 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1454 size_t fwd1;
1455 ssize_t nsent;
1456 size_t i;
1457
1458 fwd1 = 0;
1459 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1460 LWIP_ASSERT1(q->len > 0);
1461 IOVEC_SET_BASE(iov[i], q->payload);
1462 IOVEC_SET_LEN(iov[i], q->len);
1463 fwd1 += q->len;
1464 }
1465
1466#ifndef RT_OS_WINDOWS
1467 memset(&mh, 0, sizeof(mh));
1468 mh.msg_iov = iov;
1469 mh.msg_iovlen = i;
1470
1471 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1472#else
1473 /**
1474 * WSASend(,,,DWORD *,,,) - takes SSIZE_T (64bit value) ... so all nsent's
1475 * bits should be zeroed before passing to WSASent.
1476 */
1477 nsent = 0;
1478 rc = WSASend(pxtcp->sock, iov, (DWORD)i, (DWORD *)&nsent, 0, NULL, NULL);
1479 if (rc == SOCKET_ERROR) {
1480 /* WSASent reports SOCKET_ERROR and updates error accessible with
1481 * WSAGetLastError(). We assign nsent to -1, enforcing code below
1482 * to access error in BSD style.
1483 */
1484 warn("pxtcp_pcb_forward_outbound:WSASend error:%d nsent:%d\n",
1485 WSAGetLastError(),
1486 nsent);
1487 nsent = -1;
1488 }
1489#endif
1490
1491 if (nsent == (ssize_t)fwd1) {
1492 /* successfully sent this chain fragment completely */
1493 forwarded += nsent;
1494 qs = q;
1495 }
1496 else if (nsent >= 0) {
1497 /* successfully sent only some data */
1498 forwarded += nsent;
1499
1500 /* find the first pbuf that was not completely forwarded */
1501 qoff = nsent;
1502 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1503 if (qoff < q->len) {
1504 break;
1505 }
1506 qoff -= q->len;
1507 }
1508 LWIP_ASSERT1(q != NULL);
1509 LWIP_ASSERT1(qoff < q->len);
1510 break;
1511 }
1512 else {
1513 /*
1514 * Some errors are really not errors - if we get them,
1515 * it's not different from getting nsent == 0, so filter
1516 * them out here.
1517 */
1518 if (errno != EWOULDBLOCK
1519 && errno != EAGAIN
1520 && errno != ENOBUFS
1521 && errno != ENOMEM
1522 && errno != EINTR)
1523 {
1524 sockerr = errno;
1525 }
1526 q = qs;
1527 qoff = 0;
1528 break;
1529 }
1530 }
1531
1532 if (forwarded > 0) {
1533 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1534 }
1535
1536 if (q == NULL) { /* everything is forwarded? */
1537 LWIP_ASSERT1(sockerr == 0);
1538 LWIP_ASSERT1(forwarded == p->tot_len);
1539
1540 pxtcp->unsent = NULL;
1541 pbuf_free(p);
1542 if (pxtcp->outbound_close) {
1543 pxtcp_pcb_forward_outbound_close(pxtcp);
1544 }
1545 }
1546 else {
1547 if (q != p) {
1548 /* free forwarded pbufs at the beginning of the chain */
1549 pbuf_ref(q);
1550 pbuf_free(p);
1551 }
1552 if (qoff > 0) {
1553 /* advance payload pointer past the forwarded part */
1554 pbuf_header(q, -(s16_t)qoff);
1555 }
1556 pxtcp->unsent = q;
1557
1558 /*
1559 * Have sendmsg() failed?
1560 *
1561 * Connection reset will be detected by poll and
1562 * pxtcp_schedule_reset() will be called.
1563 *
1564 * Otherwise something *really* unexpected must have happened,
1565 * so we'd better abort.
1566 */
1567 if (sockerr != 0 && sockerr != ECONNRESET) {
1568 struct tcp_pcb *pcb = pxtcp->pcb;
1569 pxtcp_pcb_dissociate(pxtcp);
1570
1571 tcp_abort(pcb);
1572
1573 /* call error callback manually since we've already dissociated */
1574 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1575 return ERR_ABRT;
1576 }
1577
1578 /* schedule one-shot POLLOUT on the socket */
1579 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1580 }
1581 return ERR_OK;
1582}
1583
1584
1585/**
1586 * Callback from poll manager (on POLLOUT) to send data from
1587 * pxtcp::unsent pbuf to socket.
1588 */
1589static void
1590pxtcp_pcb_write_outbound(void *ctx)
1591{
1592 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1593 LWIP_ASSERT1(pxtcp != NULL);
1594
1595 if (pxtcp->pcb == NULL) {
1596 return;
1597 }
1598
1599 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1600}
1601
1602
1603/**
1604 * Common poll manager callback used by both outgoing and incoming
1605 * (port-forwarded) connections that has connected socket.
1606 */
1607static int
1608pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1609{
1610 struct pxtcp *pxtcp;
1611 int status;
1612 int sockerr;
1613
1614 pxtcp = (struct pxtcp *)handler->data;
1615 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1616 LWIP_ASSERT1(fd == pxtcp->sock);
1617
1618 if (revents & POLLNVAL) {
1619 pxtcp->sock = INVALID_SOCKET;
1620 return pxtcp_schedule_reset(pxtcp);
1621 }
1622
1623 if (revents & POLLERR) {
1624 socklen_t optlen = (socklen_t)sizeof(sockerr);
1625
1626 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1627 (char *)&sockerr, &optlen);
1628 if (status < 0) { /* should not happen */
1629 perror("getsockopt");
1630 sockerr = ECONNRESET;
1631 }
1632
1633 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1634 return pxtcp_schedule_reset(pxtcp);
1635 }
1636
1637 if (revents & POLLOUT) {
1638 pxtcp->events &= ~POLLOUT;
1639 proxy_lwip_post(&pxtcp->msg_outbound);
1640 }
1641
1642 if (revents & POLLIN) {
1643 ssize_t nread;
1644 int stop_pollin;
1645
1646 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1647 if (nread < 0) {
1648 sockerr = -(int)nread;
1649 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1650 return pxtcp_schedule_reset(pxtcp);
1651 }
1652
1653 if (stop_pollin) {
1654 pxtcp->events &= ~POLLIN;
1655 }
1656
1657 if (nread > 0) {
1658 proxy_lwip_post(&pxtcp->msg_inbound);
1659#if !HAVE_TCP_POLLHUP
1660 /*
1661 * If host does not report POLLHUP for closed sockets
1662 * (e.g. NetBSD) we should check for full close manually.
1663 */
1664 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1665 LWIP_ASSERT1((revents & POLLHUP) == 0);
1666 return pxtcp_schedule_delete(pxtcp);
1667 }
1668#endif
1669 }
1670 }
1671
1672#if !HAVE_TCP_POLLHUP
1673 LWIP_ASSERT1((revents & POLLHUP) == 0);
1674#else
1675 if (revents & POLLHUP) {
1676 /*
1677 * Linux and Darwin seems to report POLLHUP when both
1678 * directions are shut down. And they do report POLLHUP even
1679 * when there's unread data (which they aslo report as POLLIN
1680 * along with that POLLHUP).
1681 *
1682 * FreeBSD (from source inspection) seems to follow Linux,
1683 * reporting POLLHUP when both directions are shut down, but
1684 * POLLHUP is always accompanied with POLLIN.
1685 *
1686 * NetBSD never reports POLLHUP for sockets.
1687 *
1688 * ---
1689 *
1690 * If external half-closes first, we don't get POLLHUP, we
1691 * recv 0 bytes from the socket as EOF indicator, stop polling
1692 * for POLLIN and poll with events == 0 (with occasional
1693 * one-shot POLLOUT). When guest eventually closes, we get
1694 * POLLHUP.
1695 *
1696 * If guest half-closes first things are more tricky. As soon
1697 * as host sees the FIN from external it will spam POLLHUP,
1698 * even when there's unread data. The problem is that we
1699 * might have stopped polling for POLLIN because the ring
1700 * buffer is full or we were polling POLLIN but can't read all
1701 * of the data becuase buffer doesn't have enough space.
1702 * Either way, there's unread data but we can't keep polling
1703 * the socket.
1704 */
1705 DPRINTF(("sock %d: HUP\n", fd));
1706 LWIP_ASSERT1(pxtcp->outbound_close_done);
1707
1708 if (pxtcp->inbound_close) {
1709 /* there's no unread data, we are done */
1710 return pxtcp_schedule_delete(pxtcp);
1711 }
1712 else {
1713 /* DPRINTF */ {
1714#ifndef RT_OS_WINDOWS
1715 int unread;
1716#else
1717 u_long unread;
1718#endif
1719 status = ioctlsocket(fd, FIONREAD, &unread);
1720 if (status == SOCKET_ERROR) {
1721 perror("FIONREAD");
1722 }
1723 else {
1724 DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
1725 }
1726 }
1727
1728 /*
1729 * We cannot just set a flag here and let pxtcp_pcb_sent()
1730 * notice and start pulling, because if we are preempted
1731 * before setting the flag and all data in inbuf is ACKed
1732 * there will be no more calls to pxtcp_pcb_sent() to
1733 * notice the flag.
1734 *
1735 * We cannot set a flag and then send a message to make
1736 * sure it noticed, because if it has and it has read all
1737 * data while the message is in transit it will delete
1738 * pxtcp.
1739 *
1740 * In a sense this message is like msg_delete (except we
1741 * ask to pull some data first).
1742 */
1743 proxy_lwip_post(&pxtcp->msg_inpull);
1744 pxtcp->pmhdl.slot = -1;
1745 return -1;
1746 }
1747 /* NOTREACHED */
1748 } /* POLLHUP */
1749#endif /* HAVE_TCP_POLLHUP */
1750
1751 return pxtcp->events;
1752}
1753
1754
1755/**
1756 * Read data from socket to ringbuf. This may be used both on lwip
1757 * and poll manager threads.
1758 *
1759 * Flag pointed to by pstop is set when further reading is impossible,
1760 * either temporary when buffer is full, or permanently when EOF is
1761 * received.
1762 *
1763 * Returns number of bytes read. NB: EOF is reported as 1!
1764 *
1765 * Returns zero if nothing was read, either because buffer is full, or
1766 * if no data is available (EAGAIN, EINTR &c).
1767 *
1768 * Returns -errno on real socket errors.
1769 */
1770static ssize_t
1771pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1772{
1773 IOVEC iov[2];
1774 size_t iovlen;
1775 ssize_t nread;
1776
1777 const size_t sz = pxtcp->inbuf.bufsize;
1778 size_t beg, lim, wrnew;
1779
1780 *pstop = 0;
1781
1782 beg = pxtcp->inbuf.vacant;
1783 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1784
1785 /* lim is the index we can NOT write to */
1786 lim = pxtcp->inbuf.unacked;
1787 if (lim == 0) {
1788 lim = sz - 1; /* empty slot at the end */
1789 }
1790 else if (lim == 1) {
1791 lim = sz; /* empty slot at the beginning */
1792 }
1793 else {
1794 --lim;
1795 }
1796
1797 if (beg == lim) {
1798 /*
1799 * Buffer is full, stop polling for POLLIN.
1800 *
1801 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1802 * data, freeing space in the ring buffer.
1803 */
1804 *pstop = 1;
1805 return 0;
1806 }
1807
1808 if (beg < lim) {
1809 /* free space in one chunk */
1810 iovlen = 1;
1811 IOVEC_SET_LEN(iov[0], lim - beg);
1812 }
1813 else {
1814 /* free space in two chunks */
1815 iovlen = 2;
1816 IOVEC_SET_LEN(iov[0], sz - beg);
1817 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1818 IOVEC_SET_LEN(iov[1], lim);
1819 }
1820
1821 /*
1822 * TODO: This is where application-level proxy can hook into to
1823 * process inbound traffic.
1824 */
1825 nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
1826
1827 if (nread > 0) {
1828 wrnew = beg + nread;
1829 if (wrnew >= sz) {
1830 wrnew -= sz;
1831 }
1832 pxtcp->inbuf.vacant = wrnew;
1833 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1834 (void *)pxtcp, pxtcp->sock, (int)nread));
1835 return nread;
1836 }
1837 else if (nread == 0) {
1838 *pstop = 1;
1839 pxtcp->inbound_close = 1;
1840 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1841 (void *)pxtcp, pxtcp->sock));
1842 return 1;
1843 }
1844 else if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) {
1845 /* haven't read anything, just return */
1846 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1847 (void *)pxtcp, pxtcp->sock));
1848 return 0;
1849 }
1850 else {
1851 /* socket error! */
1852 DPRINTF0(("pxtcp %p: sock %d read errno %d\n",
1853 (void *)pxtcp, pxtcp->sock, errno));
1854 return -errno;
1855 }
1856}
1857
1858
1859#if !defined(RT_OS_WINDOWS)
1860static ssize_t
1861pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1862{
1863 struct msghdr mh;
1864 ssize_t nread;
1865
1866 memset(&mh, 0, sizeof(mh));
1867
1868 mh.msg_iov = iov;
1869 mh.msg_iovlen = iovlen;
1870
1871 nread = recvmsg(pxtcp->sock, &mh, 0);
1872
1873 return nread;
1874}
1875#else /* RT_OS_WINDOWS */
1876static ssize_t
1877pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1878{
1879 DWORD flags;
1880 DWORD nread;
1881 int status;
1882
1883 flags = 0;
1884 status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
1885 &flags, NULL, NULL);
1886 if (status == SOCKET_ERROR) {
1887 nread = -1;
1888 }
1889
1890 return (ssize_t)nread;
1891}
1892#endif /* RT_OS_WINDOWS */
1893
1894
1895/**
1896 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1897 * from ringbuf to guest.
1898 */
1899static void
1900pxtcp_pcb_write_inbound(void *ctx)
1901{
1902 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1903 LWIP_ASSERT1(pxtcp != NULL);
1904
1905 if (pxtcp->pcb == NULL) {
1906 return;
1907 }
1908
1909 pxtcp_pcb_forward_inbound(pxtcp);
1910}
1911
1912
1913/**
1914 * tcp_poll() callback
1915 *
1916 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1917 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1918 * more inbound data then pxtcp_pcb_forward_inbound() will be
1919 * triggered again, but if neither happens, tcp_poll() comes to the
1920 * rescue.
1921 */
1922static err_t
1923pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1924{
1925 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1926 LWIP_UNUSED_ARG(pcb);
1927
1928 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1929 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1930
1931 pxtcp_pcb_forward_inbound(pxtcp);
1932
1933 /*
1934 * If the last thing holding up deletion of the pxtcp was failed
1935 * tcp_shutdown() and it succeeded, we may be the last callback.
1936 */
1937 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1938
1939 return ERR_OK;
1940}
1941
1942
1943static void
1944pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1945{
1946 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1947}
1948
1949
1950static void
1951pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1952{
1953 tcp_poll(pxtcp->pcb, NULL, 255);
1954}
1955
1956
1957/**
1958 * Forward inbound data from ring buffer to the guest.
1959 *
1960 * Scheduled by poll manager thread after it receives more data into
1961 * the ring buffer (we have more data to send).
1962
1963 * Also called from tcp_sent() callback when guest ACKs some data,
1964 * increasing pcb->snd_buf (we are permitted to send more data).
1965 *
1966 * Also called from tcp_poll() callback if previous attempt to forward
1967 * inbound data failed with ERR_MEM (we need to try again).
1968 */
1969static void
1970pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1971{
1972 struct tcp_pcb *pcb;
1973 size_t sndbuf;
1974 size_t beg, lim, sndlim;
1975 size_t toeob, tolim;
1976 size_t nsent;
1977 err_t error;
1978
1979 LWIP_ASSERT1(pxtcp != NULL);
1980 pcb = pxtcp->pcb;
1981 if (pcb == NULL) {
1982 return;
1983 }
1984
1985 if (/* __predict_false */ pcb->state < ESTABLISHED) {
1986 /*
1987 * If we have just confirmed accept of this connection, the
1988 * pcb is in SYN_RCVD state and we still haven't received the
1989 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
1990 * transition that lwip decrements pcb->acked so that that ACK
1991 * is not reported to pxtcp_pcb_sent(). If we send something
1992 * now and immediately close (think "daytime", e.g.) while
1993 * still in SYN_RCVD state, we will move directly to
1994 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
1995 * report it to pxtcp_pcb_sent().
1996 */
1997 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
1998 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1999 return;
2000 }
2001
2002
2003 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
2004 lim = pxtcp->inbuf.vacant;
2005
2006 if (beg == lim) {
2007 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2008 pxtcp_pcb_forward_inbound_close(pxtcp);
2009 tcp_output(pcb);
2010 return;
2011 }
2012
2013 /*
2014 * Else, there's no data to send.
2015 *
2016 * If there is free space in the buffer, producer will
2017 * reschedule us as it receives more data and vacant (lim)
2018 * advances.
2019 *
2020 * If buffer is full when all data have been passed to
2021 * tcp_write() but not yet acknowledged, we will advance
2022 * unacked on ACK, freeing some space for producer to write to
2023 * (then see above).
2024 */
2025 return;
2026 }
2027
2028 sndbuf = tcp_sndbuf(pcb);
2029 if (sndbuf == 0) {
2030 /*
2031 * Can't send anything now. As guest ACKs some data, TCP will
2032 * call pxtcp_pcb_sent() callback and we will come here again.
2033 */
2034 return;
2035 }
2036
2037 nsent = 0;
2038
2039 /*
2040 * We have three limits to consider:
2041 * - how much data we have in the ringbuf
2042 * - how much data we are allowed to send
2043 * - ringbuf size
2044 */
2045 toeob = pxtcp->inbuf.bufsize - beg;
2046 if (lim < beg) { /* lim wrapped */
2047 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2048 /* so beg is not going to wrap, treat sndbuf as lim */
2049 lim = beg + sndbuf; /* ... and proceed to the simple case */
2050 }
2051 else { /* we are limited by the end of the buffer, beg will wrap */
2052 u8_t maybemore;
2053 if (toeob == sndbuf || lim == 0) {
2054 maybemore = 0;
2055 }
2056 else {
2057 maybemore = TCP_WRITE_FLAG_MORE;
2058 }
2059
2060 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2061 if (error != ERR_OK) {
2062 goto writeerr;
2063 }
2064 nsent += toeob;
2065 pxtcp->inbuf.unsent = 0; /* wrap */
2066
2067 if (maybemore) {
2068 beg = 0;
2069 sndbuf -= toeob;
2070 }
2071 else {
2072 /* we are done sending, but ... */
2073 goto check_inbound_close;
2074 }
2075 }
2076 }
2077
2078 LWIP_ASSERT1(beg < lim);
2079 sndlim = beg + sndbuf;
2080 if (lim > sndlim) {
2081 lim = sndlim;
2082 }
2083 tolim = lim - beg;
2084 if (tolim > 0) {
2085 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2086 if (error != ERR_OK) {
2087 goto writeerr;
2088 }
2089 nsent += tolim;
2090 pxtcp->inbuf.unsent = lim;
2091 }
2092
2093 check_inbound_close:
2094 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2095 pxtcp_pcb_forward_inbound_close(pxtcp);
2096 }
2097
2098 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2099 (void *)pxtcp, (void *)pcb, (int)nsent));
2100 tcp_output(pcb);
2101 pxtcp_pcb_cancel_poll(pxtcp);
2102 return;
2103
2104 writeerr:
2105 if (error == ERR_MEM) {
2106 if (nsent > 0) { /* first write succeeded, second failed */
2107 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2108 (void *)pxtcp, (void *)pcb, (int)nsent));
2109 tcp_output(pcb);
2110 }
2111 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2112 (void *)pxtcp, (void *)pcb));
2113 pxtcp_pcb_schedule_poll(pxtcp);
2114 }
2115 else {
2116 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2117 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2118
2119 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2120 LWIP_ASSERT1(error == ERR_MEM);
2121 }
2122}
2123
2124
2125static void
2126pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2127{
2128 struct tcp_pcb *pcb;
2129 err_t error;
2130
2131 LWIP_ASSERT1(pxtcp != NULL);
2132 LWIP_ASSERT1(pxtcp->inbound_close);
2133 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2134 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2135
2136 pcb = pxtcp->pcb;
2137 LWIP_ASSERT1(pcb != NULL);
2138
2139 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2140 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2141
2142 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2143 if (error != ERR_OK) {
2144 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2145 " tcp_shutdown: error=%s\n",
2146 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2147 pxtcp_pcb_schedule_poll(pxtcp);
2148 return;
2149 }
2150
2151 pxtcp_pcb_cancel_poll(pxtcp);
2152 pxtcp->inbound_close_done = 1;
2153
2154
2155 /*
2156 * If we have already done outbound close previously (passive
2157 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2158 * state since those will be deleted by lwip when that last ack
2159 * comes from the guest.
2160 *
2161 * NB: We do NOT check for deferred delete here, even though we
2162 * have just set one of its conditions, inbound_close_done. We
2163 * let pcb callbacks that called us do that. It's simpler and
2164 * cleaner that way.
2165 */
2166 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2167 pxtcp_pcb_dissociate(pxtcp);
2168 }
2169}
2170
2171
2172/**
2173 * Check that all forwarded inbound data is sent and acked, and that
2174 * inbound close is scheduled (we aren't called back when it's acked).
2175 */
2176DECLINLINE(int)
2177pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2178{
2179 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2180 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2181}
2182
2183
2184/**
2185 * tcp_sent() callback - guest acknowledged len bytes.
2186 *
2187 * We can advance inbuf::unacked index, making more free space in the
2188 * ringbuf and wake up producer on poll manager thread.
2189 *
2190 * We can also try to send more data if we have any since pcb->snd_buf
2191 * was increased and we are now permitted to send more.
2192 */
2193static err_t
2194pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2195{
2196 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2197 size_t unacked;
2198
2199 LWIP_ASSERT1(pxtcp != NULL);
2200 LWIP_ASSERT1(pxtcp->pcb == pcb);
2201 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2202 LWIP_UNUSED_ARG(pcb); /* only in assert */
2203
2204 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2205 " unacked %d, unsent %d, vacant %d\n",
2206 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2207 (int)pxtcp->inbuf.unacked,
2208 (int)pxtcp->inbuf.unsent,
2209 (int)pxtcp->inbuf.vacant));
2210
2211 if (/* __predict_false */ len == 0) {
2212 /* we are notified to start pulling */
2213 LWIP_ASSERT1(pxtcp->outbound_close_done);
2214 LWIP_ASSERT1(!pxtcp->inbound_close);
2215 LWIP_ASSERT1(pxtcp->inbound_pull);
2216
2217 unacked = pxtcp->inbuf.unacked;
2218 }
2219 else {
2220 /*
2221 * Advance unacked index. Guest acknowledged the data, so it
2222 * won't be needed again for potential retransmits.
2223 */
2224 unacked = pxtcp->inbuf.unacked + len;
2225 if (unacked > pxtcp->inbuf.bufsize) {
2226 unacked -= pxtcp->inbuf.bufsize;
2227 }
2228 pxtcp->inbuf.unacked = unacked;
2229 }
2230
2231 /* arrange for more inbound data */
2232 if (!pxtcp->inbound_close) {
2233 if (!pxtcp->inbound_pull) {
2234 /* wake up producer, in case it has stopped polling for POLLIN */
2235 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2236#ifdef RT_OS_WINDOWS
2237 /**
2238 * We have't got enought room in ring buffer to read atm,
2239 * but we don't want to lose notification from WSAW4ME when
2240 * space would be available, so we reset event with empty recv
2241 */
2242 recv(pxtcp->sock, NULL, 0, 0);
2243#endif
2244 }
2245 else {
2246 ssize_t nread;
2247 int stop_pollin; /* ignored */
2248
2249 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2250
2251 if (nread < 0) {
2252 int sockerr = -(int)nread;
2253 LWIP_UNUSED_ARG(sockerr);
2254 DPRINTF0(("%s: sock %d: errno %d\n",
2255 __func__, pxtcp->sock, sockerr));
2256
2257 /*
2258 * Since we are pulling, pxtcp is no longer registered
2259 * with poll manager so we can kill it directly.
2260 */
2261 pxtcp_pcb_reset_pxtcp(pxtcp);
2262 return ERR_ABRT;
2263 }
2264 }
2265 }
2266
2267 /* forward more data if we can */
2268 if (!pxtcp->inbound_close_done) {
2269 pxtcp_pcb_forward_inbound(pxtcp);
2270
2271 /*
2272 * NB: we might have dissociated from a pcb that transitioned
2273 * to LAST_ACK state, so don't refer to pcb below.
2274 */
2275 }
2276
2277
2278 /* have we got all the acks? */
2279 if (pxtcp->inbound_close /* no more new data */
2280 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2281 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2282 {
2283 char *buf;
2284
2285 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2286 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2287
2288 /* no more retransmits, so buf is not needed */
2289 buf = pxtcp->inbuf.buf;
2290 pxtcp->inbuf.buf = NULL;
2291 free(buf);
2292
2293 /* no more acks, so no more callbacks */
2294 if (pxtcp->pcb != NULL) {
2295 tcp_sent(pxtcp->pcb, NULL);
2296 }
2297
2298 /*
2299 * We may be the last callback for this pcb if we have also
2300 * successfully forwarded inbound_close.
2301 */
2302 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2303 }
2304
2305 return ERR_OK;
2306}
2307
2308
2309/**
2310 * Callback from poll manager (pxtcp::msg_inpull) to switch
2311 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2312 * POLLHUP comment in pxtcp_pmgr_pump().
2313 *
2314 * pxtcp::sock is deregistered from poll manager after this callback
2315 * is scheduled.
2316 */
2317static void
2318pxtcp_pcb_pull_inbound(void *ctx)
2319{
2320 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2321 LWIP_ASSERT1(pxtcp != NULL);
2322
2323 if (pxtcp->pcb == NULL) {
2324 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2325 pxtcp_pcb_reset_pxtcp(pxtcp);
2326 return;
2327 }
2328
2329 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2330 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2331 pxtcp->inbound_pull = 1;
2332 pxtcp->deferred_delete = 1;
2333 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2334}
2335
2336
2337/**
2338 * tcp_err() callback.
2339 *
2340 * pcb is not passed to this callback since it may be already
2341 * deallocated by the stack, but we can't do anything useful with it
2342 * anyway since connection is gone.
2343 */
2344static void
2345pxtcp_pcb_err(void *arg, err_t error)
2346{
2347 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2348 LWIP_ASSERT1(pxtcp != NULL);
2349
2350 /*
2351 * ERR_CLSD is special - it is reported here when:
2352 *
2353 * . guest has already half-closed
2354 * . we send FIN to guest when external half-closes
2355 * . guest acks that FIN
2356 *
2357 * Since connection is closed but receive has been already closed
2358 * lwip can only report this via tcp_err. At this point the pcb
2359 * is still alive, so we can peek at it if need be.
2360 *
2361 * The interesting twist is when the ACK from guest that akcs our
2362 * FIN also acks some data. In this scenario lwip will NOT call
2363 * tcp_sent() callback with the ACK for that last bit of data but
2364 * instead will call tcp_err with ERR_CLSD right away. Since that
2365 * ACK also acknowledges all the data, we should run some of
2366 * pxtcp_pcb_sent() logic here.
2367 */
2368 if (error == ERR_CLSD) {
2369 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2370
2371 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2372 " pcb->acked %d;"
2373 " unacked %d, unsent %d, vacant %d\n",
2374 (void *)pxtcp, (void *)pcb,
2375 pcb->acked,
2376 (int)pxtcp->inbuf.unacked,
2377 (int)pxtcp->inbuf.unsent,
2378 (int)pxtcp->inbuf.vacant));
2379
2380 LWIP_ASSERT1(pxtcp->pcb == pcb);
2381 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2382
2383 if (pcb->acked > 0) {
2384 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2385 }
2386 return;
2387 }
2388
2389 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2390 (void *)pxtcp, proxy_lwip_strerr(error)));
2391
2392 pxtcp->pcb = NULL; /* pcb is gone */
2393 if (pxtcp->deferred_delete) {
2394 pxtcp_pcb_reset_pxtcp(pxtcp);
2395 }
2396 else {
2397 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2398 }
2399}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette