VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxtcp.c@ 51558

Last change on this file since 51558 was 51332, checked in by vboxsync, 11 years ago

pxtcp_pcb_forward_outbound: move code to do sendmsg/WSASend to a
separate function.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 66.0 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#include "winutils.h"
3
4#include "pxtcp.h"
5
6#include "proxy.h"
7#include "proxy_pollmgr.h"
8#include "pxremap.h"
9#include "portfwd.h" /* fwspec */
10
11#ifndef RT_OS_WINDOWS
12#include <sys/types.h>
13#include <sys/socket.h>
14#include <sys/ioctl.h>
15#ifdef RT_OS_SOLARIS
16#include <sys/filio.h> /* FIONREAD is BSD'ism */
17#endif
18#include <stdlib.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <string.h>
22#include <poll.h>
23
24#include <err.h> /* BSD'ism */
25#else
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29
30#include <iprt/stdint.h>
31#include "winpoll.h"
32#endif
33
34#include "lwip/opt.h"
35
36#include "lwip/sys.h"
37#include "lwip/tcpip.h"
38#include "lwip/netif.h"
39#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
40#include "lwip/icmp.h"
41#include "lwip/icmp6.h"
42
43/* NetBSD doesn't report POLLHUP for TCP sockets */
44#ifdef __NetBSD__
45# define HAVE_TCP_POLLHUP 0
46#else
47# define HAVE_TCP_POLLHUP 1
48#endif
49
50
51/**
52 * Ring buffer for inbound data. Filled with data from the host
53 * socket on poll manager thread. Data consumed by scheduling
54 * tcp_write() to the pcb on the lwip thread.
55 *
56 * NB: There is actually third party present, the lwip stack itself.
57 * Thus the buffer doesn't have dual free vs. data split, but rather
58 * three-way free / send and unACKed data / unsent data split.
59 */
60struct ringbuf {
61 char *buf;
62 size_t bufsize;
63
64 /*
65 * Start of free space, producer writes here (up till "unacked").
66 */
67 volatile size_t vacant;
68
69 /*
70 * Start of sent but unacknowledged data. The data are "owned" by
71 * the stack as it may need to retransmit. This is the free space
72 * limit for producer.
73 */
74 volatile size_t unacked;
75
76 /*
77 * Start of unsent data, consumer reads/sends from here (up till
78 * "vacant"). Not declared volatile since it's only accessed from
79 * the consumer thread.
80 */
81 size_t unsent;
82};
83
84
85/**
86 */
87struct pxtcp {
88 /**
89 * Our poll manager handler. Must be first, strong/weak
90 * references depend on this "inheritance".
91 */
92 struct pollmgr_handler pmhdl;
93
94 /**
95 * lwIP (internal/guest) side of the proxied connection.
96 */
97 struct tcp_pcb *pcb;
98
99 /**
100 * Host (external) side of the proxied connection.
101 */
102 SOCKET sock;
103
104 /**
105 * Socket events we are currently polling for.
106 */
107 int events;
108
109 /**
110 * Socket error. Currently used to save connect(2) errors so that
111 * we can decide if we need to send ICMP error.
112 */
113 int sockerr;
114
115 /**
116 * Interface that we have got the SYN from. Needed to send ICMP
117 * with correct source address.
118 */
119 struct netif *netif;
120
121 /**
122 * For tentatively accepted connections for which we are in
123 * process of connecting to the real destination this is the
124 * initial pbuf that we might need to build ICMP error.
125 *
126 * When connection is established this is used to hold outbound
127 * pbuf chain received by pxtcp_pcb_recv() but not yet completely
128 * forwarded over the socket. We cannot "return" it to lwIP since
129 * the head of the chain is already sent and freed.
130 */
131 struct pbuf *unsent;
132
133 /**
134 * Guest has closed its side. Reported to pxtcp_pcb_recv() only
135 * once and we might not be able to forward it immediately if we
136 * have unsent pbuf.
137 */
138 int outbound_close;
139
140 /**
141 * Outbound half-close has been done on the socket.
142 */
143 int outbound_close_done;
144
145 /**
146 * External has closed its side. We might not be able to forward
147 * it immediately if we have unforwarded data.
148 */
149 int inbound_close;
150
151 /**
152 * Inbound half-close has been done on the pcb.
153 */
154 int inbound_close_done;
155
156 /**
157 * On systems that report POLLHUP as soon as the final FIN is
158 * received on a socket we cannot continue polling for the rest of
159 * input, so we have to read (pull) last data from the socket on
160 * the lwIP thread instead of polling/pushing it from the poll
161 * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
162 */
163 int inbound_pull;
164
165
166 /**
167 * When poll manager schedules delete we may not be able to delete
168 * a pxtcp immediately if not all inbound data has been acked by
169 * the guest: lwIP may need to resend and the data are in pxtcp's
170 * inbuf::buf. We defer delete until all data are acked to
171 * pxtcp_pcb_sent().
172 *
173 * It's also implied by inbound_pull. It probably means that
174 * "deferred" is not a very fortunate name.
175 */
176 int deferred_delete;
177
178 /**
179 * Ring-buffer for inbound data.
180 */
181 struct ringbuf inbuf;
182
183 /**
184 * lwIP thread's strong reference to us.
185 */
186 struct pollmgr_refptr *rp;
187
188
189 /*
190 * We use static messages to call functions on the lwIP thread to
191 * void malloc/free overhead.
192 */
193 struct tcpip_msg msg_delete; /* delete pxtcp */
194 struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
195 struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
196 struct tcpip_msg msg_outbound; /* trigger send of outbound data */
197 struct tcpip_msg msg_inbound; /* trigger send of inbound data */
198 struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
199};
200
201
202
203static struct pxtcp *pxtcp_allocate(void);
204static void pxtcp_free(struct pxtcp *);
205
206static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
207static void pxtcp_pcb_dissociate(struct pxtcp *);
208
209/* poll manager callbacks for pxtcp related channels */
210static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
211static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
212static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
213#if !HAVE_TCP_POLLHUP
214static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
215#endif
216static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
217
218/* helper functions for sending/receiving pxtcp over poll manager channels */
219static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
220static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
221static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
222static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
223
224/* poll manager callbacks for individual sockets */
225static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
226static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
227
228/* get incoming traffic into ring buffer */
229static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
230static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
231
232/* convenience functions for poll manager callbacks */
233static int pxtcp_schedule_delete(struct pxtcp *);
234static int pxtcp_schedule_reset(struct pxtcp *);
235static int pxtcp_schedule_reject(struct pxtcp *);
236
237/* lwip thread callbacks called via proxy_lwip_post() */
238static void pxtcp_pcb_delete_pxtcp(void *);
239static void pxtcp_pcb_reset_pxtcp(void *);
240static void pxtcp_pcb_accept_refuse(void *);
241static void pxtcp_pcb_accept_confirm(void *);
242static void pxtcp_pcb_write_outbound(void *);
243static void pxtcp_pcb_write_inbound(void *);
244static void pxtcp_pcb_pull_inbound(void *);
245
246/* tcp pcb callbacks */
247static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
248static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
249static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
250static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
251static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
252static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
253static void pxtcp_pcb_err(void *, err_t);
254
255static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
256static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
257
258static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
259
260static void pxtcp_pcb_forward_inbound(struct pxtcp *);
261static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
262DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
263static void pxtcp_pcb_schedule_poll(struct pxtcp *);
264static void pxtcp_pcb_cancel_poll(struct pxtcp *);
265
266static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
267DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
268
269/* poll manager handlers for pxtcp channels */
270static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
271static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
272static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
273#if !HAVE_TCP_POLLHUP
274static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
275#endif
276static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
277
278
279/**
280 * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
281 * manager threads haven't been created yet.
282 */
283void
284pxtcp_init(void)
285{
286 /*
287 * Create channels.
288 */
289#define CHANNEL(SLOT, NAME) do { \
290 NAME##_hdl.callback = NAME; \
291 NAME##_hdl.data = NULL; \
292 NAME##_hdl.slot = -1; \
293 pollmgr_add_chan(SLOT, &NAME##_hdl); \
294 } while (0)
295
296 CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
297 CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
298 CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
299#if !HAVE_TCP_POLLHUP
300 CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
301#endif
302 CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
303
304#undef CHANNEL
305
306 /*
307 * Listen to outgoing connection from guest(s).
308 */
309 tcp_proxy_accept(pxtcp_pcb_heard);
310}
311
312
313/**
314 * Syntactic sugar for sending pxtcp pointer over poll manager
315 * channel. Used by lwip thread functions.
316 */
317static ssize_t
318pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
319{
320 return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
321}
322
323
324/**
325 * Syntactic sugar for sending weak reference to pxtcp over poll
326 * manager channel. Used by lwip thread functions.
327 */
328static ssize_t
329pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
330{
331 pollmgr_refptr_weak_ref(pxtcp->rp);
332 return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
333}
334
335
336/**
337 * Counterpart of pxtcp_chan_send().
338 */
339static struct pxtcp *
340pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
341{
342 struct pxtcp *pxtcp;
343
344 pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
345 return pxtcp;
346}
347
348
349/**
350 * Counterpart of pxtcp_chan_send_weak().
351 */
352static struct pxtcp *
353pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
354{
355 struct pollmgr_refptr *rp;
356 struct pollmgr_handler *base;
357 struct pxtcp *pxtcp;
358
359 rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
360 base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
361 pxtcp = (struct pxtcp *)base;
362
363 return pxtcp;
364}
365
366
367/**
368 * Register pxtcp with poll manager.
369 *
370 * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
371 * error handling is different in these two cases, we leave it up to
372 * the caller.
373 */
374int
375pxtcp_pmgr_add(struct pxtcp *pxtcp)
376{
377 int status;
378
379 LWIP_ASSERT1(pxtcp != NULL);
380 LWIP_ASSERT1(pxtcp->sock >= 0);
381 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
382 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
383 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
384
385 status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
386 return status;
387}
388
389
390/**
391 * Unregister pxtcp with poll manager.
392 *
393 * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
394 * leg).
395 */
396void
397pxtcp_pmgr_del(struct pxtcp *pxtcp)
398{
399 LWIP_ASSERT1(pxtcp != NULL);
400
401 pollmgr_del_slot(pxtcp->pmhdl.slot);
402}
403
404
405/**
406 * POLLMGR_CHAN_PXTCP_ADD handler.
407 *
408 * Get new pxtcp from lwip thread and start polling its socket.
409 */
410static int
411pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
412{
413 struct pxtcp *pxtcp;
414 int status;
415
416 pxtcp = pxtcp_chan_recv(handler, fd, revents);
417 DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
418 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
419
420 status = pxtcp_pmgr_add(pxtcp);
421 if (status < 0) {
422 (void) pxtcp_schedule_reset(pxtcp);
423 }
424
425 return POLLIN;
426}
427
428
429/**
430 * POLLMGR_CHAN_PXTCP_POLLOUT handler.
431 *
432 * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
433 * and failed, it now requests us to poll the socket for POLLOUT and
434 * schedule pxtcp_pcb_forward_outbound() when sock is writable again.
435 */
436static int
437pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
438{
439 struct pxtcp *pxtcp;
440
441 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
442 DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
443
444 if (pxtcp == NULL) {
445 return POLLIN;
446 }
447
448 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
449 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
450
451 pxtcp->events |= POLLOUT;
452 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
453
454 return POLLIN;
455}
456
457
458/**
459 * POLLMGR_CHAN_PXTCP_POLLIN handler.
460 */
461static int
462pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
463{
464 struct pxtcp *pxtcp;
465
466 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
467 DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
468
469 if (pxtcp == NULL) {
470 return POLLIN;
471 }
472
473 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
474 LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
475
476 if (pxtcp->inbound_close) {
477 return POLLIN;
478 }
479
480 pxtcp->events |= POLLIN;
481 pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
482
483 return POLLIN;
484}
485
486
487#if !HAVE_TCP_POLLHUP
488/**
489 * POLLMGR_CHAN_PXTCP_DEL handler.
490 *
491 * Schedule pxtcp deletion. We only need this if host system doesn't
492 * report POLLHUP for fully closed tcp sockets.
493 */
494static int
495pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
496{
497 struct pxtcp *pxtcp;
498
499 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
500 if (pxtcp == NULL) {
501 return POLLIN;
502 }
503
504 DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
505 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
506
507 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
508 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
509
510 LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
511 LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
512
513 pxtcp_pmgr_del(pxtcp);
514 (void) pxtcp_schedule_delete(pxtcp);
515
516 return POLLIN;
517}
518#endif /* !HAVE_TCP_POLLHUP */
519
520
521/**
522 * POLLMGR_CHAN_PXTCP_RESET handler.
523 *
524 * Close the socket with RST and delete pxtcp.
525 */
526static int
527pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
528{
529 struct pxtcp *pxtcp;
530
531 pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
532 if (pxtcp == NULL) {
533 return POLLIN;
534 }
535
536 DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
537 (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
538
539 LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
540 LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
541
542 pxtcp_pmgr_del(pxtcp);
543
544 proxy_reset_socket(pxtcp->sock);
545 pxtcp->sock = INVALID_SOCKET;
546
547 (void) pxtcp_schedule_reset(pxtcp);
548
549 return POLLIN;
550}
551
552
553static struct pxtcp *
554pxtcp_allocate(void)
555{
556 struct pxtcp *pxtcp;
557
558 pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
559 if (pxtcp == NULL) {
560 return NULL;
561 }
562
563 pxtcp->pmhdl.callback = NULL;
564 pxtcp->pmhdl.data = (void *)pxtcp;
565 pxtcp->pmhdl.slot = -1;
566
567 pxtcp->pcb = NULL;
568 pxtcp->sock = INVALID_SOCKET;
569 pxtcp->events = 0;
570 pxtcp->sockerr = 0;
571 pxtcp->netif = NULL;
572 pxtcp->unsent = NULL;
573 pxtcp->outbound_close = 0;
574 pxtcp->outbound_close_done = 0;
575 pxtcp->inbound_close = 0;
576 pxtcp->inbound_close_done = 0;
577 pxtcp->inbound_pull = 0;
578 pxtcp->deferred_delete = 0;
579
580 pxtcp->inbuf.bufsize = 64 * 1024;
581 pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
582 if (pxtcp->inbuf.buf == NULL) {
583 free(pxtcp);
584 return NULL;
585 }
586 pxtcp->inbuf.vacant = 0;
587 pxtcp->inbuf.unacked = 0;
588 pxtcp->inbuf.unsent = 0;
589
590 pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
591 if (pxtcp->rp == NULL) {
592 free(pxtcp->inbuf.buf);
593 free(pxtcp);
594 return NULL;
595 }
596
597#define CALLBACK_MSG(MSG, FUNC) \
598 do { \
599 pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
600 pxtcp->MSG.sem = NULL; \
601 pxtcp->MSG.msg.cb.function = FUNC; \
602 pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
603 } while (0)
604
605 CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
606 CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
607 CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
608 CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
609 CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
610 CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
611
612#undef CALLBACK_MSG
613
614 return pxtcp;
615}
616
617
618/**
619 * Exported to fwtcp to create pxtcp for incoming port-forwarded
620 * connections. Completed with pcb in pxtcp_pcb_connect().
621 */
622struct pxtcp *
623pxtcp_create_forwarded(SOCKET sock)
624{
625 struct pxtcp *pxtcp;
626
627 pxtcp = pxtcp_allocate();
628 if (pxtcp == NULL) {
629 return NULL;
630 }
631
632 pxtcp->sock = sock;
633 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
634 pxtcp->events = 0;
635
636 return pxtcp;
637}
638
639
640static void
641pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
642{
643 LWIP_ASSERT1(pxtcp != NULL);
644 LWIP_ASSERT1(pcb != NULL);
645
646 pxtcp->pcb = pcb;
647
648 tcp_arg(pcb, pxtcp);
649
650 tcp_recv(pcb, pxtcp_pcb_recv);
651 tcp_sent(pcb, pxtcp_pcb_sent);
652 tcp_poll(pcb, NULL, 255);
653 tcp_err(pcb, pxtcp_pcb_err);
654}
655
656
657static void
658pxtcp_free(struct pxtcp *pxtcp)
659{
660 if (pxtcp->unsent != NULL) {
661 pbuf_free(pxtcp->unsent);
662 }
663 if (pxtcp->inbuf.buf != NULL) {
664 free(pxtcp->inbuf.buf);
665 }
666 free(pxtcp);
667}
668
669
670/**
671 * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
672 * fwtcp failed to register with poll manager to post to lwip thread
673 * for doing connect.
674 */
675void
676pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
677{
678 LWIP_ASSERT1(pxtcp->pcb == NULL);
679 pxtcp_pcb_reset_pxtcp(pxtcp);
680}
681
682
683static void
684pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
685{
686 if (pxtcp == NULL || pxtcp->pcb == NULL) {
687 return;
688 }
689
690 DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
691 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
692
693 /*
694 * We must have dissociated from a fully closed pcb immediately
695 * since lwip recycles them and we don't wan't to mess with what
696 * would be someone else's pcb that we happen to have a stale
697 * pointer to.
698 */
699 LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
700
701 tcp_recv(pxtcp->pcb, NULL);
702 tcp_sent(pxtcp->pcb, NULL);
703 tcp_poll(pxtcp->pcb, NULL, 255);
704 tcp_err(pxtcp->pcb, NULL);
705 tcp_arg(pxtcp->pcb, NULL);
706 pxtcp->pcb = NULL;
707}
708
709
710/**
711 * Lwip thread callback invoked via pxtcp::msg_delete
712 *
713 * Since we use static messages to communicate to the lwip thread, we
714 * cannot delete pxtcp without making sure there are no unprocessed
715 * messages in the lwip thread mailbox.
716 *
717 * The easiest way to ensure that is to send this "delete" message as
718 * the last one and when it's processed we know there are no more and
719 * it's safe to delete pxtcp.
720 *
721 * Poll manager handlers should use pxtcp_schedule_delete()
722 * convenience function.
723 */
724static void
725pxtcp_pcb_delete_pxtcp(void *ctx)
726{
727 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
728
729 DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
730 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
731 (pxtcp->deferred_delete && !pxtcp->inbound_pull
732 ? " (was deferred)" : "")));
733
734 LWIP_ASSERT1(pxtcp != NULL);
735 LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
736 LWIP_ASSERT1(pxtcp->outbound_close_done);
737 LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
738
739
740 /*
741 * pxtcp is no longer registered with poll manager, so it's safe
742 * to close the socket.
743 */
744 if (pxtcp->sock != INVALID_SOCKET) {
745 closesocket(pxtcp->sock);
746 pxtcp->sock = INVALID_SOCKET;
747 }
748
749 /*
750 * We might have already dissociated from a fully closed pcb, or
751 * guest might have sent us a reset while msg_delete was in
752 * transit. If there's no pcb, we are done.
753 */
754 if (pxtcp->pcb == NULL) {
755 pollmgr_refptr_unref(pxtcp->rp);
756 pxtcp_free(pxtcp);
757 return;
758 }
759
760 /*
761 * Have we completely forwarded all inbound traffic to the guest?
762 *
763 * We may still be waiting for ACKs. We may have failed to send
764 * some of the data (tcp_write() failed with ERR_MEM). We may
765 * have failed to send the FIN (tcp_shutdown() failed with
766 * ERR_MEM).
767 */
768 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
769 pxtcp_pcb_dissociate(pxtcp);
770 pollmgr_refptr_unref(pxtcp->rp);
771 pxtcp_free(pxtcp);
772 }
773 else {
774 DPRINTF2(("delete: pxtcp %p; pcb %p:"
775 " unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
776 (void *)pxtcp, (void *)pxtcp->pcb,
777 (int)pxtcp->inbuf.unacked,
778 (int)pxtcp->inbuf.unsent,
779 (int)pxtcp->inbuf.vacant,
780 pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
781
782 LWIP_ASSERT1(!pxtcp->deferred_delete);
783 pxtcp->deferred_delete = 1;
784 }
785}
786
787
788/**
789 * If we couldn't delete pxtcp right away in the msg_delete callback
790 * from the poll manager thread, we repeat the check at the end of
791 * relevant pcb callbacks.
792 */
793DECLINLINE(void)
794pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
795{
796 if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
797 pxtcp_pcb_delete_pxtcp(pxtcp);
798 }
799}
800
801
802/**
803 * Poll manager callbacks should use this convenience wrapper to
804 * schedule pxtcp deletion on the lwip thread and to deregister from
805 * the poll manager.
806 */
807static int
808pxtcp_schedule_delete(struct pxtcp *pxtcp)
809{
810 /*
811 * If pollmgr_refptr_get() is called by any channel before
812 * scheduled deletion happens, let them know we are gone.
813 */
814 pxtcp->pmhdl.slot = -1;
815
816 /*
817 * Schedule deletion. Since poll manager thread may be pre-empted
818 * right after we send the message, the deletion may actually
819 * happen on the lwip thread before we return from this function,
820 * so it's not safe to refer to pxtcp after this call.
821 */
822 proxy_lwip_post(&pxtcp->msg_delete);
823
824 /* tell poll manager to deregister us */
825 return -1;
826}
827
828
829/**
830 * Lwip thread callback invoked via pxtcp::msg_reset
831 *
832 * Like pxtcp_pcb_delete(), but sends RST to the guest before
833 * deleting this pxtcp.
834 */
835static void
836pxtcp_pcb_reset_pxtcp(void *ctx)
837{
838 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
839 LWIP_ASSERT1(pxtcp != NULL);
840
841 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
842 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
843
844 if (pxtcp->sock != INVALID_SOCKET) {
845 proxy_reset_socket(pxtcp->sock);
846 pxtcp->sock = INVALID_SOCKET;
847 }
848
849 if (pxtcp->pcb != NULL) {
850 struct tcp_pcb *pcb = pxtcp->pcb;
851 pxtcp_pcb_dissociate(pxtcp);
852 tcp_abort(pcb);
853 }
854
855 pollmgr_refptr_unref(pxtcp->rp);
856 pxtcp_free(pxtcp);
857}
858
859
860
861/**
862 * Poll manager callbacks should use this convenience wrapper to
863 * schedule pxtcp reset and deletion on the lwip thread and to
864 * deregister from the poll manager.
865 *
866 * See pxtcp_schedule_delete() for additional comments.
867 */
868static int
869pxtcp_schedule_reset(struct pxtcp *pxtcp)
870{
871 pxtcp->pmhdl.slot = -1;
872 proxy_lwip_post(&pxtcp->msg_reset);
873 return -1;
874}
875
876
877/**
878 * Reject proxy connection attempt. Depending on the cause (sockerr)
879 * we may just drop the pcb silently, generate an ICMP datagram or
880 * send TCP reset.
881 */
882static void
883pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
884 struct pbuf *p, int sockerr)
885{
886 struct netif *oif;
887 int reset = 0;
888
889 oif = ip_current_netif();
890 ip_current_netif() = netif;
891
892 if (sockerr == ECONNREFUSED) {
893 reset = 1;
894 }
895 else if (PCB_ISIPV6(pcb)) {
896 if (sockerr == EHOSTDOWN) {
897 icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
898 }
899 else if (sockerr == EHOSTUNREACH
900 || sockerr == ENETDOWN
901 || sockerr == ENETUNREACH)
902 {
903 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
904 }
905 }
906 else {
907 if (sockerr == EHOSTDOWN
908 || sockerr == EHOSTUNREACH
909 || sockerr == ENETDOWN
910 || sockerr == ENETUNREACH)
911 {
912 icmp_dest_unreach(p, ICMP_DUR_HOST);
913 }
914 }
915
916 ip_current_netif() = oif;
917
918 tcp_abandon(pcb, reset);
919}
920
921
922/**
923 * Called from poll manager thread via pxtcp::msg_accept when proxy
924 * failed to connect to the destination. Also called when we failed
925 * to register pxtcp with poll manager.
926 *
927 * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
928 * how this unestablished connection is terminated.
929 */
930static void
931pxtcp_pcb_accept_refuse(void *ctx)
932{
933 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
934
935 DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: errno %d\n",
936 __func__, (void *)pxtcp, (void *)pxtcp->pcb,
937 pxtcp->sock, pxtcp->sockerr));
938
939 LWIP_ASSERT1(pxtcp != NULL);
940 LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
941
942 if (pxtcp->pcb != NULL) {
943 struct tcp_pcb *pcb = pxtcp->pcb;
944 pxtcp_pcb_dissociate(pxtcp);
945 pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
946 }
947
948 pollmgr_refptr_unref(pxtcp->rp);
949 pxtcp_free(pxtcp);
950}
951
952
953/**
954 * Convenience wrapper for poll manager connect callback to reject
955 * connection attempt.
956 *
957 * Like pxtcp_schedule_reset(), but the callback is more discriminate
958 * in how this unestablished connection is terminated.
959 */
960static int
961pxtcp_schedule_reject(struct pxtcp *pxtcp)
962{
963 pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
964 pxtcp->pmhdl.slot = -1;
965 proxy_lwip_post(&pxtcp->msg_accept);
966 return -1;
967}
968
969
970/**
971 * Global tcp_proxy_accept() callback for proxied outgoing TCP
972 * connections from guest(s).
973 */
974static err_t
975pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
976{
977 struct pbuf *p = (struct pbuf *)arg;
978 struct pxtcp *pxtcp;
979 ipX_addr_t dst_addr;
980 int sdom;
981 SOCKET sock;
982 ssize_t nsent;
983 int sockerr = 0;
984
985 LWIP_UNUSED_ARG(error); /* always ERR_OK */
986
987 /*
988 * TCP first calls accept callback when it receives the first SYN
989 * and "tentatively accepts" new proxied connection attempt. When
990 * proxy "confirms" the SYN and sends SYN|ACK and the guest
991 * replies with ACK the accept callback is called again, this time
992 * with the established connection.
993 */
994 LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
995 tcp_accept(newpcb, pxtcp_pcb_accept);
996 tcp_arg(newpcb, NULL);
997
998 tcp_setprio(newpcb, TCP_PRIO_MAX);
999
1000 pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
1001
1002 sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
1003 sock = proxy_connected_socket(sdom, SOCK_STREAM,
1004 &dst_addr, newpcb->local_port);
1005 if (sock == INVALID_SOCKET) {
1006 sockerr = errno;
1007 goto abort;
1008 }
1009
1010 pxtcp = pxtcp_allocate();
1011 if (pxtcp == NULL) {
1012 proxy_reset_socket(sock);
1013 goto abort;
1014 }
1015
1016 /* save initial datagram in case we need to reply with ICMP */
1017 pbuf_ref(p);
1018 pxtcp->unsent = p;
1019 pxtcp->netif = ip_current_netif();
1020
1021 pxtcp_pcb_associate(pxtcp, newpcb);
1022 pxtcp->sock = sock;
1023
1024 pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
1025 pxtcp->events = POLLOUT;
1026
1027 nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
1028 if (nsent < 0) {
1029 pxtcp->sock = INVALID_SOCKET;
1030 proxy_reset_socket(sock);
1031 pxtcp_pcb_accept_refuse(pxtcp);
1032 return ERR_ABRT;
1033 }
1034
1035 return ERR_OK;
1036
1037 abort:
1038 DPRINTF0(("%s: pcb %p, sock %d: errno %d\n",
1039 __func__, (void *)newpcb, sock, sockerr));
1040 pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
1041 return ERR_ABRT;
1042}
1043
1044
1045/**
1046 * tcp_proxy_accept() callback for accepted proxied outgoing TCP
1047 * connections from guest(s). This is "real" accept with three-way
1048 * handshake completed.
1049 */
1050static err_t
1051pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
1052{
1053 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1054
1055 LWIP_UNUSED_ARG(pcb); /* used only in asserts */
1056 LWIP_UNUSED_ARG(error); /* always ERR_OK */
1057
1058 LWIP_ASSERT1(pxtcp != NULL);
1059 LWIP_ASSERT1(pxtcp->pcb = pcb);
1060 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1061
1062 /* send any inbound data that are already queued */
1063 pxtcp_pcb_forward_inbound(pxtcp);
1064 return ERR_OK;
1065}
1066
1067
1068/**
1069 * Initial poll manager callback for proxied outgoing TCP connections.
1070 * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
1071 *
1072 * Waits for connect(2) to the destination to complete. On success
1073 * replaces itself with pxtcp_pmgr_pump() callback common to all
1074 * established TCP connections.
1075 */
1076static int
1077pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
1078{
1079 struct pxtcp *pxtcp;
1080 int sockerr;
1081
1082 pxtcp = (struct pxtcp *)handler->data;
1083 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1084 LWIP_ASSERT1(fd == pxtcp->sock);
1085
1086 if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
1087 if (revents & POLLNVAL) {
1088 pxtcp->sock = INVALID_SOCKET;
1089 pxtcp->sockerr = ETIMEDOUT;
1090 }
1091 else {
1092 socklen_t optlen = (socklen_t)sizeof(sockerr);
1093 int status;
1094 SOCKET s;
1095
1096 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1097 (char *)&pxtcp->sockerr, &optlen);
1098 if (status < 0) { /* should not happen */
1099 sockerr = errno; /* ??? */
1100 perror("connect: getsockopt");
1101 }
1102 else {
1103#ifndef RT_OS_WINDOWS
1104 errno = pxtcp->sockerr; /* to avoid strerror_r */
1105#else
1106 /* see winutils.h */
1107 WSASetLastError(pxtcp->sockerr);
1108#endif
1109 perror("connect");
1110 }
1111 s = pxtcp->sock;
1112 pxtcp->sock = INVALID_SOCKET;
1113 closesocket(s);
1114 }
1115 return pxtcp_schedule_reject(pxtcp);
1116 }
1117
1118 if (revents & POLLOUT) { /* connect is successful */
1119 /* confirm accept to the guest */
1120 proxy_lwip_post(&pxtcp->msg_accept);
1121
1122 /*
1123 * Switch to common callback used for all established proxied
1124 * connections.
1125 */
1126 pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
1127
1128 /*
1129 * Initially we poll for incoming traffic only. Outgoing
1130 * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
1131 * it will ask us to poll for POLLOUT too.
1132 */
1133 pxtcp->events = POLLIN;
1134 return pxtcp->events;
1135 }
1136
1137 /* should never get here */
1138 DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
1139 __func__, (void *)pxtcp, fd, revents));
1140 return pxtcp_schedule_reset(pxtcp);
1141}
1142
1143
1144/**
1145 * Called from poll manager thread via pxtcp::msg_accept when proxy
1146 * connected to the destination. Finalize accept by sending SYN|ACK
1147 * to the guest.
1148 */
1149static void
1150pxtcp_pcb_accept_confirm(void *ctx)
1151{
1152 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1153 err_t error;
1154
1155 LWIP_ASSERT1(pxtcp != NULL);
1156 if (pxtcp->pcb == NULL) {
1157 return;
1158 }
1159
1160 /* we are not going to reply with ICMP, so we can drop initial pbuf */
1161 LWIP_ASSERT1(pxtcp->unsent != NULL);
1162 pbuf_free(pxtcp->unsent);
1163 pxtcp->unsent = NULL;
1164
1165 error = tcp_proxy_accept_confirm(pxtcp->pcb);
1166
1167 /*
1168 * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
1169 * abandons the pcb. Retrying that is not very easy, since it
1170 * would require keeping "fractional state". From guest's point
1171 * of view there is no reply to its SYN so it will either resend
1172 * the SYN (effetively triggering full connection retry for us),
1173 * or it will eventually time out.
1174 */
1175 if (error == ERR_ABRT) {
1176 pxtcp->pcb = NULL; /* pcb is gone */
1177 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1178 }
1179
1180 /*
1181 * else if (error != ERR_OK): even if tcp_output() failed with
1182 * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
1183 * retransmitted eventually.
1184 */
1185}
1186
1187
1188/**
1189 * Entry point for port-forwarding.
1190 *
1191 * fwtcp accepts new incoming connection, creates pxtcp for the socket
1192 * (with no pcb yet) and adds it to the poll manager (polling for
1193 * errors only). Then it calls this function to construct the pcb and
1194 * perform connection to the guest.
1195 */
1196void
1197pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
1198{
1199 struct sockaddr_storage ss;
1200 socklen_t sslen;
1201 struct tcp_pcb *pcb;
1202 ipX_addr_t src_addr, dst_addr;
1203 u16_t src_port, dst_port;
1204 int status;
1205 err_t error;
1206
1207 LWIP_ASSERT1(pxtcp != NULL);
1208 LWIP_ASSERT1(pxtcp->pcb == NULL);
1209 LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
1210
1211 pcb = tcp_new();
1212 if (pcb == NULL) {
1213 goto reset;
1214 }
1215
1216 tcp_setprio(pcb, TCP_PRIO_MAX);
1217 pxtcp_pcb_associate(pxtcp, pcb);
1218
1219 sslen = sizeof(ss);
1220 status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
1221 if (status == SOCKET_ERROR) {
1222 goto reset;
1223 }
1224
1225 /* nit: comapres PF and AF, but they are the same everywhere */
1226 LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
1227
1228 status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
1229 if (status == PXREMAP_FAILED) {
1230 goto reset;
1231 }
1232
1233 if (ss.ss_family == PF_INET) {
1234 const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
1235
1236 src_port = peer4->sin_port;
1237
1238 memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
1239 dst_port = fwspec->dst.sin.sin_port;
1240 }
1241 else { /* PF_INET6 */
1242 const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
1243 ip_set_v6(pcb, 1);
1244
1245 src_port = peer6->sin6_port;
1246
1247 memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
1248 dst_port = fwspec->dst.sin6.sin6_port;
1249 }
1250
1251 /* lwip port arguments are in host order */
1252 src_port = ntohs(src_port);
1253 dst_port = ntohs(dst_port);
1254
1255 error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
1256 if (error != ERR_OK) {
1257 goto reset;
1258 }
1259
1260 error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
1261 /* callback: */ pxtcp_pcb_connected);
1262 if (error != ERR_OK) {
1263 goto reset;
1264 }
1265
1266 return;
1267
1268 reset:
1269 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
1270}
1271
1272
1273/**
1274 * Port-forwarded connection to guest is successful, pump data.
1275 */
1276static err_t
1277pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
1278{
1279 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1280
1281 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1282 LWIP_UNUSED_ARG(error);
1283
1284 LWIP_ASSERT1(pxtcp != NULL);
1285 LWIP_ASSERT1(pxtcp->pcb == pcb);
1286 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1287 LWIP_UNUSED_ARG(pcb);
1288
1289 DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
1290 __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
1291
1292 /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
1293 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
1294
1295 return ERR_OK;
1296}
1297
1298
1299/**
1300 * tcp_recv() callback.
1301 */
1302static err_t
1303pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
1304{
1305 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1306
1307 LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
1308 LWIP_UNUSED_ARG(error);
1309
1310 LWIP_ASSERT1(pxtcp != NULL);
1311 LWIP_ASSERT1(pxtcp->pcb == pcb);
1312 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
1313 LWIP_UNUSED_ARG(pcb);
1314
1315
1316 /*
1317 * Have we done sending previous batch?
1318 */
1319 if (pxtcp->unsent != NULL) {
1320 if (p != NULL) {
1321 /*
1322 * Return an error to tell TCP to hold onto that pbuf.
1323 * It will be presented to us later from tcp_fasttmr().
1324 */
1325 return ERR_WOULDBLOCK;
1326 }
1327 else {
1328 /*
1329 * Unlike data, p == NULL indicating orderly shutdown is
1330 * NOT presented to us again
1331 */
1332 pxtcp->outbound_close = 1;
1333 return ERR_OK;
1334 }
1335 }
1336
1337
1338 /*
1339 * Guest closed?
1340 */
1341 if (p == NULL) {
1342 pxtcp->outbound_close = 1;
1343 pxtcp_pcb_forward_outbound_close(pxtcp);
1344 return ERR_OK;
1345 }
1346
1347
1348 /*
1349 * Got data, send what we can without blocking.
1350 */
1351 return pxtcp_pcb_forward_outbound(pxtcp, p);
1352}
1353
1354
1355/**
1356 * Guest half-closed its TX side of the connection.
1357 *
1358 * Called either immediately from pxtcp_pcb_recv() when it gets NULL,
1359 * or from pxtcp_pcb_forward_outbound() when it finishes forwarding
1360 * previously unsent data and sees pxtcp::outbound_close flag saved by
1361 * pxtcp_pcb_recv().
1362 */
1363static void
1364pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
1365{
1366 struct tcp_pcb *pcb;
1367
1368 LWIP_ASSERT1(pxtcp != NULL);
1369 LWIP_ASSERT1(pxtcp->outbound_close);
1370 LWIP_ASSERT1(!pxtcp->outbound_close_done);
1371
1372 pcb = pxtcp->pcb;
1373 LWIP_ASSERT1(pcb != NULL);
1374
1375 DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
1376 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
1377
1378
1379 /*
1380 * NB: set the flag first, since shutdown() will trigger POLLHUP
1381 * if inbound is already closed, and poll manager asserts
1382 * outbound_close_done (may be it should not?).
1383 */
1384 pxtcp->outbound_close_done = 1;
1385 shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
1386
1387#if !HAVE_TCP_POLLHUP
1388 /*
1389 * On NetBSD POLLHUP is not reported for TCP sockets, so we need
1390 * to nudge poll manager manually.
1391 */
1392 if (pxtcp->inbound_close) {
1393 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
1394 }
1395#endif
1396
1397
1398 /* no more outbound data coming to us */
1399 tcp_recv(pcb, NULL);
1400
1401 /*
1402 * If we have already done inbound close previously (active close
1403 * on the pcb), then we must not hold onto a pcb in TIME_WAIT
1404 * state since those will be recycled by lwip when it runs out of
1405 * free pcbs in the pool.
1406 *
1407 * The test is true also for a pcb in CLOSING state that waits
1408 * just for the ACK of its FIN (to transition to TIME_WAIT).
1409 */
1410 if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
1411 pxtcp_pcb_dissociate(pxtcp);
1412 }
1413}
1414
1415
1416/**
1417 * Forward outbound data from pcb to socket.
1418 *
1419 * Called by pxtcp_pcb_recv() to forward new data and by callout
1420 * triggered by POLLOUT on the socket to send previously unsent data.
1421 *
1422 * (Re)scehdules one-time callout if not all data are sent.
1423 */
1424static err_t
1425pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
1426{
1427 struct pbuf *qs, *q;
1428 size_t qoff;
1429 size_t forwarded;
1430 int sockerr;
1431
1432 LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
1433
1434 forwarded = 0;
1435 sockerr = 0;
1436
1437 q = NULL;
1438 qoff = 0;
1439
1440 qs = p;
1441 while (qs != NULL) {
1442 IOVEC iov[8];
1443 const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
1444 size_t fwd1;
1445 ssize_t nsent;
1446 size_t i;
1447
1448 fwd1 = 0;
1449 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1450 LWIP_ASSERT1(q->len > 0);
1451 IOVEC_SET_BASE(iov[i], q->payload);
1452 IOVEC_SET_LEN(iov[i], q->len);
1453 fwd1 += q->len;
1454 }
1455
1456 /*
1457 * TODO: This is where application-level proxy can hook into
1458 * to process outbound traffic.
1459 */
1460 nsent = pxtcp_sock_send(pxtcp, iov, i);
1461
1462 if (nsent == (ssize_t)fwd1) {
1463 /* successfully sent this chain fragment completely */
1464 forwarded += nsent;
1465 qs = q;
1466 }
1467 else if (nsent >= 0) {
1468 /* successfully sent only some data */
1469 forwarded += nsent;
1470
1471 /* find the first pbuf that was not completely forwarded */
1472 qoff = nsent;
1473 for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
1474 if (qoff < q->len) {
1475 break;
1476 }
1477 qoff -= q->len;
1478 }
1479 LWIP_ASSERT1(q != NULL);
1480 LWIP_ASSERT1(qoff < q->len);
1481 break;
1482 }
1483 else {
1484 /*
1485 * Some errors are really not errors - if we get them,
1486 * it's not different from getting nsent == 0, so filter
1487 * them out here.
1488 */
1489 if (errno != EWOULDBLOCK
1490 && errno != EAGAIN
1491 && errno != ENOBUFS
1492 && errno != ENOMEM
1493 && errno != EINTR)
1494 {
1495 sockerr = errno;
1496 }
1497 q = qs;
1498 qoff = 0;
1499 break;
1500 }
1501 }
1502
1503 if (forwarded > 0) {
1504 tcp_recved(pxtcp->pcb, (u16_t)forwarded);
1505 }
1506
1507 if (q == NULL) { /* everything is forwarded? */
1508 LWIP_ASSERT1(sockerr == 0);
1509 LWIP_ASSERT1(forwarded == p->tot_len);
1510
1511 pxtcp->unsent = NULL;
1512 pbuf_free(p);
1513 if (pxtcp->outbound_close) {
1514 pxtcp_pcb_forward_outbound_close(pxtcp);
1515 }
1516 }
1517 else {
1518 if (q != p) {
1519 /* free forwarded pbufs at the beginning of the chain */
1520 pbuf_ref(q);
1521 pbuf_free(p);
1522 }
1523 if (qoff > 0) {
1524 /* advance payload pointer past the forwarded part */
1525 pbuf_header(q, -(s16_t)qoff);
1526 }
1527 pxtcp->unsent = q;
1528
1529 /*
1530 * Have sendmsg() failed?
1531 *
1532 * Connection reset will be detected by poll and
1533 * pxtcp_schedule_reset() will be called.
1534 *
1535 * Otherwise something *really* unexpected must have happened,
1536 * so we'd better abort.
1537 */
1538 if (sockerr != 0 && sockerr != ECONNRESET) {
1539 struct tcp_pcb *pcb = pxtcp->pcb;
1540 pxtcp_pcb_dissociate(pxtcp);
1541
1542 tcp_abort(pcb);
1543
1544 /* call error callback manually since we've already dissociated */
1545 pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
1546 return ERR_ABRT;
1547 }
1548
1549 /* schedule one-shot POLLOUT on the socket */
1550 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
1551 }
1552 return ERR_OK;
1553}
1554
1555
1556#if !defined(RT_OS_WINDOWS)
1557static ssize_t
1558pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1559{
1560 struct msghdr mh;
1561 ssize_t nsent;
1562
1563#ifdef MSG_NOSIGNAL
1564 const int send_flags = MSG_NOSIGNAL;
1565#else
1566 const int send_flags = 0;
1567#endif
1568
1569 memset(&mh, 0, sizeof(mh));
1570
1571 mh.msg_iov = iov;
1572 mh.msg_iovlen = iovlen;
1573
1574 nsent = sendmsg(pxtcp->sock, &mh, send_flags);
1575
1576 return nsent;
1577}
1578#else /* RT_OS_WINDOWS */
1579static ssize_t
1580pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1581{
1582 DWORD nsent;
1583 int status;
1584
1585 status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
1586 0, NULL, NULL);
1587 if (status == SOCKET_ERROR) {
1588 nsent = -1;
1589 }
1590
1591 return nsent;
1592}
1593#endif /* RT_OS_WINDOWS */
1594
1595
1596/**
1597 * Callback from poll manager (on POLLOUT) to send data from
1598 * pxtcp::unsent pbuf to socket.
1599 */
1600static void
1601pxtcp_pcb_write_outbound(void *ctx)
1602{
1603 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1604 LWIP_ASSERT1(pxtcp != NULL);
1605
1606 if (pxtcp->pcb == NULL) {
1607 return;
1608 }
1609
1610 pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
1611}
1612
1613
1614/**
1615 * Common poll manager callback used by both outgoing and incoming
1616 * (port-forwarded) connections that has connected socket.
1617 */
1618static int
1619pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1620{
1621 struct pxtcp *pxtcp;
1622 int status;
1623 int sockerr;
1624
1625 pxtcp = (struct pxtcp *)handler->data;
1626 LWIP_ASSERT1(handler == &pxtcp->pmhdl);
1627 LWIP_ASSERT1(fd == pxtcp->sock);
1628
1629 if (revents & POLLNVAL) {
1630 pxtcp->sock = INVALID_SOCKET;
1631 return pxtcp_schedule_reset(pxtcp);
1632 }
1633
1634 if (revents & POLLERR) {
1635 socklen_t optlen = (socklen_t)sizeof(sockerr);
1636
1637 status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
1638 (char *)&sockerr, &optlen);
1639 if (status < 0) { /* should not happen */
1640 perror("getsockopt");
1641 sockerr = ECONNRESET;
1642 }
1643
1644 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1645 return pxtcp_schedule_reset(pxtcp);
1646 }
1647
1648 if (revents & POLLOUT) {
1649 pxtcp->events &= ~POLLOUT;
1650 proxy_lwip_post(&pxtcp->msg_outbound);
1651 }
1652
1653 if (revents & POLLIN) {
1654 ssize_t nread;
1655 int stop_pollin;
1656
1657 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
1658 if (nread < 0) {
1659 sockerr = -(int)nread;
1660 DPRINTF0(("sock %d: errno %d\n", fd, sockerr));
1661 return pxtcp_schedule_reset(pxtcp);
1662 }
1663
1664 if (stop_pollin) {
1665 pxtcp->events &= ~POLLIN;
1666 }
1667
1668 if (nread > 0) {
1669 proxy_lwip_post(&pxtcp->msg_inbound);
1670#if !HAVE_TCP_POLLHUP
1671 /*
1672 * If host does not report POLLHUP for closed sockets
1673 * (e.g. NetBSD) we should check for full close manually.
1674 */
1675 if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
1676 LWIP_ASSERT1((revents & POLLHUP) == 0);
1677 return pxtcp_schedule_delete(pxtcp);
1678 }
1679#endif
1680 }
1681 }
1682
1683#if !HAVE_TCP_POLLHUP
1684 LWIP_ASSERT1((revents & POLLHUP) == 0);
1685#else
1686 if (revents & POLLHUP) {
1687 /*
1688 * Linux and Darwin seems to report POLLHUP when both
1689 * directions are shut down. And they do report POLLHUP even
1690 * when there's unread data (which they aslo report as POLLIN
1691 * along with that POLLHUP).
1692 *
1693 * FreeBSD (from source inspection) seems to follow Linux,
1694 * reporting POLLHUP when both directions are shut down, but
1695 * POLLHUP is always accompanied with POLLIN.
1696 *
1697 * NetBSD never reports POLLHUP for sockets.
1698 *
1699 * ---
1700 *
1701 * If external half-closes first, we don't get POLLHUP, we
1702 * recv 0 bytes from the socket as EOF indicator, stop polling
1703 * for POLLIN and poll with events == 0 (with occasional
1704 * one-shot POLLOUT). When guest eventually closes, we get
1705 * POLLHUP.
1706 *
1707 * If guest half-closes first things are more tricky. As soon
1708 * as host sees the FIN from external it will spam POLLHUP,
1709 * even when there's unread data. The problem is that we
1710 * might have stopped polling for POLLIN because the ring
1711 * buffer is full or we were polling POLLIN but can't read all
1712 * of the data becuase buffer doesn't have enough space.
1713 * Either way, there's unread data but we can't keep polling
1714 * the socket.
1715 */
1716 DPRINTF(("sock %d: HUP\n", fd));
1717 LWIP_ASSERT1(pxtcp->outbound_close_done);
1718
1719 if (pxtcp->inbound_close) {
1720 /* there's no unread data, we are done */
1721 return pxtcp_schedule_delete(pxtcp);
1722 }
1723 else {
1724 /* DPRINTF */ {
1725#ifndef RT_OS_WINDOWS
1726 int unread;
1727#else
1728 u_long unread;
1729#endif
1730 status = ioctlsocket(fd, FIONREAD, &unread);
1731 if (status == SOCKET_ERROR) {
1732 perror("FIONREAD");
1733 }
1734 else {
1735 DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
1736 }
1737 }
1738
1739 /*
1740 * We cannot just set a flag here and let pxtcp_pcb_sent()
1741 * notice and start pulling, because if we are preempted
1742 * before setting the flag and all data in inbuf is ACKed
1743 * there will be no more calls to pxtcp_pcb_sent() to
1744 * notice the flag.
1745 *
1746 * We cannot set a flag and then send a message to make
1747 * sure it noticed, because if it has and it has read all
1748 * data while the message is in transit it will delete
1749 * pxtcp.
1750 *
1751 * In a sense this message is like msg_delete (except we
1752 * ask to pull some data first).
1753 */
1754 proxy_lwip_post(&pxtcp->msg_inpull);
1755 pxtcp->pmhdl.slot = -1;
1756 return -1;
1757 }
1758 /* NOTREACHED */
1759 } /* POLLHUP */
1760#endif /* HAVE_TCP_POLLHUP */
1761
1762 return pxtcp->events;
1763}
1764
1765
1766/**
1767 * Read data from socket to ringbuf. This may be used both on lwip
1768 * and poll manager threads.
1769 *
1770 * Flag pointed to by pstop is set when further reading is impossible,
1771 * either temporary when buffer is full, or permanently when EOF is
1772 * received.
1773 *
1774 * Returns number of bytes read. NB: EOF is reported as 1!
1775 *
1776 * Returns zero if nothing was read, either because buffer is full, or
1777 * if no data is available (EAGAIN, EINTR &c).
1778 *
1779 * Returns -errno on real socket errors.
1780 */
1781static ssize_t
1782pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
1783{
1784 IOVEC iov[2];
1785 size_t iovlen;
1786 ssize_t nread;
1787
1788 const size_t sz = pxtcp->inbuf.bufsize;
1789 size_t beg, lim, wrnew;
1790
1791 *pstop = 0;
1792
1793 beg = pxtcp->inbuf.vacant;
1794 IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
1795
1796 /* lim is the index we can NOT write to */
1797 lim = pxtcp->inbuf.unacked;
1798 if (lim == 0) {
1799 lim = sz - 1; /* empty slot at the end */
1800 }
1801 else if (lim == 1) {
1802 lim = sz; /* empty slot at the beginning */
1803 }
1804 else {
1805 --lim;
1806 }
1807
1808 if (beg == lim) {
1809 /*
1810 * Buffer is full, stop polling for POLLIN.
1811 *
1812 * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
1813 * data, freeing space in the ring buffer.
1814 */
1815 *pstop = 1;
1816 return 0;
1817 }
1818
1819 if (beg < lim) {
1820 /* free space in one chunk */
1821 iovlen = 1;
1822 IOVEC_SET_LEN(iov[0], lim - beg);
1823 }
1824 else {
1825 /* free space in two chunks */
1826 iovlen = 2;
1827 IOVEC_SET_LEN(iov[0], sz - beg);
1828 IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
1829 IOVEC_SET_LEN(iov[1], lim);
1830 }
1831
1832 /*
1833 * TODO: This is where application-level proxy can hook into to
1834 * process inbound traffic.
1835 */
1836 nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
1837
1838 if (nread > 0) {
1839 wrnew = beg + nread;
1840 if (wrnew >= sz) {
1841 wrnew -= sz;
1842 }
1843 pxtcp->inbuf.vacant = wrnew;
1844 DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
1845 (void *)pxtcp, pxtcp->sock, (int)nread));
1846 return nread;
1847 }
1848 else if (nread == 0) {
1849 *pstop = 1;
1850 pxtcp->inbound_close = 1;
1851 DPRINTF2(("pxtcp %p: sock %d read EOF\n",
1852 (void *)pxtcp, pxtcp->sock));
1853 return 1;
1854 }
1855 else if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) {
1856 /* haven't read anything, just return */
1857 DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
1858 (void *)pxtcp, pxtcp->sock));
1859 return 0;
1860 }
1861 else {
1862 /* socket error! */
1863 DPRINTF0(("pxtcp %p: sock %d read errno %d\n",
1864 (void *)pxtcp, pxtcp->sock, errno));
1865 return -errno;
1866 }
1867}
1868
1869
1870#if !defined(RT_OS_WINDOWS)
1871static ssize_t
1872pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1873{
1874 struct msghdr mh;
1875 ssize_t nread;
1876
1877 memset(&mh, 0, sizeof(mh));
1878
1879 mh.msg_iov = iov;
1880 mh.msg_iovlen = iovlen;
1881
1882 nread = recvmsg(pxtcp->sock, &mh, 0);
1883
1884 return nread;
1885}
1886#else /* RT_OS_WINDOWS */
1887static ssize_t
1888pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
1889{
1890 DWORD flags;
1891 DWORD nread;
1892 int status;
1893
1894 flags = 0;
1895 status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
1896 &flags, NULL, NULL);
1897 if (status == SOCKET_ERROR) {
1898 nread = -1;
1899 }
1900
1901 return (ssize_t)nread;
1902}
1903#endif /* RT_OS_WINDOWS */
1904
1905
1906/**
1907 * Callback from poll manager (pxtcp::msg_inbound) to trigger output
1908 * from ringbuf to guest.
1909 */
1910static void
1911pxtcp_pcb_write_inbound(void *ctx)
1912{
1913 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
1914 LWIP_ASSERT1(pxtcp != NULL);
1915
1916 if (pxtcp->pcb == NULL) {
1917 return;
1918 }
1919
1920 pxtcp_pcb_forward_inbound(pxtcp);
1921}
1922
1923
1924/**
1925 * tcp_poll() callback
1926 *
1927 * We swtich it on when tcp_write() or tcp_shutdown() fail with
1928 * ERR_MEM to prevent connection from stalling. If there are ACKs or
1929 * more inbound data then pxtcp_pcb_forward_inbound() will be
1930 * triggered again, but if neither happens, tcp_poll() comes to the
1931 * rescue.
1932 */
1933static err_t
1934pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
1935{
1936 struct pxtcp *pxtcp = (struct pxtcp *)arg;
1937 LWIP_UNUSED_ARG(pcb);
1938
1939 DPRINTF2(("%s: pxtcp %p; pcb %p\n",
1940 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
1941
1942 pxtcp_pcb_forward_inbound(pxtcp);
1943
1944 /*
1945 * If the last thing holding up deletion of the pxtcp was failed
1946 * tcp_shutdown() and it succeeded, we may be the last callback.
1947 */
1948 pxtcp_pcb_maybe_deferred_delete(pxtcp);
1949
1950 return ERR_OK;
1951}
1952
1953
1954static void
1955pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
1956{
1957 tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
1958}
1959
1960
1961static void
1962pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
1963{
1964 tcp_poll(pxtcp->pcb, NULL, 255);
1965}
1966
1967
1968/**
1969 * Forward inbound data from ring buffer to the guest.
1970 *
1971 * Scheduled by poll manager thread after it receives more data into
1972 * the ring buffer (we have more data to send).
1973
1974 * Also called from tcp_sent() callback when guest ACKs some data,
1975 * increasing pcb->snd_buf (we are permitted to send more data).
1976 *
1977 * Also called from tcp_poll() callback if previous attempt to forward
1978 * inbound data failed with ERR_MEM (we need to try again).
1979 */
1980static void
1981pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
1982{
1983 struct tcp_pcb *pcb;
1984 size_t sndbuf;
1985 size_t beg, lim, sndlim;
1986 size_t toeob, tolim;
1987 size_t nsent;
1988 err_t error;
1989
1990 LWIP_ASSERT1(pxtcp != NULL);
1991 pcb = pxtcp->pcb;
1992 if (pcb == NULL) {
1993 return;
1994 }
1995
1996 if (/* __predict_false */ pcb->state < ESTABLISHED) {
1997 /*
1998 * If we have just confirmed accept of this connection, the
1999 * pcb is in SYN_RCVD state and we still haven't received the
2000 * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
2001 * transition that lwip decrements pcb->acked so that that ACK
2002 * is not reported to pxtcp_pcb_sent(). If we send something
2003 * now and immediately close (think "daytime", e.g.) while
2004 * still in SYN_RCVD state, we will move directly to
2005 * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
2006 * report it to pxtcp_pcb_sent().
2007 */
2008 DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
2009 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2010 return;
2011 }
2012
2013
2014 beg = pxtcp->inbuf.unsent; /* private to lwip thread */
2015 lim = pxtcp->inbuf.vacant;
2016
2017 if (beg == lim) {
2018 if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
2019 pxtcp_pcb_forward_inbound_close(pxtcp);
2020 tcp_output(pcb);
2021 return;
2022 }
2023
2024 /*
2025 * Else, there's no data to send.
2026 *
2027 * If there is free space in the buffer, producer will
2028 * reschedule us as it receives more data and vacant (lim)
2029 * advances.
2030 *
2031 * If buffer is full when all data have been passed to
2032 * tcp_write() but not yet acknowledged, we will advance
2033 * unacked on ACK, freeing some space for producer to write to
2034 * (then see above).
2035 */
2036 return;
2037 }
2038
2039 sndbuf = tcp_sndbuf(pcb);
2040 if (sndbuf == 0) {
2041 /*
2042 * Can't send anything now. As guest ACKs some data, TCP will
2043 * call pxtcp_pcb_sent() callback and we will come here again.
2044 */
2045 return;
2046 }
2047
2048 nsent = 0;
2049
2050 /*
2051 * We have three limits to consider:
2052 * - how much data we have in the ringbuf
2053 * - how much data we are allowed to send
2054 * - ringbuf size
2055 */
2056 toeob = pxtcp->inbuf.bufsize - beg;
2057 if (lim < beg) { /* lim wrapped */
2058 if (sndbuf < toeob) { /* but we are limited by sndbuf */
2059 /* so beg is not going to wrap, treat sndbuf as lim */
2060 lim = beg + sndbuf; /* ... and proceed to the simple case */
2061 }
2062 else { /* we are limited by the end of the buffer, beg will wrap */
2063 u8_t maybemore;
2064 if (toeob == sndbuf || lim == 0) {
2065 maybemore = 0;
2066 }
2067 else {
2068 maybemore = TCP_WRITE_FLAG_MORE;
2069 }
2070
2071 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
2072 if (error != ERR_OK) {
2073 goto writeerr;
2074 }
2075 nsent += toeob;
2076 pxtcp->inbuf.unsent = 0; /* wrap */
2077
2078 if (maybemore) {
2079 beg = 0;
2080 sndbuf -= toeob;
2081 }
2082 else {
2083 /* we are done sending, but ... */
2084 goto check_inbound_close;
2085 }
2086 }
2087 }
2088
2089 LWIP_ASSERT1(beg < lim);
2090 sndlim = beg + sndbuf;
2091 if (lim > sndlim) {
2092 lim = sndlim;
2093 }
2094 tolim = lim - beg;
2095 if (tolim > 0) {
2096 error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
2097 if (error != ERR_OK) {
2098 goto writeerr;
2099 }
2100 nsent += tolim;
2101 pxtcp->inbuf.unsent = lim;
2102 }
2103
2104 check_inbound_close:
2105 if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
2106 pxtcp_pcb_forward_inbound_close(pxtcp);
2107 }
2108
2109 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
2110 (void *)pxtcp, (void *)pcb, (int)nsent));
2111 tcp_output(pcb);
2112 pxtcp_pcb_cancel_poll(pxtcp);
2113 return;
2114
2115 writeerr:
2116 if (error == ERR_MEM) {
2117 if (nsent > 0) { /* first write succeeded, second failed */
2118 DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
2119 (void *)pxtcp, (void *)pcb, (int)nsent));
2120 tcp_output(pcb);
2121 }
2122 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
2123 (void *)pxtcp, (void *)pcb));
2124 pxtcp_pcb_schedule_poll(pxtcp);
2125 }
2126 else {
2127 DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
2128 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2129
2130 /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
2131 LWIP_ASSERT1(error == ERR_MEM);
2132 }
2133}
2134
2135
2136static void
2137pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
2138{
2139 struct tcp_pcb *pcb;
2140 err_t error;
2141
2142 LWIP_ASSERT1(pxtcp != NULL);
2143 LWIP_ASSERT1(pxtcp->inbound_close);
2144 LWIP_ASSERT1(!pxtcp->inbound_close_done);
2145 LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
2146
2147 pcb = pxtcp->pcb;
2148 LWIP_ASSERT1(pcb != NULL);
2149
2150 DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
2151 (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
2152
2153 error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
2154 if (error != ERR_OK) {
2155 DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
2156 " tcp_shutdown: error=%s\n",
2157 (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
2158 pxtcp_pcb_schedule_poll(pxtcp);
2159 return;
2160 }
2161
2162 pxtcp_pcb_cancel_poll(pxtcp);
2163 pxtcp->inbound_close_done = 1;
2164
2165
2166 /*
2167 * If we have already done outbound close previously (passive
2168 * close on the pcb), then we must not hold onto a pcb in LAST_ACK
2169 * state since those will be deleted by lwip when that last ack
2170 * comes from the guest.
2171 *
2172 * NB: We do NOT check for deferred delete here, even though we
2173 * have just set one of its conditions, inbound_close_done. We
2174 * let pcb callbacks that called us do that. It's simpler and
2175 * cleaner that way.
2176 */
2177 if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
2178 pxtcp_pcb_dissociate(pxtcp);
2179 }
2180}
2181
2182
2183/**
2184 * Check that all forwarded inbound data is sent and acked, and that
2185 * inbound close is scheduled (we aren't called back when it's acked).
2186 */
2187DECLINLINE(int)
2188pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
2189{
2190 return (pxtcp->inbound_close_done /* also implies that all data forwarded */
2191 && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
2192}
2193
2194
2195/**
2196 * tcp_sent() callback - guest acknowledged len bytes.
2197 *
2198 * We can advance inbuf::unacked index, making more free space in the
2199 * ringbuf and wake up producer on poll manager thread.
2200 *
2201 * We can also try to send more data if we have any since pcb->snd_buf
2202 * was increased and we are now permitted to send more.
2203 */
2204static err_t
2205pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
2206{
2207 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2208 size_t unacked;
2209
2210 LWIP_ASSERT1(pxtcp != NULL);
2211 LWIP_ASSERT1(pxtcp->pcb == pcb);
2212 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2213 LWIP_UNUSED_ARG(pcb); /* only in assert */
2214
2215 DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
2216 " unacked %d, unsent %d, vacant %d\n",
2217 __func__, (void *)pxtcp, (void *)pcb, (int)len,
2218 (int)pxtcp->inbuf.unacked,
2219 (int)pxtcp->inbuf.unsent,
2220 (int)pxtcp->inbuf.vacant));
2221
2222 if (/* __predict_false */ len == 0) {
2223 /* we are notified to start pulling */
2224 LWIP_ASSERT1(pxtcp->outbound_close_done);
2225 LWIP_ASSERT1(!pxtcp->inbound_close);
2226 LWIP_ASSERT1(pxtcp->inbound_pull);
2227
2228 unacked = pxtcp->inbuf.unacked;
2229 }
2230 else {
2231 /*
2232 * Advance unacked index. Guest acknowledged the data, so it
2233 * won't be needed again for potential retransmits.
2234 */
2235 unacked = pxtcp->inbuf.unacked + len;
2236 if (unacked > pxtcp->inbuf.bufsize) {
2237 unacked -= pxtcp->inbuf.bufsize;
2238 }
2239 pxtcp->inbuf.unacked = unacked;
2240 }
2241
2242 /* arrange for more inbound data */
2243 if (!pxtcp->inbound_close) {
2244 if (!pxtcp->inbound_pull) {
2245 /* wake up producer, in case it has stopped polling for POLLIN */
2246 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
2247#ifdef RT_OS_WINDOWS
2248 /**
2249 * We have't got enought room in ring buffer to read atm,
2250 * but we don't want to lose notification from WSAW4ME when
2251 * space would be available, so we reset event with empty recv
2252 */
2253 recv(pxtcp->sock, NULL, 0, 0);
2254#endif
2255 }
2256 else {
2257 ssize_t nread;
2258 int stop_pollin; /* ignored */
2259
2260 nread = pxtcp_sock_read(pxtcp, &stop_pollin);
2261
2262 if (nread < 0) {
2263 int sockerr = -(int)nread;
2264 LWIP_UNUSED_ARG(sockerr);
2265 DPRINTF0(("%s: sock %d: errno %d\n",
2266 __func__, pxtcp->sock, sockerr));
2267
2268 /*
2269 * Since we are pulling, pxtcp is no longer registered
2270 * with poll manager so we can kill it directly.
2271 */
2272 pxtcp_pcb_reset_pxtcp(pxtcp);
2273 return ERR_ABRT;
2274 }
2275 }
2276 }
2277
2278 /* forward more data if we can */
2279 if (!pxtcp->inbound_close_done) {
2280 pxtcp_pcb_forward_inbound(pxtcp);
2281
2282 /*
2283 * NB: we might have dissociated from a pcb that transitioned
2284 * to LAST_ACK state, so don't refer to pcb below.
2285 */
2286 }
2287
2288
2289 /* have we got all the acks? */
2290 if (pxtcp->inbound_close /* no more new data */
2291 && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
2292 && unacked == pxtcp->inbuf.unsent) /* ... and is acked */
2293 {
2294 char *buf;
2295
2296 DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
2297 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2298
2299 /* no more retransmits, so buf is not needed */
2300 buf = pxtcp->inbuf.buf;
2301 pxtcp->inbuf.buf = NULL;
2302 free(buf);
2303
2304 /* no more acks, so no more callbacks */
2305 if (pxtcp->pcb != NULL) {
2306 tcp_sent(pxtcp->pcb, NULL);
2307 }
2308
2309 /*
2310 * We may be the last callback for this pcb if we have also
2311 * successfully forwarded inbound_close.
2312 */
2313 pxtcp_pcb_maybe_deferred_delete(pxtcp);
2314 }
2315
2316 return ERR_OK;
2317}
2318
2319
2320/**
2321 * Callback from poll manager (pxtcp::msg_inpull) to switch
2322 * pxtcp_pcb_sent() to actively pull the last bits of input. See
2323 * POLLHUP comment in pxtcp_pmgr_pump().
2324 *
2325 * pxtcp::sock is deregistered from poll manager after this callback
2326 * is scheduled.
2327 */
2328static void
2329pxtcp_pcb_pull_inbound(void *ctx)
2330{
2331 struct pxtcp *pxtcp = (struct pxtcp *)ctx;
2332 LWIP_ASSERT1(pxtcp != NULL);
2333
2334 if (pxtcp->pcb == NULL) {
2335 DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
2336 pxtcp_pcb_reset_pxtcp(pxtcp);
2337 return;
2338 }
2339
2340 DPRINTF(("%s: pxtcp %p: pcb %p\n",
2341 __func__, (void *)pxtcp, (void *)pxtcp->pcb));
2342 pxtcp->inbound_pull = 1;
2343 pxtcp->deferred_delete = 1;
2344 pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
2345}
2346
2347
2348/**
2349 * tcp_err() callback.
2350 *
2351 * pcb is not passed to this callback since it may be already
2352 * deallocated by the stack, but we can't do anything useful with it
2353 * anyway since connection is gone.
2354 */
2355static void
2356pxtcp_pcb_err(void *arg, err_t error)
2357{
2358 struct pxtcp *pxtcp = (struct pxtcp *)arg;
2359 LWIP_ASSERT1(pxtcp != NULL);
2360
2361 /*
2362 * ERR_CLSD is special - it is reported here when:
2363 *
2364 * . guest has already half-closed
2365 * . we send FIN to guest when external half-closes
2366 * . guest acks that FIN
2367 *
2368 * Since connection is closed but receive has been already closed
2369 * lwip can only report this via tcp_err. At this point the pcb
2370 * is still alive, so we can peek at it if need be.
2371 *
2372 * The interesting twist is when the ACK from guest that akcs our
2373 * FIN also acks some data. In this scenario lwip will NOT call
2374 * tcp_sent() callback with the ACK for that last bit of data but
2375 * instead will call tcp_err with ERR_CLSD right away. Since that
2376 * ACK also acknowledges all the data, we should run some of
2377 * pxtcp_pcb_sent() logic here.
2378 */
2379 if (error == ERR_CLSD) {
2380 struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
2381
2382 DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
2383 " pcb->acked %d;"
2384 " unacked %d, unsent %d, vacant %d\n",
2385 (void *)pxtcp, (void *)pcb,
2386 pcb->acked,
2387 (int)pxtcp->inbuf.unacked,
2388 (int)pxtcp->inbuf.unsent,
2389 (int)pxtcp->inbuf.vacant));
2390
2391 LWIP_ASSERT1(pxtcp->pcb == pcb);
2392 LWIP_ASSERT1(pcb->callback_arg == pxtcp);
2393
2394 if (pcb->acked > 0) {
2395 pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
2396 }
2397 return;
2398 }
2399
2400 DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
2401 (void *)pxtcp, proxy_lwip_strerr(error)));
2402
2403 pxtcp->pcb = NULL; /* pcb is gone */
2404 if (pxtcp->deferred_delete) {
2405 pxtcp_pcb_reset_pxtcp(pxtcp);
2406 }
2407 else {
2408 pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
2409 }
2410}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette