VirtualBox

source: vbox/trunk/src/VBox/NetworkServices/NAT/pxping.c@ 51619

Last change on this file since 51619 was 51597, checked in by vboxsync, 11 years ago

NAT/Net: convert perror/warn/warnx to DPRINTFs.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 52.9 KB
Line 
1/* -*- indent-tabs-mode: nil; -*- */
2#define LOG_GROUP LOG_GROUP_NAT_SERVICE
3
4#include "winutils.h"
5#include "proxy.h"
6#include "proxy_pollmgr.h"
7#include "pxremap.h"
8
9#include <iprt/string.h>
10
11#ifndef RT_OS_WINDOWS
12#include <sys/types.h>
13#include <sys/socket.h>
14#ifdef RT_OS_DARWIN
15# define __APPLE_USE_RFC_3542
16#endif
17#include <netinet/in.h>
18#include <poll.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#else
24#include <iprt/stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include "winpoll.h"
29#endif
30
31#include "lwip/opt.h"
32
33#include "lwip/sys.h"
34#include "lwip/tcpip.h"
35#include "lwip/inet_chksum.h"
36#include "lwip/ip.h"
37#include "lwip/icmp.h"
38
39#if defined(RT_OS_LINUX) && !defined(__USE_GNU)
40#if __GLIBC_PREREQ(2, 8)
41/*
42 * XXX: This is gross. in6_pktinfo is now hidden behind _GNU_SOURCE
43 * https://sourceware.org/bugzilla/show_bug.cgi?id=6775
44 *
45 * But in older glibc versions, e.g. RHEL5, it is not! I don't want
46 * to deal with _GNU_SOURCE now, so as a kludge check for glibc
47 * version. It seems the __USE_GNU guard was introduced in 2.8.
48 */
49struct in6_pktinfo {
50 struct in6_addr ipi6_addr;
51 unsigned int ipi6_ifindex;
52};
53#endif /* __GLIBC_PREREQ */
54#endif /* RT_OS_LINUX && !__USE_GNU */
55
56
57/* forward */
58struct ping_pcb;
59
60
61/**
62 * Global state for ping proxy collected in one entity to minimize
63 * globals. There's only one instance of this structure.
64 *
65 * Raw ICMP sockets are promiscuous, so it doesn't make sense to have
66 * multiple. If this code ever needs to support multiple netifs, the
67 * netif member should be exiled into "pcb".
68 */
69struct pxping {
70 SOCKET sock4;
71
72#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
73# define DF_WITH_IP_HDRINCL
74 int hdrincl;
75#else
76 int df;
77#endif
78 int ttl;
79 int tos;
80
81 SOCKET sock6;
82#ifdef RT_OS_WINDOWS
83 LPFN_WSARECVMSG pfWSARecvMsg6;
84#endif
85 int hopl;
86
87 struct pollmgr_handler pmhdl4;
88 struct pollmgr_handler pmhdl6;
89
90 struct netif *netif;
91
92 /**
93 * Protect lwIP and pmgr accesses to the list of pcbs.
94 */
95 sys_mutex_t lock;
96
97 /*
98 * We need to find pcbs both from the guest side and from the host
99 * side. If we need to support industrial grade ping throughput,
100 * we will need two pcb hashes. For now, a short linked list
101 * should be enough. Cf. pxping_pcb_for_request() and
102 * pxping_pcb_for_reply().
103 */
104#define PXPING_MAX_PCBS 8
105 size_t npcbs;
106 struct ping_pcb *pcbs;
107
108#define TIMEOUT 5
109 int timer_active;
110 size_t timeout_slot;
111 struct ping_pcb *timeout_list[TIMEOUT];
112};
113
114
115/**
116 * Quasi PCB for ping.
117 */
118struct ping_pcb {
119 ipX_addr_t src;
120 ipX_addr_t dst;
121
122 u8_t is_ipv6;
123 u8_t is_mapped;
124
125 u16_t guest_id;
126 u16_t host_id;
127
128 /**
129 * Desired slot in pxping::timeout_list. See pxping_timer().
130 */
131 size_t timeout_slot;
132
133 /**
134 * Chaining for pxping::timeout_list
135 */
136 struct ping_pcb **pprev_timeout;
137 struct ping_pcb *next_timeout;
138
139 /**
140 * Chaining for pxping::pcbs
141 */
142 struct ping_pcb *next;
143
144 union {
145 struct sockaddr_in sin;
146 struct sockaddr_in6 sin6;
147 } peer;
148};
149
150
151/**
152 * lwIP thread callback message for IPv4 ping.
153 *
154 * We pass raw IP datagram for ip_output_if() so we only need pbuf and
155 * netif (from pxping).
156 */
157struct ping_msg {
158 struct tcpip_msg msg;
159 struct pxping *pxping;
160 struct pbuf *p;
161};
162
163
164/**
165 * lwIP thread callback message for IPv6 ping.
166 *
167 * We cannot obtain raw IPv6 datagram from host without extra trouble,
168 * so we pass ICMPv6 payload in pbuf and also other parameters to
169 * ip6_output_if().
170 */
171struct ping6_msg {
172 struct tcpip_msg msg;
173 struct pxping *pxping;
174 struct pbuf *p;
175 ip6_addr_t src, dst;
176 int hopl, tclass;
177};
178
179
180#ifdef RT_OS_WINDOWS
181static int pxping_init_windows(struct pxping *pxping);
182#endif
183static void pxping_recv4(void *arg, struct pbuf *p);
184static void pxping_recv6(void *arg, struct pbuf *p);
185
186static void pxping_timer(void *arg);
187static void pxping_timer_needed(struct pxping *pxping);
188
189static struct ping_pcb *pxping_pcb_for_request(struct pxping *pxping,
190 int is_ipv6,
191 ipX_addr_t *src, ipX_addr_t *dst,
192 u16_t guest_id);
193static struct ping_pcb *pxping_pcb_for_reply(struct pxping *pxping, int is_ipv6,
194 ipX_addr_t *dst, u16_t host_id);
195
196static FNRTSTRFORMATTYPE pxping_pcb_rtstrfmt;
197static struct ping_pcb *pxping_pcb_allocate(struct pxping *pxping);
198static void pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb);
199static void pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb);
200static void pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb);
201static void pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb);
202static void pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb);
203
204static int pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents);
205
206static void pxping_pmgr_icmp4(struct pxping *pxping);
207static void pxping_pmgr_icmp4_echo(struct pxping *pxping,
208 u16_t iplen, struct sockaddr_in *peer);
209static void pxping_pmgr_icmp4_error(struct pxping *pxping,
210 u16_t iplen, struct sockaddr_in *peer);
211static void pxping_pmgr_icmp6(struct pxping *pxping);
212static void pxping_pmgr_icmp6_echo(struct pxping *pxping,
213 ip6_addr_t *src, ip6_addr_t *dst,
214 int hopl, int tclass, u16_t icmplen);
215static void pxping_pmgr_icmp6_error(struct pxping *pxping,
216 ip6_addr_t *src, ip6_addr_t *dst,
217 int hopl, int tclass, u16_t icmplen);
218
219static void pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen);
220static void pxping_pcb_forward_inbound(void *arg);
221
222static void pxping_pmgr_forward_inbound6(struct pxping *pxping,
223 ip6_addr_t *src, ip6_addr_t *dst,
224 u8_t hopl, u8_t tclass,
225 u16_t icmplen);
226static void pxping_pcb_forward_inbound6(void *arg);
227
228/*
229 * NB: This is not documented except in RTFS.
230 *
231 * If ip_output_if() is passed dest == NULL then it treats p as
232 * complete IP packet with payload pointing to the IP header. It does
233 * not build IP header, ignores all header-related arguments, fetches
234 * real destination from the header in the pbuf and outputs pbuf to
235 * the specified netif.
236 */
237#define ip_raw_output_if(p, netif) \
238 (ip_output_if((p), NULL, NULL, 0, 0, 0, (netif)))
239
240
241
242static struct pxping g_pxping;
243
244
245err_t
246pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6)
247{
248 const int on = 1;
249 int status;
250
251 if (sock4 == INVALID_SOCKET && sock6 == INVALID_SOCKET) {
252 return ERR_VAL;
253 }
254
255 g_pxping.netif = netif;
256 sys_mutex_new(&g_pxping.lock);
257
258 g_pxping.sock4 = sock4;
259 if (g_pxping.sock4 != INVALID_SOCKET) {
260#ifdef DF_WITH_IP_HDRINCL
261 g_pxping.hdrincl = -1;
262#else
263 g_pxping.df = -1;
264#endif
265 g_pxping.ttl = -1;
266 g_pxping.tos = 0;
267
268#ifdef RT_OS_LINUX
269 {
270 const int dont = IP_PMTUDISC_DONT;
271 status = setsockopt(sock4, IPPROTO_IP, IP_MTU_DISCOVER,
272 &dont, sizeof(dont));
273 if (status != 0) {
274 DPRINTF(("IP_MTU_DISCOVER: %R[sockerr]\n", SOCKERRNO()));
275 }
276 }
277#endif /* RT_OS_LINUX */
278
279 g_pxping.pmhdl4.callback = pxping_pmgr_pump;
280 g_pxping.pmhdl4.data = (void *)&g_pxping;
281 g_pxping.pmhdl4.slot = -1;
282 pollmgr_add(&g_pxping.pmhdl4, g_pxping.sock4, POLLIN);
283
284 ping_proxy_accept(pxping_recv4, &g_pxping);
285 }
286
287 g_pxping.sock6 = sock6;
288#ifdef RT_OS_WINDOWS
289 /* we need recvmsg */
290 if (g_pxping.sock6 != INVALID_SOCKET) {
291 status = pxping_init_windows(&g_pxping);
292 if (status == SOCKET_ERROR) {
293 g_pxping.sock6 = INVALID_SOCKET;
294 /* close(sock6); */
295 }
296 }
297#endif
298 if (g_pxping.sock6 != INVALID_SOCKET) {
299 g_pxping.hopl = -1;
300
301#if !defined(IPV6_RECVPKTINFO)
302#define IPV6_RECVPKTINFO (IPV6_PKTINFO)
303#endif
304 status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVPKTINFO,
305 (const char *)&on, sizeof(on));
306 if (status < 0) {
307 DPRINTF(("IPV6_RECVPKTINFO: %R[sockerr]\n", SOCKERRNO()));
308 /* XXX: for now this is fatal */
309 }
310
311#if !defined(IPV6_RECVHOPLIMIT)
312#define IPV6_RECVHOPLIMIT (IPV6_HOPLIMIT)
313#endif
314 status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
315 (const char *)&on, sizeof(on));
316 if (status < 0) {
317 DPRINTF(("IPV6_RECVHOPLIMIT: %R[sockerr]\n", SOCKERRNO()));
318 }
319
320#ifdef IPV6_RECVTCLASS /* new in RFC 3542, there's no RFC 2292 counterpart */
321 /* TODO: IPV6_RECVTCLASS */
322#endif
323
324 g_pxping.pmhdl6.callback = pxping_pmgr_pump;
325 g_pxping.pmhdl6.data = (void *)&g_pxping;
326 g_pxping.pmhdl6.slot = -1;
327 pollmgr_add(&g_pxping.pmhdl6, g_pxping.sock6, POLLIN);
328
329 ping6_proxy_accept(pxping_recv6, &g_pxping);
330 }
331
332 status = RTStrFormatTypeRegister("ping_pcb", pxping_pcb_rtstrfmt, NULL);
333 AssertRC(status);
334
335 return ERR_OK;
336}
337
338
339#ifdef RT_OS_WINDOWS
340static int
341pxping_init_windows(struct pxping *pxping)
342{
343 GUID WSARecvMsgGUID = WSAID_WSARECVMSG;
344 DWORD nread;
345 int status;
346
347 pxping->pfWSARecvMsg6 = NULL;
348 status = WSAIoctl(pxping->sock6,
349 SIO_GET_EXTENSION_FUNCTION_POINTER,
350 &WSARecvMsgGUID, sizeof(WSARecvMsgGUID),
351 &pxping->pfWSARecvMsg6, sizeof(pxping->pfWSARecvMsg6),
352 &nread,
353 NULL, NULL);
354 return status;
355}
356#endif /* RT_OS_WINDOWS */
357
358
359static u32_t
360chksum_delta_16(u16_t oval, u16_t nval)
361{
362 u32_t sum = (u16_t)~oval;
363 sum += nval;
364 return sum;
365}
366
367
368static u32_t
369chksum_update_16(u16_t *oldp, u16_t nval)
370{
371 u32_t sum = chksum_delta_16(*oldp, nval);
372 *oldp = nval;
373 return sum;
374}
375
376
377static u32_t
378chksum_delta_32(u32_t oval, u32_t nval)
379{
380 u32_t sum = ~oval;
381 sum = FOLD_U32T(sum);
382 sum += FOLD_U32T(nval);
383 return sum;
384}
385
386
387static u32_t
388chksum_update_32(u32_t *oldp, u32_t nval)
389{
390 u32_t sum = chksum_delta_32(*oldp, nval);
391 *oldp = nval;
392 return sum;
393}
394
395
396static u32_t
397chksum_delta_ipv6(const ip6_addr_t *oldp, const ip6_addr_t *newp)
398{
399 u32_t sum;
400
401 sum = chksum_delta_32(oldp->addr[0], newp->addr[0]);
402 sum += chksum_delta_32(oldp->addr[1], newp->addr[1]);
403 sum += chksum_delta_32(oldp->addr[2], newp->addr[2]);
404 sum += chksum_delta_32(oldp->addr[3], newp->addr[3]);
405
406 return sum;
407}
408
409
410static u32_t
411chksum_update_ipv6(ip6_addr_t *oldp, const ip6_addr_t *newp)
412{
413 u32_t sum;
414
415 sum = chksum_update_32(&oldp->addr[0], newp->addr[0]);
416 sum += chksum_update_32(&oldp->addr[1], newp->addr[1]);
417 sum += chksum_update_32(&oldp->addr[2], newp->addr[2]);
418 sum += chksum_update_32(&oldp->addr[3], newp->addr[3]);
419
420 return sum;
421}
422
423
424/**
425 * ICMP Echo Request in pbuf "p" is to be proxied.
426 */
427static void
428pxping_recv4(void *arg, struct pbuf *p)
429{
430 struct pxping *pxping = (struct pxping *)arg;
431 struct ping_pcb *pcb;
432#ifdef DF_WITH_IP_HDRINCL
433 struct ip_hdr iph_orig;
434#endif
435 struct icmp_echo_hdr icmph_orig;
436 struct ip_hdr *iph;
437 struct icmp_echo_hdr *icmph;
438 int df, ttl, tos;
439 u32_t sum;
440 u16_t iphlen;
441 int status;
442
443 iphlen = ip_current_header_tot_len();
444 if (iphlen != IP_HLEN) { /* we don't do options */
445 pbuf_free(p);
446 return;
447 }
448
449 iph = (/* UNCONST */ struct ip_hdr *)ip_current_header();
450 icmph = (struct icmp_echo_hdr *)p->payload;
451
452 pcb = pxping_pcb_for_request(pxping, 0,
453 ipX_current_src_addr(),
454 ipX_current_dest_addr(),
455 icmph->id);
456 if (pcb == NULL) {
457 pbuf_free(p);
458 return;
459 }
460
461 DPRINTF(("ping %p: %R[ping_pcb] seq %d len %u ttl %d\n",
462 pcb, pcb,
463 ntohs(icmph->seqno), (unsigned int)p->tot_len,
464 IPH_TTL(iph)));
465
466 ttl = IPH_TTL(iph);
467 if (!pcb->is_mapped) {
468 if (RT_UNLIKELY(ttl == 1)) {
469 status = pbuf_header(p, iphlen); /* back to IP header */
470 if (RT_LIKELY(status == 0)) {
471 icmp_time_exceeded(p, ICMP_TE_TTL);
472 }
473 pbuf_free(p);
474 return;
475 }
476 --ttl;
477 }
478
479 /*
480 * OS X doesn't provide a socket option to control fragmentation.
481 * Solaris doesn't provide IP_DONTFRAG on all releases we support.
482 * In this case we have to use IP_HDRINCL. We don't want to use
483 * it always since it doesn't handle fragmentation (but that's ok
484 * for DF) and Windows doesn't do automatic source address
485 * selection with IP_HDRINCL.
486 */
487 df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0;
488
489#ifdef DF_WITH_IP_HDRINCL
490 if (df != pxping->hdrincl) {
491 status = setsockopt(pxping->sock4, IPPROTO_IP, IP_HDRINCL,
492 &df, sizeof(df));
493 if (RT_LIKELY(status == 0)) {
494 pxping->hdrincl = df;
495 }
496 else {
497 DPRINTF(("IP_HDRINCL: %R[sockerr]\n", SOCKERRNO()));
498 }
499 }
500
501 if (pxping->hdrincl) {
502 status = pbuf_header(p, iphlen); /* back to IP header */
503 if (RT_UNLIKELY(status != 0)) {
504 pbuf_free(p);
505 return;
506 }
507
508 /* we will overwrite IP header, save original for ICMP errors */
509 memcpy(&iph_orig, iph, iphlen);
510
511 if (g_proxy_options->src4 != NULL) {
512 memcpy(&iph->src, &g_proxy_options->src4->sin_addr,
513 sizeof(g_proxy_options->src4->sin_addr));
514 }
515 else {
516 /* let the kernel select suitable source address */
517 memset(&iph->src, 0, sizeof(iph->src));
518 }
519
520 IPH_TTL_SET(iph, ttl); /* already decremented */
521 IPH_ID_SET(iph, 0); /* kernel will set one */
522#ifdef RT_OS_DARWIN
523 /* wants ip_offset and ip_len fields in host order */
524 IPH_OFFSET_SET(iph, ntohs(IPH_OFFSET(iph)));
525 IPH_LEN_SET(iph, ntohs(IPH_LEN(iph)));
526 /* wants checksum of everything (sic!), in host order */
527 sum = inet_chksum_pbuf(p);
528 IPH_CHKSUM_SET(iph, sum);
529#else /* !RT_OS_DARWIN */
530 IPH_CHKSUM_SET(iph, 0); /* kernel will recalculate */
531#endif
532 }
533 else /* !pxping->hdrincl */
534#endif /* DF_WITH_IP_HDRINCL */
535 {
536#if !defined(DF_WITH_IP_HDRINCL)
537 /* control DF flag via setsockopt(2) */
538#define USE_DF_OPTION(_Optname) \
539 const int dfopt = _Optname; \
540 const char * const dfoptname = #_Optname;
541#if defined(RT_OS_LINUX)
542 USE_DF_OPTION(IP_MTU_DISCOVER);
543 df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
544#elif defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
545 USE_DF_OPTION(IP_DONTFRAG);
546#elif defined(RT_OS_WINDOWS)
547 USE_DF_OPTION(IP_DONTFRAGMENT);
548#endif
549 if (df != pxping->df) {
550 status = setsockopt(pxping->sock4, IPPROTO_IP, dfopt,
551 (char *)&df, sizeof(df));
552 if (RT_LIKELY(status == 0)) {
553 pxping->df = df;
554 }
555 else {
556 DPRINTF(("%s: %R[sockerr]\n", dfoptname, SOCKERRNO()));
557 }
558 }
559#endif /* !DF_WITH_IP_HDRINCL */
560
561 if (ttl != pxping->ttl) {
562 status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TTL,
563 (char *)&ttl, sizeof(ttl));
564 if (RT_LIKELY(status == 0)) {
565 pxping->ttl = ttl;
566 }
567 else {
568 DPRINTF(("IP_TTL: %R[sockerr]\n", SOCKERRNO()));
569 }
570 }
571
572 tos = IPH_TOS(iph);
573 if (tos != pxping->tos) {
574 status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TOS,
575 (char *)&tos, sizeof(tos));
576 if (RT_LIKELY(status == 0)) {
577 pxping->tos = tos;
578 }
579 else {
580 DPRINTF(("IP_TOS: %R[sockerr]\n", SOCKERRNO()));
581 }
582 }
583 }
584
585 /* rewrite ICMP echo header */
586 memcpy(&icmph_orig, icmph, sizeof(*icmph));
587 sum = (u16_t)~icmph->chksum;
588 sum += chksum_update_16(&icmph->id, pcb->host_id);
589 sum = FOLD_U32T(sum);
590 icmph->chksum = ~sum;
591
592 status = proxy_sendto(pxping->sock4, p,
593 &pcb->peer.sin, sizeof(pcb->peer.sin));
594 if (status != 0) {
595 int error = -status;
596 DPRINTF(("%s: sendto: %R[sockerr]\n", __func__, error));
597
598#ifdef DF_WITH_IP_HDRINCL
599 if (pxping->hdrincl) {
600 /* restore original IP header */
601 memcpy(iph, &iph_orig, iphlen);
602 }
603 else
604#endif
605 {
606 status = pbuf_header(p, iphlen); /* back to IP header */
607 if (RT_UNLIKELY(status != 0)) {
608 pbuf_free(p);
609 return;
610 }
611 }
612
613 /* restore original ICMP header */
614 memcpy(icmph, &icmph_orig, sizeof(*icmph));
615
616 /*
617 * Some ICMP errors may be generated by the kernel and we read
618 * them from the socket and forward them normally, hence the
619 * ifdefs below.
620 */
621 switch (error) {
622
623#if !( defined(RT_OS_SOLARIS) \
624 || (defined(RT_OS_LINUX) && !defined(DF_WITH_IP_HDRINCL)) \
625 )
626 case EMSGSIZE:
627 icmp_dest_unreach(p, ICMP_DUR_FRAG);
628 break;
629#endif
630
631 case ENETDOWN:
632 case ENETUNREACH:
633 icmp_dest_unreach(p, ICMP_DUR_NET);
634 break;
635
636 case EHOSTDOWN:
637 case EHOSTUNREACH:
638 icmp_dest_unreach(p, ICMP_DUR_HOST);
639 break;
640 }
641 }
642
643 pbuf_free(p);
644}
645
646
647/**
648 * ICMPv6 Echo Request in pbuf "p" is to be proxied.
649 */
650static void
651pxping_recv6(void *arg, struct pbuf *p)
652{
653 struct pxping *pxping = (struct pxping *)arg;
654 struct ping_pcb *pcb;
655 struct ip6_hdr *iph;
656 struct icmp6_echo_hdr *icmph;
657 int hopl;
658 u16_t iphlen;
659 u16_t id, seq;
660 int status;
661
662 iph = (/* UNCONST */ struct ip6_hdr *)ip6_current_header();
663 iphlen = ip_current_header_tot_len();
664
665 icmph = (struct icmp6_echo_hdr *)p->payload;
666
667 id = icmph->id;
668 seq = icmph->seqno;
669
670 pcb = pxping_pcb_for_request(pxping, 1,
671 ipX_current_src_addr(),
672 ipX_current_dest_addr(),
673 id);
674 if (pcb == NULL) {
675 pbuf_free(p);
676 return;
677 }
678
679 DPRINTF(("ping %p: %R[ping_pcb] seq %d len %u hopl %d\n",
680 pcb, pcb,
681 ntohs(seq), (unsigned int)p->tot_len,
682 IP6H_HOPLIM(iph)));
683
684 hopl = IP6H_HOPLIM(iph);
685 if (!pcb->is_mapped) {
686 if (hopl == 1) {
687 status = pbuf_header(p, iphlen); /* back to IP header */
688 if (RT_LIKELY(status == 0)) {
689 icmp6_time_exceeded(p, ICMP6_TE_HL);
690 }
691 pbuf_free(p);
692 return;
693 }
694 --hopl;
695 }
696
697 /*
698 * Rewrite ICMPv6 echo header. We don't need to recompute the
699 * checksum since, unlike IPv4, checksum includes pseudo-header.
700 * OS computes checksum for us on send() since it needs to select
701 * source address.
702 */
703 icmph->id = pcb->host_id;
704
705 /* TODO: use control messages to save a syscall? */
706 if (hopl != pxping->hopl) {
707 status = setsockopt(pxping->sock6, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
708 (char *)&hopl, sizeof(hopl));
709 if (status == 0) {
710 pxping->hopl = hopl;
711 }
712 else {
713 DPRINTF(("IPV6_HOPLIMIT: %R[sockerr]\n", SOCKERRNO()));
714 }
715 }
716
717 status = proxy_sendto(pxping->sock6, p,
718 &pcb->peer.sin6, sizeof(pcb->peer.sin6));
719 if (status != 0) {
720 int error = -status;
721 DPRINTF(("%s: sendto: %R[sockerr]\n", __func__, error));
722
723 status = pbuf_header(p, iphlen); /* back to IP header */
724 if (RT_UNLIKELY(status != 0)) {
725 pbuf_free(p);
726 return;
727 }
728
729 /* restore original ICMP header */
730 icmph->id = pcb->guest_id;
731
732 switch (error) {
733 case EACCES:
734 icmp6_dest_unreach(p, ICMP6_DUR_PROHIBITED);
735 break;
736
737#ifdef ENONET
738 case ENONET:
739#endif
740 case ENETDOWN:
741 case ENETUNREACH:
742 case EHOSTDOWN:
743 case EHOSTUNREACH:
744 icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
745 break;
746 }
747 }
748
749 pbuf_free(p);
750}
751
752
753/**
754 * Formatter for %R[ping_pcb].
755 */
756static DECLCALLBACK(size_t)
757pxping_pcb_rtstrfmt(PFNRTSTROUTPUT pfnOutput, void *pvArgOutput,
758 const char *pszType, const void *pvValue,
759 int cchWidth, int cchPrecision, unsigned int fFlags,
760 void *pvUser)
761{
762 const struct ping_pcb *pcb = (const struct ping_pcb *)pvValue;
763 size_t cb = 0;
764
765 NOREF(cchWidth);
766 NOREF(cchPrecision);
767 NOREF(fFlags);
768 NOREF(pvUser);
769
770 AssertReturn(strcmp(pszType, "ping_pcb") == 0, 0);
771
772 if (pcb == NULL) {
773 return RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL, "(null)");
774 }
775
776 /* XXX: %RTnaipv4 takes the value, but %RTnaipv6 takes the pointer */
777 if (pcb->is_ipv6) {
778 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
779 "%RTnaipv6 -> %RTnaipv6", &pcb->src, &pcb->dst);
780 }
781 else {
782 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
783 "%RTnaipv4 -> %RTnaipv4",
784 ip4_addr_get_u32(ipX_2_ip(&pcb->src)),
785 ip4_addr_get_u32(ipX_2_ip(&pcb->dst)));
786 }
787
788 cb += RTStrFormat(pfnOutput, pvArgOutput, NULL, NULL,
789 " id %04x->%04x", ntohs(pcb->guest_id), ntohs(pcb->host_id));
790
791 return cb;
792}
793
794
795static struct ping_pcb *
796pxping_pcb_allocate(struct pxping *pxping)
797{
798 struct ping_pcb *pcb;
799
800 if (pxping->npcbs >= PXPING_MAX_PCBS) {
801 return NULL;
802 }
803
804 pcb = (struct ping_pcb *)malloc(sizeof(*pcb));
805 if (pcb == NULL) {
806 return NULL;
807 }
808
809 ++pxping->npcbs;
810 return pcb;
811}
812
813
814static void
815pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb)
816{
817 LWIP_ASSERT1(pxping->npcbs > 0);
818 LWIP_ASSERT1(pcb->next == NULL);
819 LWIP_ASSERT1(pcb->pprev_timeout == NULL);
820
821 DPRINTF(("%s: ping %p\n", __func__, (void *)pcb));
822
823 --pxping->npcbs;
824 free(pcb);
825}
826
827
828static void
829pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb)
830{
831 struct ping_pcb **chain;
832
833 LWIP_ASSERT1(pcb->pprev_timeout == NULL);
834
835 chain = &pxping->timeout_list[pcb->timeout_slot];
836 if ((pcb->next_timeout = *chain) != NULL) {
837 (*chain)->pprev_timeout = &pcb->next_timeout;
838 }
839 *chain = pcb;
840 pcb->pprev_timeout = chain;
841}
842
843
844static void
845pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb)
846{
847 LWIP_UNUSED_ARG(pxping);
848
849 LWIP_ASSERT1(pcb->pprev_timeout != NULL);
850 if (pcb->next_timeout != NULL) {
851 pcb->next_timeout->pprev_timeout = pcb->pprev_timeout;
852 }
853 *pcb->pprev_timeout = pcb->next_timeout;
854 pcb->pprev_timeout = NULL;
855 pcb->next_timeout = NULL;
856}
857
858
859static void
860pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb)
861{
862 pcb->next = pxping->pcbs;
863 pxping->pcbs = pcb;
864
865 pxping_timeout_add(pxping, pcb);
866}
867
868
869static void
870pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb)
871{
872 struct ping_pcb **p;
873
874 for (p = &pxping->pcbs; *p != NULL; p = &(*p)->next) {
875 if (*p == pcb) {
876 *p = pcb->next;
877 pcb->next = NULL;
878 break;
879 }
880 }
881
882 pxping_timeout_del(pxping, pcb);
883}
884
885
886static struct ping_pcb *
887pxping_pcb_for_request(struct pxping *pxping,
888 int is_ipv6, ipX_addr_t *src, ipX_addr_t *dst,
889 u16_t guest_id)
890{
891 struct ping_pcb *pcb;
892
893 /* on lwip thread, so no concurrent updates */
894 for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) {
895 if (pcb->guest_id == guest_id
896 && pcb->is_ipv6 == is_ipv6
897 && ipX_addr_cmp(is_ipv6, &pcb->dst, dst)
898 && ipX_addr_cmp(is_ipv6, &pcb->src, src))
899 {
900 break;
901 }
902 }
903
904 if (pcb == NULL) {
905 int mapped;
906
907 pcb = pxping_pcb_allocate(pxping);
908 if (pcb == NULL) {
909 return NULL;
910 }
911
912 pcb->is_ipv6 = is_ipv6;
913 ipX_addr_copy(is_ipv6, pcb->src, *src);
914 ipX_addr_copy(is_ipv6, pcb->dst, *dst);
915
916 pcb->guest_id = guest_id;
917#ifdef RT_OS_WINDOWS
918# define random() (rand())
919#endif
920 pcb->host_id = random() & 0xffffUL;
921
922 pcb->pprev_timeout = NULL;
923 pcb->next_timeout = NULL;
924
925 if (is_ipv6) {
926 pcb->peer.sin6.sin6_family = AF_INET6;
927#if HAVE_SA_LEN
928 pcb->peer.sin6.sin6_len = sizeof(pcb->peer.sin6);
929#endif
930 pcb->peer.sin6.sin6_port = htons(IPPROTO_ICMPV6);
931 pcb->peer.sin6.sin6_flowinfo = 0;
932 mapped = pxremap_outbound_ip6((ip6_addr_t *)&pcb->peer.sin6.sin6_addr,
933 ipX_2_ip6(&pcb->dst));
934 }
935 else {
936 pcb->peer.sin.sin_family = AF_INET;
937#if HAVE_SA_LEN
938 pcb->peer.sin.sin_len = sizeof(pcb->peer.sin);
939#endif
940 pcb->peer.sin.sin_port = htons(IPPROTO_ICMP);
941 mapped = pxremap_outbound_ip4((ip_addr_t *)&pcb->peer.sin.sin_addr,
942 ipX_2_ip(&pcb->dst));
943 }
944
945 if (mapped == PXREMAP_FAILED) {
946 free(pcb);
947 return NULL;
948 }
949 else {
950 pcb->is_mapped = (mapped == PXREMAP_MAPPED);
951 }
952
953 pcb->timeout_slot = pxping->timeout_slot;
954
955 sys_mutex_lock(&pxping->lock);
956 pxping_pcb_register(pxping, pcb);
957 sys_mutex_unlock(&pxping->lock);
958
959 DPRINTF(("ping %p: %R[ping_pcb] - created\n", pcb, pcb));
960
961 pxping_timer_needed(pxping);
962 }
963 else {
964 /* just bump up expiration timeout lazily */
965 DPRINTF(("ping %p: %R[ping_pcb] - slot %d -> %d\n",
966 pcb, pcb,
967 (unsigned int)pcb->timeout_slot,
968 (unsigned int)pxping->timeout_slot));
969 pcb->timeout_slot = pxping->timeout_slot;
970 }
971
972 return pcb;
973}
974
975
976/**
977 * Called on pollmgr thread. Caller must do the locking since caller
978 * is going to use the returned pcb, which needs to be protected from
979 * being expired by pxping_timer() on lwip thread.
980 */
981static struct ping_pcb *
982pxping_pcb_for_reply(struct pxping *pxping,
983 int is_ipv6, ipX_addr_t *dst, u16_t host_id)
984{
985 struct ping_pcb *pcb;
986
987 for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) {
988 if (pcb->host_id == host_id
989 && pcb->is_ipv6 == is_ipv6
990 /* XXX: allow broadcast pings? */
991 && ipX_addr_cmp(is_ipv6, &pcb->dst, dst))
992 {
993 return pcb;
994 }
995 }
996
997 return NULL;
998}
999
1000
1001static void
1002pxping_timer(void *arg)
1003{
1004 struct pxping *pxping = (struct pxping *)arg;
1005 struct ping_pcb **chain, *pcb;
1006
1007 pxping->timer_active = 0;
1008
1009 /*
1010 * New slot points to the list of pcbs to check for expiration.
1011 */
1012 LWIP_ASSERT1(pxping->timeout_slot < TIMEOUT);
1013 if (++pxping->timeout_slot == TIMEOUT) {
1014 pxping->timeout_slot = 0;
1015 }
1016
1017 chain = &pxping->timeout_list[pxping->timeout_slot];
1018 pcb = *chain;
1019
1020 /* protect from pollmgr concurrent reads */
1021 sys_mutex_lock(&pxping->lock);
1022
1023 while (pcb != NULL) {
1024 struct ping_pcb *xpcb = pcb;
1025 pcb = pcb->next_timeout;
1026
1027 if (xpcb->timeout_slot == pxping->timeout_slot) {
1028 /* expired */
1029 pxping_pcb_deregister(pxping, xpcb);
1030 pxping_pcb_delete(pxping, xpcb);
1031 }
1032 else {
1033 /*
1034 * If there was another request, we updated timeout_slot
1035 * but delayed actually moving the pcb until now.
1036 */
1037 pxping_timeout_del(pxping, xpcb); /* from current slot */
1038 pxping_timeout_add(pxping, xpcb); /* to new slot */
1039 }
1040 }
1041
1042 sys_mutex_unlock(&pxping->lock);
1043 pxping_timer_needed(pxping);
1044}
1045
1046
1047static void
1048pxping_timer_needed(struct pxping *pxping)
1049{
1050 if (!pxping->timer_active && pxping->pcbs != NULL) {
1051 pxping->timer_active = 1;
1052 sys_timeout(1 * 1000, pxping_timer, pxping);
1053 }
1054}
1055
1056
1057static int
1058pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
1059{
1060 struct pxping *pxping;
1061
1062 pxping = (struct pxping *)handler->data;
1063 LWIP_ASSERT1(fd == pxping->sock4 || fd == pxping->sock6);
1064
1065 if (revents & ~(POLLIN|POLLERR)) {
1066 DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents));
1067 return POLLIN;
1068 }
1069
1070 if (revents & POLLERR) {
1071 int sockerr = -1;
1072 socklen_t optlen = (socklen_t)sizeof(sockerr);
1073 int status;
1074
1075 status = getsockopt(fd, SOL_SOCKET,
1076 SO_ERROR, (char *)&sockerr, &optlen);
1077 if (status < 0) {
1078 DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
1079 __func__, fd, SOCKERRNO()));
1080 }
1081 else {
1082 DPRINTF(("%s: sock %d: %R[sockerr]\n",
1083 __func__, fd, sockerr));
1084 }
1085 }
1086
1087 if ((revents & POLLIN) == 0) {
1088 return POLLIN;
1089 }
1090
1091 if (fd == pxping->sock4) {
1092 pxping_pmgr_icmp4(pxping);
1093 }
1094 else /* fd == pxping->sock6 */ {
1095 pxping_pmgr_icmp6(pxping);
1096 }
1097
1098 return POLLIN;
1099}
1100
1101
1102/**
1103 * Process incoming ICMP message for the host.
1104 * NB: we will get a lot of spam here and have to sift through it.
1105 */
1106static void
1107pxping_pmgr_icmp4(struct pxping *pxping)
1108{
1109 struct sockaddr_in sin;
1110 socklen_t salen = sizeof(sin);
1111 ssize_t nread;
1112 struct ip_hdr *iph;
1113 struct icmp_echo_hdr *icmph;
1114 u16_t iplen;
1115
1116 memset(&sin, 0, sizeof(sin));
1117
1118 /*
1119 * Reads from raw IPv4 sockets deliver complete IP datagrams with
1120 * IP header included.
1121 */
1122 nread = recvfrom(pxping->sock4, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0,
1123 (struct sockaddr *)&sin, &salen);
1124 if (nread < 0) {
1125 DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
1126 return;
1127 }
1128
1129 if (nread < IP_HLEN) {
1130 DPRINTF2(("%s: read %d bytes, IP header truncated\n",
1131 __func__, (unsigned int)nread));
1132 return;
1133 }
1134
1135 iph = (struct ip_hdr *)pollmgr_udpbuf;
1136
1137 /* match version */
1138 if (IPH_V(iph) != 4) {
1139 DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(iph)));
1140 return;
1141 }
1142
1143 /* no fragmentation */
1144 if ((IPH_OFFSET(iph) & PP_HTONS(IP_OFFMASK | IP_MF)) != 0) {
1145 DPRINTF2(("%s: dropping fragmented datagram\n", __func__));
1146 return;
1147 }
1148
1149 /* no options */
1150 if (IPH_HL(iph) * 4 != IP_HLEN) {
1151 DPRINTF2(("%s: dropping datagram with options (IP header length %d)\n",
1152 __func__, IPH_HL(iph) * 4));
1153 return;
1154 }
1155
1156 if (IPH_PROTO(iph) != IP_PROTO_ICMP) {
1157 DPRINTF2(("%s: unexpected protocol %d\n", __func__, IPH_PROTO(iph)));
1158 return;
1159 }
1160
1161 iplen = IPH_LEN(iph);
1162#if !defined(RT_OS_DARWIN)
1163 /* darwin reports IPH_LEN in host byte order */
1164 iplen = ntohs(iplen);
1165#endif
1166#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
1167 /* darwin and solaris change IPH_LEN to payload length only */
1168 iplen += IP_HLEN; /* we verified there are no options */
1169 IPH_LEN(iph) = htons(iplen);
1170#endif
1171 if (nread < iplen) {
1172 DPRINTF2(("%s: read %d bytes but total length is %d bytes\n",
1173 __func__, (unsigned int)nread, (unsigned int)iplen));
1174 return;
1175 }
1176
1177 if (iplen < IP_HLEN + ICMP_HLEN) {
1178 DPRINTF2(("%s: IP length %d bytes, ICMP header truncated\n",
1179 __func__, iplen));
1180 return;
1181 }
1182
1183 icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
1184 if (ICMPH_TYPE(icmph) == ICMP_ER) {
1185 pxping_pmgr_icmp4_echo(pxping, iplen, &sin);
1186 }
1187 else if (ICMPH_TYPE(icmph) == ICMP_DUR || ICMPH_TYPE(icmph) == ICMP_TE) {
1188 pxping_pmgr_icmp4_error(pxping, iplen, &sin);
1189 }
1190#if 1
1191 else {
1192 DPRINTF2(("%s: ignoring ICMP type %d\n", __func__, ICMPH_TYPE(icmph)));
1193 }
1194#endif
1195}
1196
1197
1198/**
1199 * Check if this incoming ICMP echo reply is for one of our pings and
1200 * forward it to the guest.
1201 */
1202static void
1203pxping_pmgr_icmp4_echo(struct pxping *pxping,
1204 u16_t iplen, struct sockaddr_in *peer)
1205{
1206 struct ip_hdr *iph;
1207 struct icmp_echo_hdr *icmph;
1208 u16_t id, seq;
1209 ip_addr_t guest_ip, target_ip;
1210 int mapped;
1211 struct ping_pcb *pcb;
1212 u16_t guest_id;
1213 u32_t sum;
1214
1215 iph = (struct ip_hdr *)pollmgr_udpbuf;
1216 icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
1217
1218 id = icmph->id;
1219 seq = icmph->seqno;
1220
1221 DPRINTF(("<--- PING %RTnaipv4 id 0x%x seq %d\n",
1222 peer->sin_addr.s_addr, ntohs(id), ntohs(seq)));
1223
1224 /*
1225 * Is this a reply to one of our pings?
1226 */
1227
1228 ip_addr_copy(target_ip, iph->src);
1229 mapped = pxremap_inbound_ip4(&target_ip, &target_ip);
1230 if (mapped == PXREMAP_FAILED) {
1231 return;
1232 }
1233 if (mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) {
1234 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1235 return;
1236 }
1237
1238 sys_mutex_lock(&pxping->lock);
1239 pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id);
1240 if (pcb == NULL) {
1241 sys_mutex_unlock(&pxping->lock);
1242 DPRINTF2(("%s: no match\n", __func__));
1243 return;
1244 }
1245
1246 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1247
1248 /* save info before unlocking since pcb may expire */
1249 ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src));
1250 guest_id = pcb->guest_id;
1251
1252 sys_mutex_unlock(&pxping->lock);
1253
1254
1255 /*
1256 * Rewrite headers and forward to guest.
1257 */
1258
1259 /* rewrite ICMP echo header */
1260 sum = (u16_t)~icmph->chksum;
1261 sum += chksum_update_16(&icmph->id, guest_id);
1262 sum = FOLD_U32T(sum);
1263 icmph->chksum = ~sum;
1264
1265 /* rewrite IP header */
1266 sum = (u16_t)~IPH_CHKSUM(iph);
1267 sum += chksum_update_32((u32_t *)&iph->dest,
1268 ip4_addr_get_u32(&guest_ip));
1269 if (mapped == PXREMAP_MAPPED) {
1270 sum += chksum_update_32((u32_t *)&iph->src,
1271 ip4_addr_get_u32(&target_ip));
1272 }
1273 else {
1274 IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
1275 sum += PP_NTOHS(~0x0100);
1276 }
1277 sum = FOLD_U32T(sum);
1278 IPH_CHKSUM_SET(iph, ~sum);
1279
1280 pxping_pmgr_forward_inbound(pxping, iplen);
1281}
1282
1283
1284/**
1285 * Check if this incoming ICMP error (destination unreachable or time
1286 * exceeded) is about one of our pings and forward it to the guest.
1287 */
1288static void
1289pxping_pmgr_icmp4_error(struct pxping *pxping,
1290 u16_t iplen, struct sockaddr_in *peer)
1291{
1292 struct ip_hdr *iph, *oiph;
1293 struct icmp_echo_hdr *icmph, *oicmph;
1294 u16_t oipoff, oiphlen, oiplen;
1295 u16_t id, seq;
1296 ip_addr_t guest_ip, target_ip, error_ip;
1297 int target_mapped, error_mapped;
1298 struct ping_pcb *pcb;
1299 u16_t guest_id;
1300 u32_t sum;
1301
1302 iph = (struct ip_hdr *)pollmgr_udpbuf;
1303 icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN);
1304
1305 /*
1306 * Inner IP datagram is not checked by the kernel and may be
1307 * anything, possibly malicious.
1308 */
1309
1310 oipoff = IP_HLEN + ICMP_HLEN;
1311 oiplen = iplen - oipoff; /* NB: truncated length, not IPH_LEN(oiph) */
1312 if (oiplen < IP_HLEN) {
1313 DPRINTF2(("%s: original datagram truncated to %d bytes\n",
1314 __func__, oiplen));
1315 }
1316
1317 /* IP header of the original message */
1318 oiph = (struct ip_hdr *)(pollmgr_udpbuf + oipoff);
1319
1320 /* match version */
1321 if (IPH_V(oiph) != 4) {
1322 DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(oiph)));
1323 return;
1324 }
1325
1326 /* can't match fragments except the first one */
1327 if ((IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)) != 0) {
1328 DPRINTF2(("%s: ignoring fragment with offset %d\n",
1329 __func__, ntohs(IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK))));
1330 return;
1331 }
1332
1333 if (IPH_PROTO(oiph) != IP_PROTO_ICMP) {
1334#if 0
1335 /* don't spam with every "destination unreachable" in the system */
1336 DPRINTF2(("%s: ignoring protocol %d\n", __func__, IPH_PROTO(oiph)));
1337#endif
1338 return;
1339 }
1340
1341 oiphlen = IPH_HL(oiph) * 4;
1342 if (oiplen < oiphlen + ICMP_HLEN) {
1343 DPRINTF2(("%s: original datagram truncated to %d bytes\n",
1344 __func__, oiplen));
1345 return;
1346 }
1347
1348 oicmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + oipoff + oiphlen);
1349 if (ICMPH_TYPE(oicmph) != ICMP_ECHO) {
1350 DPRINTF2(("%s: ignoring ICMP error for original ICMP type %d\n",
1351 __func__, ICMPH_TYPE(oicmph)));
1352 return;
1353 }
1354
1355 id = oicmph->id;
1356 seq = oicmph->seqno;
1357
1358 DPRINTF2(("%s: ping %RTnaipv4 id 0x%x seq %d",
1359 __func__, ip4_addr_get_u32(&oiph->dest), ntohs(id), ntohs(seq)));
1360 if (ICMPH_TYPE(icmph) == ICMP_DUR) {
1361 DPRINTF2((" unreachable (code %d)\n", ICMPH_CODE(icmph)));
1362 }
1363 else {
1364 DPRINTF2((" time exceeded\n"));
1365 }
1366
1367
1368 /*
1369 * Is the inner (failed) datagram one of our pings?
1370 */
1371
1372 ip_addr_copy(target_ip, oiph->dest); /* inner (failed) */
1373 target_mapped = pxremap_inbound_ip4(&target_ip, &target_ip);
1374 if (target_mapped == PXREMAP_FAILED) {
1375 return;
1376 }
1377
1378 sys_mutex_lock(&pxping->lock);
1379 pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id);
1380 if (pcb == NULL) {
1381 sys_mutex_unlock(&pxping->lock);
1382 DPRINTF2(("%s: no match\n", __func__));
1383 return;
1384 }
1385
1386 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1387
1388 /* save info before unlocking since pcb may expire */
1389 ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src));
1390 guest_id = pcb->guest_id;
1391
1392 sys_mutex_unlock(&pxping->lock);
1393
1394
1395 /*
1396 * Rewrite both inner and outer headers and forward to guest.
1397 * Note that the checksum of the outer ICMP error message is
1398 * preserved by the changes we do to inner headers.
1399 */
1400
1401 ip_addr_copy(error_ip, iph->src); /* node that reports the error */
1402 error_mapped = pxremap_inbound_ip4(&error_ip, &error_ip);
1403 if (error_mapped == PXREMAP_FAILED) {
1404 return;
1405 }
1406 if (error_mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) {
1407 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1408 return;
1409 }
1410
1411 /* rewrite inner ICMP echo header */
1412 sum = (u16_t)~oicmph->chksum;
1413 sum += chksum_update_16(&oicmph->id, guest_id);
1414 sum = FOLD_U32T(sum);
1415 oicmph->chksum = ~sum;
1416
1417 /* rewrite inner IP header */
1418 sum = (u16_t)~IPH_CHKSUM(oiph);
1419 sum += chksum_update_32((u32_t *)&oiph->src, ip4_addr_get_u32(&guest_ip));
1420 if (target_mapped == PXREMAP_MAPPED) {
1421 sum += chksum_update_32((u32_t *)&oiph->dest, ip4_addr_get_u32(&target_ip));
1422 }
1423 sum = FOLD_U32T(sum);
1424 IPH_CHKSUM_SET(oiph, ~sum);
1425
1426 /* rewrite outer IP header */
1427 sum = (u16_t)~IPH_CHKSUM(iph);
1428 sum += chksum_update_32((u32_t *)&iph->dest, ip4_addr_get_u32(&guest_ip));
1429 if (error_mapped == PXREMAP_MAPPED) {
1430 sum += chksum_update_32((u32_t *)&iph->src, ip4_addr_get_u32(&error_ip));
1431 }
1432 else {
1433 IPH_TTL_SET(iph, IPH_TTL(iph) - 1);
1434 sum += PP_NTOHS(~0x0100);
1435 }
1436 sum = FOLD_U32T(sum);
1437 IPH_CHKSUM_SET(iph, ~sum);
1438
1439 pxping_pmgr_forward_inbound(pxping, iplen);
1440}
1441
1442
1443/**
1444 * Process incoming ICMPv6 message for the host.
1445 * NB: we will get a lot of spam here and have to sift through it.
1446 */
1447static void
1448pxping_pmgr_icmp6(struct pxping *pxping)
1449{
1450#ifndef RT_OS_WINDOWS
1451 struct msghdr mh;
1452 ssize_t nread;
1453#else
1454 WSAMSG mh;
1455 DWORD nread;
1456#endif
1457 IOVEC iov[1];
1458 static u8_t cmsgbuf[128];
1459 struct cmsghdr *cmh;
1460 struct sockaddr_in6 sin6;
1461 socklen_t salen = sizeof(sin6);
1462 struct icmp6_echo_hdr *icmph;
1463 struct in6_pktinfo *pktinfo;
1464 int hopl, tclass;
1465 int status;
1466
1467 /*
1468 * Reads from raw IPv6 sockets deliver only the payload. Full
1469 * headers are available via recvmsg(2)/cmsg(3).
1470 */
1471 IOVEC_SET_BASE(iov[0], pollmgr_udpbuf);
1472 IOVEC_SET_LEN(iov[0], sizeof(pollmgr_udpbuf));
1473
1474 memset(&mh, 0, sizeof(mh));
1475#ifndef RT_OS_WINDOWS
1476 mh.msg_name = &sin6;
1477 mh.msg_namelen = sizeof(sin6);
1478 mh.msg_iov = iov;
1479 mh.msg_iovlen = 1;
1480 mh.msg_control = cmsgbuf;
1481 mh.msg_controllen = sizeof(cmsgbuf);
1482 mh.msg_flags = 0;
1483
1484 nread = recvmsg(pxping->sock6, &mh, 0);
1485 if (nread < 0) {
1486 DPRINTF(("%s: %R[sockerr]\n", __func__, SOCKERRNO()));
1487 return;
1488 }
1489#else /* RT_OS_WINDOWS */
1490 mh.name = (LPSOCKADDR)&sin6;
1491 mh.namelen = sizeof(sin6);
1492 mh.lpBuffers = iov;
1493 mh.dwBufferCount = 1;
1494 mh.Control.buf = cmsgbuf;
1495 mh.Control.len = sizeof(cmsgbuf);
1496 mh.dwFlags = 0;
1497
1498 status = (*pxping->pfWSARecvMsg6)(pxping->sock6, &mh, &nread, NULL, NULL);
1499 if (status == SOCKET_ERROR) {
1500 DPRINTF2(("%s: error %d\n", __func__, WSAGetLastError()));
1501 return;
1502 }
1503#endif
1504
1505 icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf;
1506
1507 DPRINTF2(("%s: %RTnaipv6 ICMPv6: ", __func__, &sin6.sin6_addr));
1508
1509 if (icmph->type == ICMP6_TYPE_EREP) {
1510 DPRINTF2(("echo reply %04x %u\n",
1511 (unsigned int)icmph->id, (unsigned int)icmph->seqno));
1512 }
1513 else { /* XXX */
1514 if (icmph->type == ICMP6_TYPE_EREQ) {
1515 DPRINTF2(("echo request %04x %u\n",
1516 (unsigned int)icmph->id, (unsigned int)icmph->seqno));
1517 }
1518 else if (icmph->type == ICMP6_TYPE_DUR) {
1519 DPRINTF2(("destination unreachable\n"));
1520 }
1521 else if (icmph->type == ICMP6_TYPE_PTB) {
1522 DPRINTF2(("packet too big\n"));
1523 }
1524 else if (icmph->type == ICMP6_TYPE_TE) {
1525 DPRINTF2(("time exceeded\n"));
1526 }
1527 else if (icmph->type == ICMP6_TYPE_PP) {
1528 DPRINTF2(("parameter problem\n"));
1529 }
1530 else {
1531 DPRINTF2(("type %d len %u\n", icmph->type, (unsigned int)nread));
1532 }
1533
1534 if (icmph->type >= ICMP6_TYPE_EREQ) {
1535 return; /* informational message */
1536 }
1537 }
1538
1539 pktinfo = NULL;
1540 hopl = -1;
1541 tclass = -1;
1542 for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) {
1543 if (cmh->cmsg_len == 0)
1544 break;
1545
1546 if (cmh->cmsg_level == IPPROTO_IPV6
1547 && cmh->cmsg_type == IPV6_HOPLIMIT
1548 && cmh->cmsg_len == CMSG_LEN(sizeof(int)))
1549 {
1550 hopl = *(int *)CMSG_DATA(cmh);
1551 DPRINTF2(("hoplimit = %d\n", hopl));
1552 }
1553
1554 if (cmh->cmsg_level == IPPROTO_IPV6
1555 && cmh->cmsg_type == IPV6_PKTINFO
1556 && cmh->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo)))
1557 {
1558 pktinfo = (struct in6_pktinfo *)CMSG_DATA(cmh);
1559 DPRINTF2(("pktinfo found\n"));
1560 }
1561 }
1562
1563 if (pktinfo == NULL) {
1564 /*
1565 * ip6_output_if() doesn't do checksum for us so we need to
1566 * manually recompute it - for this we must know the
1567 * destination address of the pseudo-header that we will
1568 * rewrite with guest's address. (TODO: yeah, yeah, we can
1569 * compute it from scratch...)
1570 */
1571 DPRINTF2(("%s: unable to get pktinfo\n", __func__));
1572 return;
1573 }
1574
1575 if (hopl < 0) {
1576 hopl = LWIP_ICMP6_HL;
1577 }
1578
1579 if (icmph->type == ICMP6_TYPE_EREP) {
1580 pxping_pmgr_icmp6_echo(pxping,
1581 (ip6_addr_t *)&sin6.sin6_addr,
1582 (ip6_addr_t *)&pktinfo->ipi6_addr,
1583 hopl, tclass, (u16_t)nread);
1584 }
1585 else if (icmph->type < ICMP6_TYPE_EREQ) {
1586 pxping_pmgr_icmp6_error(pxping,
1587 (ip6_addr_t *)&sin6.sin6_addr,
1588 (ip6_addr_t *)&pktinfo->ipi6_addr,
1589 hopl, tclass, (u16_t)nread);
1590 }
1591}
1592
1593
1594/**
1595 * Check if this incoming ICMPv6 echo reply is for one of our pings
1596 * and forward it to the guest.
1597 */
1598static void
1599pxping_pmgr_icmp6_echo(struct pxping *pxping,
1600 ip6_addr_t *src, ip6_addr_t *dst,
1601 int hopl, int tclass, u16_t icmplen)
1602{
1603 struct icmp6_echo_hdr *icmph;
1604 ip6_addr_t guest_ip, target_ip;
1605 int mapped;
1606 struct ping_pcb *pcb;
1607 u16_t id, guest_id;
1608 u32_t sum;
1609
1610 ip6_addr_copy(target_ip, *src);
1611 mapped = pxremap_inbound_ip6(&target_ip, &target_ip);
1612 if (mapped == PXREMAP_FAILED) {
1613 return;
1614 }
1615 else if (mapped == PXREMAP_ASIS) {
1616 if (hopl == 1) {
1617 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1618 return;
1619 }
1620 --hopl;
1621 }
1622
1623 icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf;
1624 id = icmph->id;
1625
1626 sys_mutex_lock(&pxping->lock);
1627 pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), id);
1628 if (pcb == NULL) {
1629 sys_mutex_unlock(&pxping->lock);
1630 DPRINTF2(("%s: no match\n", __func__));
1631 return;
1632 }
1633
1634 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1635
1636 /* save info before unlocking since pcb may expire */
1637 ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src));
1638 guest_id = pcb->guest_id;
1639
1640 sys_mutex_unlock(&pxping->lock);
1641
1642 /* rewrite ICMPv6 echo header */
1643 sum = (u16_t)~icmph->chksum;
1644 sum += chksum_update_16(&icmph->id, guest_id);
1645 sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */
1646 if (mapped) {
1647 sum += chksum_delta_ipv6(src, &target_ip); /* pseudo */
1648 }
1649 sum = FOLD_U32T(sum);
1650 icmph->chksum = ~sum;
1651
1652 pxping_pmgr_forward_inbound6(pxping,
1653 &target_ip, /* echo reply src */
1654 &guest_ip, /* echo reply dst */
1655 hopl, tclass, icmplen);
1656}
1657
1658
1659/**
1660 * Check if this incoming ICMPv6 error is about one of our pings and
1661 * forward it to the guest.
1662 */
1663static void
1664pxping_pmgr_icmp6_error(struct pxping *pxping,
1665 ip6_addr_t *src, ip6_addr_t *dst,
1666 int hopl, int tclass, u16_t icmplen)
1667{
1668 struct icmp6_hdr *icmph;
1669 u8_t *bufptr;
1670 size_t buflen, hlen;
1671 int proto;
1672 struct ip6_hdr *oiph;
1673 struct icmp6_echo_hdr *oicmph;
1674 struct ping_pcb *pcb;
1675 ip6_addr_t guest_ip, target_ip, error_ip;
1676 int target_mapped, error_mapped;
1677 u16_t guest_id;
1678 u32_t sum;
1679
1680 icmph = (struct icmp6_hdr *)pollmgr_udpbuf;
1681
1682 /*
1683 * Inner IP datagram is not checked by the kernel and may be
1684 * anything, possibly malicious.
1685 */
1686 oiph = NULL;
1687 oicmph = NULL;
1688
1689 bufptr = pollmgr_udpbuf;
1690 buflen = icmplen;
1691
1692 hlen = sizeof(*icmph);
1693 proto = IP6_NEXTH_ENCAPS; /* i.e. IPv6, lwIP's name is unfortuate */
1694 for (;;) {
1695 if (hlen > buflen) {
1696 DPRINTF2(("truncated datagram inside ICMPv6 error message is too short\n"));
1697 return;
1698 }
1699 buflen -= hlen;
1700 bufptr += hlen;
1701
1702 if (proto == IP6_NEXTH_ENCAPS && oiph == NULL) { /* outermost IPv6 */
1703 oiph = (struct ip6_hdr *)bufptr;
1704 if (IP6H_V(oiph) != 6) {
1705 DPRINTF2(("%s: unexpected IP version %d\n", __func__, IP6H_V(oiph)));
1706 return;
1707 }
1708
1709 proto = IP6H_NEXTH(oiph);
1710 hlen = IP6_HLEN;
1711 }
1712 else if (proto == IP6_NEXTH_ICMP6) {
1713 oicmph = (struct icmp6_echo_hdr *)bufptr;
1714 break;
1715 }
1716 else if (proto == IP6_NEXTH_ROUTING
1717 || proto == IP6_NEXTH_HOPBYHOP
1718 || proto == IP6_NEXTH_DESTOPTS)
1719 {
1720 proto = bufptr[0];
1721 hlen = (bufptr[1] + 1) * 8;
1722 }
1723 else {
1724 DPRINTF2(("%s: stopping at protocol %d\n", __func__, proto));
1725 break;
1726 }
1727 }
1728
1729 if (oiph == NULL || oicmph == NULL) {
1730 return;
1731 }
1732
1733 if (buflen < sizeof(*oicmph)) {
1734 DPRINTF2(("%s: original ICMPv6 is truncated too short\n", __func__));
1735 return;
1736 }
1737
1738 if (oicmph->type != ICMP6_TYPE_EREQ) {
1739 DPRINTF2(("%s: ignoring original ICMPv6 type %d\n", __func__, oicmph->type));
1740 return;
1741 }
1742
1743 memcpy(&target_ip, &oiph->dest, sizeof(target_ip)); /* inner (failed) */
1744 target_mapped = pxremap_inbound_ip6(&target_ip, &target_ip);
1745 if (target_mapped == PXREMAP_FAILED) {
1746 return;
1747 }
1748
1749 sys_mutex_lock(&pxping->lock);
1750 pcb = pxping_pcb_for_reply(pxping, 1, ip_2_ipX(&target_ip), oicmph->id);
1751 if (pcb == NULL) {
1752 sys_mutex_unlock(&pxping->lock);
1753 DPRINTF2(("%s: no match\n", __func__));
1754 return;
1755 }
1756
1757 DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb));
1758
1759 /* save info before unlocking since pcb may expire */
1760 ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src));
1761 guest_id = pcb->guest_id;
1762
1763 sys_mutex_unlock(&pxping->lock);
1764
1765
1766 /*
1767 * Rewrite inner and outer headers and forward to guest. Note
1768 * that IPv6 has no IP header checksum, but uses pseudo-header for
1769 * ICMPv6, so we update both in one go, adjusting ICMPv6 checksum
1770 * as we rewrite IP header.
1771 */
1772
1773 ip6_addr_copy(error_ip, *src); /* node that reports the error */
1774 error_mapped = pxremap_inbound_ip6(&error_ip, &error_ip);
1775 if (error_mapped == PXREMAP_FAILED) {
1776 return;
1777 }
1778 if (error_mapped == PXREMAP_ASIS && hopl == 1) {
1779 DPRINTF2(("%s: dropping packet with ttl 1\n", __func__));
1780 return;
1781 }
1782
1783 /* rewrite inner ICMPv6 echo header and inner IPv6 header */
1784 sum = (u16_t)~oicmph->chksum;
1785 sum += chksum_update_16(&oicmph->id, guest_id);
1786 sum += chksum_update_ipv6((ip6_addr_t *)&oiph->src, &guest_ip);
1787 if (target_mapped) {
1788 sum += chksum_delta_ipv6((ip6_addr_t *)&oiph->dest, &target_ip);
1789 }
1790 sum = FOLD_U32T(sum);
1791 oicmph->chksum = ~sum;
1792
1793 /* rewrite outer ICMPv6 error header */
1794 sum = (u16_t)~icmph->chksum;
1795 sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */
1796 if (error_mapped) {
1797 sum += chksum_delta_ipv6(src, &error_ip); /* pseudo */
1798 }
1799 sum = FOLD_U32T(sum);
1800 icmph->chksum = ~sum;
1801
1802 pxping_pmgr_forward_inbound6(pxping,
1803 &error_ip, /* error src */
1804 &guest_ip, /* error dst */
1805 hopl, tclass, icmplen);
1806}
1807
1808
1809/**
1810 * Hand off ICMP datagram to the lwip thread where it will be
1811 * forwarded to the guest.
1812 *
1813 * We no longer need ping_pcb. The pcb may get expired on the lwip
1814 * thread, but we have already patched necessary information into the
1815 * datagram.
1816 */
1817static void
1818pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen)
1819{
1820 struct pbuf *p;
1821 struct ping_msg *msg;
1822 err_t error;
1823
1824 p = pbuf_alloc(PBUF_LINK, iplen, PBUF_RAM);
1825 if (p == NULL) {
1826 DPRINTF(("%s: pbuf_alloc(%d) failed\n",
1827 __func__, (unsigned int)iplen));
1828 return;
1829 }
1830
1831 error = pbuf_take(p, pollmgr_udpbuf, iplen);
1832 if (error != ERR_OK) {
1833 DPRINTF(("%s: pbuf_take(%d) failed\n",
1834 __func__, (unsigned int)iplen));
1835 pbuf_free(p);
1836 return;
1837 }
1838
1839 msg = (struct ping_msg *)malloc(sizeof(*msg));
1840 if (msg == NULL) {
1841 pbuf_free(p);
1842 return;
1843 }
1844
1845 msg->msg.type = TCPIP_MSG_CALLBACK_STATIC;
1846 msg->msg.sem = NULL;
1847 msg->msg.msg.cb.function = pxping_pcb_forward_inbound;
1848 msg->msg.msg.cb.ctx = (void *)msg;
1849
1850 msg->pxping = pxping;
1851 msg->p = p;
1852
1853 proxy_lwip_post(&msg->msg);
1854}
1855
1856
1857static void
1858pxping_pcb_forward_inbound(void *arg)
1859{
1860 struct ping_msg *msg = (struct ping_msg *)arg;
1861 err_t error;
1862
1863 LWIP_ASSERT1(msg != NULL);
1864 LWIP_ASSERT1(msg->pxping != NULL);
1865 LWIP_ASSERT1(msg->p != NULL);
1866
1867 error = ip_raw_output_if(msg->p, msg->pxping->netif);
1868 if (error != ERR_OK) {
1869 DPRINTF(("%s: ip_output_if: %s\n",
1870 __func__, proxy_lwip_strerr(error)));
1871 }
1872 pbuf_free(msg->p);
1873 free(msg);
1874}
1875
1876
1877static void
1878pxping_pmgr_forward_inbound6(struct pxping *pxping,
1879 ip6_addr_t *src, ip6_addr_t *dst,
1880 u8_t hopl, u8_t tclass,
1881 u16_t icmplen)
1882{
1883 struct pbuf *p;
1884 struct ping6_msg *msg;
1885
1886 err_t error;
1887
1888 p = pbuf_alloc(PBUF_IP, icmplen, PBUF_RAM);
1889 if (p == NULL) {
1890 DPRINTF(("%s: pbuf_alloc(%d) failed\n",
1891 __func__, (unsigned int)icmplen));
1892 return;
1893 }
1894
1895 error = pbuf_take(p, pollmgr_udpbuf, icmplen);
1896 if (error != ERR_OK) {
1897 DPRINTF(("%s: pbuf_take(%d) failed\n",
1898 __func__, (unsigned int)icmplen));
1899 pbuf_free(p);
1900 return;
1901 }
1902
1903 msg = (struct ping6_msg *)malloc(sizeof(*msg));
1904 if (msg == NULL) {
1905 pbuf_free(p);
1906 return;
1907 }
1908
1909 msg->msg.type = TCPIP_MSG_CALLBACK_STATIC;
1910 msg->msg.sem = NULL;
1911 msg->msg.msg.cb.function = pxping_pcb_forward_inbound6;
1912 msg->msg.msg.cb.ctx = (void *)msg;
1913
1914 msg->pxping = pxping;
1915 msg->p = p;
1916 ip6_addr_copy(msg->src, *src);
1917 ip6_addr_copy(msg->dst, *dst);
1918 msg->hopl = hopl;
1919 msg->tclass = tclass;
1920
1921 proxy_lwip_post(&msg->msg);
1922}
1923
1924
1925static void
1926pxping_pcb_forward_inbound6(void *arg)
1927{
1928 struct ping6_msg *msg = (struct ping6_msg *)arg;
1929 err_t error;
1930
1931 LWIP_ASSERT1(msg != NULL);
1932 LWIP_ASSERT1(msg->pxping != NULL);
1933 LWIP_ASSERT1(msg->p != NULL);
1934
1935 error = ip6_output_if(msg->p,
1936 &msg->src, &msg->dst, msg->hopl, msg->tclass,
1937 IP6_NEXTH_ICMP6, msg->pxping->netif);
1938 if (error != ERR_OK) {
1939 DPRINTF(("%s: ip6_output_if: %s\n",
1940 __func__, proxy_lwip_strerr(error)));
1941 }
1942 pbuf_free(msg->p);
1943 free(msg);
1944}
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette