xref: /openbsd-src/usr.sbin/traceroute/traceroute.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: traceroute.c,v 1.147 2016/09/19 07:08:01 florian Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*-
34  * Copyright (c) 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Van Jacobson.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 /*
66  * traceroute host  - trace the route ip packets follow going to "host".
67  *
68  * Attempt to trace the route an ip packet would follow to some
69  * internet host.  We find out intermediate hops by launching probe
70  * packets with a small ttl (time to live) then listening for an
71  * icmp "time exceeded" reply from a gateway.  We start our probes
72  * with a ttl of one and increase by one until we get an icmp "port
73  * unreachable" (which means we got to "host") or hit a max (which
74  * defaults to 64 hops & can be changed with the -m flag).  Three
75  * probes (change with -q flag) are sent at each ttl setting and a
76  * line is printed showing the ttl, address of the gateway and
77  * round trip time of each probe.  If the probe answers come from
78  * different gateways, the address of each responding system will
79  * be printed.  If there is no response within a 5 sec. timeout
80  * interval (changed with the -w flag), a "*" is printed for that
81  * probe.
82  *
83  * Probe packets are UDP format.  We don't want the destination
84  * host to process them so the destination port is set to an
85  * unlikely value (if some clod on the destination is using that
86  * value, it can be changed with the -p flag).
87  *
88  * A sample use might be:
89  *
90  *     [yak 71]% traceroute nis.nsf.net.
91  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
92  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
93  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
94  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
95  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
96  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
97  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
98  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
99  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
100  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
101  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
102  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
103  *
104  * Note that lines 2 & 3 are the same.  This is due to a buggy
105  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
106  * packets with a zero ttl.
107  *
108  * A more interesting example is:
109  *
110  *     [yak 72]% traceroute allspice.lcs.mit.edu.
111  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
112  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
113  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
114  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
115  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
116  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
117  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
118  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
119  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
120  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
121  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
122  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
123  *     12  * * *
124  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
125  *     14  * * *
126  *     15  * * *
127  *     16  * * *
128  *     17  * * *
129  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
130  *
131  * (I start to see why I'm having so much trouble with mail to
132  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
133  * either don't send ICMP "time exceeded" messages or send them
134  * with a ttl too small to reach us.  14 - 17 are running the
135  * MIT C Gateway code that doesn't send "time exceeded"s.  God
136  * only knows what's going on with 12.
137  *
138  * The silent gateway 12 in the above may be the result of a bug in
139  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
140  * sends an unreachable message using whatever ttl remains in the
141  * original datagram.  Since, for gateways, the remaining ttl is
142  * zero, the icmp "time exceeded" is guaranteed to not make it back
143  * to us.  The behavior of this bug is slightly more interesting
144  * when it appears on the destination system:
145  *
146  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
147  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
148  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
149  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
150  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
151  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
152  *      7  * * *
153  *      8  * * *
154  *      9  * * *
155  *     10  * * *
156  *     11  * * *
157  *     12  * * *
158  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
159  *
160  * Notice that there are 12 "gateways" (13 is the final
161  * destination) and exactly the last half of them are "missing".
162  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
163  * is using the ttl from our arriving datagram as the ttl in its
164  * icmp reply.  So, the reply will time out on the return path
165  * (with no notice sent to anyone since icmp's aren't sent for
166  * icmp's) until we probe with a ttl that's at least twice the path
167  * length.  I.e., rip is really only 7 hops away.  A reply that
168  * returns with a ttl of 1 is a clue this problem exists.
169  * Traceroute prints a "!" after the time if the ttl is <= 1.
170  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
171  * non-standard (HPUX) software, expect to see this problem
172  * frequently and/or take care picking the target host of your
173  * probes.
174  *
175  * Other possible annotations after the time are !H, !N, !P (got a host,
176  * network or protocol unreachable, respectively), !S or !F (source
177  * route failed or fragmentation needed -- neither of these should
178  * ever occur and the associated gateway is busted if you see one).  If
179  * almost all the probes result in some kind of unreachable, traceroute
180  * will give up and exit.
181  *
182  * Notes
183  * -----
184  * This program must be run by root or be setuid.  (I suggest that
185  * you *don't* make it setuid -- casual use could result in a lot
186  * of unnecessary traffic on our poor, congested nets.)
187  *
188  * This program requires a kernel mod that does not appear in any
189  * system available from Berkeley:  A raw ip socket using proto
190  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
191  * opposed to data to be wrapped in a ip datagram).  See the README
192  * file that came with the source to this program for a description
193  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
194  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
195  * MODIFIED TO RUN THIS PROGRAM.
196  *
197  * The udp port usage may appear bizarre (well, ok, it is bizarre).
198  * The problem is that an icmp message only contains 8 bytes of
199  * data from the original datagram.  8 bytes is the size of a udp
200  * header so, if we want to associate replies with the original
201  * datagram, the necessary information must be encoded into the
202  * udp header (the ip id could be used but there's no way to
203  * interlock with the kernel's assignment of ip id's and, anyway,
204  * it would have taken a lot more kernel hacking to allow this
205  * code to set the ip id).  So, to allow two or more users to
206  * use traceroute simultaneously, we use this task's pid as the
207  * source port (the high bit is set to move the port number out
208  * of the "likely" range).  To keep track of which probe is being
209  * replied to (so times and/or hop counts don't get confused by a
210  * reply that was delayed in transit), we increment the destination
211  * port number before each probe.
212  *
213  * Don't use this as a coding example.  I was trying to find a
214  * routing problem and this code sort-of popped out after 48 hours
215  * without sleep.  I was amazed it ever compiled, much less ran.
216  *
217  * I stole the idea for this program from Steve Deering.  Since
218  * the first release, I've learned that had I attended the right
219  * IETF working group meetings, I also could have stolen it from Guy
220  * Almes or Matt Mathis.  I don't know (or care) who came up with
221  * the idea first.  I envy the originators' perspicacity and I'm
222  * glad they didn't keep the idea a secret.
223  *
224  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
225  * enhancements to the original distribution.
226  *
227  * I've hacked up a round-trip-route version of this that works by
228  * sending a loose-source-routed udp datagram through the destination
229  * back to yourself.  Unfortunately, SO many gateways botch source
230  * routing, the thing is almost worthless.  Maybe one day...
231  *
232  *  -- Van Jacobson (van@helios.ee.lbl.gov)
233  *     Tue Dec 20 03:50:13 PST 1988
234  */
235 
236 
237 #include <arpa/inet.h>
238 #include <endian.h>
239 #include <err.h>
240 #include <errno.h>
241 #include <limits.h>
242 #include <netdb.h>
243 #include <netinet/icmp6.h>
244 #include <netinet/in.h>
245 #include <netinet/ip.h>
246 #include <netinet/ip6.h>
247 #include <netinet/ip_icmp.h>
248 #include <netinet/udp.h>
249 #include <stdio.h>
250 #include <stdlib.h>
251 #include <string.h>
252 #include <sys/socket.h>
253 #include <sys/sysctl.h>
254 #include <sys/time.h>
255 #include <sys/uio.h>
256 #include <unistd.h>
257 
258 #include "traceroute.h"
259 
260 struct in_addr	gateway[MAX_LSRR + 1];
261 
262 int	 lsrrlen = 0;
263 int32_t	 sec_perturb;
264 int32_t	 usec_perturb;
265 
266 u_char	 packet[512];
267 u_char	*outpacket;	/* last inbound (icmp) packet */
268 
269 int	rcvsock;	/* receive (icmp) socket file descriptor */
270 int	sndsock;	/* send (udp) socket file descriptor */
271 
272 static struct msghdr	rcvmhdr;
273 static struct iovec	rcviov[2];
274 
275 int	rcvhlim;
276 struct in6_pktinfo *rcvpktinfo;
277 
278 	int	datalen;	/* How much data */
279 static	int	headerlen;	/* How long packet's header is */
280 
281 char	*hostname;
282 
283 static int	nprobes = 3;
284 static u_int8_t	max_ttl = IPDEFTTL;
285 static u_int8_t	first_ttl = 1;
286 
287 static int	options;	/* socket options */
288 static int	xflag;		/* show ICMP extension header */
289 static int	tflag;		/* tos flag was set */
290 static int	v6flag;
291 
292 u_short		ident;
293 u_int16_t	srcport;
294 u_int16_t	port = 32768+666;/* start udp dest port # for probe packets */
295 u_char		proto = IPPROTO_UDP;
296 
297 int		verbose;
298 int		waittime = 5;	/* time to wait for response (in seconds) */
299 int		nflag;		/* print addresses numerically */
300 int		dump;
301 int		Aflag;		/* lookup ASN */
302 int		last_tos;
303 
304 void	usage(void);
305 
306 int
307 main(int argc, char *argv[])
308 {
309 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
310 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
311 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0, error, packetlen;
312 	int rcvcmsglen, rcvsock4, rcvsock6, sndsock4, sndsock6;
313 	int v4sock_errno, v6sock_errno;
314 	struct addrinfo hints, *res;
315 	size_t size;
316 	static u_char *rcvcmsgbuf;
317 	struct sockaddr_in from4, to4;
318 	struct sockaddr_in6 from6, to6;
319 	struct sockaddr *from, *to;
320 	struct hostent *hp;
321 	u_int32_t tmprnd;
322 	struct ip *ip = NULL;
323 	u_int8_t ttl;
324 	char *ep, hbuf[NI_MAXHOST], *dest, *source = NULL;
325 	const char *errstr;
326 	long l;
327 	uid_t uid;
328 	u_int rtableid;
329 	socklen_t len;
330 
331 	rcvsock4 = rcvsock6 = sndsock4 = sndsock6 = -1;
332 	v4sock_errno = v6sock_errno = 0;
333 
334 	if ((rcvsock6 = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) < 0)
335 		v6sock_errno = errno;
336 	else if ((sndsock6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
337 		v6sock_errno = errno;
338 
339 	if ((rcvsock4 = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
340 		v4sock_errno = errno;
341 	else if ((sndsock4 = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
342 		v4sock_errno = errno;
343 
344 	/* revoke privs */
345 	uid = getuid();
346 	if (setresuid(uid, uid, uid) == -1)
347 		err(1, "setresuid");
348 
349 	if (strcmp("traceroute6", __progname) == 0) {
350 		v6flag = 1;
351 		if (v6sock_errno != 0)
352 			errc(5, v6sock_errno, rcvsock6 < 0 ? "socket(ICMPv6)" :
353 			    "socket(SOCK_DGRAM)");
354 		rcvsock = rcvsock6;
355 		sndsock = sndsock6;
356 		if (rcvsock4 >= 0)
357 			close(rcvsock4);
358 		if (sndsock4 >= 0)
359 			close(sndsock4);
360 	} else {
361 		if (v4sock_errno != 0)
362 			errc(5, v4sock_errno, rcvsock4 < 0 ? "icmp socket" :
363 			    "raw socket");
364 		rcvsock = rcvsock4;
365 		sndsock = sndsock4;
366 		if (rcvsock6 >= 0)
367 			close(rcvsock6);
368 		if (sndsock6 >= 0)
369 			close(sndsock6);
370 	}
371 
372 	if (v6flag) {
373 		mib[1] = PF_INET6;
374 		mib[2] = IPPROTO_IPV6;
375 		mib[3] = IPV6CTL_DEFHLIM;
376 		/* specify to tell receiving interface */
377 		if (setsockopt(rcvsock, IPPROTO_IPV6, IPV6_RECVPKTINFO, &on,
378 		    sizeof(on)) < 0)
379 			err(1, "setsockopt(IPV6_RECVPKTINFO)");
380 
381 		/* specify to tell hoplimit field of received IP6 hdr */
382 		if (setsockopt(rcvsock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &on,
383 		    sizeof(on)) < 0)
384 			err(1, "setsockopt(IPV6_RECVHOPLIMIT)");
385 	}
386 
387 	size = sizeof(i);
388 	if (sysctl(mib, sizeof(mib)/sizeof(mib[0]), &i, &size, NULL, 0) == -1)
389 		err(1, "sysctl");
390 	max_ttl = i;
391 
392 	while ((ch = getopt(argc, argv, v6flag ? "AcDdf:Ilm:np:q:Ss:w:vV:" :
393 	    "AcDdf:g:Ilm:nP:p:q:Ss:t:V:vw:x")) != -1)
394 		switch (ch) {
395 		case 'A':
396 			Aflag = 1;
397 			break;
398 		case 'c':
399 			incflag = 0;
400 			break;
401 		case 'd':
402 			options |= SO_DEBUG;
403 			break;
404 		case 'D':
405 			dump = 1;
406 			break;
407 		case 'f':
408 			first_ttl = strtonum(optarg, 1, max_ttl, &errstr);
409 			if (errstr)
410 				errx(1, "min ttl must be 1 to %u.", max_ttl);
411 			break;
412 		case 'g':
413 			if (lsrr >= MAX_LSRR)
414 				errx(1, "too many gateways; max %d", MAX_LSRR);
415 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
416 				hp = gethostbyname(optarg);
417 				if (hp == 0)
418 					errx(1, "unknown host %s", optarg);
419 				memcpy(&gateway[lsrr], hp->h_addr,
420 				    hp->h_length);
421 			}
422 			if (++lsrr == 1)
423 				lsrrlen = 4;
424 			lsrrlen += 4;
425 			break;
426 		case 'I':
427 			if (protoset)
428 				errx(1, "protocol already set with -P");
429 			protoset = 1;
430 			proto = IPPROTO_ICMP;
431 			break;
432 		case 'l':
433 			ttl_flag = 1;
434 			break;
435 		case 'm':
436 			max_ttl = strtonum(optarg, first_ttl, MAXTTL, &errstr);
437 			if (errstr)
438 				errx(1, "max ttl must be %u to %u.", first_ttl,
439 				    MAXTTL);
440 			break;
441 		case 'n':
442 			nflag = 1;
443 			break;
444 		case 'p':
445 			port = strtonum(optarg, 1, 65535, &errstr);
446 			if (errstr)
447 				errx(1, "port must be >0, <65536.");
448 			break;
449 		case 'P':
450 			if (protoset)
451 				errx(1, "protocol already set with -I");
452 			protoset = 1;
453 			proto = strtonum(optarg, 1, IPPROTO_MAX - 1, &errstr);
454 			if (errstr) {
455 				struct protoent *pent;
456 
457 				pent = getprotobyname(optarg);
458 				if (pent)
459 					proto = pent->p_proto;
460 				else
461 					errx(1, "proto must be >=1, or a "
462 					    "name.");
463 			}
464 			break;
465 		case 'q':
466 			nprobes = strtonum(optarg, 1, INT_MAX, &errstr);
467 			if (errstr)
468 				errx(1, "nprobes must be >0.");
469 			break;
470 		case 's':
471 			/*
472 			 * set the ip source address of the outbound
473 			 * probe (e.g., on a multi-homed host).
474 			 */
475 			source = optarg;
476 			break;
477 		case 'S':
478 			sump = 1;
479 			break;
480 		case 't':
481 			if (!map_tos(optarg, &tos)) {
482 				if (strlen(optarg) > 1 && optarg[0] == '0' &&
483 				    optarg[1] == 'x') {
484 					errno = 0;
485 					ep = NULL;
486 					l = strtol(optarg, &ep, 16);
487 					if (errno || !*optarg || *ep ||
488 					    l < 0 || l > 255)
489 						errx(1, "illegal tos value %s",
490 						    optarg);
491 					tos = (int)l;
492 				} else {
493 					tos = strtonum(optarg, 0, 255, &errstr);
494 					if (errstr)
495 						errx(1, "illegal tos value %s",
496 						    optarg);
497 				}
498 			}
499 			tflag = 1;
500 			last_tos = tos;
501 			break;
502 		case 'v':
503 			verbose = 1;
504 			break;
505 		case 'V':
506 			rtableid = (unsigned int)strtonum(optarg, 0,
507 			    RT_TABLEID_MAX, &errstr);
508 			if (errstr)
509 				errx(1, "rtable value is %s: %s",
510 				    errstr, optarg);
511 			if (setsockopt(sndsock, SOL_SOCKET, SO_RTABLE,
512 			    &rtableid, sizeof(rtableid)) == -1)
513 				err(1, "setsockopt SO_RTABLE");
514 			if (setsockopt(rcvsock, SOL_SOCKET, SO_RTABLE,
515 			    &rtableid, sizeof(rtableid)) == -1)
516 				err(1, "setsockopt SO_RTABLE");
517 			break;
518 		case 'w':
519 			waittime = strtonum(optarg, 2, INT_MAX, &errstr);
520 			if (errstr)
521 				errx(1, "wait must be >1 sec.");
522 			break;
523 		case 'x':
524 			xflag = 1;
525 			break;
526 		default:
527 			usage();
528 		}
529 	argc -= optind;
530 	argv += optind;
531 
532 	if (argc < 1 || argc > 2)
533 		usage();
534 
535 	setvbuf(stdout, NULL, _IOLBF, 0);
536 
537 	ident = (getpid() & 0xffff) | 0x8000;
538 	tmprnd = arc4random();
539 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
540 	    (tmprnd & 0x7ff);
541 	usec_perturb = arc4random();
542 
543 	memset(&to4, 0, sizeof(to4));
544 	memset(&to6, 0, sizeof(to6));
545 
546 	dest = *argv;
547 
548 	memset(&hints, 0, sizeof(hints));
549 	hints.ai_family = v6flag ? PF_INET6 : PF_INET;
550 	hints.ai_socktype = SOCK_RAW;
551 	hints.ai_protocol = 0;
552 	hints.ai_flags = AI_CANONNAME;
553 	if ((error = getaddrinfo(dest, NULL, &hints, &res)))
554 		errx(1, "%s", gai_strerror(error));
555 
556 	switch (res->ai_family) {
557 	case AF_INET:
558 		if (res->ai_addrlen != sizeof(to4))
559 		    errx(1, "size of sockaddr mismatch");
560 
561 		to = (struct sockaddr *)&to4;
562 		from = (struct sockaddr *)&from4;
563 		break;
564 	case AF_INET6:
565 		if (res->ai_addrlen != sizeof(to6))
566 			errx(1, "size of sockaddr mismatch");
567 
568 		to = (struct sockaddr *)&to6;
569 		from = (struct sockaddr *)&from6;
570 		break;
571 	default:
572 		errx(1, "unsupported AF: %d", res->ai_family);
573 		break;
574 	}
575 
576 	memcpy(to, res->ai_addr, res->ai_addrlen);
577 
578 	if (!hostname) {
579 		hostname = res->ai_canonname ? strdup(res->ai_canonname) : dest;
580 		if (!hostname)
581 			errx(1, "malloc");
582 	}
583 
584 	if (res->ai_next) {
585 		if (getnameinfo(res->ai_addr, res->ai_addrlen, hbuf,
586 		    sizeof(hbuf), NULL, 0, NI_NUMERICHOST) != 0)
587 			strlcpy(hbuf, "?", sizeof(hbuf));
588 		warnx("Warning: %s has multiple "
589 		    "addresses; using %s", hostname, hbuf);
590 	}
591 	freeaddrinfo(res);
592 
593 	if (*++argv) {
594 		datalen = strtonum(*argv, 0, INT_MAX, &errstr);
595 		if (errstr)
596 			errx(1, "datalen out of range");
597 	}
598 
599 	switch (to->sa_family) {
600 	case AF_INET:
601 		switch (proto) {
602 		case IPPROTO_UDP:
603 			headerlen = (sizeof(struct ip) + lsrrlen +
604 			    sizeof(struct udphdr) + sizeof(struct packetdata));
605 			break;
606 		case IPPROTO_ICMP:
607 			headerlen = (sizeof(struct ip) + lsrrlen +
608 			    sizeof(struct icmp) + sizeof(struct packetdata));
609 			break;
610 		default:
611 			headerlen = (sizeof(struct ip) + lsrrlen +
612 			    sizeof(struct packetdata));
613 		}
614 
615 		if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
616 			errx(1, "packet size must be 0 to %d.",
617 			    IP_MAXPACKET - headerlen);
618 
619 		datalen += headerlen;
620 
621 		if ((outpacket = calloc(1, datalen)) == NULL)
622 			err(1, "calloc");
623 
624 		rcviov[0].iov_base = (caddr_t)packet;
625 		rcviov[0].iov_len = sizeof(packet);
626 		rcvmhdr.msg_name = (caddr_t)&from4;
627 		rcvmhdr.msg_namelen = sizeof(from4);
628 		rcvmhdr.msg_iov = rcviov;
629 		rcvmhdr.msg_iovlen = 1;
630 		rcvmhdr.msg_control = NULL;
631 		rcvmhdr.msg_controllen = 0;
632 
633 		ip = (struct ip *)outpacket;
634 		if (lsrr != 0) {
635 			u_char *p = (u_char *)(ip + 1);
636 
637 			*p++ = IPOPT_NOP;
638 			*p++ = IPOPT_LSRR;
639 			*p++ = lsrrlen - 1;
640 			*p++ = IPOPT_MINOFF;
641 			gateway[lsrr] = to4.sin_addr;
642 			for (i = 1; i <= lsrr; i++) {
643 				memcpy(p, &gateway[i], sizeof(struct in_addr));
644 				p += sizeof(struct in_addr);
645 			}
646 			ip->ip_dst = gateway[0];
647 		} else
648 			ip->ip_dst = to4.sin_addr;
649 		ip->ip_off = htons(0);
650 		ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
651 		ip->ip_p = proto;
652 		ip->ip_v = IPVERSION;
653 		ip->ip_tos = tos;
654 
655 		if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
656 		    sizeof(on)) < 0)
657 			err(6, "IP_HDRINCL");
658 
659 		if (source) {
660 			memset(&from4, 0, sizeof(from4));
661 			from4.sin_family = AF_INET;
662 			if (inet_aton(source, &from4.sin_addr) == 0)
663 				errx(1, "unknown host %s", source);
664 			ip->ip_src = from4.sin_addr;
665 			if (getuid() != 0 &&
666 			    (ntohl(from4.sin_addr.s_addr) & 0xff000000U) ==
667 			    0x7f000000U && (ntohl(to4.sin_addr.s_addr) &
668 			    0xff000000U) != 0x7f000000U)
669 				errx(1, "source is on 127/8, destination is"
670 				    " not");
671 			if (getuid() && bind(sndsock, (struct sockaddr *)&from4,
672 			    sizeof(from4)) < 0)
673 				err(1, "bind");
674 		}
675 		packetlen = datalen;
676 		break;
677 	case AF_INET6:
678 		/*
679 		 * packetlen is the size of the complete IP packet sent and
680 		 * reported in the first line of output.
681 		 * For IPv4 this is equal to datalen since we are constructing
682 		 * a raw packet.
683 		 * For IPv6 we need to always add the size of the IP6 header
684 		 * and for UDP packets the size of the UDP header since they
685 		 * are prepended to the packet by the kernel
686 		 */
687 		packetlen = sizeof(struct ip6_hdr);
688 		switch (proto) {
689 		case IPPROTO_UDP:
690 			headerlen = sizeof(struct packetdata);
691 			packetlen += sizeof(struct udphdr);
692 			break;
693 		case IPPROTO_ICMP:
694 			headerlen = sizeof(struct icmp6_hdr) +
695 			    sizeof(struct packetdata);
696 			break;
697 		default:
698 			errx(1, "Unsupported proto: %hhu", proto);
699 			break;
700 		}
701 
702 		if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
703 			errx(1, "packet size must be 0 to %d.",
704 			    IP_MAXPACKET - headerlen);
705 
706 		datalen += headerlen;
707 		packetlen += datalen;
708 
709 		if ((outpacket = calloc(1, datalen)) == NULL)
710 			err(1, "calloc");
711 
712 		/* initialize msghdr for receiving packets */
713 		rcviov[0].iov_base = (caddr_t)packet;
714 		rcviov[0].iov_len = sizeof(packet);
715 		rcvmhdr.msg_name = (caddr_t)&from6;
716 		rcvmhdr.msg_namelen = sizeof(from6);
717 		rcvmhdr.msg_iov = rcviov;
718 		rcvmhdr.msg_iovlen = 1;
719 		rcvcmsglen = CMSG_SPACE(sizeof(struct in6_pktinfo)) +
720 		    CMSG_SPACE(sizeof(int));
721 
722 		if ((rcvcmsgbuf = malloc(rcvcmsglen)) == NULL)
723 			errx(1, "malloc");
724 		rcvmhdr.msg_control = (caddr_t) rcvcmsgbuf;
725 		rcvmhdr.msg_controllen = rcvcmsglen;
726 
727 		/*
728 		 * Send UDP or ICMP
729 		 */
730 		if (proto == IPPROTO_ICMP) {
731 			close(sndsock);
732 			sndsock = rcvsock;
733 		}
734 
735 		/*
736 		 * Source selection
737 		 */
738 		memset(&from6, 0, sizeof(from6));
739 		if (source) {
740 			memset(&hints, 0, sizeof(hints));
741 			hints.ai_family = AF_INET6;
742 			hints.ai_socktype = SOCK_DGRAM;	/*dummy*/
743 			hints.ai_flags = AI_NUMERICHOST;
744 			if ((error = getaddrinfo(source, "0", &hints, &res)))
745 				errx(1, "%s: %s", source, gai_strerror(error));
746 			if (res->ai_addrlen != sizeof(from6))
747 				errx(1, "size of sockaddr mismatch");
748 			memcpy(&from6, res->ai_addr, res->ai_addrlen);
749 			freeaddrinfo(res);
750 		} else {
751 			struct sockaddr_in6 nxt;
752 			int dummy;
753 
754 			nxt = to6;
755 			nxt.sin6_port = htons(DUMMY_PORT);
756 			if ((dummy = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
757 				err(1, "socket");
758 			if (connect(dummy, (struct sockaddr *)&nxt,
759 			    nxt.sin6_len) < 0)
760 				err(1, "connect");
761 			len = sizeof(from6);
762 			if (getsockname(dummy, (struct sockaddr *)&from6,
763 			    &len) < 0)
764 				err(1, "getsockname");
765 			close(dummy);
766 		}
767 
768 		from6.sin6_port = htons(0);
769 		if (bind(sndsock, (struct sockaddr *)&from6, from6.sin6_len) <
770 		    0)
771 			err(1, "bind sndsock");
772 
773 		len = sizeof(from6);
774 		if (getsockname(sndsock, (struct sockaddr *)&from6, &len) < 0)
775 			err(1, "getsockname");
776 		srcport = ntohs(from6.sin6_port);
777 		break;
778 	default:
779 		errx(1, "unsupported AF: %d", to->sa_family);
780 		break;
781 	}
782 
783 	if (options & SO_DEBUG) {
784 		(void) setsockopt(rcvsock, SOL_SOCKET, SO_DEBUG,
785 		    (char *)&on, sizeof(on));
786 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
787 		    (char *)&on, sizeof(on));
788 	}
789 
790 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
791 	    sizeof(datalen)) < 0)
792 		err(6, "SO_SNDBUF");
793 
794 	if (nflag && !Aflag) {
795 		if (pledge("stdio inet", NULL) == -1)
796 			err(1, "pledge");
797 	} else {
798 		if (pledge("stdio inet dns", NULL) == -1)
799 			err(1, "pledge");
800 	}
801 
802 	if (getnameinfo(to, to->sa_len, hbuf,
803 	    sizeof(hbuf), NULL, 0, NI_NUMERICHOST))
804 		strlcpy(hbuf, "(invalid)", sizeof(hbuf));
805 	fprintf(stderr, "%s to %s (%s)", __progname, hostname, hbuf);
806 	if (source)
807 		fprintf(stderr, " from %s", source);
808 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, packetlen);
809 	(void) fflush(stderr);
810 
811 	if (first_ttl > 1)
812 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
813 
814 	for (ttl = first_ttl; ttl && ttl <= max_ttl; ++ttl) {
815 		int got_there = 0, unreachable = 0, timeout = 0, loss;
816 		in_addr_t lastaddr = 0;
817 		struct in6_addr lastaddr6;
818 
819 		printf("%2u ", ttl);
820 		memset(&lastaddr6, 0, sizeof(lastaddr6));
821 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
822 			int cc;
823 			struct timeval t1, t2;
824 
825 			gettime(&t1);
826 			send_probe(++seq, ttl, incflag, to);
827 			while ((cc = wait_for_reply(rcvsock, &rcvmhdr))) {
828 				gettime(&t2);
829 				i = packet_ok(to->sa_family, &rcvmhdr, cc, seq,
830 				    incflag);
831 				/* Skip short packet */
832 				if (i == 0)
833 					continue;
834 				if (to->sa_family == AF_INET) {
835 					ip = (struct ip *)packet;
836 					if (from4.sin_addr.s_addr != lastaddr) {
837 						print(from,
838 						    cc - (ip->ip_hl << 2),
839 						    inet_ntop(AF_INET,
840 						    &ip->ip_dst, hbuf,
841 						    sizeof(hbuf)));
842 						lastaddr =
843 						    from4.sin_addr.s_addr;
844 					}
845 				} else if (to->sa_family == AF_INET6) {
846 					if (!IN6_ARE_ADDR_EQUAL(
847 					    &from6.sin6_addr, &lastaddr6)) {
848 						print(from, cc, rcvpktinfo ?
849 						    inet_ntop( AF_INET6,
850 						    &rcvpktinfo->ipi6_addr,
851 						    hbuf, sizeof(hbuf)) : "?");
852 						lastaddr6 = from6.sin6_addr;
853 					}
854 				} else
855 					errx(1, "unsupported AF: %d",
856 					    to->sa_family);
857 
858 				printf("  %g ms", deltaT(&t1, &t2));
859 				if (ttl_flag)
860 					printf(" (%u)", v6flag ? rcvhlim :
861 					    ip->ip_ttl);
862 				if (to->sa_family == AF_INET) {
863 					if (i == -2) {
864 						if (ip->ip_ttl <= 1)
865 							printf(" !");
866 						++got_there;
867 						break;
868 					}
869 
870 					if (tflag)
871 						check_tos(ip);
872 				}
873 
874 				/* time exceeded in transit */
875 				if (i == -1)
876 					break;
877 				icmp_code(to->sa_family, i - 1, &got_there,
878 				    &unreachable);
879 				break;
880 			}
881 			if (cc == 0) {
882 				printf(" *");
883 				timeout++;
884 				loss++;
885 			} else if (cc && probe == nprobes - 1 &&
886 			    (xflag || verbose))
887 				print_exthdr(packet, cc);
888 			(void) fflush(stdout);
889 		}
890 		if (sump)
891 			printf(" (%d%% loss)", (loss * 100) / nprobes);
892 		putchar('\n');
893 		if (got_there ||
894 		    (unreachable && (unreachable + timeout) >= nprobes))
895 			break;
896 	}
897 	exit(0);
898 }
899 
900 void
901 usage(void)
902 {
903 	if (v6flag) {
904 		fprintf(stderr, "usage: traceroute6 [-AcDdIlnSv] [-f first_hop] "
905 		    "[-m max_hop] [-p port]\n"
906 		    "\t[-q nqueries] [-s src_addr] [-V rtable] [-w waittime] "
907 		    "host\n\t[datalen]\n");
908 	} else {
909 		fprintf(stderr,
910 		    "usage: %s [-AcDdIlnSvx] [-f first_ttl] [-g gateway_addr] "
911 		    "[-m max_ttl]\n"
912 		    "\t[-P proto] [-p port] [-q nqueries] [-s src_addr]\n"
913 		    "\t[-t toskeyword] "
914 		    "[-V rtable] [-w waittime] host [datalen]\n",
915 		    __progname);
916 	}
917 	exit(1);
918 }
919