xref: /openbsd-src/usr.sbin/traceroute/traceroute.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: traceroute.c,v 1.66 2008/10/04 02:21:49 deraadt Exp $	*/
2 /*	$NetBSD: traceroute.c,v 1.10 1995/05/21 15:50:45 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Van Jacobson.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 static char copyright[] =
38 "@(#) Copyright (c) 1990, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)traceroute.c	8.1 (Berkeley) 6/6/93";*/
45 #else
46 static char rcsid[] = "$OpenBSD: traceroute.c,v 1.66 2008/10/04 02:21:49 deraadt Exp $";
47 #endif
48 #endif /* not lint */
49 
50 /*
51  * traceroute host  - trace the route ip packets follow going to "host".
52  *
53  * Attempt to trace the route an ip packet would follow to some
54  * internet host.  We find out intermediate hops by launching probe
55  * packets with a small ttl (time to live) then listening for an
56  * icmp "time exceeded" reply from a gateway.  We start our probes
57  * with a ttl of one and increase by one until we get an icmp "port
58  * unreachable" (which means we got to "host") or hit a max (which
59  * defaults to 64 hops & can be changed with the -m flag).  Three
60  * probes (change with -q flag) are sent at each ttl setting and a
61  * line is printed showing the ttl, address of the gateway and
62  * round trip time of each probe.  If the probe answers come from
63  * different gateways, the address of each responding system will
64  * be printed.  If there is no response within a 5 sec. timeout
65  * interval (changed with the -w flag), a "*" is printed for that
66  * probe.
67  *
68  * Probe packets are UDP format.  We don't want the destination
69  * host to process them so the destination port is set to an
70  * unlikely value (if some clod on the destination is using that
71  * value, it can be changed with the -p flag).
72  *
73  * A sample use might be:
74  *
75  *     [yak 71]% traceroute nis.nsf.net.
76  *     traceroute to nis.nsf.net (35.1.1.48), 64 hops max, 56 byte packet
77  *      1  helios.ee.lbl.gov (128.3.112.1)  19 ms  19 ms  0 ms
78  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
79  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  39 ms  19 ms
80  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  39 ms
81  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  39 ms  39 ms  39 ms
82  *      6  128.32.197.4 (128.32.197.4)  40 ms  59 ms  59 ms
83  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  59 ms
84  *      8  129.140.70.13 (129.140.70.13)  99 ms  99 ms  80 ms
85  *      9  129.140.71.6 (129.140.71.6)  139 ms  239 ms  319 ms
86  *     10  129.140.81.7 (129.140.81.7)  220 ms  199 ms  199 ms
87  *     11  nic.merit.edu (35.1.1.48)  239 ms  239 ms  239 ms
88  *
89  * Note that lines 2 & 3 are the same.  This is due to a buggy
90  * kernel on the 2nd hop system -- lbl-csam.arpa -- that forwards
91  * packets with a zero ttl.
92  *
93  * A more interesting example is:
94  *
95  *     [yak 72]% traceroute allspice.lcs.mit.edu.
96  *     traceroute to allspice.lcs.mit.edu (18.26.0.115), 64 hops max
97  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
98  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  19 ms  19 ms
99  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  19 ms
100  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  19 ms  39 ms  39 ms
101  *      5  ccn-nerif22.Berkeley.EDU (128.32.168.22)  20 ms  39 ms  39 ms
102  *      6  128.32.197.4 (128.32.197.4)  59 ms  119 ms  39 ms
103  *      7  131.119.2.5 (131.119.2.5)  59 ms  59 ms  39 ms
104  *      8  129.140.70.13 (129.140.70.13)  80 ms  79 ms  99 ms
105  *      9  129.140.71.6 (129.140.71.6)  139 ms  139 ms  159 ms
106  *     10  129.140.81.7 (129.140.81.7)  199 ms  180 ms  300 ms
107  *     11  129.140.72.17 (129.140.72.17)  300 ms  239 ms  239 ms
108  *     12  * * *
109  *     13  128.121.54.72 (128.121.54.72)  259 ms  499 ms  279 ms
110  *     14  * * *
111  *     15  * * *
112  *     16  * * *
113  *     17  * * *
114  *     18  ALLSPICE.LCS.MIT.EDU (18.26.0.115)  339 ms  279 ms  279 ms
115  *
116  * (I start to see why I'm having so much trouble with mail to
117  * MIT.)  Note that the gateways 12, 14, 15, 16 & 17 hops away
118  * either don't send ICMP "time exceeded" messages or send them
119  * with a ttl too small to reach us.  14 - 17 are running the
120  * MIT C Gateway code that doesn't send "time exceeded"s.  God
121  * only knows what's going on with 12.
122  *
123  * The silent gateway 12 in the above may be the result of a bug in
124  * the 4.[23]BSD network code (and its derivatives):  4.x (x <= 3)
125  * sends an unreachable message using whatever ttl remains in the
126  * original datagram.  Since, for gateways, the remaining ttl is
127  * zero, the icmp "time exceeded" is guaranteed to not make it back
128  * to us.  The behavior of this bug is slightly more interesting
129  * when it appears on the destination system:
130  *
131  *      1  helios.ee.lbl.gov (128.3.112.1)  0 ms  0 ms  0 ms
132  *      2  lilac-dmc.Berkeley.EDU (128.32.216.1)  39 ms  19 ms  39 ms
133  *      3  lilac-dmc.Berkeley.EDU (128.32.216.1)  19 ms  39 ms  19 ms
134  *      4  ccngw-ner-cc.Berkeley.EDU (128.32.136.23)  39 ms  40 ms  19 ms
135  *      5  ccn-nerif35.Berkeley.EDU (128.32.168.35)  39 ms  39 ms  39 ms
136  *      6  csgw.Berkeley.EDU (128.32.133.254)  39 ms  59 ms  39 ms
137  *      7  * * *
138  *      8  * * *
139  *      9  * * *
140  *     10  * * *
141  *     11  * * *
142  *     12  * * *
143  *     13  rip.Berkeley.EDU (128.32.131.22)  59 ms !  39 ms !  39 ms !
144  *
145  * Notice that there are 12 "gateways" (13 is the final
146  * destination) and exactly the last half of them are "missing".
147  * What's really happening is that rip (a Sun-3 running Sun OS3.5)
148  * is using the ttl from our arriving datagram as the ttl in its
149  * icmp reply.  So, the reply will time out on the return path
150  * (with no notice sent to anyone since icmp's aren't sent for
151  * icmp's) until we probe with a ttl that's at least twice the path
152  * length.  I.e., rip is really only 7 hops away.  A reply that
153  * returns with a ttl of 1 is a clue this problem exists.
154  * Traceroute prints a "!" after the time if the ttl is <= 1.
155  * Since vendors ship a lot of obsolete (DEC's Ultrix, Sun 3.x) or
156  * non-standard (HPUX) software, expect to see this problem
157  * frequently and/or take care picking the target host of your
158  * probes.
159  *
160  * Other possible annotations after the time are !H, !N, !P (got a host,
161  * network or protocol unreachable, respectively), !S or !F (source
162  * route failed or fragmentation needed -- neither of these should
163  * ever occur and the associated gateway is busted if you see one).  If
164  * almost all the probes result in some kind of unreachable, traceroute
165  * will give up and exit.
166  *
167  * Notes
168  * -----
169  * This program must be run by root or be setuid.  (I suggest that
170  * you *don't* make it setuid -- casual use could result in a lot
171  * of unnecessary traffic on our poor, congested nets.)
172  *
173  * This program requires a kernel mod that does not appear in any
174  * system available from Berkeley:  A raw ip socket using proto
175  * IPPROTO_RAW must interpret the data sent as an ip datagram (as
176  * opposed to data to be wrapped in a ip datagram).  See the README
177  * file that came with the source to this program for a description
178  * of the mods I made to /sys/netinet/raw_ip.c.  Your mileage may
179  * vary.  But, again, ANY 4.x (x < 4) BSD KERNEL WILL HAVE TO BE
180  * MODIFIED TO RUN THIS PROGRAM.
181  *
182  * The udp port usage may appear bizarre (well, ok, it is bizarre).
183  * The problem is that an icmp message only contains 8 bytes of
184  * data from the original datagram.  8 bytes is the size of a udp
185  * header so, if we want to associate replies with the original
186  * datagram, the necessary information must be encoded into the
187  * udp header (the ip id could be used but there's no way to
188  * interlock with the kernel's assignment of ip id's and, anyway,
189  * it would have taken a lot more kernel hacking to allow this
190  * code to set the ip id).  So, to allow two or more users to
191  * use traceroute simultaneously, we use this task's pid as the
192  * source port (the high bit is set to move the port number out
193  * of the "likely" range).  To keep track of which probe is being
194  * replied to (so times and/or hop counts don't get confused by a
195  * reply that was delayed in transit), we increment the destination
196  * port number before each probe.
197  *
198  * Don't use this as a coding example.  I was trying to find a
199  * routing problem and this code sort-of popped out after 48 hours
200  * without sleep.  I was amazed it ever compiled, much less ran.
201  *
202  * I stole the idea for this program from Steve Deering.  Since
203  * the first release, I've learned that had I attended the right
204  * IETF working group meetings, I also could have stolen it from Guy
205  * Almes or Matt Mathis.  I don't know (or care) who came up with
206  * the idea first.  I envy the originators' perspicacity and I'm
207  * glad they didn't keep the idea a secret.
208  *
209  * Tim Seaver, Ken Adelman and C. Philip Wood provided bug fixes and/or
210  * enhancements to the original distribution.
211  *
212  * I've hacked up a round-trip-route version of this that works by
213  * sending a loose-source-routed udp datagram through the destination
214  * back to yourself.  Unfortunately, SO many gateways botch source
215  * routing, the thing is almost worthless.  Maybe one day...
216  *
217  *  -- Van Jacobson (van@helios.ee.lbl.gov)
218  *     Tue Dec 20 03:50:13 PST 1988
219  */
220 
221 #include <sys/param.h>
222 #include <sys/time.h>
223 #include <sys/socket.h>
224 #include <sys/file.h>
225 #include <sys/ioctl.h>
226 #include <sys/sysctl.h>
227 
228 #include <netinet/in_systm.h>
229 #include <netinet/in.h>
230 #include <netinet/ip.h>
231 #include <netinet/ip_icmp.h>
232 #include <netinet/ip_var.h>
233 #include <netinet/udp.h>
234 
235 #include <arpa/inet.h>
236 
237 #include <ctype.h>
238 #include <err.h>
239 #include <errno.h>
240 #include <netdb.h>
241 #include <stdio.h>
242 #include <stdlib.h>
243 #include <string.h>
244 #include <unistd.h>
245 
246 #define	MAX_LSRR	((MAX_IPOPTLEN - 4) / 4)
247 
248 /*
249  * Format of the data in a (udp) probe packet.
250  */
251 struct packetdata {
252 	u_char seq;		/* sequence number of this packet */
253 	u_int8_t ttl;		/* ttl packet left with */
254 	u_char pad[2];
255 	u_int32_t sec;		/* time packet left */
256 	u_int32_t usec;
257 } __packed;
258 
259 struct in_addr gateway[MAX_LSRR + 1];
260 int lsrrlen = 0;
261 int32_t sec_perturb;
262 int32_t usec_perturb;
263 
264 u_char packet[512], *outpacket;	/* last inbound (icmp) packet */
265 
266 int wait_for_reply(int, struct sockaddr_in *, struct timeval *);
267 void send_probe(int, u_int8_t, int, struct sockaddr_in *);
268 int packet_ok(u_char *, int, struct sockaddr_in *, int, int);
269 void print(u_char *, int, struct sockaddr_in *);
270 char *inetname(struct in_addr);
271 u_short in_cksum(u_short *, int);
272 void usage(void);
273 
274 int s;				/* receive (icmp) socket file descriptor */
275 int sndsock;			/* send (udp) socket file descriptor */
276 
277 int datalen;			/* How much data */
278 int headerlen;			/* How long packet's header is */
279 
280 char *source = 0;
281 char *hostname;
282 
283 int nprobes = 3;
284 u_int8_t max_ttl = IPDEFTTL;
285 u_int8_t first_ttl = 1;
286 u_short ident;
287 u_short port = 32768+666;	/* start udp dest port # for probe packets */
288 u_char	proto = IPPROTO_UDP;
289 u_int8_t  icmp_type = ICMP_ECHO; /* default ICMP code/type */
290 u_char  icmp_code = 0;
291 int options;			/* socket options */
292 int verbose;
293 int waittime = 5;		/* time to wait for response (in seconds) */
294 int nflag;			/* print addresses numerically */
295 int dump;
296 
297 int
298 main(int argc, char *argv[])
299 {
300 	int mib[4] = { CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL };
301 	int ttl_flag = 0, incflag = 1, protoset = 0, sump = 0;
302 	int ch, i, lsrr = 0, on = 1, probe, seq = 0, tos = 0;
303 	size_t size = sizeof(max_ttl);
304 	struct sockaddr_in from, to;
305 	struct hostent *hp;
306 	u_int32_t tmprnd;
307 	struct ip *ip;
308 	u_int8_t ttl;
309 	char *ep;
310 	long l;
311 	uid_t uid;
312 
313 	if ((s = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP)) < 0)
314 		err(5, "icmp socket");
315 	if ((sndsock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW)) < 0)
316 		err(5, "raw socket");
317 
318 	/* revoke privs */
319 	uid = getuid();
320 	if (setresuid(uid, uid, uid) == -1)
321 		err(1, "setresuid");
322 
323 	(void) sysctl(mib, sizeof(mib)/sizeof(mib[0]), &max_ttl, &size,
324 	    NULL, 0);
325 
326 	while ((ch = getopt(argc, argv, "SDIdg:f:m:np:q:rs:t:w:vlP:c")) != -1)
327 		switch (ch) {
328 		case 'S':
329 			sump = 1;
330 			break;
331 		case 'f':
332 			errno = 0;
333 			ep = NULL;
334 			l = strtol(optarg, &ep, 10);
335 			if (errno || !*optarg || *ep || l < 1 || l > max_ttl)
336 				errx(1, "min ttl must be 1 to %u.", max_ttl);
337 			first_ttl = (u_int8_t)l;
338 			break;
339 		case 'c':
340 			incflag = 0;
341 			break;
342 		case 'd':
343 			options |= SO_DEBUG;
344 			break;
345 		case 'D':
346 			dump = 1;
347 			break;
348 		case 'g':
349 			if (lsrr >= MAX_LSRR)
350 				errx(1, "too many gateways; max %d", MAX_LSRR);
351 			if (inet_aton(optarg, &gateway[lsrr]) == 0) {
352 				hp = gethostbyname(optarg);
353 				if (hp == 0)
354 					errx(1, "unknown host %s", optarg);
355 				memcpy(&gateway[lsrr], hp->h_addr, hp->h_length);
356 			}
357 			if (++lsrr == 1)
358 				lsrrlen = 4;
359 			lsrrlen += 4;
360 			break;
361 		case 'I':
362 			if (protoset)
363 				errx(1, "protocol already set with -P");
364 			protoset = 1;
365 			proto = IPPROTO_ICMP;
366 			break;
367 		case 'l':
368 			ttl_flag++;
369 			break;
370 		case 'm':
371 			errno = 0;
372 			ep = NULL;
373 			l = strtol(optarg, &ep, 10);
374 			if (errno || !*optarg || *ep || l < first_ttl ||
375 			    l > MAXTTL)
376 				errx(1, "max ttl must be %u to %u.", first_ttl,
377 				    MAXTTL);
378 			max_ttl = (u_int8_t)l;
379 			break;
380 		case 'n':
381 			nflag++;
382 			break;
383 		case 'p':
384 			errno = 0;
385 			ep = NULL;
386 			l = strtol(optarg, &ep, 10);
387 			if (errno || !*optarg || *ep || l <= 0 || l >= 65536)
388 				errx(1, "port must be >0, <65536.");
389 			port = (int)l;
390 			break;
391 		case 'P':
392 			if (protoset)
393 				errx(1, "protocol already set with -I");
394 			protoset = 1;
395 			errno = 0;
396 			ep = NULL;
397 			l = strtol(optarg, &ep, 10);
398 			if (errno || !*optarg || *ep || l < 1 ||
399 			    l >= IPPROTO_MAX) {
400 				struct protoent *pent;
401 
402 				pent = getprotobyname(optarg);
403 				if (pent)
404 					proto = pent->p_proto;
405 				else
406 					errx(1, "proto must be >=1, or a name.");
407 			} else
408 				proto = (int)l;
409 			break;
410 		case 'q':
411 			errno = 0;
412 			ep = NULL;
413 			l = strtol(optarg, &ep, 10);
414 			if (errno || !*optarg || *ep || l < 1 || l > INT_MAX)
415 				errx(1, "nprobes must be >0.");
416 			nprobes = (int)l;
417 			break;
418 		case 'r':
419 			options |= SO_DONTROUTE;
420 			break;
421 		case 's':
422 			/*
423 			 * set the ip source address of the outbound
424 			 * probe (e.g., on a multi-homed host).
425 			 */
426 			source = optarg;
427 			break;
428 		case 't':
429 			errno = 0;
430 			ep = NULL;
431 			l = strtol(optarg, &ep, 10);
432 			if (errno || !*optarg || *ep || l < 0 || l > 255)
433 				errx(1, "tos must be 0 to 255.");
434 			tos = (int)l;
435 			break;
436 		case 'v':
437 			verbose++;
438 			break;
439 		case 'w':
440 			errno = 0;
441 			ep = NULL;
442 			l = strtol(optarg, &ep, 10);
443 			if (errno || !*optarg || *ep || l <= 1 || l > INT_MAX)
444 				errx(1, "wait must be >1 sec.");
445 			waittime = (int)l;
446 			break;
447 		default:
448 			usage();
449 		}
450 	argc -= optind;
451 	argv += optind;
452 
453 	if (argc < 1)
454 		usage();
455 
456 	setlinebuf (stdout);
457 
458 	(void) memset(&to, 0, sizeof(struct sockaddr));
459 	to.sin_family = AF_INET;
460 	if (inet_aton(*argv, &to.sin_addr) != 0)
461 		hostname = *argv;
462 	else {
463 		hp = gethostbyname(*argv);
464 		if (hp == 0)
465 			errx(1, "unknown host %s", *argv);
466 		to.sin_family = hp->h_addrtype;
467 		memcpy(&to.sin_addr, hp->h_addr, hp->h_length);
468 		if ((hostname = strdup(hp->h_name)) == NULL)
469 			err(1, "malloc");
470 		if (hp->h_addr_list[1] != NULL)
471 			warnx("Warning: %s has multiple addresses; using %s",
472 			    hostname, inet_ntoa(to.sin_addr));
473 	}
474 	if (*++argv) {
475 		errno = 0;
476 		ep = NULL;
477 		l = strtol(*argv, &ep, 10);
478 		if (errno || !*argv || *ep || l < 0 || l > INT_MAX)
479 			errx(1, "datalen out of range");
480 		datalen = (int)l;
481 	}
482 
483 	switch (proto) {
484 	case IPPROTO_UDP:
485 		headerlen = (sizeof(struct ip) + lsrrlen +
486 		    sizeof(struct udphdr) + sizeof(struct packetdata));
487 		break;
488 	case IPPROTO_ICMP:
489 		headerlen = (sizeof(struct ip) + lsrrlen +
490 		    sizeof(struct icmp) + sizeof(struct packetdata));
491 		break;
492 	default:
493 		headerlen = (sizeof(struct ip) + lsrrlen +
494 		    sizeof(struct packetdata));
495 	}
496 
497 	if (datalen < 0 || datalen > IP_MAXPACKET - headerlen)
498 		errx(1, "packet size must be 0 to %d.",
499 		    IP_MAXPACKET - headerlen);
500 
501 	datalen += headerlen;
502 
503 	outpacket = malloc(datalen);
504 	if (outpacket == 0)
505 		err(1, "malloc");
506 	(void) memset(outpacket, 0, datalen);
507 
508 	ip = (struct ip *)outpacket;
509 	if (lsrr != 0) {
510 		u_char *p = (u_char *)(ip + 1);
511 
512 		*p++ = IPOPT_NOP;
513 		*p++ = IPOPT_LSRR;
514 		*p++ = lsrrlen - 1;
515 		*p++ = IPOPT_MINOFF;
516 		gateway[lsrr] = to.sin_addr;
517 		for (i = 1; i <= lsrr; i++) {
518 			memcpy(p, &gateway[i], sizeof(struct in_addr));
519 			p += sizeof(struct in_addr);
520 		}
521 		ip->ip_dst = gateway[0];
522 	} else
523 		ip->ip_dst = to.sin_addr;
524 	ip->ip_off = htons(0);
525 	ip->ip_hl = (sizeof(struct ip) + lsrrlen) >> 2;
526 	ip->ip_p = proto;
527 	ip->ip_v = IPVERSION;
528 	ip->ip_tos = tos;
529 
530 	ident = (getpid() & 0xffff) | 0x8000;
531 	tmprnd = arc4random();
532 	sec_perturb = (tmprnd & 0x80000000) ? -(tmprnd & 0x7ff) :
533 	    (tmprnd & 0x7ff);
534 	usec_perturb = arc4random();
535 
536 	if (options & SO_DEBUG)
537 		(void) setsockopt(s, SOL_SOCKET, SO_DEBUG,
538 		    (char *)&on, sizeof(on));
539 #ifdef SO_SNDBUF
540 	if (setsockopt(sndsock, SOL_SOCKET, SO_SNDBUF, (char *)&datalen,
541 	    sizeof(datalen)) < 0)
542 		err(6, "SO_SNDBUF");
543 #endif /* SO_SNDBUF */
544 #ifdef IP_HDRINCL
545 	if (setsockopt(sndsock, IPPROTO_IP, IP_HDRINCL, (char *)&on,
546 	    sizeof(on)) < 0)
547 		err(6, "IP_HDRINCL");
548 #endif /* IP_HDRINCL */
549 	if (options & SO_DEBUG)
550 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DEBUG,
551 		    (char *)&on, sizeof(on));
552 	if (options & SO_DONTROUTE)
553 		(void) setsockopt(sndsock, SOL_SOCKET, SO_DONTROUTE,
554 		    (char *)&on, sizeof(on));
555 
556 	if (source) {
557 		(void) memset(&from, 0, sizeof(struct sockaddr));
558 		from.sin_family = AF_INET;
559 		if (inet_aton(source, &from.sin_addr) == 0)
560 			errx(1, "unknown host %s", source);
561 		ip->ip_src = from.sin_addr;
562 		if (getuid() != 0 &&
563 		    (ntohl(from.sin_addr.s_addr) & 0xff000000U) == 0x7f000000U &&
564 		    (ntohl(to.sin_addr.s_addr) & 0xff000000U) != 0x7f000000U)
565 			errx(1, "source is on 127/8, destination is not");
566 
567 		if (getuid() &&
568 		    bind(sndsock, (struct sockaddr *)&from, sizeof(from)) < 0)
569 			err(1, "bind");
570 	}
571 
572 	fprintf(stderr, "traceroute to %s (%s)", hostname,
573 		inet_ntoa(to.sin_addr));
574 	if (source)
575 		fprintf(stderr, " from %s", source);
576 	fprintf(stderr, ", %u hops max, %d byte packets\n", max_ttl, datalen);
577 	(void) fflush(stderr);
578 
579 	if (first_ttl > 1)
580 		printf("Skipping %u intermediate hops\n", first_ttl - 1);
581 
582 	for (ttl = first_ttl; ttl && ttl <= max_ttl; ++ttl) {
583 		int got_there = 0, unreachable = 0, timeout = 0, loss;
584 		in_addr_t lastaddr = 0;
585 		quad_t dt;
586 
587 		printf("%2u ", ttl);
588 		for (probe = 0, loss = 0; probe < nprobes; ++probe) {
589 			int cc;
590 			struct timeval t1, t2;
591 			int code;
592 
593 			(void) gettimeofday(&t1, NULL);
594 			send_probe(++seq, ttl, incflag, &to);
595 			while ((cc = wait_for_reply(s, &from, &t1))) {
596 				(void) gettimeofday(&t2, NULL);
597 				if (t2.tv_sec - t1.tv_sec > waittime) {
598 					cc = 0;
599 					break;
600 				}
601 				i = packet_ok(packet, cc, &from, seq, incflag);
602 				/* Skip short packet */
603 				if (i == 0)
604 					continue;
605 				if (from.sin_addr.s_addr != lastaddr) {
606 					print(packet, cc, &from);
607 					lastaddr = from.sin_addr.s_addr;
608 				}
609 				dt = (quad_t)(t2.tv_sec - t1.tv_sec) * 1000000 +
610 				    (quad_t)(t2.tv_usec - t1.tv_usec);
611 				printf("  %u", (u_int)(dt / 1000));
612 				if (dt % 1000)
613 					printf(".%u", (u_int)(dt % 1000));
614 				printf(" ms");
615 				ip = (struct ip *)packet;
616 				if (ttl_flag)
617 					printf(" (%u)", ip->ip_ttl);
618 				if (i == -2) {
619 #ifndef ARCHAIC
620 					ip = (struct ip *)packet;
621 					if (ip->ip_ttl <= 1)
622 						printf(" !");
623 #endif
624 					++got_there;
625 					break;
626 				}
627 				/* time exceeded in transit */
628 				if (i == -1)
629 					break;
630 				code = i - 1;
631 				switch (code) {
632 				case ICMP_UNREACH_PORT:
633 #ifndef ARCHAIC
634 					ip = (struct ip *)packet;
635 					if (ip->ip_ttl <= 1)
636 						printf(" !");
637 #endif /* ARCHAIC */
638 					++got_there;
639 					break;
640 				case ICMP_UNREACH_NET:
641 					++unreachable;
642 					printf(" !N");
643 					break;
644 				case ICMP_UNREACH_HOST:
645 					++unreachable;
646 					printf(" !H");
647 					break;
648 				case ICMP_UNREACH_PROTOCOL:
649 					++got_there;
650 					printf(" !P");
651 					break;
652 				case ICMP_UNREACH_NEEDFRAG:
653 					++unreachable;
654 					printf(" !F");
655 					break;
656 				case ICMP_UNREACH_SRCFAIL:
657 					++unreachable;
658 					printf(" !S");
659 					break;
660 				case ICMP_UNREACH_FILTER_PROHIB:
661 					++unreachable;
662 					printf(" !X");
663 					break;
664 				case ICMP_UNREACH_NET_PROHIB: /*misuse*/
665 					++unreachable;
666 					printf(" !A");
667 					break;
668 				case ICMP_UNREACH_HOST_PROHIB:
669 					++unreachable;
670 					printf(" !C");
671 					break;
672 				case ICMP_UNREACH_NET_UNKNOWN:
673 				case ICMP_UNREACH_HOST_UNKNOWN:
674 					++unreachable;
675 					printf(" !U");
676 					break;
677 				case ICMP_UNREACH_ISOLATED:
678 					++unreachable;
679 					printf(" !I");
680 					break;
681 				case ICMP_UNREACH_TOSNET:
682 				case ICMP_UNREACH_TOSHOST:
683 					++unreachable;
684 					printf(" !T");
685 					break;
686 				default:
687 					++unreachable;
688 					printf(" !<%d>", i - 1);
689 					break;
690 				}
691 				break;
692 			}
693 			if (cc == 0) {
694 				printf(" *");
695 				timeout++;
696 				loss++;
697 			}
698 			(void) fflush(stdout);
699 		}
700 		if (sump)
701 			printf(" (%d%% loss)", (loss * 100) / nprobes);
702 		putchar('\n');
703 		if (got_there || (unreachable && (unreachable + timeout) >= nprobes))
704 			break;
705 	}
706 	exit(0);
707 }
708 
709 int
710 wait_for_reply(int sock, struct sockaddr_in *from, struct timeval *sent)
711 {
712 	socklen_t fromlen = sizeof (*from);
713 	struct timeval now, wait;
714 	int cc = 0, fdsn;
715 	fd_set *fdsp;
716 
717 	fdsn = howmany(sock+1, NFDBITS) * sizeof(fd_mask);
718 	if ((fdsp = (fd_set *)malloc(fdsn)) == NULL)
719 		err(1, "malloc");
720 	memset(fdsp, 0, fdsn);
721 	FD_SET(sock, fdsp);
722 	gettimeofday(&now, NULL);
723 	wait.tv_sec = (sent->tv_sec + waittime) - now.tv_sec;
724 	wait.tv_usec =  sent->tv_usec - now.tv_usec;
725 	if (wait.tv_usec < 0) {
726 		wait.tv_usec += 1000000;
727 		wait.tv_sec--;
728 	}
729 	if (wait.tv_sec < 0)
730 		wait.tv_sec = wait.tv_usec = 0;
731 
732 	if (select(sock+1, fdsp, (fd_set *)0, (fd_set *)0, &wait) > 0)
733 		cc = recvfrom(s, (char *)packet, sizeof(packet), 0,
734 		    (struct sockaddr *)from, &fromlen);
735 
736 	free(fdsp);
737 	return (cc);
738 }
739 
740 void
741 dump_packet(void)
742 {
743 	u_char *p;
744 	int i;
745 
746 	fprintf(stderr, "packet data:");
747 	for (p = outpacket, i = 0; i < datalen; i++) {
748 		if ((i % 24) == 0)
749 			fprintf(stderr, "\n ");
750 		fprintf(stderr, " %02x", *p++);
751 	}
752 	fprintf(stderr, "\n");
753 }
754 
755 void
756 send_probe(int seq, u_int8_t ttl, int iflag, struct sockaddr_in *to)
757 {
758 	struct ip *ip = (struct ip *)outpacket;
759 	u_char *p = (u_char *)(ip + 1);
760 	struct udphdr *up = (struct udphdr *)(p + lsrrlen);
761 	struct icmp *icmpp = (struct icmp *)(p + lsrrlen);
762 	struct packetdata *op;
763 	struct timeval tv;
764 	int i;
765 
766 	ip->ip_len = htons(datalen);
767 	ip->ip_ttl = ttl;
768 	ip->ip_id = htons(ident+seq);
769 
770 	switch (proto) {
771 	case IPPROTO_ICMP:
772 		icmpp->icmp_type = icmp_type;
773 		icmpp->icmp_code = icmp_code;
774 		icmpp->icmp_seq = htons(seq);
775 		icmpp->icmp_id = htons(ident);
776 		op = (struct packetdata *)(icmpp + 1);
777 		break;
778 	case IPPROTO_UDP:
779 		up->uh_sport = htons(ident);
780 		if (iflag)
781 			up->uh_dport = htons(port+seq);
782 		else
783 			up->uh_dport = htons(port);
784 		up->uh_ulen = htons((u_short)(datalen - sizeof(struct ip) -
785 		    lsrrlen));
786 		up->uh_sum = 0;
787 		op = (struct packetdata *)(up + 1);
788 		break;
789 	default:
790 		op = (struct packetdata *)(ip + 1);
791 		break;
792 	}
793 	op->seq = seq;
794 	op->ttl = ttl;
795 	(void) gettimeofday(&tv, NULL);
796 
797 	/*
798 	 * We don't want hostiles snooping the net to get any useful
799 	 * information about us. Send the timestamp in network byte order,
800 	 * and perturb the timestamp enough that they won't know our
801 	 * real clock ticker. We don't want to perturb the time by too
802 	 * much: being off by a suspiciously large amount might indicate
803 	 * OpenBSD.
804 	 *
805 	 * The timestamps in the packet are currently unused. If future
806 	 * work wants to use them they will have to subtract out the
807 	 * perturbation first.
808 	 */
809 	(void) gettimeofday(&tv, NULL);
810 	op->sec = htonl(tv.tv_sec + sec_perturb);
811 	op->usec = htonl((tv.tv_usec + usec_perturb) % 1000000);
812 
813 	if (proto == IPPROTO_ICMP && icmp_type == ICMP_ECHO) {
814 		icmpp->icmp_cksum = 0;
815 		icmpp->icmp_cksum = in_cksum((u_short *)icmpp,
816 		    datalen - sizeof(struct ip) - lsrrlen);
817 		if (icmpp->icmp_cksum == 0)
818 			icmpp->icmp_cksum = 0xffff;
819 	}
820 
821 	if (dump)
822 		dump_packet();
823 
824 	i = sendto(sndsock, outpacket, datalen, 0, (struct sockaddr *)to,
825 	    sizeof(struct sockaddr_in));
826 	if (i < 0 || i != datalen)  {
827 		if (i < 0)
828 			perror("sendto");
829 		printf("traceroute: wrote %s %d chars, ret=%d\n", hostname,
830 		    datalen, i);
831 		(void) fflush(stdout);
832 	}
833 }
834 
835 static char *ttab[] = {
836 	"Echo Reply",
837 	"ICMP 1",
838 	"ICMP 2",
839 	"Dest Unreachable",
840 	"Source Quench",
841 	"Redirect",
842 	"ICMP 6",
843 	"ICMP 7",
844 	"Echo",
845 	"Router Advert",
846 	"Router Solicit",
847 	"Time Exceeded",
848 	"Param Problem",
849 	"Timestamp",
850 	"Timestamp Reply",
851 	"Info Request",
852 	"Info Reply",
853 	"Mask Request",
854 	"Mask Reply"
855 };
856 
857 /*
858  * Convert an ICMP "type" field to a printable string.
859  */
860 char *
861 pr_type(u_int8_t t)
862 {
863 	if (t > 18)
864 		return ("OUT-OF-RANGE");
865 	return (ttab[t]);
866 }
867 
868 int
869 packet_ok(u_char *buf, int cc, struct sockaddr_in *from, int seq, int iflag)
870 {
871 	struct icmp *icp;
872 	u_char code;
873 	u_int8_t type;
874 	int hlen;
875 #ifndef ARCHAIC
876 	struct ip *ip;
877 
878 	ip = (struct ip *) buf;
879 	hlen = ip->ip_hl << 2;
880 	if (cc < hlen + ICMP_MINLEN) {
881 		if (verbose)
882 			printf("packet too short (%d bytes) from %s\n", cc,
883 			    inet_ntoa(from->sin_addr));
884 		return (0);
885 	}
886 	cc -= hlen;
887 	icp = (struct icmp *)(buf + hlen);
888 #else
889 	icp = (struct icmp *)buf;
890 #endif /* ARCHAIC */
891 	type = icp->icmp_type;
892 	code = icp->icmp_code;
893 	if ((type == ICMP_TIMXCEED && code == ICMP_TIMXCEED_INTRANS) ||
894 	    type == ICMP_UNREACH || type == ICMP_ECHOREPLY) {
895 		struct ip *hip;
896 		struct udphdr *up;
897 		struct icmp *icmpp;
898 
899 		hip = &icp->icmp_ip;
900 		hlen = hip->ip_hl << 2;
901 
902 		switch (proto) {
903 		case IPPROTO_ICMP:
904 			if (icmp_type == ICMP_ECHO &&
905 			    type == ICMP_ECHOREPLY &&
906 			    icp->icmp_id == htons(ident) &&
907 			    icp->icmp_seq == htons(seq))
908 				return (-2); /* we got there */
909 
910 			icmpp = (struct icmp *)((u_char *)hip + hlen);
911 			if (hlen + 8 <= cc && hip->ip_p == IPPROTO_ICMP &&
912 			    icmpp->icmp_id == htons(ident) &&
913 			    icmpp->icmp_seq == htons(seq))
914 				return (type == ICMP_TIMXCEED? -1 : code + 1);
915 			break;
916 
917 		case IPPROTO_UDP:
918 			up = (struct udphdr *)((u_char *)hip + hlen);
919 			if (hlen + 12 <= cc && hip->ip_p == proto &&
920 			    up->uh_sport == htons(ident) &&
921 			    ((iflag && up->uh_dport == htons(port + seq)) ||
922 			    (!iflag && up->uh_dport == htons(port))))
923 				return (type == ICMP_TIMXCEED? -1 : code + 1);
924 			break;
925 		default:
926 			/* this is some odd, user specified proto,
927 			 * how do we check it?
928 			 */
929 			if (hip->ip_p == proto)
930 				return (type == ICMP_TIMXCEED? -1 : code + 1);
931 		}
932 	}
933 #ifndef ARCHAIC
934 	if (verbose) {
935 		int i;
936 		in_addr_t *lp = (in_addr_t *)&icp->icmp_ip;
937 
938 		printf("\n%d bytes from %s", cc, inet_ntoa(from->sin_addr));
939 		printf(" to %s", inet_ntoa(ip->ip_dst));
940 		printf(": icmp type %u (%s) code %d\n", type, pr_type(type),
941 		    icp->icmp_code);
942 		for (i = 4; i < cc ; i += sizeof(in_addr_t))
943 			printf("%2d: x%8.8lx\n", i, (unsigned long)*lp++);
944 	}
945 #endif /* ARCHAIC */
946 	return (0);
947 }
948 
949 void
950 print(u_char *buf, int cc, struct sockaddr_in *from)
951 {
952 	struct ip *ip;
953 	int hlen;
954 
955 	ip = (struct ip *) buf;
956 	hlen = ip->ip_hl << 2;
957 	cc -= hlen;
958 
959 	if (nflag)
960 		printf(" %s", inet_ntoa(from->sin_addr));
961 	else
962 		printf(" %s (%s)", inetname(from->sin_addr),
963 		    inet_ntoa(from->sin_addr));
964 
965 	if (verbose)
966 		printf(" %d bytes to %s", cc, inet_ntoa (ip->ip_dst));
967 }
968 
969 
970 /*
971  * Checksum routine for Internet Protocol family headers (C Version)
972  */
973 u_short
974 in_cksum(u_short *addr, int len)
975 {
976 	u_short *w = addr, answer;
977 	int nleft = len, sum = 0;
978 
979 	/*
980 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
981 	 *  we add sequential 16 bit words to it, and at the end, fold
982 	 *  back all the carry bits from the top 16 bits into the lower
983 	 *  16 bits.
984 	 */
985 	while (nleft > 1)  {
986 		sum += *w++;
987 		nleft -= 2;
988 	}
989 
990 	/* mop up an odd byte, if necessary */
991 	if (nleft == 1)
992 		sum += *(u_char *)w;
993 
994 	/*
995 	 * add back carry outs from top 16 bits to low 16 bits
996 	 */
997 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
998 	sum += (sum >> 16);			/* add carry */
999 	answer = ~sum;				/* truncate to 16 bits */
1000 	return (answer);
1001 }
1002 
1003 /*
1004  * Construct an Internet address representation.
1005  * If the nflag has been supplied, give
1006  * numeric value, otherwise try for symbolic name.
1007  */
1008 char *
1009 inetname(struct in_addr in)
1010 {
1011 	static char domain[MAXHOSTNAMELEN], line[MAXHOSTNAMELEN];
1012 	static int first = 1;
1013 	struct hostent *hp;
1014 	char *cp;
1015 
1016 	if (first && !nflag) {
1017 		first = 0;
1018 		if (gethostname(domain, sizeof domain) == 0 &&
1019 		    (cp = strchr(domain, '.')) != NULL) {
1020 			strlcpy(domain, cp + 1, sizeof(domain));
1021 		}
1022 	}
1023 	if (!nflag && in.s_addr != INADDR_ANY) {
1024 		hp = gethostbyaddr((char *)&in, sizeof(in), AF_INET);
1025 		if (hp != NULL) {
1026 			if ((cp = strchr(hp->h_name, '.')) != NULL &&
1027 			    strcmp(cp + 1, domain) == 0)
1028 				*cp = '\0';
1029 			strlcpy(line, hp->h_name, sizeof(line));
1030 			return (line);
1031 		}
1032 	}
1033 	return (inet_ntoa(in));
1034 }
1035 
1036 void
1037 usage(void)
1038 {
1039 	extern char *__progname;
1040 
1041 	fprintf(stderr,
1042 	    "usage: %s [-cDdIlnrSv] [-f first_ttl] [-g gateway_addr] [-m max_ttl]\n"
1043 	    "\t[-P proto] [-p port] [-q nqueries] [-s src_addr] [-t tos]\n"
1044 	    "\t[-w waittime] host [packetsize]\n", __progname);
1045 	exit(1);
1046 }
1047