xref: /netbsd-src/sbin/savecore/savecore.c (revision ce0bb6e8d2e560ecacbe865a848624f94498063b)
1 /*	$NetBSD: savecore.c,v 1.21 1995/03/18 15:01:02 cgd Exp $	*/
2 
3 /*-
4  * Copyright (c) 1986, 1992, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 static char copyright[] =
38 "@(#) Copyright (c) 1986, 1992, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)savecore.c	8.3 (Berkeley) 1/2/94";
45 #else
46 static char rcsid[] = "$NetBSD: savecore.c,v 1.21 1995/03/18 15:01:02 cgd Exp $";
47 #endif
48 #endif /* not lint */
49 
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/mount.h>
53 #include <sys/syslog.h>
54 #include <sys/time.h>
55 
56 #include <dirent.h>
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <nlist.h>
60 #include <paths.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <tzfile.h>
65 #include <unistd.h>
66 
67 extern FILE *zopen __P((const char *fname, const char *mode, int bits));
68 
69 #define ok(number) ((number) - KERNBASE)
70 
71 struct nlist current_nl[] = {	/* Namelist for currently running system. */
72 #define X_DUMPDEV	0
73 	{ "_dumpdev" },
74 #define X_DUMPLO	1
75 	{ "_dumplo" },
76 #define X_TIME		2
77 	{ "_time" },
78 #define	X_DUMPSIZE	3
79 	{ "_dumpsize" },
80 #define X_VERSION	4
81 	{ "_version" },
82 #define X_PANICSTR	5
83 	{ "_panicstr" },
84 #define	X_DUMPMAG	6
85 	{ "_dumpmag" },
86 	{ "" },
87 };
88 int cursyms[] = { X_DUMPDEV, X_DUMPLO, X_VERSION, X_DUMPMAG, -1 };
89 int dumpsyms[] = { X_TIME, X_DUMPSIZE, X_VERSION, X_PANICSTR, X_DUMPMAG, -1 };
90 
91 struct nlist dump_nl[] = {	/* Name list for dumped system. */
92 	{ "_dumpdev" },		/* Entries MUST be the same as */
93 	{ "_dumplo" },		/*	those in current_nl[].  */
94 	{ "_time" },
95 	{ "_dumpsize" },
96 	{ "_version" },
97 	{ "_panicstr" },
98 	{ "_dumpmag" },
99 	{ "" },
100 };
101 
102 /* Types match kernel declarations. */
103 long	dumplo;				/* where dump starts on dumpdev */
104 int	dumpmag;			/* magic number in dump */
105 int	dumpsize;			/* amount of memory dumped */
106 
107 char	*kernel;
108 char	*dirname;			/* directory to save dumps in */
109 char	*ddname;			/* name of dump device */
110 dev_t	dumpdev;			/* dump device */
111 int	dumpfd;				/* read/write descriptor on block dev */
112 time_t	now;				/* current date */
113 char	panic_mesg[1024];
114 int	panicstr;
115 char	vers[1024];
116 
117 int	clear, compress, force, verbose;	/* flags */
118 
119 void	 check_kmem __P((void));
120 int	 check_space __P((void));
121 void	 clear_dump __P((void));
122 int	 Create __P((char *, int));
123 int	 dump_exists __P((void));
124 char	*find_dev __P((dev_t, int));
125 int	 get_crashtime __P((void));
126 void	 kmem_setup __P((void));
127 void	 log __P((int, char *, ...));
128 void	 Lseek __P((int, off_t, int));
129 int	 Open __P((char *, int rw));
130 int	 Read __P((int, void *, int));
131 char	*rawname __P((char *s));
132 void	 save_core __P((void));
133 void	 usage __P((void));
134 void	 Write __P((int, void *, int));
135 
136 int
137 main(argc, argv)
138 	int argc;
139 	char *argv[];
140 {
141 	int ch;
142 
143 	openlog("savecore", LOG_PERROR, LOG_DAEMON);
144 
145 	while ((ch = getopt(argc, argv, "cdfN:vz")) != -1)
146 		switch(ch) {
147 		case 'c':
148 			clear = 1;
149 			break;
150 		case 'd':		/* Not documented. */
151 		case 'v':
152 			verbose = 1;
153 			break;
154 		case 'f':
155 			force = 1;
156 			break;
157 		case 'N':
158 			kernel = optarg;
159 			break;
160 		case 'z':
161 			compress = 1;
162 			break;
163 		case '?':
164 		default:
165 			usage();
166 		}
167 	argc -= optind;
168 	argv += optind;
169 
170 	if (!clear) {
171 		if (argc != 1 && argc != 2)
172 			usage();
173 		dirname = argv[0];
174 	}
175 	if (argc == 2)
176 		kernel = argv[1];
177 
178 	(void)time(&now);
179 	kmem_setup();
180 
181 	if (clear) {
182 		clear_dump();
183 		exit(0);
184 	}
185 
186 	if (!dump_exists() && !force)
187 		exit(1);
188 
189 	check_kmem();
190 
191 	if (panicstr)
192 		syslog(LOG_ALERT, "reboot after panic: %s", panic_mesg);
193 	else
194 		syslog(LOG_ALERT, "reboot");
195 
196 	if ((!get_crashtime() || !check_space()) && !force)
197 		exit(1);
198 
199 	save_core();
200 
201 	clear_dump();
202 	exit(0);
203 }
204 
205 void
206 kmem_setup()
207 {
208 	FILE *fp;
209 	int kmem, i;
210 	char *dump_sys;
211 
212 	/*
213 	 * Some names we need for the currently running system, others for
214 	 * the system that was running when the dump was made.  The values
215 	 * obtained from the current system are used to look for things in
216 	 * /dev/kmem that cannot be found in the dump_sys namelist, but are
217 	 * presumed to be the same (since the disk partitions are probably
218 	 * the same!)
219 	 */
220 	if ((nlist(_PATH_UNIX, current_nl)) == -1)
221 		syslog(LOG_ERR, "%s: nlist: %s", _PATH_UNIX, strerror(errno));
222 	for (i = 0; cursyms[i] != -1; i++)
223 		if (current_nl[cursyms[i]].n_value == 0) {
224 			syslog(LOG_ERR, "%s: %s not in namelist",
225 			    _PATH_UNIX, current_nl[cursyms[i]].n_name);
226 			exit(1);
227 		}
228 
229 	dump_sys = kernel ? kernel : _PATH_UNIX;
230 	if ((nlist(dump_sys, dump_nl)) == -1)
231 		syslog(LOG_ERR, "%s: nlist: %s", dump_sys, strerror(errno));
232 	for (i = 0; dumpsyms[i] != -1; i++)
233 		if (dump_nl[dumpsyms[i]].n_value == 0) {
234 			syslog(LOG_ERR, "%s: %s not in namelist",
235 			    dump_sys, dump_nl[dumpsyms[i]].n_name);
236 			exit(1);
237 		}
238 
239 	kmem = Open(_PATH_KMEM, O_RDONLY);
240 	Lseek(kmem, (off_t)current_nl[X_DUMPDEV].n_value, L_SET);
241 	(void)Read(kmem, &dumpdev, sizeof(dumpdev));
242 	if (dumpdev == NODEV) {
243 		syslog(LOG_WARNING, "no core dump (no dumpdev)");
244 		exit(1);
245 	}
246 	Lseek(kmem, (off_t)current_nl[X_DUMPLO].n_value, L_SET);
247 	(void)Read(kmem, &dumplo, sizeof(dumplo));
248 	if (verbose)
249 		(void)printf("dumplo = %d (%d * %d)\n",
250 		    dumplo, dumplo/DEV_BSIZE, DEV_BSIZE);
251 	Lseek(kmem, (off_t)current_nl[X_DUMPMAG].n_value, L_SET);
252 	(void)Read(kmem, &dumpmag, sizeof(dumpmag));
253 	dumplo *= DEV_BSIZE;
254 	ddname = find_dev(dumpdev, S_IFBLK);
255 	dumpfd = Open(ddname, O_RDWR);
256 	fp = fdopen(kmem, "r");
257 	if (fp == NULL) {
258 		syslog(LOG_ERR, "%s: fdopen: %m", _PATH_KMEM);
259 		exit(1);
260 	}
261 	if (kernel)
262 		return;
263 	(void)fseek(fp, (off_t)current_nl[X_VERSION].n_value, L_SET);
264 	(void)fgets(vers, sizeof(vers), fp);
265 
266 	/* Don't fclose(fp), we use dumpfd later. */
267 }
268 
269 void
270 check_kmem()
271 {
272 	register char *cp;
273 	FILE *fp;
274 	char core_vers[1024];
275 
276 	fp = fdopen(dumpfd, "r");
277 	if (fp == NULL) {
278 		syslog(LOG_ERR, "%s: fdopen: %m", ddname);
279 		exit(1);
280 	}
281 	fseek(fp, (off_t)(dumplo + ok(dump_nl[X_VERSION].n_value)), L_SET);
282 	fgets(core_vers, sizeof(core_vers), fp);
283 	if (strcmp(vers, core_vers) && kernel == 0)
284 		syslog(LOG_WARNING,
285 		    "warning: %s version mismatch:\n\t%s\nand\t%s\n",
286 		    _PATH_UNIX, vers, core_vers);
287 	(void)fseek(fp,
288 	    (off_t)(dumplo + ok(dump_nl[X_PANICSTR].n_value)), L_SET);
289 	(void)fread(&panicstr, sizeof(panicstr), 1, fp);
290 	if (panicstr) {
291 		(void)fseek(fp, dumplo + ok(panicstr), L_SET);
292 		cp = panic_mesg;
293 		do
294 			*cp = getc(fp);
295 		while (*cp++ && cp < &panic_mesg[sizeof(panic_mesg)]);
296 	}
297 	/* Don't fclose(fp), we use dumpfd later. */
298 }
299 
300 void
301 clear_dump()
302 {
303 	long newdumplo;
304 
305 	newdumplo = 0;
306 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
307 	Write(dumpfd, &newdumplo, sizeof(newdumplo));
308 }
309 
310 int
311 dump_exists()
312 {
313 	int newdumpmag;
314 
315 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
316 	(void)Read(dumpfd, &newdumpmag, sizeof(newdumpmag));
317 	if (newdumpmag != dumpmag) {
318 		if (verbose)
319 			syslog(LOG_WARNING, "magic number mismatch (%x != %x)",
320 			    newdumpmag, dumpmag);
321 		syslog(LOG_WARNING, "no core dump");
322 		return (0);
323 	}
324 	return (1);
325 }
326 
327 char buf[1024 * 1024];
328 
329 void
330 save_core()
331 {
332 	register FILE *fp;
333 	register int bounds, ifd, nr, nw, ofd;
334 	char *rawp, path[MAXPATHLEN];
335 
336 	/*
337 	 * Get the current number and update the bounds file.  Do the update
338 	 * now, because may fail later and don't want to overwrite anything.
339 	 */
340 	(void)snprintf(path, sizeof(path), "%s/bounds", dirname);
341 	if ((fp = fopen(path, "r")) == NULL)
342 		goto err1;
343 	if (fgets(buf, sizeof(buf), fp) == NULL) {
344 		if (ferror(fp))
345 err1:			syslog(LOG_WARNING, "%s: %s", path, strerror(errno));
346 		bounds = 0;
347 	} else
348 		bounds = atoi(buf);
349 	if (fp != NULL)
350 		(void)fclose(fp);
351 	if ((fp = fopen(path, "w")) == NULL)
352 		syslog(LOG_ERR, "%s: %m", path);
353 	else {
354 		(void)fprintf(fp, "%d\n", bounds + 1);
355 		(void)fclose(fp);
356 	}
357 	(void)fclose(fp);
358 
359 	/* Create the core file. */
360 	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s.core",
361 	    dirname, bounds, compress ? ".Z" : "");
362 	if (compress) {
363 		if ((fp = zopen(path, "w", 0)) == NULL) {
364 			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
365 			exit(1);
366 		}
367 	} else
368 		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
369 
370 	/* Open the raw device. */
371 	rawp = rawname(ddname);
372 	if ((ifd = open(rawp, O_RDONLY)) == -1) {
373 		syslog(LOG_WARNING, "%s: %m; using block device", rawp);
374 		ifd = dumpfd;
375 	}
376 
377 	/* Read the dump size. */
378 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPSIZE].n_value)), L_SET);
379 	(void)Read(dumpfd, &dumpsize, sizeof(dumpsize));
380 
381 	/* Seek to the start of the core. */
382 	Lseek(ifd, (off_t)dumplo, L_SET);
383 
384 	/* Copy the core file. */
385 	dumpsize *= getpagesize();
386 	syslog(LOG_NOTICE, "writing %score to %s",
387 	    compress ? "compressed " : "", path);
388 	for (; dumpsize > 0; dumpsize -= nr) {
389 		(void)printf("%6dK\r", dumpsize / 1024);
390 		(void)fflush(stdout);
391 		nr = read(ifd, buf, MIN(dumpsize, sizeof(buf)));
392 		if (nr <= 0) {
393 			if (nr == 0)
394 				syslog(LOG_WARNING,
395 				    "WARNING: EOF on dump device");
396 			else
397 				syslog(LOG_ERR, "%s: %m", rawp);
398 			goto err2;
399 		}
400 		if (compress)
401 			nw = fwrite(buf, 1, nr, fp);
402 		else
403 			nw = write(ofd, buf, nr);
404 		if (nw != nr) {
405 			syslog(LOG_ERR, "%s: %s",
406 			    path, strerror(nw == 0 ? EIO : errno));
407 err2:			syslog(LOG_WARNING,
408 			    "WARNING: core may be incomplete");
409 			(void)printf("\n");
410 			exit(1);
411 		}
412 	}
413 	(void)printf("\n");
414 	(void)close(ifd);
415 	if (compress)
416 		(void)fclose(fp);
417 	else
418 		(void)close(ofd);
419 
420 	/* Copy the kernel. */
421 	ifd = Open(kernel ? kernel : _PATH_UNIX, O_RDONLY);
422 	(void)snprintf(path, sizeof(path), "%s/netbsd.%d%s",
423 	    dirname, bounds, compress ? ".Z" : "");
424 	if (compress) {
425 		if ((fp = zopen(path, "w", 0)) == NULL) {
426 			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
427 			exit(1);
428 		}
429 	} else
430 		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
431 	syslog(LOG_NOTICE, "writing %skernel to %s",
432 	    compress ? "compressed " : "", path);
433 	while ((nr = read(ifd, buf, sizeof(buf))) > 0) {
434 		if (compress)
435 			nw = fwrite(buf, 1, nr, fp);
436 		else
437 			nw = write(ofd, buf, nr);
438 		if (nw != nr) {
439 			syslog(LOG_ERR, "%s: %s",
440 			    path, strerror(nw == 0 ? EIO : errno));
441 			syslog(LOG_WARNING,
442 			    "WARNING: kernel may be incomplete");
443 			exit(1);
444 		}
445 	}
446 	if (nr < 0) {
447 		syslog(LOG_ERR, "%s: %s",
448 		    kernel ? kernel : _PATH_UNIX, strerror(errno));
449 		syslog(LOG_WARNING,
450 		    "WARNING: kernel may be incomplete");
451 		exit(1);
452 	}
453 	if (compress)
454 		(void)fclose(fp);
455 	else
456 		(void)close(ofd);
457 }
458 
459 char *
460 find_dev(dev, type)
461 	register dev_t dev;
462 	register int type;
463 {
464 	register DIR *dfd;
465 	struct dirent *dir;
466 	struct stat sb;
467 	char *dp, devname[MAXPATHLEN + 1];
468 
469 	if ((dfd = opendir(_PATH_DEV)) == NULL) {
470 		syslog(LOG_ERR, "%s: %s", _PATH_DEV, strerror(errno));
471 		exit(1);
472 	}
473 	(void)strcpy(devname, _PATH_DEV);
474 	while ((dir = readdir(dfd))) {
475 		(void)strcpy(devname + sizeof(_PATH_DEV) - 1, dir->d_name);
476 		if (lstat(devname, &sb)) {
477 			syslog(LOG_ERR, "%s: %s", devname, strerror(errno));
478 			continue;
479 		}
480 		if ((sb.st_mode & S_IFMT) != type)
481 			continue;
482 		if (dev == sb.st_rdev) {
483 			closedir(dfd);
484 			if ((dp = strdup(devname)) == NULL) {
485 				syslog(LOG_ERR, "%s", strerror(errno));
486 				exit(1);
487 			}
488 			return (dp);
489 		}
490 	}
491 	closedir(dfd);
492 	syslog(LOG_ERR, "can't find device %d/%d", major(dev), minor(dev));
493 	exit(1);
494 }
495 
496 char *
497 rawname(s)
498 	char *s;
499 {
500 	char *sl, name[MAXPATHLEN];
501 
502 	if ((sl = strrchr(s, '/')) == NULL || sl[1] == '0') {
503 		syslog(LOG_ERR,
504 		    "can't make raw dump device name from %s", s);
505 		return (s);
506 	}
507 	(void)snprintf(name, sizeof(name), "%.*s/r%s", sl - s, s, sl + 1);
508 	if ((sl = strdup(name)) == NULL) {
509 		syslog(LOG_ERR, "%s", strerror(errno));
510 		exit(1);
511 	}
512 	return (sl);
513 }
514 
515 int
516 get_crashtime()
517 {
518 	time_t dumptime;			/* Time the dump was taken. */
519 
520 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_TIME].n_value)), L_SET);
521 	(void)Read(dumpfd, &dumptime, sizeof(dumptime));
522 	if (dumptime == 0) {
523 		if (verbose)
524 			syslog(LOG_ERR, "dump time is zero");
525 		return (0);
526 	}
527 	(void)printf("savecore: system went down at %s", ctime(&dumptime));
528 #define	LEEWAY	(7 * SECSPERDAY)
529 	if (dumptime < now - LEEWAY || dumptime > now + LEEWAY) {
530 		(void)printf("dump time is unreasonable\n");
531 		return (0);
532 	}
533 	return (1);
534 }
535 
536 int
537 check_space()
538 {
539 	register FILE *fp;
540 	char *tkernel;
541 	off_t minfree, spacefree, kernelsize, needed;
542 	struct stat st;
543 	struct statfs fsbuf;
544 	char buf[100], path[MAXPATHLEN];
545 
546 	tkernel = kernel ? kernel : _PATH_UNIX;
547 	if (stat(tkernel, &st) < 0) {
548 		syslog(LOG_ERR, "%s: %m", tkernel);
549 		exit(1);
550 	}
551 	kernelsize = st.st_blocks * S_BLKSIZE;
552 	if (statfs(dirname, &fsbuf) < 0) {
553 		syslog(LOG_ERR, "%s: %m", dirname);
554 		exit(1);
555 	}
556  	spacefree = (fsbuf.f_bavail * fsbuf.f_bsize) / 1024;
557 
558 	(void)snprintf(path, sizeof(path), "%s/minfree", dirname);
559 	if ((fp = fopen(path, "r")) == NULL)
560 		minfree = 0;
561 	else {
562 		if (fgets(buf, sizeof(buf), fp) == NULL)
563 			minfree = 0;
564 		else
565 			minfree = atoi(buf);
566 		(void)fclose(fp);
567 	}
568 
569 	needed = (dumpsize + kernelsize) / 1024;
570  	if (minfree > 0 && spacefree - needed < minfree) {
571 		syslog(LOG_WARNING,
572 		    "no dump, not enough free space on device");
573 		return (0);
574 	}
575 	if (spacefree - needed < minfree)
576 		syslog(LOG_WARNING,
577 		    "dump performed, but free space threshold crossed");
578 	return (1);
579 }
580 
581 int
582 Open(name, rw)
583 	char *name;
584 	int rw;
585 {
586 	int fd;
587 
588 	if ((fd = open(name, rw, 0)) < 0) {
589 		syslog(LOG_ERR, "%s: %m", name);
590 		exit(1);
591 	}
592 	return (fd);
593 }
594 
595 int
596 Read(fd, bp, size)
597 	int fd, size;
598 	void *bp;
599 {
600 	int nr;
601 
602 	nr = read(fd, bp, size);
603 	if (nr != size) {
604 		syslog(LOG_ERR, "read: %m");
605 		exit(1);
606 	}
607 	return (nr);
608 }
609 
610 void
611 Lseek(fd, off, flag)
612 	int fd, flag;
613 	off_t off;
614 {
615 	off_t ret;
616 
617 	ret = lseek(fd, off, flag);
618 	if (ret == -1) {
619 		syslog(LOG_ERR, "lseek: %m");
620 		exit(1);
621 	}
622 }
623 
624 int
625 Create(file, mode)
626 	char *file;
627 	int mode;
628 {
629 	register int fd;
630 
631 	fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, mode);
632 	if (fd < 0) {
633 		syslog(LOG_ERR, "%s: %m", file);
634 		exit(1);
635 	}
636 	return (fd);
637 }
638 
639 void
640 Write(fd, bp, size)
641 	int fd, size;
642 	void *bp;
643 {
644 	int n;
645 
646 	if ((n = write(fd, bp, size)) < size) {
647 		syslog(LOG_ERR, "write: %s", strerror(n == -1 ? errno : EIO));
648 		exit(1);
649 	}
650 }
651 
652 void
653 usage()
654 {
655 	(void)syslog(LOG_ERR, "usage: savecore [-cfvz] [-N system] directory");
656 	exit(1);
657 }
658