xref: /netbsd-src/lib/libkvm/kvm.c (revision 03dcb730d46d34d85c9f496c1f5a3a6a43f2b7b3)
1 /*	$NetBSD: kvm.c,v 1.102 2016/03/29 06:51:40 mrg Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1992, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software developed by the Computer Systems
8  * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
9  * BG 91-66 and contributed to Berkeley.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #if defined(LIBC_SCCS) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)kvm.c	8.2 (Berkeley) 2/13/94";
40 #else
41 __RCSID("$NetBSD: kvm.c,v 1.102 2016/03/29 06:51:40 mrg Exp $");
42 #endif
43 #endif /* LIBC_SCCS and not lint */
44 
45 #include <sys/param.h>
46 #include <sys/lwp.h>
47 #include <sys/proc.h>
48 #include <sys/ioctl.h>
49 #include <sys/stat.h>
50 #include <sys/sysctl.h>
51 
52 #include <sys/core.h>
53 #include <sys/exec.h>
54 #include <sys/kcore.h>
55 #include <sys/ksyms.h>
56 #include <sys/types.h>
57 
58 #include <uvm/uvm_extern.h>
59 
60 #include <machine/cpu.h>
61 
62 #include <ctype.h>
63 #include <errno.h>
64 #include <fcntl.h>
65 #include <limits.h>
66 #include <nlist.h>
67 #include <paths.h>
68 #include <stdarg.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <unistd.h>
73 #include <kvm.h>
74 
75 #include "kvm_private.h"
76 
77 static int	_kvm_get_header(kvm_t *);
78 static kvm_t	*_kvm_open(kvm_t *, const char *, const char *,
79 		    const char *, int, char *);
80 static int	clear_gap(kvm_t *, bool (*)(void *, const void *, size_t),
81 		    void *, size_t);
82 static off_t	Lseek(kvm_t *, int, off_t, int);
83 static ssize_t	Pread(kvm_t *, int, void *, size_t, off_t);
84 
85 char *
86 kvm_geterr(kvm_t *kd)
87 {
88 	return (kd->errbuf);
89 }
90 
91 const char *
92 kvm_getkernelname(kvm_t *kd)
93 {
94 	return kd->kernelname;
95 }
96 
97 /*
98  * Report an error using printf style arguments.  "program" is kd->program
99  * on hard errors, and 0 on soft errors, so that under sun error emulation,
100  * only hard errors are printed out (otherwise, programs like gdb will
101  * generate tons of error messages when trying to access bogus pointers).
102  */
103 void
104 _kvm_err(kvm_t *kd, const char *program, const char *fmt, ...)
105 {
106 	va_list ap;
107 
108 	va_start(ap, fmt);
109 	if (program != NULL) {
110 		(void)fprintf(stderr, "%s: ", program);
111 		(void)vfprintf(stderr, fmt, ap);
112 		(void)fputc('\n', stderr);
113 	} else
114 		(void)vsnprintf(kd->errbuf,
115 		    sizeof(kd->errbuf), fmt, ap);
116 
117 	va_end(ap);
118 }
119 
120 void
121 _kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...)
122 {
123 	va_list ap;
124 	size_t n;
125 
126 	va_start(ap, fmt);
127 	if (program != NULL) {
128 		(void)fprintf(stderr, "%s: ", program);
129 		(void)vfprintf(stderr, fmt, ap);
130 		(void)fprintf(stderr, ": %s\n", strerror(errno));
131 	} else {
132 		char *cp = kd->errbuf;
133 
134 		(void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap);
135 		n = strlen(cp);
136 		(void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s",
137 		    strerror(errno));
138 	}
139 	va_end(ap);
140 }
141 
142 void *
143 _kvm_malloc(kvm_t *kd, size_t n)
144 {
145 	void *p;
146 
147 	if ((p = malloc(n)) == NULL)
148 		_kvm_err(kd, kd->program, "%s", strerror(errno));
149 	return (p);
150 }
151 
152 /*
153  * Wrapper around the lseek(2) system call; calls _kvm_syserr() for us
154  * in the event of emergency.
155  */
156 static off_t
157 Lseek(kvm_t *kd, int fd, off_t offset, int whence)
158 {
159 	off_t off;
160 
161 	errno = 0;
162 
163 	if ((off = lseek(fd, offset, whence)) == -1 && errno != 0) {
164 		_kvm_syserr(kd, kd->program, "Lseek");
165 		return ((off_t)-1);
166 	}
167 	return (off);
168 }
169 
170 ssize_t
171 _kvm_pread(kvm_t *kd, int fd, void *buf, size_t size, off_t off)
172 {
173 	ptrdiff_t moff;
174 	void *newbuf;
175 	size_t dsize;
176 	ssize_t rv;
177 	off_t doff;
178 
179 	/* If aligned nothing to do. */
180  	if (((off % kd->fdalign) | (size % kd->fdalign)) == 0) {
181 		return pread(fd, buf, size, off);
182  	}
183 
184 	/*
185 	 * Otherwise must buffer.  We can't tolerate short reads in this
186 	 * case (lazy bum).
187 	 */
188 	moff = (ptrdiff_t)off % kd->fdalign;
189 	doff = off - moff;
190 	dsize = moff + size + kd->fdalign - 1;
191 	dsize -= dsize % kd->fdalign;
192 	if (kd->iobufsz < dsize) {
193 		newbuf = realloc(kd->iobuf, dsize);
194 		if (newbuf == NULL) {
195 			_kvm_syserr(kd, 0, "cannot allocate I/O buffer");
196 			return (-1);
197 		}
198 		kd->iobuf = newbuf;
199 		kd->iobufsz = dsize;
200 	}
201 	rv = pread(fd, kd->iobuf, dsize, doff);
202 	if (rv < size + moff)
203 		return -1;
204 	memcpy(buf, kd->iobuf + moff, size);
205 	return size;
206 }
207 
208 /*
209  * Wrapper around the pread(2) system call; calls _kvm_syserr() for us
210  * in the event of emergency.
211  */
212 static ssize_t
213 Pread(kvm_t *kd, int fd, void *buf, size_t nbytes, off_t offset)
214 {
215 	ssize_t rv;
216 
217 	errno = 0;
218 
219 	if ((rv = _kvm_pread(kd, fd, buf, nbytes, offset)) != nbytes &&
220 	    errno != 0)
221 		_kvm_syserr(kd, kd->program, "Pread");
222 	return (rv);
223 }
224 
225 static kvm_t *
226 _kvm_open(kvm_t *kd, const char *uf, const char *mf, const char *sf, int flag,
227     char *errout)
228 {
229 	struct stat st;
230 	int ufgiven;
231 
232 	kd->pmfd = -1;
233 	kd->vmfd = -1;
234 	kd->swfd = -1;
235 	kd->nlfd = -1;
236 	kd->alive = KVM_ALIVE_DEAD;
237 	kd->procbase = NULL;
238 	kd->procbase_len = 0;
239 	kd->procbase2 = NULL;
240 	kd->procbase2_len = 0;
241 	kd->lwpbase = NULL;
242 	kd->lwpbase_len = 0;
243 	kd->nbpg = getpagesize();
244 	kd->swapspc = NULL;
245 	kd->argspc = NULL;
246 	kd->argspc_len = 0;
247 	kd->argbuf = NULL;
248 	kd->argv = NULL;
249 	kd->vmst = NULL;
250 	kd->vm_page_buckets = NULL;
251 	kd->kcore_hdr = NULL;
252 	kd->cpu_dsize = 0;
253 	kd->cpu_data = NULL;
254 	kd->dump_off = 0;
255 	kd->fdalign = 1;
256 	kd->iobuf = NULL;
257 	kd->iobufsz = 0;
258 
259 	if (flag & KVM_NO_FILES) {
260 		kd->alive = KVM_ALIVE_SYSCTL;
261 		return(kd);
262 	}
263 
264 	/*
265 	 * Call the MD open hook.  This sets:
266 	 *	usrstack, min_uva, max_uva
267 	 */
268 	if (_kvm_mdopen(kd)) {
269 		_kvm_err(kd, kd->program, "md init failed");
270 		goto failed;
271 	}
272 
273 	ufgiven = (uf != NULL);
274 	if (!ufgiven) {
275 #ifdef CPU_BOOTED_KERNEL
276 		/* 130 is 128 + '/' + '\0' */
277 		static char booted_kernel[130];
278 		int mib[2], rc;
279 		size_t len;
280 
281 		mib[0] = CTL_MACHDEP;
282 		mib[1] = CPU_BOOTED_KERNEL;
283 		booted_kernel[0] = '/';
284 		booted_kernel[1] = '\0';
285 		len = sizeof(booted_kernel) - 2;
286 		rc = sysctl(&mib[0], 2, &booted_kernel[1], &len, NULL, 0);
287 		booted_kernel[sizeof(booted_kernel) - 1] = '\0';
288 		uf = (booted_kernel[1] == '/') ?
289 		    &booted_kernel[1] : &booted_kernel[0];
290 		if (rc != -1)
291 			rc = stat(uf, &st);
292 		if (rc != -1 && !S_ISREG(st.st_mode))
293 			rc = -1;
294 		if (rc == -1)
295 #endif /* CPU_BOOTED_KERNEL */
296 			uf = _PATH_UNIX;
297 	}
298 	else if (strlen(uf) >= MAXPATHLEN) {
299 		_kvm_err(kd, kd->program, "exec file name too long");
300 		goto failed;
301 	}
302 	if (flag & ~O_RDWR) {
303 		_kvm_err(kd, kd->program, "bad flags arg");
304 		goto failed;
305 	}
306 	if (mf == 0)
307 		mf = _PATH_MEM;
308 	if (sf == 0)
309 		sf = _PATH_DRUM;
310 
311 	/*
312 	 * Open the kernel namelist.  If /dev/ksyms doesn't
313 	 * exist, open the current kernel.
314 	 */
315 	if (ufgiven == 0)
316 		kd->nlfd = open(_PATH_KSYMS, O_RDONLY | O_CLOEXEC, 0);
317 	if (kd->nlfd < 0) {
318 		if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
319 			_kvm_syserr(kd, kd->program, "%s", uf);
320 			goto failed;
321 		}
322 		strlcpy(kd->kernelname, uf, sizeof(kd->kernelname));
323 	} else {
324 		strlcpy(kd->kernelname, _PATH_KSYMS, sizeof(kd->kernelname));
325 		/*
326 		 * We're here because /dev/ksyms was opened
327 		 * successfully.  However, we don't want to keep it
328 		 * open, so we close it now.  Later, we will open
329 		 * it again, since it will be the only case where
330 		 * kd->nlfd is negative.
331 		 */
332 		close(kd->nlfd);
333 		kd->nlfd = -1;
334 	}
335 
336 	if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
337 		_kvm_syserr(kd, kd->program, "%s", mf);
338 		goto failed;
339 	}
340 	if (fstat(kd->pmfd, &st) < 0) {
341 		_kvm_syserr(kd, kd->program, "%s", mf);
342 		goto failed;
343 	}
344 	if (S_ISCHR(st.st_mode) && strcmp(mf, _PATH_MEM) == 0) {
345 		/*
346 		 * If this is /dev/mem, open kmem too.  (Maybe we should
347 		 * make it work for either /dev/mem or /dev/kmem -- in either
348 		 * case you're working with a live kernel.)
349 		 */
350 		if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC, 0)) < 0) {
351 			_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
352 			goto failed;
353 		}
354 		kd->alive = KVM_ALIVE_FILES;
355 		if ((kd->swfd = open(sf, flag | O_CLOEXEC, 0)) < 0) {
356 			if (errno != ENXIO) {
357 				_kvm_syserr(kd, kd->program, "%s", sf);
358 				goto failed;
359 			}
360 			/* swap is not configured?  not fatal */
361 		}
362 	} else {
363 		kd->fdalign = DEV_BSIZE;	/* XXX */
364 		/*
365 		 * This is a crash dump.
366 		 * Initialize the virtual address translation machinery.
367 		 *
368 		 * If there is no valid core header, fail silently here.
369 		 * The address translations however will fail without
370 		 * header. Things can be made to run by calling
371 		 * kvm_dump_mkheader() before doing any translation.
372 		 */
373 		if (_kvm_get_header(kd) == 0) {
374 			if (_kvm_initvtop(kd) < 0)
375 				goto failed;
376 		}
377 	}
378 	return (kd);
379 failed:
380 	/*
381 	 * Copy out the error if doing sane error semantics.
382 	 */
383 	if (errout != 0)
384 		(void)strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
385 	(void)kvm_close(kd);
386 	return (0);
387 }
388 
389 /*
390  * The kernel dump file (from savecore) contains:
391  *    kcore_hdr_t kcore_hdr;
392  *    kcore_seg_t cpu_hdr;
393  *    (opaque)    cpu_data; (size is cpu_hdr.c_size)
394  *	  kcore_seg_t mem_hdr;
395  *    (memory)    mem_data; (size is mem_hdr.c_size)
396  *
397  * Note: khdr is padded to khdr.c_hdrsize;
398  * cpu_hdr and mem_hdr are padded to khdr.c_seghdrsize
399  */
400 static int
401 _kvm_get_header(kvm_t *kd)
402 {
403 	kcore_hdr_t	kcore_hdr;
404 	kcore_seg_t	cpu_hdr;
405 	kcore_seg_t	mem_hdr;
406 	size_t		offset;
407 	ssize_t		sz;
408 
409 	/*
410 	 * Read the kcore_hdr_t
411 	 */
412 	sz = Pread(kd, kd->pmfd, &kcore_hdr, sizeof(kcore_hdr), (off_t)0);
413 	if (sz != sizeof(kcore_hdr))
414 		return (-1);
415 
416 	/*
417 	 * Currently, we only support dump-files made by the current
418 	 * architecture...
419 	 */
420 	if ((CORE_GETMAGIC(kcore_hdr) != KCORE_MAGIC) ||
421 	    (CORE_GETMID(kcore_hdr) != MID_MACHINE))
422 		return (-1);
423 
424 	/*
425 	 * Currently, we only support exactly 2 segments: cpu-segment
426 	 * and data-segment in exactly that order.
427 	 */
428 	if (kcore_hdr.c_nseg != 2)
429 		return (-1);
430 
431 	/*
432 	 * Save away the kcore_hdr.  All errors after this
433 	 * should do a to "goto fail" to deallocate things.
434 	 */
435 	kd->kcore_hdr = _kvm_malloc(kd, sizeof(kcore_hdr));
436 	memcpy(kd->kcore_hdr, &kcore_hdr, sizeof(kcore_hdr));
437 	offset = kcore_hdr.c_hdrsize;
438 
439 	/*
440 	 * Read the CPU segment header
441 	 */
442 	sz = Pread(kd, kd->pmfd, &cpu_hdr, sizeof(cpu_hdr), (off_t)offset);
443 	if (sz != sizeof(cpu_hdr))
444 		goto fail;
445 	if ((CORE_GETMAGIC(cpu_hdr) != KCORESEG_MAGIC) ||
446 	    (CORE_GETFLAG(cpu_hdr) != CORE_CPU))
447 		goto fail;
448 	offset += kcore_hdr.c_seghdrsize;
449 
450 	/*
451 	 * Read the CPU segment DATA.
452 	 */
453 	kd->cpu_dsize = cpu_hdr.c_size;
454 	kd->cpu_data = _kvm_malloc(kd, cpu_hdr.c_size);
455 	if (kd->cpu_data == NULL)
456 		goto fail;
457 	sz = Pread(kd, kd->pmfd, kd->cpu_data, cpu_hdr.c_size, (off_t)offset);
458 	if (sz != cpu_hdr.c_size)
459 		goto fail;
460 	offset += cpu_hdr.c_size;
461 
462 	/*
463 	 * Read the next segment header: data segment
464 	 */
465 	sz = Pread(kd, kd->pmfd, &mem_hdr, sizeof(mem_hdr), (off_t)offset);
466 	if (sz != sizeof(mem_hdr))
467 		goto fail;
468 	offset += kcore_hdr.c_seghdrsize;
469 
470 	if ((CORE_GETMAGIC(mem_hdr) != KCORESEG_MAGIC) ||
471 	    (CORE_GETFLAG(mem_hdr) != CORE_DATA))
472 		goto fail;
473 
474 	kd->dump_off = offset;
475 	return (0);
476 
477 fail:
478 	if (kd->kcore_hdr != NULL) {
479 		free(kd->kcore_hdr);
480 		kd->kcore_hdr = NULL;
481 	}
482 	if (kd->cpu_data != NULL) {
483 		free(kd->cpu_data);
484 		kd->cpu_data = NULL;
485 		kd->cpu_dsize = 0;
486 	}
487 	return (-1);
488 }
489 
490 /*
491  * The format while on the dump device is: (new format)
492  *	kcore_seg_t cpu_hdr;
493  *	(opaque)    cpu_data; (size is cpu_hdr.c_size)
494  *	kcore_seg_t mem_hdr;
495  *	(memory)    mem_data; (size is mem_hdr.c_size)
496  */
497 int
498 kvm_dump_mkheader(kvm_t *kd, off_t dump_off)
499 {
500 	kcore_seg_t	cpu_hdr;
501 	size_t hdr_size;
502 	ssize_t sz;
503 
504 	if (kd->kcore_hdr != NULL) {
505 	    _kvm_err(kd, kd->program, "already has a dump header");
506 	    return (-1);
507 	}
508 	if (ISALIVE(kd)) {
509 		_kvm_err(kd, kd->program, "don't use on live kernel");
510 		return (-1);
511 	}
512 
513 	/*
514 	 * Validate new format crash dump
515 	 */
516 	sz = Pread(kd, kd->pmfd, &cpu_hdr, sizeof(cpu_hdr), dump_off);
517 	if (sz != sizeof(cpu_hdr)) {
518 		if (sz == -1)
519 			_kvm_err(kd, 0, "read %zx bytes at offset %"PRIx64
520 			    " for cpu_hdr failed: %s", sizeof(cpu_hdr),
521 			    dump_off, strerror(errno));
522 		else
523 			_kvm_err(kd, 0, "read %zx bytes at offset %"PRIx64
524 			    " for cpu_hdr instead of requested %zu",
525 			    sz, dump_off, sizeof(cpu_hdr));
526 		return (-1);
527 	}
528 	if ((CORE_GETMAGIC(cpu_hdr) != KCORE_MAGIC)
529 		|| (CORE_GETMID(cpu_hdr) != MID_MACHINE)) {
530 		_kvm_err(kd, 0, "invalid magic in cpu_hdr");
531 		return (0);
532 	}
533 	hdr_size = ALIGN(sizeof(cpu_hdr));
534 
535 	/*
536 	 * Read the CPU segment.
537 	 */
538 	kd->cpu_dsize = cpu_hdr.c_size;
539 	kd->cpu_data = _kvm_malloc(kd, kd->cpu_dsize);
540 	if (kd->cpu_data == NULL) {
541 		_kvm_err(kd, kd->program, "no cpu_data");
542 		goto fail;
543 	}
544 	sz = Pread(kd, kd->pmfd, kd->cpu_data, cpu_hdr.c_size,
545 	    dump_off + hdr_size);
546 	if (sz != cpu_hdr.c_size) {
547 		_kvm_err(kd, kd->program, "size %zu != cpu_hdr.csize %"PRIu32,
548 		    sz, cpu_hdr.c_size);
549 		goto fail;
550 	}
551 	hdr_size += kd->cpu_dsize;
552 
553 	/*
554 	 * Leave phys mem pointer at beginning of memory data
555 	 */
556 	kd->dump_off = dump_off + hdr_size;
557 	if (Lseek(kd, kd->pmfd, kd->dump_off, SEEK_SET) == -1) {
558 		_kvm_err(kd, kd->program, "failed to seek to %" PRId64,
559 		    (int64_t)kd->dump_off);
560 		goto fail;
561 	}
562 
563 	/*
564 	 * Create a kcore_hdr.
565 	 */
566 	kd->kcore_hdr = _kvm_malloc(kd, sizeof(kcore_hdr_t));
567 	if (kd->kcore_hdr == NULL) {
568 		_kvm_err(kd, kd->program, "failed to allocate header");
569 		goto fail;
570 	}
571 
572 	kd->kcore_hdr->c_hdrsize    = ALIGN(sizeof(kcore_hdr_t));
573 	kd->kcore_hdr->c_seghdrsize = ALIGN(sizeof(kcore_seg_t));
574 	kd->kcore_hdr->c_nseg       = 2;
575 	CORE_SETMAGIC(*(kd->kcore_hdr), KCORE_MAGIC, MID_MACHINE,0);
576 
577 	/*
578 	 * Now that we have a valid header, enable translations.
579 	 */
580 	if (_kvm_initvtop(kd) == 0)
581 		/* Success */
582 		return (hdr_size);
583 
584 fail:
585 	if (kd->kcore_hdr != NULL) {
586 		free(kd->kcore_hdr);
587 		kd->kcore_hdr = NULL;
588 	}
589 	if (kd->cpu_data != NULL) {
590 		free(kd->cpu_data);
591 		kd->cpu_data = NULL;
592 		kd->cpu_dsize = 0;
593 	}
594 	return (-1);
595 }
596 
597 static int
598 clear_gap(kvm_t *kd, bool (*write_buf)(void *, const void *, size_t),
599     void *cookie, size_t size)
600 {
601 	char buf[1024];
602 	size_t len;
603 
604 	(void)memset(buf, 0, size > sizeof(buf) ? sizeof(buf) : size);
605 
606 	while (size > 0) {
607 		len = size > sizeof(buf) ? sizeof(buf) : size;
608 		if (!(*write_buf)(cookie, buf, len)) {
609 			_kvm_syserr(kd, kd->program, "clear_gap");
610 			return -1;
611 		}
612 		size -= len;
613 	}
614 
615 	return 0;
616 }
617 
618 /*
619  * Write the dump header by calling write_buf with cookie as first argument.
620  */
621 int
622 kvm_dump_header(kvm_t *kd, bool (*write_buf)(void *, const void *, size_t),
623     void *cookie, int dumpsize)
624 {
625 	kcore_seg_t	seghdr;
626 	long		offset;
627 	size_t		gap;
628 
629 	if (kd->kcore_hdr == NULL || kd->cpu_data == NULL) {
630 		_kvm_err(kd, kd->program, "no valid dump header(s)");
631 		return (-1);
632 	}
633 
634 	/*
635 	 * Write the generic header
636 	 */
637 	offset = 0;
638 	if (!(*write_buf)(cookie, kd->kcore_hdr, sizeof(kcore_hdr_t))) {
639 		_kvm_syserr(kd, kd->program, "kvm_dump_header");
640 		return (-1);
641 	}
642 	offset += kd->kcore_hdr->c_hdrsize;
643 	gap     = kd->kcore_hdr->c_hdrsize - sizeof(kcore_hdr_t);
644 	if (clear_gap(kd, write_buf, cookie, gap) == -1)
645 		return (-1);
646 
647 	/*
648 	 * Write the CPU header
649 	 */
650 	CORE_SETMAGIC(seghdr, KCORESEG_MAGIC, 0, CORE_CPU);
651 	seghdr.c_size = ALIGN(kd->cpu_dsize);
652 	if (!(*write_buf)(cookie, &seghdr, sizeof(seghdr))) {
653 		_kvm_syserr(kd, kd->program, "kvm_dump_header");
654 		return (-1);
655 	}
656 	offset += kd->kcore_hdr->c_seghdrsize;
657 	gap     = kd->kcore_hdr->c_seghdrsize - sizeof(seghdr);
658 	if (clear_gap(kd, write_buf, cookie, gap) == -1)
659 		return (-1);
660 
661 	if (!(*write_buf)(cookie, kd->cpu_data, kd->cpu_dsize)) {
662 		_kvm_syserr(kd, kd->program, "kvm_dump_header");
663 		return (-1);
664 	}
665 	offset += seghdr.c_size;
666 	gap     = seghdr.c_size - kd->cpu_dsize;
667 	if (clear_gap(kd, write_buf, cookie, gap) == -1)
668 		return (-1);
669 
670 	/*
671 	 * Write the actual dump data segment header
672 	 */
673 	CORE_SETMAGIC(seghdr, KCORESEG_MAGIC, 0, CORE_DATA);
674 	seghdr.c_size = dumpsize;
675 	if (!(*write_buf)(cookie, &seghdr, sizeof(seghdr))) {
676 		_kvm_syserr(kd, kd->program, "kvm_dump_header");
677 		return (-1);
678 	}
679 	offset += kd->kcore_hdr->c_seghdrsize;
680 	gap     = kd->kcore_hdr->c_seghdrsize - sizeof(seghdr);
681 	if (clear_gap(kd, write_buf, cookie, gap) == -1)
682 		return (-1);
683 
684 	return (int)offset;
685 }
686 
687 static bool
688 kvm_dump_header_stdio(void *cookie, const void *buf, size_t len)
689 {
690 	return fwrite(buf, len, 1, (FILE *)cookie) == 1;
691 }
692 
693 int
694 kvm_dump_wrtheader(kvm_t *kd, FILE *fp, int dumpsize)
695 {
696 	return kvm_dump_header(kd, kvm_dump_header_stdio, fp, dumpsize);
697 }
698 
699 kvm_t *
700 kvm_openfiles(const char *uf, const char *mf, const char *sf,
701     int flag, char *errout)
702 {
703 	kvm_t *kd;
704 
705 	if ((kd = malloc(sizeof(*kd))) == NULL) {
706 		(void)strlcpy(errout, strerror(errno), _POSIX2_LINE_MAX);
707 		return (0);
708 	}
709 	kd->program = 0;
710 	return (_kvm_open(kd, uf, mf, sf, flag, errout));
711 }
712 
713 kvm_t *
714 kvm_open(const char *uf, const char *mf, const char *sf, int flag,
715     const char *program)
716 {
717 	kvm_t *kd;
718 
719 	if ((kd = malloc(sizeof(*kd))) == NULL) {
720 		(void)fprintf(stderr, "%s: %s\n",
721 		    program ? program : getprogname(), strerror(errno));
722 		return (0);
723 	}
724 	kd->program = program;
725 	return (_kvm_open(kd, uf, mf, sf, flag, NULL));
726 }
727 
728 int
729 kvm_close(kvm_t *kd)
730 {
731 	int error = 0;
732 
733 	if (kd->pmfd >= 0)
734 		error |= close(kd->pmfd);
735 	if (kd->vmfd >= 0)
736 		error |= close(kd->vmfd);
737 	if (kd->nlfd >= 0)
738 		error |= close(kd->nlfd);
739 	if (kd->swfd >= 0)
740 		error |= close(kd->swfd);
741 	if (kd->vmst)
742 		_kvm_freevtop(kd);
743 	kd->cpu_dsize = 0;
744 	if (kd->cpu_data != NULL)
745 		free(kd->cpu_data);
746 	if (kd->kcore_hdr != NULL)
747 		free(kd->kcore_hdr);
748 	if (kd->procbase != 0)
749 		free(kd->procbase);
750 	if (kd->procbase2 != 0)
751 		free(kd->procbase2);
752 	if (kd->lwpbase != 0)
753 		free(kd->lwpbase);
754 	if (kd->swapspc != 0)
755 		free(kd->swapspc);
756 	if (kd->argspc != 0)
757 		free(kd->argspc);
758 	if (kd->argbuf != 0)
759 		free(kd->argbuf);
760 	if (kd->argv != 0)
761 		free(kd->argv);
762 	if (kd->iobuf != 0)
763 		free(kd->iobuf);
764 	free(kd);
765 
766 	return (error);
767 }
768 
769 int
770 kvm_nlist(kvm_t *kd, struct nlist *nl)
771 {
772 	int rv, nlfd;
773 
774 	/*
775 	 * kd->nlfd might be negative when we get here, and in that
776 	 * case that means that we're using /dev/ksyms.
777 	 * So open it again, just for the time we retrieve the list.
778 	 */
779 	if (kd->nlfd < 0) {
780 		nlfd = open(_PATH_KSYMS, O_RDONLY | O_CLOEXEC, 0);
781 		if (nlfd < 0) {
782 			_kvm_err(kd, 0, "failed to open %s", _PATH_KSYMS);
783 			return (nlfd);
784 		}
785 	} else
786 		nlfd = kd->nlfd;
787 
788 	/*
789 	 * Call the nlist(3) routines to retrieve the given namelist.
790 	 */
791 	rv = __fdnlist(nlfd, nl);
792 
793 	if (rv == -1)
794 		_kvm_err(kd, 0, "bad namelist");
795 
796 	if (kd->nlfd < 0)
797 		close(nlfd);
798 
799 	return (rv);
800 }
801 
802 int
803 kvm_dump_inval(kvm_t *kd)
804 {
805 	struct nlist	nl[2];
806 	paddr_t		pa;
807 	size_t		dsize;
808 	off_t		doff;
809 	void		*newbuf;
810 
811 	if (ISALIVE(kd)) {
812 		_kvm_err(kd, kd->program, "clearing dump on live kernel");
813 		return (-1);
814 	}
815 	nl[0].n_name = "_dumpmag";
816 	nl[1].n_name = NULL;
817 
818 	if (kvm_nlist(kd, nl) == -1) {
819 		_kvm_err(kd, 0, "bad namelist");
820 		return (-1);
821 	}
822 	if (_kvm_kvatop(kd, (vaddr_t)nl[0].n_value, &pa) == 0)
823 		return (-1);
824 
825 	errno = 0;
826 	dsize = MAX(kd->fdalign, sizeof(u_long));
827 	if (kd->iobufsz < dsize) {
828 		newbuf = realloc(kd->iobuf, dsize);
829 		if (newbuf == NULL) {
830 			_kvm_syserr(kd, 0, "cannot allocate I/O buffer");
831 			return (-1);
832 		}
833 		kd->iobuf = newbuf;
834 		kd->iobufsz = dsize;
835 	}
836 	memset(kd->iobuf, 0, dsize);
837 	doff = _kvm_pa2off(kd, pa);
838 	doff -= doff % kd->fdalign;
839 	if (pwrite(kd->pmfd, kd->iobuf, dsize, doff) == -1) {
840 		_kvm_syserr(kd, 0, "cannot invalidate dump - pwrite");
841 		return (-1);
842 	}
843 	return (0);
844 }
845 
846 ssize_t
847 kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
848 {
849 	int cc;
850 	void *cp;
851 
852 	if (ISKMEM(kd)) {
853 		/*
854 		 * We're using /dev/kmem.  Just read straight from the
855 		 * device and let the active kernel do the address translation.
856 		 */
857 		errno = 0;
858 		cc = _kvm_pread(kd, kd->vmfd, buf, len, (off_t)kva);
859 		if (cc < 0) {
860 			_kvm_syserr(kd, 0, "kvm_read");
861 			return (-1);
862 		} else if (cc < len)
863 			_kvm_err(kd, kd->program, "short read");
864 		return (cc);
865 	} else if (ISSYSCTL(kd)) {
866 		_kvm_err(kd, kd->program, "kvm_open called with KVM_NO_FILES, "
867 		    "can't use kvm_read");
868 		return (-1);
869 	} else {
870 		if ((kd->kcore_hdr == NULL) || (kd->cpu_data == NULL)) {
871 			_kvm_err(kd, kd->program, "no valid dump header");
872 			return (-1);
873 		}
874 		cp = buf;
875 		while (len > 0) {
876 			paddr_t	pa;
877 			off_t	foff;
878 
879 			cc = _kvm_kvatop(kd, (vaddr_t)kva, &pa);
880 			if (cc == 0)
881 				return (-1);
882 			if (cc > len)
883 				cc = len;
884 			foff = _kvm_pa2off(kd, pa);
885 			errno = 0;
886 			cc = _kvm_pread(kd, kd->pmfd, cp, (size_t)cc, foff);
887 			if (cc < 0) {
888 				_kvm_syserr(kd, kd->program, "kvm_read");
889 				break;
890 			}
891 			/*
892 			 * If kvm_kvatop returns a bogus value or our core
893 			 * file is truncated, we might wind up seeking beyond
894 			 * the end of the core file in which case the read will
895 			 * return 0 (EOF).
896 			 */
897 			if (cc == 0)
898 				break;
899 			cp = (char *)cp + cc;
900 			kva += cc;
901 			len -= cc;
902 		}
903 		return ((char *)cp - (char *)buf);
904 	}
905 	/* NOTREACHED */
906 }
907 
908 ssize_t
909 kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
910 {
911 	int cc;
912 
913 	if (ISKMEM(kd)) {
914 		/*
915 		 * Just like kvm_read, only we write.
916 		 */
917 		errno = 0;
918 		cc = pwrite(kd->vmfd, buf, len, (off_t)kva);
919 		if (cc < 0) {
920 			_kvm_syserr(kd, 0, "kvm_write");
921 			return (-1);
922 		} else if (cc < len)
923 			_kvm_err(kd, kd->program, "short write");
924 		return (cc);
925 	} else if (ISSYSCTL(kd)) {
926 		_kvm_err(kd, kd->program, "kvm_open called with KVM_NO_FILES, "
927 		    "can't use kvm_write");
928 		return (-1);
929 	} else {
930 		_kvm_err(kd, kd->program,
931 		    "kvm_write not implemented for dead kernels");
932 		return (-1);
933 	}
934 	/* NOTREACHED */
935 }
936