1 /* $NetBSD: dumpsys.c,v 1.17 2022/08/20 23:48:50 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
9 * Simulation Facility, NASA Ames Research Center.
10 *
11 * This code is derived from software contributed to The NetBSD Foundation
12 * by Coyote Point Systems, Inc. which was written under contract to Coyote
13 * Point by Jed Davis and Devon O'Dell.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37 /*-
38 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to Berkeley by
42 * William Jolitz.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * @(#)machdep.c 7.4 (Berkeley) 6/3/91
69 */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: dumpsys.c,v 1.17 2022/08/20 23:48:50 riastradh Exp $");
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/kcore.h>
78 #include <sys/core.h>
79 #include <sys/conf.h>
80 #include <sys/exec.h>
81 #include <sys/exec_aout.h>
82
83 #include <machine/kcore.h>
84 #include <machine/pmap_private.h>
85
86 #include <uvm/uvm_extern.h>
87
88 /*
89 * Exports, needed by savecore, the debugger or elsewhere in the kernel.
90 */
91
92 void dodumpsys(void);
93 void dumpsys(void);
94
95 struct pcb dumppcb;
96 uint32_t dumpmag = 0x8fca0101; /* magic number */
97 int dumpsize; /* pages */
98 long dumplo; /* blocks */
99 int sparse_dump = 1;
100
101 /*
102 * Module private.
103 */
104
105 #define dump_headerbuf_size PAGE_SIZE
106 #define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size)
107 #define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr)
108 #define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize */
109
110 static vaddr_t dumpspace;
111 static paddr_t max_paddr;
112 static uint8_t *sparse_dump_physmap;
113
114 static uint8_t *dump_headerbuf;
115 static uint8_t *dump_headerbuf_ptr;
116 static daddr_t dump_header_blkno;
117
118 static size_t dump_nmemsegs;
119 static size_t dump_npages;
120 static size_t dump_header_size;
121 static size_t dump_totalbytesleft;
122
123 static int cpu_dump(void);
124 static int cpu_dumpsize(void);
125 static u_long cpu_dump_mempagecnt(void);
126
127 static void dump_misc_init(void);
128 static void dump_seg_prep(void);
129 static int dump_seg_iter(int (*)(paddr_t, paddr_t));
130
131 static void sparse_dump_reset(void);
132 static void sparse_dump_mark(vaddr_t, vaddr_t, int);
133 static void cpu_dump_prep_sparse(void);
134
135 static void dump_header_start(void);
136 static int dump_header_flush(void);
137 static int dump_header_addbytes(const void*, size_t);
138 static int dump_header_addseg(paddr_t, paddr_t);
139 static int dump_header_finish(void);
140
141 static int dump_seg_count_range(paddr_t, paddr_t);
142 static int dumpsys_seg(paddr_t, paddr_t);
143
144 /*
145 * From machdep.c.
146 */
147
148 extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
149 extern int mem_cluster_cnt;
150
151 void
dodumpsys(void)152 dodumpsys(void)
153 {
154 const struct bdevsw *bdev;
155 int dumpend, psize;
156 int error;
157
158 if (dumpdev == NODEV)
159 return;
160
161 bdev = bdevsw_lookup(dumpdev);
162 if (bdev == NULL || bdev->d_psize == NULL)
163 return;
164
165 /*
166 * For dumps during autoconfiguration,
167 * if dump device has already configured...
168 */
169 if (dumpsize == 0)
170 cpu_dumpconf();
171 if (dumplo <= 0 || dumpsize == 0) {
172 printf("\ndump to dev %llu,%llu not possible\n",
173 (unsigned long long)major(dumpdev),
174 (unsigned long long)minor(dumpdev));
175 return;
176 }
177 printf("\ndumping to dev %llu,%llu offset %ld\n",
178 (unsigned long long)major(dumpdev),
179 (unsigned long long)minor(dumpdev), dumplo);
180
181 psize = bdev_size(dumpdev);
182 printf("dump ");
183 if (psize == -1) {
184 printf("area unavailable\n");
185 return;
186 }
187
188 #if 0 /* XXX this doesn't work. grr. */
189 /* toss any characters present prior to dump */
190 while (sget() != NULL); /*syscons and pccons differ */
191 #endif
192
193 dump_seg_prep();
194 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages);
195 if (dumpend > psize) {
196 printf("failed: insufficient space (%d < %d)\n",
197 psize, dumpend);
198 goto failed;
199 }
200
201 dump_header_start();
202 if ((error = cpu_dump()) != 0)
203 goto err;
204 if ((error = dump_header_finish()) != 0)
205 goto err;
206
207 if (dump_header_blkno != dumplo + btodb(dump_header_size)) {
208 printf("BAD header size (%ld [written] != %ld [expected])\n",
209 (long)(dump_header_blkno - dumplo),
210 (long)btodb(dump_header_size));
211 goto failed;
212 }
213
214 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP);
215 error = dump_seg_iter(dumpsys_seg);
216
217 if (error == 0 && dump_header_blkno != dumpend) {
218 printf("BAD dump size (%ld [written] != %ld [expected])\n",
219 (long)(dumpend - dumplo),
220 (long)(dump_header_blkno - dumplo));
221 goto failed;
222 }
223
224 err:
225 switch (error) {
226
227 case ENXIO:
228 printf("device bad\n");
229 break;
230
231 case EFAULT:
232 printf("device not ready\n");
233 break;
234
235 case EINVAL:
236 printf("area improper\n");
237 break;
238
239 case EIO:
240 printf("i/o error\n");
241 break;
242
243 case EINTR:
244 printf("aborted from console\n");
245 break;
246
247 case 0:
248 printf("succeeded\n");
249 break;
250
251 default:
252 printf("error %d\n", error);
253 break;
254 }
255 failed:
256 printf("\n\n");
257 delay(5000000); /* 5 seconds */
258 }
259
260 /*
261 * This is called by main to set dumplo and dumpsize.
262 * Dumps always skip the first PAGE_SIZE of disk space
263 * in case there might be a disk label stored there.
264 * If there is extra space, put dump at the end to
265 * reduce the chance that swapping trashes it.
266 *
267 * Sparse dumps can't placed as close to the end as possible, because
268 * savecore(8) has to know where to start reading in the dump device
269 * before it has access to any of the crashed system's state.
270 *
271 * Note also that a sparse dump will never be larger than a full one:
272 * in order to add a phys_ram_seg_t to the header, at least one page
273 * must be removed.
274 */
275 void
cpu_dumpconf(void)276 cpu_dumpconf(void)
277 {
278 int nblks, dumpblks; /* size of dump area */
279
280 if (dumpdev == NODEV)
281 goto bad;
282 nblks = bdev_size(dumpdev);
283 if (nblks <= ctod(1))
284 goto bad;
285
286 dumpblks = cpu_dumpsize();
287 if (dumpblks < 0)
288 goto bad;
289 dumpblks += ctod(cpu_dump_mempagecnt());
290
291 /* If dump won't fit (incl. room for possible label): */
292 if (dumpblks > (nblks - ctod(1))) {
293 /* A sparse dump might (and hopefully will) fit. */
294 dumplo = ctod(1);
295 } else {
296 /* Put dump at end of partition */
297 dumplo = nblks - dumpblks;
298 }
299
300 /* dumpsize is in page units, and doesn't include headers. */
301 dumpsize = cpu_dump_mempagecnt();
302
303 /* Now that we've decided this will work, init ancillary stuff. */
304 dump_misc_init();
305 return;
306
307 bad:
308 dumpsize = 0;
309 }
310
311 vaddr_t
reserve_dumppages(vaddr_t p)312 reserve_dumppages(vaddr_t p)
313 {
314
315 dumpspace = p;
316 return (p + BYTES_PER_DUMP);
317 }
318
319 /*
320 * Perform assorted dump-related initialization tasks. Assumes that
321 * the maximum physical memory address will not increase afterwards.
322 */
323 static void
dump_misc_init(void)324 dump_misc_init(void)
325 {
326 int i;
327
328 if (dump_headerbuf != NULL)
329 return; /* already called */
330
331 for (i = 0; i < mem_cluster_cnt; ++i) {
332 paddr_t top = mem_clusters[i].start + mem_clusters[i].size;
333 if (max_paddr < top)
334 max_paddr = top;
335 }
336 #ifdef DUMP_DEBUG
337 printf("dump_misc_init: max_paddr = %#" PRIxPADDR "\n", max_paddr);
338 #endif
339
340 sparse_dump_physmap = (void*)uvm_km_alloc(kernel_map,
341 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE),
342 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
343 dump_headerbuf = (void*)uvm_km_alloc(kernel_map,
344 dump_headerbuf_size,
345 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
346 /* XXXjld should check for failure here, disable dumps if so. */
347 }
348
349 /*
350 * Clear the set of pages to include in a sparse dump.
351 */
352 static void
sparse_dump_reset(void)353 sparse_dump_reset(void)
354 {
355
356 memset(sparse_dump_physmap, 0,
357 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE));
358 }
359
360 /*
361 * Include or exclude pages in a sparse dump, by half-open virtual
362 * address interval (which may wrap around the end of the space).
363 */
364 static void
sparse_dump_mark(vaddr_t vbegin,vaddr_t vend,int includep)365 sparse_dump_mark(vaddr_t vbegin, vaddr_t vend, int includep)
366 {
367 pmap_t pmap;
368 paddr_t p;
369 vaddr_t v;
370
371 /*
372 * If a partial page is called for, the whole page must be included.
373 */
374 if (includep) {
375 vbegin = rounddown(vbegin, PAGE_SIZE);
376 vend = roundup(vend, PAGE_SIZE);
377 } else {
378 vbegin = roundup(vbegin, PAGE_SIZE);
379 vend = rounddown(vend, PAGE_SIZE);
380 }
381
382 pmap = pmap_kernel();
383 for (v = vbegin; v != vend; v += PAGE_SIZE) {
384 if (pmap_extract(pmap, v, &p)) {
385 if (includep)
386 setbit(sparse_dump_physmap, p/PAGE_SIZE);
387 else
388 clrbit(sparse_dump_physmap, p/PAGE_SIZE);
389 }
390 }
391 }
392
393 /*
394 * Machine-dependently decides on the contents of a sparse dump, using
395 * the above.
396 */
397 static void
cpu_dump_prep_sparse(void)398 cpu_dump_prep_sparse(void)
399 {
400
401 sparse_dump_reset();
402 /* XXX could the alternate recursive page table be skipped? */
403 sparse_dump_mark((vaddr_t)PTE_BASE, 0, 1);
404 /* Memory for I/O buffers could be unmarked here, for example. */
405 /* The kernel text could also be unmarked, but gdb would be upset. */
406 }
407
408 /*
409 * Abstractly iterate over the collection of memory segments to be
410 * dumped; the callback lacks the customary environment-pointer
411 * argument because none of the current users really need one.
412 *
413 * To be used only after dump_seg_prep is called to set things up.
414 */
415 static int
dump_seg_iter(int (* callback)(paddr_t,paddr_t))416 dump_seg_iter(int (*callback)(paddr_t, paddr_t))
417 {
418 int error, i;
419
420 #define CALLBACK(start,size) do { \
421 error = callback(start,size); \
422 if (error) \
423 return error; \
424 } while(0)
425
426 for (i = 0; i < mem_cluster_cnt; ++i) {
427 /*
428 * The bitmap is scanned within each memory segment,
429 * rather than over its entire domain, in case any
430 * pages outside of the memory proper have been mapped
431 * into kva; they might be devices that wouldn't
432 * appreciate being arbitrarily read, and including
433 * them could also break the assumption that a sparse
434 * dump will always be smaller than a full one.
435 */
436 if (sparse_dump) {
437 paddr_t p, start, end;
438 int lastset;
439
440 start = mem_clusters[i].start;
441 end = start + mem_clusters[i].size;
442 start = rounddown(start, PAGE_SIZE); /* unnecessary? */
443 lastset = 0;
444 for (p = start; p < end; p += PAGE_SIZE) {
445 int thisset = isset(sparse_dump_physmap,
446 p/PAGE_SIZE);
447
448 if (!lastset && thisset)
449 start = p;
450 if (lastset && !thisset)
451 CALLBACK(start, p - start);
452 lastset = thisset;
453 }
454 if (lastset)
455 CALLBACK(start, p - start);
456 } else
457 CALLBACK(mem_clusters[i].start, mem_clusters[i].size);
458 }
459 return 0;
460 #undef CALLBACK
461 }
462
463 /*
464 * Prepare for an impending core dump: decide what's being dumped and
465 * how much space it will take up.
466 */
467 static void
dump_seg_prep(void)468 dump_seg_prep(void)
469 {
470
471 if (sparse_dump)
472 cpu_dump_prep_sparse();
473
474 dump_nmemsegs = 0;
475 dump_npages = 0;
476 dump_seg_iter(dump_seg_count_range);
477
478 dump_header_size = ALIGN(sizeof(kcore_seg_t)) +
479 ALIGN(sizeof(cpu_kcore_hdr_t)) +
480 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t));
481 dump_header_size = roundup(dump_header_size, dbtob(1));
482
483 /*
484 * savecore(8) will read this to decide how many pages to
485 * copy, and cpu_dumpconf has already used the pessimistic
486 * value to set dumplo, so it's time to tell the truth.
487 */
488 dumpsize = dump_npages; /* XXX could these just be one variable? */
489 }
490
491 static int
dump_seg_count_range(paddr_t start,paddr_t size)492 dump_seg_count_range(paddr_t start, paddr_t size)
493 {
494
495 ++dump_nmemsegs;
496 dump_npages += size / PAGE_SIZE;
497 return 0;
498 }
499
500 /*
501 * A sparse dump's header may be rather large, due to the number of
502 * "segments" emitted. These routines manage a simple output buffer,
503 * so that the header can be written to disk incrementally.
504 */
505 static void
dump_header_start(void)506 dump_header_start(void)
507 {
508
509 dump_headerbuf_ptr = dump_headerbuf;
510 dump_header_blkno = dumplo;
511 }
512
513 static int
dump_header_flush(void)514 dump_header_flush(void)
515 {
516 const struct bdevsw *bdev;
517 size_t to_write;
518 int error;
519
520 bdev = bdevsw_lookup(dumpdev);
521 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1));
522 error = bdev->d_dump(dumpdev, dump_header_blkno,
523 dump_headerbuf, to_write);
524 dump_header_blkno += btodb(to_write);
525 dump_headerbuf_ptr = dump_headerbuf;
526 return error;
527 }
528
529 static int
dump_header_addbytes(const void * vptr,size_t n)530 dump_header_addbytes(const void* vptr, size_t n)
531 {
532 const char *ptr = vptr;
533 int error;
534
535 while (n > dump_headerbuf_avail) {
536 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail);
537 ptr += dump_headerbuf_avail;
538 n -= dump_headerbuf_avail;
539 dump_headerbuf_ptr = dump_headerbuf_end;
540 error = dump_header_flush();
541 if (error)
542 return error;
543 }
544 memcpy(dump_headerbuf_ptr, ptr, n);
545 dump_headerbuf_ptr += n;
546
547 return 0;
548 }
549
550 static int
dump_header_addseg(paddr_t start,paddr_t size)551 dump_header_addseg(paddr_t start, paddr_t size)
552 {
553 phys_ram_seg_t seg = { start, size };
554
555 return dump_header_addbytes(&seg, sizeof(seg));
556 }
557
558 static int
dump_header_finish(void)559 dump_header_finish(void)
560 {
561
562 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail);
563 return dump_header_flush();
564 }
565
566 /*
567 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers
568 * for a full (non-sparse) dump.
569 */
570 static int
cpu_dumpsize(void)571 cpu_dumpsize(void)
572 {
573 int size;
574
575 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
576 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
577 if (roundup(size, dbtob(1)) != dbtob(1))
578 return (-1);
579
580 return (1);
581 }
582
583 /*
584 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped
585 * for a full (non-sparse) dump.
586 */
587 static u_long
cpu_dump_mempagecnt(void)588 cpu_dump_mempagecnt(void)
589 {
590 u_long i, n;
591
592 n = 0;
593 for (i = 0; i < mem_cluster_cnt; i++)
594 n += atop(mem_clusters[i].size);
595 return (n);
596 }
597
598 /*
599 * cpu_dump: dump the machine-dependent kernel core dump headers.
600 */
601 static int
cpu_dump(void)602 cpu_dump(void)
603 {
604 kcore_seg_t seg;
605 cpu_kcore_hdr_t cpuhdr;
606 const struct bdevsw *bdev;
607
608 bdev = bdevsw_lookup(dumpdev);
609 if (bdev == NULL)
610 return (ENXIO);
611
612 /*
613 * Generate a segment header.
614 */
615 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
616 seg.c_size = dump_header_size - ALIGN(sizeof(seg));
617 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg)));
618
619 /*
620 * Add the machine-dependent header info.
621 */
622 cpuhdr.pdppaddr = PDPpaddr;
623 if (use_pae == 1)
624 cpuhdr.pdppaddr |= I386_KCORE_PAE;
625 cpuhdr.nmemsegs = dump_nmemsegs;
626 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));
627
628 /*
629 * Write out the memory segment descriptors.
630 */
631 return dump_seg_iter(dump_header_addseg);
632 }
633
634 static int
dumpsys_seg(paddr_t maddr,paddr_t bytes)635 dumpsys_seg(paddr_t maddr, paddr_t bytes)
636 {
637 u_long i, m, n;
638 daddr_t blkno;
639 const struct bdevsw *bdev;
640 int (*dump)(dev_t, daddr_t, void *, size_t);
641 int error;
642
643 bdev = bdevsw_lookup(dumpdev);
644 dump = bdev->d_dump;
645
646 blkno = dump_header_blkno;
647 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) {
648 /* Print out how many MBs we have left to go. */
649 if ((dump_totalbytesleft % (1024*1024)) == 0)
650 printf_nolog("%lu ", (unsigned long)
651 (dump_totalbytesleft / (1024 * 1024)));
652
653 /* Limit size for next transfer. */
654 n = bytes - i;
655 if (n > BYTES_PER_DUMP)
656 n = BYTES_PER_DUMP;
657
658 for (m = 0; m < n; m += NBPG)
659 pmap_kenter_pa(dumpspace + m, maddr + m,
660 VM_PROT_READ, 0);
661 pmap_update(pmap_kernel());
662
663 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n);
664 if (error)
665 return error;
666 maddr += n;
667 blkno += btodb(n); /* XXX? */
668
669 #if 0 /* XXX this doesn't work. grr. */
670 /* operator aborting dump? */
671 if (sget() != NULL)
672 return EINTR;
673 #endif
674 }
675 dump_header_blkno = blkno;
676
677 return 0;
678 }
679