10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 53446Smrj * Common Development and Distribution License (the "License"). 63446Smrj * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 217563SPrasad.Singamsetty@Sun.COM 220Sstevel@tonic-gate /* 2312042SDave.Plauger@Sun.COM * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #include <sys/types.h> 270Sstevel@tonic-gate #include <sys/param.h> 280Sstevel@tonic-gate #include <sys/systm.h> 290Sstevel@tonic-gate #include <sys/vm.h> 300Sstevel@tonic-gate #include <sys/proc.h> 310Sstevel@tonic-gate #include <sys/file.h> 320Sstevel@tonic-gate #include <sys/conf.h> 330Sstevel@tonic-gate #include <sys/kmem.h> 340Sstevel@tonic-gate #include <sys/mem.h> 350Sstevel@tonic-gate #include <sys/mman.h> 360Sstevel@tonic-gate #include <sys/vnode.h> 370Sstevel@tonic-gate #include <sys/errno.h> 380Sstevel@tonic-gate #include <sys/memlist.h> 390Sstevel@tonic-gate #include <sys/dumphdr.h> 400Sstevel@tonic-gate #include <sys/dumpadm.h> 410Sstevel@tonic-gate #include <sys/ksyms.h> 420Sstevel@tonic-gate #include <sys/compress.h> 430Sstevel@tonic-gate #include <sys/stream.h> 440Sstevel@tonic-gate #include <sys/strsun.h> 450Sstevel@tonic-gate #include <sys/cmn_err.h> 460Sstevel@tonic-gate #include <sys/bitmap.h> 470Sstevel@tonic-gate #include <sys/modctl.h> 480Sstevel@tonic-gate #include <sys/utsname.h> 490Sstevel@tonic-gate #include <sys/systeminfo.h> 500Sstevel@tonic-gate #include <sys/vmem.h> 510Sstevel@tonic-gate #include <sys/log.h> 520Sstevel@tonic-gate #include <sys/var.h> 530Sstevel@tonic-gate #include <sys/debug.h> 540Sstevel@tonic-gate #include <sys/sunddi.h> 550Sstevel@tonic-gate #include <fs/fs_subr.h> 560Sstevel@tonic-gate #include <sys/fs/snode.h> 570Sstevel@tonic-gate #include <sys/ontrap.h> 580Sstevel@tonic-gate #include <sys/panic.h> 590Sstevel@tonic-gate #include <sys/dkio.h> 600Sstevel@tonic-gate #include <sys/vtoc.h> 610Sstevel@tonic-gate #include <sys/errorq.h> 620Sstevel@tonic-gate #include <sys/fm/util.h> 636423Sgw25295 #include <sys/fs/zfs.h> 640Sstevel@tonic-gate 650Sstevel@tonic-gate #include <vm/hat.h> 660Sstevel@tonic-gate #include <vm/as.h> 670Sstevel@tonic-gate #include <vm/page.h> 6810843SDave.Plauger@Sun.COM #include <vm/pvn.h> 690Sstevel@tonic-gate #include <vm/seg.h> 700Sstevel@tonic-gate #include <vm/seg_kmem.h> 7111066Srafael.vanoni@sun.com #include <sys/clock_impl.h> 7211480SStuart.Maybee@Sun.COM #include <sys/hold_page.h> 730Sstevel@tonic-gate 7410843SDave.Plauger@Sun.COM #include <bzip2/bzlib.h> 7510843SDave.Plauger@Sun.COM 7610843SDave.Plauger@Sun.COM /* 7710843SDave.Plauger@Sun.COM * Crash dump time is dominated by disk write time. To reduce this, 7810843SDave.Plauger@Sun.COM * the stronger compression method bzip2 is applied to reduce the dump 7910843SDave.Plauger@Sun.COM * size and hence reduce I/O time. However, bzip2 is much more 8010843SDave.Plauger@Sun.COM * computationally expensive than the existing lzjb algorithm, so to 8110843SDave.Plauger@Sun.COM * avoid increasing compression time, CPUs that are otherwise idle 8210843SDave.Plauger@Sun.COM * during panic are employed to parallelize the compression task. 8310843SDave.Plauger@Sun.COM * Many helper CPUs are needed to prevent bzip2 from being a 8410843SDave.Plauger@Sun.COM * bottleneck, and on systems with too few CPUs, the lzjb algorithm is 8510843SDave.Plauger@Sun.COM * parallelized instead. Lastly, I/O and compression are performed by 8610843SDave.Plauger@Sun.COM * different CPUs, and are hence overlapped in time, unlike the older 8710843SDave.Plauger@Sun.COM * serial code. 8810843SDave.Plauger@Sun.COM * 8910843SDave.Plauger@Sun.COM * Another important consideration is the speed of the dump 9010843SDave.Plauger@Sun.COM * device. Faster disks need less CPUs in order to benefit from 9110843SDave.Plauger@Sun.COM * parallel lzjb versus parallel bzip2. Therefore, the CPU count 9210843SDave.Plauger@Sun.COM * threshold for switching from parallel lzjb to paralled bzip2 is 9310843SDave.Plauger@Sun.COM * elevated for faster disks. The dump device speed is adduced from 9410843SDave.Plauger@Sun.COM * the setting for dumpbuf.iosize, see dump_update_clevel. 9510843SDave.Plauger@Sun.COM */ 9610843SDave.Plauger@Sun.COM 9710843SDave.Plauger@Sun.COM /* 9810843SDave.Plauger@Sun.COM * exported vars 9910843SDave.Plauger@Sun.COM */ 10010843SDave.Plauger@Sun.COM kmutex_t dump_lock; /* lock for dump configuration */ 10110843SDave.Plauger@Sun.COM dumphdr_t *dumphdr; /* dump header */ 1020Sstevel@tonic-gate int dump_conflags = DUMP_KERNEL; /* dump configuration flags */ 10310843SDave.Plauger@Sun.COM vnode_t *dumpvp; /* dump device vnode pointer */ 10410843SDave.Plauger@Sun.COM u_offset_t dumpvp_size; /* size of dump device, in bytes */ 10510843SDave.Plauger@Sun.COM char *dumppath; /* pathname of dump device */ 10610843SDave.Plauger@Sun.COM int dump_timeout = 120; /* timeout for dumping pages */ 10710843SDave.Plauger@Sun.COM int dump_timeleft; /* portion of dump_timeout remaining */ 10810843SDave.Plauger@Sun.COM int dump_ioerr; /* dump i/o error */ 10910843SDave.Plauger@Sun.COM int dump_check_used; /* enable check for used pages */ 11010843SDave.Plauger@Sun.COM 11110843SDave.Plauger@Sun.COM /* 11210843SDave.Plauger@Sun.COM * Tunables for dump compression and parallelism. These can be set via 11310843SDave.Plauger@Sun.COM * /etc/system. 11410843SDave.Plauger@Sun.COM * 11510843SDave.Plauger@Sun.COM * dump_ncpu_low number of helpers for parallel lzjb 11610843SDave.Plauger@Sun.COM * This is also the minimum configuration. 11710843SDave.Plauger@Sun.COM * 11810843SDave.Plauger@Sun.COM * dump_bzip2_level bzip2 compression level: 1-9 11910843SDave.Plauger@Sun.COM * Higher numbers give greater compression, but take more memory 12010843SDave.Plauger@Sun.COM * and time. Memory used per helper is ~(dump_bzip2_level * 1MB). 12110843SDave.Plauger@Sun.COM * 12210843SDave.Plauger@Sun.COM * dump_plat_mincpu the cross-over limit for using bzip2 (per platform): 12310843SDave.Plauger@Sun.COM * if dump_plat_mincpu == 0, then always do single threaded dump 12410843SDave.Plauger@Sun.COM * if ncpu >= dump_plat_mincpu then try to use bzip2 12510843SDave.Plauger@Sun.COM * 12610843SDave.Plauger@Sun.COM * dump_metrics_on if set, metrics are collected in the kernel, passed 12710843SDave.Plauger@Sun.COM * to savecore via the dump file, and recorded by savecore in 12810843SDave.Plauger@Sun.COM * METRICS.txt. 12910843SDave.Plauger@Sun.COM */ 13010843SDave.Plauger@Sun.COM uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */ 13110843SDave.Plauger@Sun.COM uint_t dump_bzip2_level = 1; /* bzip2 level (1-9) */ 13210843SDave.Plauger@Sun.COM 133*12931SDave.Plauger@Sun.COM /* Use dump_plat_mincpu_default unless this variable is set by /etc/system */ 134*12931SDave.Plauger@Sun.COM #define MINCPU_NOT_SET ((uint_t)-1) 135*12931SDave.Plauger@Sun.COM uint_t dump_plat_mincpu = MINCPU_NOT_SET; 136*12931SDave.Plauger@Sun.COM 13711178SDave.Plauger@Sun.COM /* tunables for pre-reserved heap */ 13811178SDave.Plauger@Sun.COM uint_t dump_kmem_permap = 1024; 13911178SDave.Plauger@Sun.COM uint_t dump_kmem_pages = 8; 14011178SDave.Plauger@Sun.COM 14110843SDave.Plauger@Sun.COM /* Define multiple buffers per helper to avoid stalling */ 14210843SDave.Plauger@Sun.COM #define NCBUF_PER_HELPER 2 14310843SDave.Plauger@Sun.COM #define NCMAP_PER_HELPER 4 14410843SDave.Plauger@Sun.COM 14510843SDave.Plauger@Sun.COM /* minimum number of helpers configured */ 14610843SDave.Plauger@Sun.COM #define MINHELPERS (dump_ncpu_low) 14710843SDave.Plauger@Sun.COM #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER) 14810843SDave.Plauger@Sun.COM 14910843SDave.Plauger@Sun.COM /* 15010843SDave.Plauger@Sun.COM * Define constant parameters. 15110843SDave.Plauger@Sun.COM * 15210843SDave.Plauger@Sun.COM * CBUF_SIZE size of an output buffer 15310843SDave.Plauger@Sun.COM * 15410843SDave.Plauger@Sun.COM * CBUF_MAPSIZE size of virtual range for mapping pages 15510843SDave.Plauger@Sun.COM * 15610843SDave.Plauger@Sun.COM * CBUF_MAPNP size of virtual range in pages 15710843SDave.Plauger@Sun.COM * 15810843SDave.Plauger@Sun.COM */ 15910843SDave.Plauger@Sun.COM #define DUMP_1KB ((size_t)1 << 10) 16010843SDave.Plauger@Sun.COM #define DUMP_1MB ((size_t)1 << 20) 16110843SDave.Plauger@Sun.COM #define CBUF_SIZE ((size_t)1 << 17) 16210843SDave.Plauger@Sun.COM #define CBUF_MAPSHIFT (22) 16310843SDave.Plauger@Sun.COM #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT) 16410843SDave.Plauger@Sun.COM #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT)) 16510843SDave.Plauger@Sun.COM 16610843SDave.Plauger@Sun.COM /* 16710843SDave.Plauger@Sun.COM * Compression metrics are accumulated nano-second subtotals. The 16810843SDave.Plauger@Sun.COM * results are normalized by the number of pages dumped. A report is 16910843SDave.Plauger@Sun.COM * generated when dumpsys() completes and is saved in the dump image 17010843SDave.Plauger@Sun.COM * after the trailing dump header. 17110843SDave.Plauger@Sun.COM * 17210843SDave.Plauger@Sun.COM * Metrics are always collected. Set the variable dump_metrics_on to 17310843SDave.Plauger@Sun.COM * cause metrics to be saved in the crash file, where savecore will 17410843SDave.Plauger@Sun.COM * save it in the file METRICS.txt. 17510843SDave.Plauger@Sun.COM */ 17610843SDave.Plauger@Sun.COM #define PERPAGES \ 17710843SDave.Plauger@Sun.COM PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \ 17810843SDave.Plauger@Sun.COM PERPAGE(copy) PERPAGE(compress) \ 17910843SDave.Plauger@Sun.COM PERPAGE(write) \ 18010843SDave.Plauger@Sun.COM PERPAGE(inwait) PERPAGE(outwait) 18110843SDave.Plauger@Sun.COM 18210843SDave.Plauger@Sun.COM typedef struct perpage { 18310843SDave.Plauger@Sun.COM #define PERPAGE(x) hrtime_t x; 18410843SDave.Plauger@Sun.COM PERPAGES 18510843SDave.Plauger@Sun.COM #undef PERPAGE 18610843SDave.Plauger@Sun.COM } perpage_t; 18710843SDave.Plauger@Sun.COM 18810843SDave.Plauger@Sun.COM /* 18910843SDave.Plauger@Sun.COM * This macro controls the code generation for collecting dump 19010843SDave.Plauger@Sun.COM * performance information. By default, the code is generated, but 19110843SDave.Plauger@Sun.COM * automatic saving of the information is disabled. If dump_metrics_on 19210843SDave.Plauger@Sun.COM * is set to 1, the timing information is passed to savecore via the 19310843SDave.Plauger@Sun.COM * crash file, where it is appended to the file dump-dir/METRICS.txt. 19410843SDave.Plauger@Sun.COM */ 19510843SDave.Plauger@Sun.COM #define COLLECT_METRICS 19610843SDave.Plauger@Sun.COM 19710843SDave.Plauger@Sun.COM #ifdef COLLECT_METRICS 19810843SDave.Plauger@Sun.COM uint_t dump_metrics_on = 0; /* set to 1 to enable recording metrics */ 19910843SDave.Plauger@Sun.COM 20010843SDave.Plauger@Sun.COM #define HRSTART(v, m) v##ts.m = gethrtime() 20110843SDave.Plauger@Sun.COM #define HRSTOP(v, m) v.m += gethrtime() - v##ts.m 20210843SDave.Plauger@Sun.COM #define HRBEGIN(v, m, s) v##ts.m = gethrtime(); v.size += s 20310843SDave.Plauger@Sun.COM #define HREND(v, m) v.m += gethrtime() - v##ts.m 20410843SDave.Plauger@Sun.COM #define HRNORM(v, m, n) v.m /= (n) 20510843SDave.Plauger@Sun.COM 2060Sstevel@tonic-gate #else 20710843SDave.Plauger@Sun.COM #define HRSTART(v, m) 20810843SDave.Plauger@Sun.COM #define HRSTOP(v, m) 20910843SDave.Plauger@Sun.COM #define HRBEGIN(v, m, s) 21010843SDave.Plauger@Sun.COM #define HREND(v, m) 21110843SDave.Plauger@Sun.COM #define HRNORM(v, m, n) 21210843SDave.Plauger@Sun.COM #endif /* COLLECT_METRICS */ 21310843SDave.Plauger@Sun.COM 21410843SDave.Plauger@Sun.COM /* 21510843SDave.Plauger@Sun.COM * Buffers for copying and compressing memory pages. 21610843SDave.Plauger@Sun.COM * 21710843SDave.Plauger@Sun.COM * cbuf_t buffer controllers: used for both input and output. 21810843SDave.Plauger@Sun.COM * 21910843SDave.Plauger@Sun.COM * The buffer state indicates how it is being used: 22010843SDave.Plauger@Sun.COM * 22110843SDave.Plauger@Sun.COM * CBUF_FREEMAP: CBUF_MAPSIZE virtual address range is available for 22210843SDave.Plauger@Sun.COM * mapping input pages. 22310843SDave.Plauger@Sun.COM * 22410843SDave.Plauger@Sun.COM * CBUF_INREADY: input pages are mapped and ready for compression by a 22510843SDave.Plauger@Sun.COM * helper. 22610843SDave.Plauger@Sun.COM * 22710843SDave.Plauger@Sun.COM * CBUF_USEDMAP: mapping has been consumed by a helper. Needs unmap. 22810843SDave.Plauger@Sun.COM * 22910843SDave.Plauger@Sun.COM * CBUF_FREEBUF: CBUF_SIZE output buffer, which is available. 23010843SDave.Plauger@Sun.COM * 23110843SDave.Plauger@Sun.COM * CBUF_WRITE: CBUF_SIZE block of compressed pages from a helper, 23210843SDave.Plauger@Sun.COM * ready to write out. 23310843SDave.Plauger@Sun.COM * 23410843SDave.Plauger@Sun.COM * CBUF_ERRMSG: CBUF_SIZE block of error messages from a helper 23510843SDave.Plauger@Sun.COM * (reports UE errors.) 23610843SDave.Plauger@Sun.COM */ 23710843SDave.Plauger@Sun.COM 23810843SDave.Plauger@Sun.COM typedef enum cbufstate { 23910843SDave.Plauger@Sun.COM CBUF_FREEMAP, 24010843SDave.Plauger@Sun.COM CBUF_INREADY, 24110843SDave.Plauger@Sun.COM CBUF_USEDMAP, 24210843SDave.Plauger@Sun.COM CBUF_FREEBUF, 24310843SDave.Plauger@Sun.COM CBUF_WRITE, 24410843SDave.Plauger@Sun.COM CBUF_ERRMSG 24510843SDave.Plauger@Sun.COM } cbufstate_t; 24610843SDave.Plauger@Sun.COM 24710843SDave.Plauger@Sun.COM typedef struct cbuf cbuf_t; 24810843SDave.Plauger@Sun.COM 24910843SDave.Plauger@Sun.COM struct cbuf { 25010843SDave.Plauger@Sun.COM cbuf_t *next; /* next in list */ 25110843SDave.Plauger@Sun.COM cbufstate_t state; /* processing state */ 25210843SDave.Plauger@Sun.COM size_t used; /* amount used */ 25310843SDave.Plauger@Sun.COM size_t size; /* mem size */ 25410843SDave.Plauger@Sun.COM char *buf; /* kmem or vmem */ 25510843SDave.Plauger@Sun.COM pgcnt_t pagenum; /* index to pfn map */ 25610843SDave.Plauger@Sun.COM pgcnt_t bitnum; /* first set bitnum */ 25710843SDave.Plauger@Sun.COM pfn_t pfn; /* first pfn in mapped range */ 25810843SDave.Plauger@Sun.COM int off; /* byte offset to first pfn */ 25910843SDave.Plauger@Sun.COM }; 2600Sstevel@tonic-gate 2610Sstevel@tonic-gate /* 26210843SDave.Plauger@Sun.COM * cqueue_t queues: a uni-directional channel for communication 26310843SDave.Plauger@Sun.COM * from the master to helper tasks or vice-versa using put and 26410843SDave.Plauger@Sun.COM * get primitives. Both mappings and data buffers are passed via 26510843SDave.Plauger@Sun.COM * queues. Producers close a queue when done. The number of 26610843SDave.Plauger@Sun.COM * active producers is reference counted so the consumer can 26710843SDave.Plauger@Sun.COM * detect end of data. Concurrent access is mediated by atomic 26810843SDave.Plauger@Sun.COM * operations for panic dump, or mutex/cv for live dump. 26910843SDave.Plauger@Sun.COM * 27010843SDave.Plauger@Sun.COM * There a four queues, used as follows: 27110843SDave.Plauger@Sun.COM * 27210843SDave.Plauger@Sun.COM * Queue Dataflow NewState 27310843SDave.Plauger@Sun.COM * -------------------------------------------------- 27410843SDave.Plauger@Sun.COM * mainq master -> master FREEMAP 27510843SDave.Plauger@Sun.COM * master has initialized or unmapped an input buffer 27610843SDave.Plauger@Sun.COM * -------------------------------------------------- 27710843SDave.Plauger@Sun.COM * helperq master -> helper INREADY 27810843SDave.Plauger@Sun.COM * master has mapped input for use by helper 27910843SDave.Plauger@Sun.COM * -------------------------------------------------- 28010843SDave.Plauger@Sun.COM * mainq master <- helper USEDMAP 28110843SDave.Plauger@Sun.COM * helper is done with input 28210843SDave.Plauger@Sun.COM * -------------------------------------------------- 28310843SDave.Plauger@Sun.COM * freebufq master -> helper FREEBUF 28410843SDave.Plauger@Sun.COM * master has initialized or written an output buffer 28510843SDave.Plauger@Sun.COM * -------------------------------------------------- 28610843SDave.Plauger@Sun.COM * mainq master <- helper WRITE 28710843SDave.Plauger@Sun.COM * block of compressed pages from a helper 28810843SDave.Plauger@Sun.COM * -------------------------------------------------- 28910843SDave.Plauger@Sun.COM * mainq master <- helper ERRMSG 29010843SDave.Plauger@Sun.COM * error messages from a helper (memory error case) 29110843SDave.Plauger@Sun.COM * -------------------------------------------------- 29210843SDave.Plauger@Sun.COM * writerq master <- master WRITE 29310843SDave.Plauger@Sun.COM * non-blocking queue of blocks to write 29410843SDave.Plauger@Sun.COM * -------------------------------------------------- 29510843SDave.Plauger@Sun.COM */ 29610843SDave.Plauger@Sun.COM typedef struct cqueue { 29710843SDave.Plauger@Sun.COM cbuf_t *volatile first; /* first in list */ 29810843SDave.Plauger@Sun.COM cbuf_t *last; /* last in list */ 29910843SDave.Plauger@Sun.COM hrtime_t ts; /* timestamp */ 30010843SDave.Plauger@Sun.COM hrtime_t empty; /* total time empty */ 30110843SDave.Plauger@Sun.COM kmutex_t mutex; /* live state lock */ 30210843SDave.Plauger@Sun.COM kcondvar_t cv; /* live wait var */ 30310843SDave.Plauger@Sun.COM lock_t spinlock; /* panic mode spin lock */ 30410843SDave.Plauger@Sun.COM volatile uint_t open; /* producer ref count */ 30510843SDave.Plauger@Sun.COM } cqueue_t; 30610843SDave.Plauger@Sun.COM 30710843SDave.Plauger@Sun.COM /* 30810843SDave.Plauger@Sun.COM * Convenience macros for using the cqueue functions 30910843SDave.Plauger@Sun.COM * Note that the caller must have defined "dumpsync_t *ds" 31010843SDave.Plauger@Sun.COM */ 31110843SDave.Plauger@Sun.COM #define CQ_IS_EMPTY(q) \ 31210843SDave.Plauger@Sun.COM (ds->q.first == NULL) 31310843SDave.Plauger@Sun.COM 31410843SDave.Plauger@Sun.COM #define CQ_OPEN(q) \ 31510843SDave.Plauger@Sun.COM atomic_inc_uint(&ds->q.open) 31610843SDave.Plauger@Sun.COM 31710843SDave.Plauger@Sun.COM #define CQ_CLOSE(q) \ 31810843SDave.Plauger@Sun.COM dumpsys_close_cq(&ds->q, ds->live) 31910843SDave.Plauger@Sun.COM 32010843SDave.Plauger@Sun.COM #define CQ_PUT(q, cp, st) \ 32110843SDave.Plauger@Sun.COM dumpsys_put_cq(&ds->q, cp, st, ds->live) 32210843SDave.Plauger@Sun.COM 32310843SDave.Plauger@Sun.COM #define CQ_GET(q) \ 32410843SDave.Plauger@Sun.COM dumpsys_get_cq(&ds->q, ds->live) 32510843SDave.Plauger@Sun.COM 32610843SDave.Plauger@Sun.COM /* 32710843SDave.Plauger@Sun.COM * Dynamic state when dumpsys() is running. 3280Sstevel@tonic-gate */ 32910843SDave.Plauger@Sun.COM typedef struct dumpsync { 33010843SDave.Plauger@Sun.COM pgcnt_t npages; /* subtotal of pages dumped */ 33110843SDave.Plauger@Sun.COM pgcnt_t pages_mapped; /* subtotal of pages mapped */ 33210843SDave.Plauger@Sun.COM pgcnt_t pages_used; /* subtotal of pages used per map */ 33310843SDave.Plauger@Sun.COM size_t nwrite; /* subtotal of bytes written */ 33410843SDave.Plauger@Sun.COM uint_t live; /* running live dump */ 33510843SDave.Plauger@Sun.COM uint_t neednl; /* will need to print a newline */ 33610843SDave.Plauger@Sun.COM uint_t percent; /* dump progress */ 33710843SDave.Plauger@Sun.COM uint_t percent_done; /* dump progress reported */ 33810843SDave.Plauger@Sun.COM cqueue_t freebufq; /* free kmem bufs for writing */ 33910843SDave.Plauger@Sun.COM cqueue_t mainq; /* input for main task */ 34010843SDave.Plauger@Sun.COM cqueue_t helperq; /* input for helpers */ 34110843SDave.Plauger@Sun.COM cqueue_t writerq; /* input for writer */ 34210843SDave.Plauger@Sun.COM hrtime_t start; /* start time */ 34310843SDave.Plauger@Sun.COM hrtime_t elapsed; /* elapsed time when completed */ 34410843SDave.Plauger@Sun.COM hrtime_t iotime; /* time spent writing nwrite bytes */ 34510843SDave.Plauger@Sun.COM hrtime_t iowait; /* time spent waiting for output */ 34610843SDave.Plauger@Sun.COM hrtime_t iowaitts; /* iowait timestamp */ 34710843SDave.Plauger@Sun.COM perpage_t perpage; /* metrics */ 34810843SDave.Plauger@Sun.COM perpage_t perpagets; 34910843SDave.Plauger@Sun.COM int dumpcpu; /* master cpu */ 35010843SDave.Plauger@Sun.COM } dumpsync_t; 35110843SDave.Plauger@Sun.COM 35210843SDave.Plauger@Sun.COM static dumpsync_t dumpsync; /* synchronization vars */ 35310843SDave.Plauger@Sun.COM 35410843SDave.Plauger@Sun.COM /* 35510843SDave.Plauger@Sun.COM * helper_t helpers: contains the context for a stream. CPUs run in 35610843SDave.Plauger@Sun.COM * parallel at dump time; each CPU creates a single stream of 35710843SDave.Plauger@Sun.COM * compression data. Stream data is divided into CBUF_SIZE blocks. 35810843SDave.Plauger@Sun.COM * The blocks are written in order within a stream. But, blocks from 35910843SDave.Plauger@Sun.COM * multiple streams can be interleaved. Each stream is identified by a 36010843SDave.Plauger@Sun.COM * unique tag. 36110843SDave.Plauger@Sun.COM */ 36210843SDave.Plauger@Sun.COM typedef struct helper { 36310843SDave.Plauger@Sun.COM int helper; /* bound helper id */ 36410843SDave.Plauger@Sun.COM int tag; /* compression stream tag */ 36510843SDave.Plauger@Sun.COM perpage_t perpage; /* per page metrics */ 36610843SDave.Plauger@Sun.COM perpage_t perpagets; /* per page metrics (timestamps) */ 36710843SDave.Plauger@Sun.COM taskqid_t taskqid; /* live dump task ptr */ 36810843SDave.Plauger@Sun.COM int in, out; /* buffer offsets */ 36910843SDave.Plauger@Sun.COM cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */ 37010843SDave.Plauger@Sun.COM dumpsync_t *ds; /* pointer to sync vars */ 37110843SDave.Plauger@Sun.COM size_t used; /* counts input consumed */ 37210843SDave.Plauger@Sun.COM char *page; /* buffer for page copy */ 37310843SDave.Plauger@Sun.COM char *lzbuf; /* lzjb output */ 37410843SDave.Plauger@Sun.COM bz_stream bzstream; /* bzip2 state */ 37510843SDave.Plauger@Sun.COM } helper_t; 37610843SDave.Plauger@Sun.COM 37710843SDave.Plauger@Sun.COM #define MAINHELPER (-1) /* helper is also the main task */ 37810843SDave.Plauger@Sun.COM #define FREEHELPER (-2) /* unbound helper */ 37910843SDave.Plauger@Sun.COM #define DONEHELPER (-3) /* helper finished */ 38010843SDave.Plauger@Sun.COM 38110843SDave.Plauger@Sun.COM /* 38210843SDave.Plauger@Sun.COM * configuration vars for dumpsys 38310843SDave.Plauger@Sun.COM */ 38410843SDave.Plauger@Sun.COM typedef struct dumpcfg { 38510843SDave.Plauger@Sun.COM int threshold; /* ncpu threshold for bzip2 */ 38610843SDave.Plauger@Sun.COM int nhelper; /* number of helpers */ 38710843SDave.Plauger@Sun.COM int nhelper_used; /* actual number of helpers used */ 38810843SDave.Plauger@Sun.COM int ncmap; /* number VA pages for compression */ 38910843SDave.Plauger@Sun.COM int ncbuf; /* number of bufs for compression */ 39010843SDave.Plauger@Sun.COM int ncbuf_used; /* number of bufs in use */ 39110843SDave.Plauger@Sun.COM uint_t clevel; /* dump compression level */ 39210843SDave.Plauger@Sun.COM helper_t *helper; /* array of helpers */ 39310843SDave.Plauger@Sun.COM cbuf_t *cmap; /* array of input (map) buffers */ 39410843SDave.Plauger@Sun.COM cbuf_t *cbuf; /* array of output buffers */ 39510843SDave.Plauger@Sun.COM ulong_t *helpermap; /* set of dumpsys helper CPU ids */ 39610843SDave.Plauger@Sun.COM ulong_t *bitmap; /* bitmap for marking pages to dump */ 39710843SDave.Plauger@Sun.COM ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */ 39810843SDave.Plauger@Sun.COM pgcnt_t bitmapsize; /* size of bitmap */ 39910843SDave.Plauger@Sun.COM pgcnt_t rbitmapsize; /* size of bitmap for ranges */ 40010843SDave.Plauger@Sun.COM pgcnt_t found4m; /* number ranges allocated by dump */ 40110843SDave.Plauger@Sun.COM pgcnt_t foundsm; /* number small pages allocated by dump */ 40210843SDave.Plauger@Sun.COM pid_t *pids; /* list of process IDs at dump time */ 40310843SDave.Plauger@Sun.COM size_t maxsize; /* memory size needed at dump time */ 40410843SDave.Plauger@Sun.COM size_t maxvmsize; /* size of reserved VM */ 40510843SDave.Plauger@Sun.COM char *maxvm; /* reserved VM for spare pages */ 40610843SDave.Plauger@Sun.COM lock_t helper_lock; /* protect helper state */ 40710843SDave.Plauger@Sun.COM char helpers_wanted; /* flag to enable parallelism */ 408*12931SDave.Plauger@Sun.COM char helper_present; /* at least one helper showed up */ 40910843SDave.Plauger@Sun.COM } dumpcfg_t; 41010843SDave.Plauger@Sun.COM 41110843SDave.Plauger@Sun.COM static dumpcfg_t dumpcfg; /* config vars */ 41210843SDave.Plauger@Sun.COM 41310843SDave.Plauger@Sun.COM /* 41410843SDave.Plauger@Sun.COM * The dump I/O buffer. 41510843SDave.Plauger@Sun.COM * 41610843SDave.Plauger@Sun.COM * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is 41710843SDave.Plauger@Sun.COM * sized according to the optimum device transfer speed. 41810843SDave.Plauger@Sun.COM */ 41910843SDave.Plauger@Sun.COM typedef struct dumpbuf { 42010843SDave.Plauger@Sun.COM vnode_t *cdev_vp; /* VCHR open of the dump device */ 42110843SDave.Plauger@Sun.COM len_t vp_limit; /* maximum write offset */ 42210843SDave.Plauger@Sun.COM offset_t vp_off; /* current dump device offset */ 42310843SDave.Plauger@Sun.COM char *cur; /* dump write pointer */ 42410843SDave.Plauger@Sun.COM char *start; /* dump buffer address */ 42510843SDave.Plauger@Sun.COM char *end; /* dump buffer end */ 42610843SDave.Plauger@Sun.COM size_t size; /* size of dumpbuf in bytes */ 42710843SDave.Plauger@Sun.COM size_t iosize; /* best transfer size for device */ 42810843SDave.Plauger@Sun.COM } dumpbuf_t; 42910843SDave.Plauger@Sun.COM 43010843SDave.Plauger@Sun.COM dumpbuf_t dumpbuf; /* I/O buffer */ 43110843SDave.Plauger@Sun.COM 43210843SDave.Plauger@Sun.COM /* 43310843SDave.Plauger@Sun.COM * The dump I/O buffer must be at least one page, at most xfer_size 43410843SDave.Plauger@Sun.COM * bytes, and should scale with physmem in between. The transfer size 43510843SDave.Plauger@Sun.COM * passed in will either represent a global default (maxphys) or the 43610843SDave.Plauger@Sun.COM * best size for the device. The size of the dumpbuf I/O buffer is 43710843SDave.Plauger@Sun.COM * limited by dumpbuf_limit (8MB by default) because the dump 43810843SDave.Plauger@Sun.COM * performance saturates beyond a certain size. The default is to 43910843SDave.Plauger@Sun.COM * select 1/4096 of the memory. 44010843SDave.Plauger@Sun.COM */ 44110843SDave.Plauger@Sun.COM static int dumpbuf_fraction = 12; /* memory size scale factor */ 44210843SDave.Plauger@Sun.COM static size_t dumpbuf_limit = 8 * DUMP_1MB; /* max I/O buf size */ 44310843SDave.Plauger@Sun.COM 4440Sstevel@tonic-gate static size_t 4450Sstevel@tonic-gate dumpbuf_iosize(size_t xfer_size) 4460Sstevel@tonic-gate { 44710843SDave.Plauger@Sun.COM size_t iosize = ptob(physmem >> dumpbuf_fraction); 44810843SDave.Plauger@Sun.COM 44910843SDave.Plauger@Sun.COM if (iosize < PAGESIZE) 45010843SDave.Plauger@Sun.COM iosize = PAGESIZE; 45110843SDave.Plauger@Sun.COM else if (iosize > xfer_size) 45210843SDave.Plauger@Sun.COM iosize = xfer_size; 45310843SDave.Plauger@Sun.COM if (iosize > dumpbuf_limit) 45410843SDave.Plauger@Sun.COM iosize = dumpbuf_limit; 45510843SDave.Plauger@Sun.COM return (iosize & PAGEMASK); 4560Sstevel@tonic-gate } 4570Sstevel@tonic-gate 45810843SDave.Plauger@Sun.COM /* 45910843SDave.Plauger@Sun.COM * resize the I/O buffer 46010843SDave.Plauger@Sun.COM */ 4610Sstevel@tonic-gate static void 4620Sstevel@tonic-gate dumpbuf_resize(void) 4630Sstevel@tonic-gate { 46410843SDave.Plauger@Sun.COM char *old_buf = dumpbuf.start; 46510843SDave.Plauger@Sun.COM size_t old_size = dumpbuf.size; 4660Sstevel@tonic-gate char *new_buf; 4670Sstevel@tonic-gate size_t new_size; 4680Sstevel@tonic-gate 4690Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 4700Sstevel@tonic-gate 47110843SDave.Plauger@Sun.COM new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys)); 47210843SDave.Plauger@Sun.COM if (new_size <= old_size) 4730Sstevel@tonic-gate return; /* no need to reallocate buffer */ 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate new_buf = kmem_alloc(new_size, KM_SLEEP); 47610843SDave.Plauger@Sun.COM dumpbuf.size = new_size; 47710843SDave.Plauger@Sun.COM dumpbuf.start = new_buf; 47810843SDave.Plauger@Sun.COM dumpbuf.end = new_buf + new_size; 4790Sstevel@tonic-gate kmem_free(old_buf, old_size); 4800Sstevel@tonic-gate } 4810Sstevel@tonic-gate 48210843SDave.Plauger@Sun.COM /* 48310843SDave.Plauger@Sun.COM * dump_update_clevel is called when dumpadm configures the dump device. 48410843SDave.Plauger@Sun.COM * Calculate number of helpers and buffers. 48510843SDave.Plauger@Sun.COM * Allocate the minimum configuration for now. 48610843SDave.Plauger@Sun.COM * 48710843SDave.Plauger@Sun.COM * When the dump file is configured we reserve a minimum amount of 48810843SDave.Plauger@Sun.COM * memory for use at crash time. But we reserve VA for all the memory 48910843SDave.Plauger@Sun.COM * we really want in order to do the fastest dump possible. The VA is 49010843SDave.Plauger@Sun.COM * backed by pages not being dumped, according to the bitmap. If 49110843SDave.Plauger@Sun.COM * there is insufficient spare memory, however, we fall back to the 49210843SDave.Plauger@Sun.COM * minimum. 49310843SDave.Plauger@Sun.COM * 49410843SDave.Plauger@Sun.COM * Live dump (savecore -L) always uses the minimum config. 49510843SDave.Plauger@Sun.COM * 49610843SDave.Plauger@Sun.COM * clevel 0 is single threaded lzjb 49710843SDave.Plauger@Sun.COM * clevel 1 is parallel lzjb 49810843SDave.Plauger@Sun.COM * clevel 2 is parallel bzip2 49910843SDave.Plauger@Sun.COM * 50010843SDave.Plauger@Sun.COM * The ncpu threshold is selected with dump_plat_mincpu. 50110843SDave.Plauger@Sun.COM * On OPL, set_platform_defaults() overrides the sun4u setting. 50210843SDave.Plauger@Sun.COM * The actual values are defined via DUMP_PLAT_*_MINCPU macros. 50310843SDave.Plauger@Sun.COM * 50410843SDave.Plauger@Sun.COM * Architecture Threshold Algorithm 50510843SDave.Plauger@Sun.COM * sun4u < 51 parallel lzjb 50610843SDave.Plauger@Sun.COM * sun4u >= 51 parallel bzip2(*) 50710843SDave.Plauger@Sun.COM * sun4u OPL < 8 parallel lzjb 50810843SDave.Plauger@Sun.COM * sun4u OPL >= 8 parallel bzip2(*) 50910843SDave.Plauger@Sun.COM * sun4v < 128 parallel lzjb 51010843SDave.Plauger@Sun.COM * sun4v >= 128 parallel bzip2(*) 51110843SDave.Plauger@Sun.COM * x86 < 11 parallel lzjb 51210843SDave.Plauger@Sun.COM * x86 >= 11 parallel bzip2(*) 51310843SDave.Plauger@Sun.COM * 32-bit N/A single-threaded lzjb 51410843SDave.Plauger@Sun.COM * 51510843SDave.Plauger@Sun.COM * (*) bzip2 is only chosen if there is sufficient available 51610843SDave.Plauger@Sun.COM * memory for buffers at dump time. See dumpsys_get_maxmem(). 51710843SDave.Plauger@Sun.COM * 51810843SDave.Plauger@Sun.COM * Faster dump devices have larger I/O buffers. The threshold value is 51910843SDave.Plauger@Sun.COM * increased according to the size of the dump I/O buffer, because 52010843SDave.Plauger@Sun.COM * parallel lzjb performs better with faster disks. For buffers >= 1MB 52110843SDave.Plauger@Sun.COM * the threshold is 3X; for buffers >= 256K threshold is 2X. 52210843SDave.Plauger@Sun.COM * 52310843SDave.Plauger@Sun.COM * For parallel dumps, the number of helpers is ncpu-1. The CPU 52410843SDave.Plauger@Sun.COM * running panic runs the main task. For single-threaded dumps, the 52510843SDave.Plauger@Sun.COM * panic CPU does lzjb compression (it is tagged as MAINHELPER.) 52610843SDave.Plauger@Sun.COM * 52710843SDave.Plauger@Sun.COM * Need multiple buffers per helper so that they do not block waiting 52810843SDave.Plauger@Sun.COM * for the main task. 52910843SDave.Plauger@Sun.COM * parallel single-threaded 53010843SDave.Plauger@Sun.COM * Number of output buffers: nhelper*2 1 53110843SDave.Plauger@Sun.COM * Number of mapping buffers: nhelper*4 1 53210843SDave.Plauger@Sun.COM * 53310843SDave.Plauger@Sun.COM */ 53410843SDave.Plauger@Sun.COM static void 53510843SDave.Plauger@Sun.COM dump_update_clevel() 53610843SDave.Plauger@Sun.COM { 53710843SDave.Plauger@Sun.COM int tag; 53810843SDave.Plauger@Sun.COM size_t bz2size; 53910843SDave.Plauger@Sun.COM helper_t *hp, *hpend; 54010843SDave.Plauger@Sun.COM cbuf_t *cp, *cpend; 54110843SDave.Plauger@Sun.COM dumpcfg_t *old = &dumpcfg; 54210843SDave.Plauger@Sun.COM dumpcfg_t newcfg = *old; 54310843SDave.Plauger@Sun.COM dumpcfg_t *new = &newcfg; 54410843SDave.Plauger@Sun.COM 54510843SDave.Plauger@Sun.COM ASSERT(MUTEX_HELD(&dump_lock)); 54610843SDave.Plauger@Sun.COM 54710843SDave.Plauger@Sun.COM /* 54810843SDave.Plauger@Sun.COM * Free the previously allocated bufs and VM. 54910843SDave.Plauger@Sun.COM */ 55010843SDave.Plauger@Sun.COM if (old->helper != NULL) { 55110843SDave.Plauger@Sun.COM 55210843SDave.Plauger@Sun.COM /* helpers */ 55310843SDave.Plauger@Sun.COM hpend = &old->helper[old->nhelper]; 55410843SDave.Plauger@Sun.COM for (hp = old->helper; hp != hpend; hp++) { 55510843SDave.Plauger@Sun.COM if (hp->lzbuf != NULL) 55610843SDave.Plauger@Sun.COM kmem_free(hp->lzbuf, PAGESIZE); 55710843SDave.Plauger@Sun.COM if (hp->page != NULL) 55810843SDave.Plauger@Sun.COM kmem_free(hp->page, PAGESIZE); 55910843SDave.Plauger@Sun.COM } 56010843SDave.Plauger@Sun.COM kmem_free(old->helper, old->nhelper * sizeof (helper_t)); 56110843SDave.Plauger@Sun.COM 56210843SDave.Plauger@Sun.COM /* VM space for mapping pages */ 56310843SDave.Plauger@Sun.COM cpend = &old->cmap[old->ncmap]; 56410843SDave.Plauger@Sun.COM for (cp = old->cmap; cp != cpend; cp++) 56510843SDave.Plauger@Sun.COM vmem_xfree(heap_arena, cp->buf, CBUF_MAPSIZE); 56610843SDave.Plauger@Sun.COM kmem_free(old->cmap, old->ncmap * sizeof (cbuf_t)); 56710843SDave.Plauger@Sun.COM 56810843SDave.Plauger@Sun.COM /* output bufs */ 56910843SDave.Plauger@Sun.COM cpend = &old->cbuf[old->ncbuf]; 57010843SDave.Plauger@Sun.COM for (cp = old->cbuf; cp != cpend; cp++) 57110843SDave.Plauger@Sun.COM if (cp->buf != NULL) 57210843SDave.Plauger@Sun.COM kmem_free(cp->buf, cp->size); 57310843SDave.Plauger@Sun.COM kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t)); 57410843SDave.Plauger@Sun.COM 57510843SDave.Plauger@Sun.COM /* reserved VM for dumpsys_get_maxmem */ 57610843SDave.Plauger@Sun.COM if (old->maxvmsize > 0) 57710843SDave.Plauger@Sun.COM vmem_xfree(heap_arena, old->maxvm, old->maxvmsize); 57810843SDave.Plauger@Sun.COM } 57910843SDave.Plauger@Sun.COM 58010843SDave.Plauger@Sun.COM /* 58110843SDave.Plauger@Sun.COM * Allocate memory and VM. 58210843SDave.Plauger@Sun.COM * One CPU runs dumpsys, the rest are helpers. 58310843SDave.Plauger@Sun.COM */ 58410843SDave.Plauger@Sun.COM new->nhelper = ncpus - 1; 58510843SDave.Plauger@Sun.COM if (new->nhelper < 1) 58610843SDave.Plauger@Sun.COM new->nhelper = 1; 58710843SDave.Plauger@Sun.COM 58810843SDave.Plauger@Sun.COM if (new->nhelper > DUMP_MAX_NHELPER) 58910843SDave.Plauger@Sun.COM new->nhelper = DUMP_MAX_NHELPER; 59010843SDave.Plauger@Sun.COM 591*12931SDave.Plauger@Sun.COM /* use platform default, unless /etc/system overrides */ 592*12931SDave.Plauger@Sun.COM if (dump_plat_mincpu == MINCPU_NOT_SET) 593*12931SDave.Plauger@Sun.COM dump_plat_mincpu = dump_plat_mincpu_default; 594*12931SDave.Plauger@Sun.COM 59510843SDave.Plauger@Sun.COM /* increase threshold for faster disks */ 59610843SDave.Plauger@Sun.COM new->threshold = dump_plat_mincpu; 59710843SDave.Plauger@Sun.COM if (dumpbuf.iosize >= DUMP_1MB) 59810843SDave.Plauger@Sun.COM new->threshold *= 3; 59910843SDave.Plauger@Sun.COM else if (dumpbuf.iosize >= (256 * DUMP_1KB)) 60010843SDave.Plauger@Sun.COM new->threshold *= 2; 60110843SDave.Plauger@Sun.COM 60210843SDave.Plauger@Sun.COM /* figure compression level based upon the computed threshold. */ 60310843SDave.Plauger@Sun.COM if (dump_plat_mincpu == 0 || new->nhelper < 2) { 60410843SDave.Plauger@Sun.COM new->clevel = 0; 60510843SDave.Plauger@Sun.COM new->nhelper = 1; 60610843SDave.Plauger@Sun.COM } else if ((new->nhelper + 1) >= new->threshold) { 60710843SDave.Plauger@Sun.COM new->clevel = DUMP_CLEVEL_BZIP2; 60810843SDave.Plauger@Sun.COM } else { 60910843SDave.Plauger@Sun.COM new->clevel = DUMP_CLEVEL_LZJB; 61010843SDave.Plauger@Sun.COM } 61110843SDave.Plauger@Sun.COM 61210843SDave.Plauger@Sun.COM if (new->clevel == 0) { 61310843SDave.Plauger@Sun.COM new->ncbuf = 1; 61410843SDave.Plauger@Sun.COM new->ncmap = 1; 61510843SDave.Plauger@Sun.COM } else { 61610843SDave.Plauger@Sun.COM new->ncbuf = NCBUF_PER_HELPER * new->nhelper; 61710843SDave.Plauger@Sun.COM new->ncmap = NCMAP_PER_HELPER * new->nhelper; 61810843SDave.Plauger@Sun.COM } 61910843SDave.Plauger@Sun.COM 62010843SDave.Plauger@Sun.COM /* 62110843SDave.Plauger@Sun.COM * Allocate new data structures and buffers for MINHELPERS, 62210843SDave.Plauger@Sun.COM * and also figure the max desired size. 62310843SDave.Plauger@Sun.COM */ 62410843SDave.Plauger@Sun.COM bz2size = BZ2_bzCompressInitSize(dump_bzip2_level); 62510843SDave.Plauger@Sun.COM new->maxsize = 0; 62610843SDave.Plauger@Sun.COM new->maxvmsize = 0; 62710843SDave.Plauger@Sun.COM new->maxvm = NULL; 62810843SDave.Plauger@Sun.COM tag = 1; 62910843SDave.Plauger@Sun.COM new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP); 63010843SDave.Plauger@Sun.COM hpend = &new->helper[new->nhelper]; 63110843SDave.Plauger@Sun.COM for (hp = new->helper; hp != hpend; hp++) { 63210843SDave.Plauger@Sun.COM hp->tag = tag++; 63310843SDave.Plauger@Sun.COM if (hp < &new->helper[MINHELPERS]) { 63410843SDave.Plauger@Sun.COM hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP); 63510843SDave.Plauger@Sun.COM hp->page = kmem_alloc(PAGESIZE, KM_SLEEP); 63610843SDave.Plauger@Sun.COM } else if (new->clevel < DUMP_CLEVEL_BZIP2) { 63710843SDave.Plauger@Sun.COM new->maxsize += 2 * PAGESIZE; 63810843SDave.Plauger@Sun.COM } else { 63910843SDave.Plauger@Sun.COM new->maxsize += PAGESIZE; 64010843SDave.Plauger@Sun.COM } 64110843SDave.Plauger@Sun.COM if (new->clevel >= DUMP_CLEVEL_BZIP2) 64210843SDave.Plauger@Sun.COM new->maxsize += bz2size; 64310843SDave.Plauger@Sun.COM } 64410843SDave.Plauger@Sun.COM 64510843SDave.Plauger@Sun.COM new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP); 64610843SDave.Plauger@Sun.COM cpend = &new->cbuf[new->ncbuf]; 64710843SDave.Plauger@Sun.COM for (cp = new->cbuf; cp != cpend; cp++) { 64810843SDave.Plauger@Sun.COM cp->state = CBUF_FREEBUF; 64910843SDave.Plauger@Sun.COM cp->size = CBUF_SIZE; 65010843SDave.Plauger@Sun.COM if (cp < &new->cbuf[MINCBUFS]) 65110843SDave.Plauger@Sun.COM cp->buf = kmem_alloc(cp->size, KM_SLEEP); 65210843SDave.Plauger@Sun.COM else 65310843SDave.Plauger@Sun.COM new->maxsize += cp->size; 65410843SDave.Plauger@Sun.COM } 65510843SDave.Plauger@Sun.COM 65610843SDave.Plauger@Sun.COM new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP); 65710843SDave.Plauger@Sun.COM cpend = &new->cmap[new->ncmap]; 65810843SDave.Plauger@Sun.COM for (cp = new->cmap; cp != cpend; cp++) { 65910843SDave.Plauger@Sun.COM cp->state = CBUF_FREEMAP; 66010843SDave.Plauger@Sun.COM cp->size = CBUF_MAPSIZE; 66110843SDave.Plauger@Sun.COM cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE, 66210843SDave.Plauger@Sun.COM 0, 0, NULL, NULL, VM_SLEEP); 66310843SDave.Plauger@Sun.COM } 66410843SDave.Plauger@Sun.COM 66510843SDave.Plauger@Sun.COM /* reserve VA to be backed with spare pages at crash time */ 66610843SDave.Plauger@Sun.COM if (new->maxsize > 0) { 66710843SDave.Plauger@Sun.COM new->maxsize = P2ROUNDUP(new->maxsize, PAGESIZE); 66810843SDave.Plauger@Sun.COM new->maxvmsize = P2ROUNDUP(new->maxsize, CBUF_MAPSIZE); 66910843SDave.Plauger@Sun.COM new->maxvm = vmem_xalloc(heap_arena, new->maxvmsize, 67010843SDave.Plauger@Sun.COM CBUF_MAPSIZE, 0, 0, NULL, NULL, VM_SLEEP); 67110843SDave.Plauger@Sun.COM } 67210843SDave.Plauger@Sun.COM 67311178SDave.Plauger@Sun.COM /* 67411178SDave.Plauger@Sun.COM * Reserve memory for kmem allocation calls made during crash 67511178SDave.Plauger@Sun.COM * dump. The hat layer allocates memory for each mapping 67611178SDave.Plauger@Sun.COM * created, and the I/O path allocates buffers and data structs. 67711178SDave.Plauger@Sun.COM * Add a few pages for safety. 67811178SDave.Plauger@Sun.COM */ 67911178SDave.Plauger@Sun.COM kmem_dump_init((new->ncmap * dump_kmem_permap) + 68011178SDave.Plauger@Sun.COM (dump_kmem_pages * PAGESIZE)); 68111178SDave.Plauger@Sun.COM 68210843SDave.Plauger@Sun.COM /* set new config pointers */ 68310843SDave.Plauger@Sun.COM *old = *new; 68410843SDave.Plauger@Sun.COM } 68510843SDave.Plauger@Sun.COM 68610843SDave.Plauger@Sun.COM /* 68710843SDave.Plauger@Sun.COM * Define a struct memlist walker to optimize bitnum to pfn 68810843SDave.Plauger@Sun.COM * lookup. The walker maintains the state of the list traversal. 68910843SDave.Plauger@Sun.COM */ 69010843SDave.Plauger@Sun.COM typedef struct dumpmlw { 69110843SDave.Plauger@Sun.COM struct memlist *mp; /* current memlist */ 69210843SDave.Plauger@Sun.COM pgcnt_t basenum; /* bitnum base offset */ 69310843SDave.Plauger@Sun.COM pgcnt_t mppages; /* current memlist size */ 69410843SDave.Plauger@Sun.COM pgcnt_t mpleft; /* size to end of current memlist */ 69510843SDave.Plauger@Sun.COM pfn_t mpaddr; /* first pfn in memlist */ 69610843SDave.Plauger@Sun.COM } dumpmlw_t; 69710843SDave.Plauger@Sun.COM 69810843SDave.Plauger@Sun.COM /* initialize the walker */ 69910843SDave.Plauger@Sun.COM static inline void 70010843SDave.Plauger@Sun.COM dump_init_memlist_walker(dumpmlw_t *pw) 70110843SDave.Plauger@Sun.COM { 70210843SDave.Plauger@Sun.COM pw->mp = phys_install; 70310843SDave.Plauger@Sun.COM pw->basenum = 0; 70411474SJonathan.Adams@Sun.COM pw->mppages = pw->mp->ml_size >> PAGESHIFT; 70510843SDave.Plauger@Sun.COM pw->mpleft = pw->mppages; 70611474SJonathan.Adams@Sun.COM pw->mpaddr = pw->mp->ml_address >> PAGESHIFT; 70710843SDave.Plauger@Sun.COM } 70810843SDave.Plauger@Sun.COM 70910843SDave.Plauger@Sun.COM /* 71010843SDave.Plauger@Sun.COM * Lookup pfn given bitnum. The memlist can be quite long on some 71110843SDave.Plauger@Sun.COM * systems (e.g.: one per board). To optimize sequential lookups, the 71210843SDave.Plauger@Sun.COM * caller initializes and presents a memlist walker. 71310843SDave.Plauger@Sun.COM */ 71410843SDave.Plauger@Sun.COM static pfn_t 71510843SDave.Plauger@Sun.COM dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw) 71610843SDave.Plauger@Sun.COM { 71710843SDave.Plauger@Sun.COM bitnum -= pw->basenum; 71810843SDave.Plauger@Sun.COM while (pw->mp != NULL) { 71910843SDave.Plauger@Sun.COM if (bitnum < pw->mppages) { 72010843SDave.Plauger@Sun.COM pw->mpleft = pw->mppages - bitnum; 72110843SDave.Plauger@Sun.COM return (pw->mpaddr + bitnum); 72210843SDave.Plauger@Sun.COM } 72310843SDave.Plauger@Sun.COM bitnum -= pw->mppages; 72410843SDave.Plauger@Sun.COM pw->basenum += pw->mppages; 72511474SJonathan.Adams@Sun.COM pw->mp = pw->mp->ml_next; 72610843SDave.Plauger@Sun.COM if (pw->mp != NULL) { 72711474SJonathan.Adams@Sun.COM pw->mppages = pw->mp->ml_size >> PAGESHIFT; 72810843SDave.Plauger@Sun.COM pw->mpleft = pw->mppages; 72911474SJonathan.Adams@Sun.COM pw->mpaddr = pw->mp->ml_address >> PAGESHIFT; 73010843SDave.Plauger@Sun.COM } 73110843SDave.Plauger@Sun.COM } 73210843SDave.Plauger@Sun.COM return (PFN_INVALID); 73310843SDave.Plauger@Sun.COM } 73410843SDave.Plauger@Sun.COM 73510843SDave.Plauger@Sun.COM static pgcnt_t 73610843SDave.Plauger@Sun.COM dump_pfn_to_bitnum(pfn_t pfn) 73710843SDave.Plauger@Sun.COM { 73810843SDave.Plauger@Sun.COM struct memlist *mp; 73910843SDave.Plauger@Sun.COM pgcnt_t bitnum = 0; 74010843SDave.Plauger@Sun.COM 74111474SJonathan.Adams@Sun.COM for (mp = phys_install; mp != NULL; mp = mp->ml_next) { 74211474SJonathan.Adams@Sun.COM if (pfn >= (mp->ml_address >> PAGESHIFT) && 74311474SJonathan.Adams@Sun.COM pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT)) 74411474SJonathan.Adams@Sun.COM return (bitnum + pfn - (mp->ml_address >> PAGESHIFT)); 74511474SJonathan.Adams@Sun.COM bitnum += mp->ml_size >> PAGESHIFT; 74610843SDave.Plauger@Sun.COM } 74710843SDave.Plauger@Sun.COM return ((pgcnt_t)-1); 74810843SDave.Plauger@Sun.COM } 74910843SDave.Plauger@Sun.COM 75010843SDave.Plauger@Sun.COM /* 75110843SDave.Plauger@Sun.COM * Set/test bitmap for a CBUF_MAPSIZE range which includes pfn. The 75210843SDave.Plauger@Sun.COM * mapping of pfn to range index is imperfect because pfn and bitnum 75310843SDave.Plauger@Sun.COM * do not have the same phase. To make sure a CBUF_MAPSIZE range is 75410843SDave.Plauger@Sun.COM * covered, call this for both ends: 75510843SDave.Plauger@Sun.COM * dump_set_used(base) 75610843SDave.Plauger@Sun.COM * dump_set_used(base+CBUF_MAPNP-1) 75710843SDave.Plauger@Sun.COM * 75810843SDave.Plauger@Sun.COM * This is used during a panic dump to mark pages allocated by 75910843SDave.Plauger@Sun.COM * dumpsys_get_maxmem(). The macro IS_DUMP_PAGE(pp) is used by 76010843SDave.Plauger@Sun.COM * page_get_mnode_freelist() to make sure pages used by dump are never 76110843SDave.Plauger@Sun.COM * allocated. 76210843SDave.Plauger@Sun.COM */ 76310843SDave.Plauger@Sun.COM #define CBUF_MAPP2R(pfn) ((pfn) >> (CBUF_MAPSHIFT - PAGESHIFT)) 76410843SDave.Plauger@Sun.COM 76510843SDave.Plauger@Sun.COM static void 76610843SDave.Plauger@Sun.COM dump_set_used(pfn_t pfn) 76710843SDave.Plauger@Sun.COM { 76810843SDave.Plauger@Sun.COM 76910843SDave.Plauger@Sun.COM pgcnt_t bitnum, rbitnum; 77010843SDave.Plauger@Sun.COM 77110843SDave.Plauger@Sun.COM bitnum = dump_pfn_to_bitnum(pfn); 77210843SDave.Plauger@Sun.COM ASSERT(bitnum != (pgcnt_t)-1); 77310843SDave.Plauger@Sun.COM 77410843SDave.Plauger@Sun.COM rbitnum = CBUF_MAPP2R(bitnum); 77510843SDave.Plauger@Sun.COM ASSERT(rbitnum < dumpcfg.rbitmapsize); 77610843SDave.Plauger@Sun.COM 77710843SDave.Plauger@Sun.COM BT_SET(dumpcfg.rbitmap, rbitnum); 77810843SDave.Plauger@Sun.COM } 77910843SDave.Plauger@Sun.COM 78010843SDave.Plauger@Sun.COM int 78110843SDave.Plauger@Sun.COM dump_test_used(pfn_t pfn) 78210843SDave.Plauger@Sun.COM { 78310843SDave.Plauger@Sun.COM pgcnt_t bitnum, rbitnum; 78410843SDave.Plauger@Sun.COM 78510843SDave.Plauger@Sun.COM bitnum = dump_pfn_to_bitnum(pfn); 78610843SDave.Plauger@Sun.COM ASSERT(bitnum != (pgcnt_t)-1); 78710843SDave.Plauger@Sun.COM 78810843SDave.Plauger@Sun.COM rbitnum = CBUF_MAPP2R(bitnum); 78910843SDave.Plauger@Sun.COM ASSERT(rbitnum < dumpcfg.rbitmapsize); 79010843SDave.Plauger@Sun.COM 79110843SDave.Plauger@Sun.COM return (BT_TEST(dumpcfg.rbitmap, rbitnum)); 79210843SDave.Plauger@Sun.COM } 79310843SDave.Plauger@Sun.COM 79410843SDave.Plauger@Sun.COM /* 79510843SDave.Plauger@Sun.COM * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library. 79610843SDave.Plauger@Sun.COM * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit(). 79710843SDave.Plauger@Sun.COM */ 79810843SDave.Plauger@Sun.COM static void * 79910843SDave.Plauger@Sun.COM dumpbzalloc(void *opaque, int items, int size) 80010843SDave.Plauger@Sun.COM { 80110843SDave.Plauger@Sun.COM size_t *sz; 80210843SDave.Plauger@Sun.COM char *ret; 80310843SDave.Plauger@Sun.COM 80410843SDave.Plauger@Sun.COM ASSERT(opaque != NULL); 80510843SDave.Plauger@Sun.COM sz = opaque; 80610843SDave.Plauger@Sun.COM ret = dumpcfg.maxvm + *sz; 80710843SDave.Plauger@Sun.COM *sz += items * size; 80810843SDave.Plauger@Sun.COM *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN); 80910843SDave.Plauger@Sun.COM ASSERT(*sz <= dumpcfg.maxvmsize); 81010843SDave.Plauger@Sun.COM return (ret); 81110843SDave.Plauger@Sun.COM } 81210843SDave.Plauger@Sun.COM 81310843SDave.Plauger@Sun.COM /*ARGSUSED*/ 81410843SDave.Plauger@Sun.COM static void 81510843SDave.Plauger@Sun.COM dumpbzfree(void *opaque, void *addr) 81610843SDave.Plauger@Sun.COM { 81710843SDave.Plauger@Sun.COM } 81810843SDave.Plauger@Sun.COM 81910843SDave.Plauger@Sun.COM /* 82010843SDave.Plauger@Sun.COM * Perform additional checks on the page to see if we can really use 82110843SDave.Plauger@Sun.COM * it. The kernel (kas) pages are always set in the bitmap. However, 82210843SDave.Plauger@Sun.COM * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the 82310843SDave.Plauger@Sun.COM * bitmap. So we check for them. 82410843SDave.Plauger@Sun.COM */ 82510843SDave.Plauger@Sun.COM static inline int 82610843SDave.Plauger@Sun.COM dump_pfn_check(pfn_t pfn) 82710843SDave.Plauger@Sun.COM { 82810843SDave.Plauger@Sun.COM page_t *pp = page_numtopp_nolock(pfn); 82910843SDave.Plauger@Sun.COM if (pp == NULL || pp->p_pagenum != pfn || 83010843SDave.Plauger@Sun.COM #if defined(__sparc) 83111185SSean.McEnroe@Sun.COM pp->p_vnode == &promvp || 83210843SDave.Plauger@Sun.COM #else 83310843SDave.Plauger@Sun.COM PP_ISBOOTPAGES(pp) || 83410843SDave.Plauger@Sun.COM #endif 83510843SDave.Plauger@Sun.COM pp->p_toxic != 0) 83610843SDave.Plauger@Sun.COM return (0); 83710843SDave.Plauger@Sun.COM return (1); 83810843SDave.Plauger@Sun.COM } 83910843SDave.Plauger@Sun.COM 84010843SDave.Plauger@Sun.COM /* 84110843SDave.Plauger@Sun.COM * Check a range to see if all contained pages are available and 84210843SDave.Plauger@Sun.COM * return non-zero if the range can be used. 84310843SDave.Plauger@Sun.COM */ 84410843SDave.Plauger@Sun.COM static inline int 84510843SDave.Plauger@Sun.COM dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn) 84610843SDave.Plauger@Sun.COM { 84710843SDave.Plauger@Sun.COM for (; start < end; start++, pfn++) { 84810843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, start)) 84910843SDave.Plauger@Sun.COM return (0); 85010843SDave.Plauger@Sun.COM if (!dump_pfn_check(pfn)) 85110843SDave.Plauger@Sun.COM return (0); 85210843SDave.Plauger@Sun.COM } 85310843SDave.Plauger@Sun.COM return (1); 85410843SDave.Plauger@Sun.COM } 85510843SDave.Plauger@Sun.COM 85610843SDave.Plauger@Sun.COM /* 85710843SDave.Plauger@Sun.COM * dumpsys_get_maxmem() is called during panic. Find unused ranges 85810843SDave.Plauger@Sun.COM * and use them for buffers. If we find enough memory switch to 85910843SDave.Plauger@Sun.COM * parallel bzip2, otherwise use parallel lzjb. 86010843SDave.Plauger@Sun.COM * 86110843SDave.Plauger@Sun.COM * It searches the dump bitmap in 2 passes. The first time it looks 86210843SDave.Plauger@Sun.COM * for CBUF_MAPSIZE ranges. On the second pass it uses small pages. 86310843SDave.Plauger@Sun.COM */ 86410843SDave.Plauger@Sun.COM static void 86510843SDave.Plauger@Sun.COM dumpsys_get_maxmem() 86610843SDave.Plauger@Sun.COM { 86710843SDave.Plauger@Sun.COM dumpcfg_t *cfg = &dumpcfg; 86810843SDave.Plauger@Sun.COM cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf]; 86910843SDave.Plauger@Sun.COM helper_t *endhp = &cfg->helper[cfg->nhelper]; 87010843SDave.Plauger@Sun.COM pgcnt_t bitnum, end; 87110843SDave.Plauger@Sun.COM size_t sz, endsz, bz2size; 87210843SDave.Plauger@Sun.COM pfn_t pfn, off; 87310843SDave.Plauger@Sun.COM cbuf_t *cp; 87410843SDave.Plauger@Sun.COM helper_t *hp, *ohp; 87510843SDave.Plauger@Sun.COM dumpmlw_t mlw; 87610843SDave.Plauger@Sun.COM int k; 87710843SDave.Plauger@Sun.COM 878*12931SDave.Plauger@Sun.COM /* 879*12931SDave.Plauger@Sun.COM * Fall back to doing a serial dump if no helpers showed 880*12931SDave.Plauger@Sun.COM * up. It is possible for other CPUs to be stuck in PROM, or 881*12931SDave.Plauger@Sun.COM * DRd out. panic("sync initiated") in sync_handler() is one 882*12931SDave.Plauger@Sun.COM * case. A parallel dump will hang (dump time out) unless 883*12931SDave.Plauger@Sun.COM * there is at least one helper CPU. At this point dumpsys() 884*12931SDave.Plauger@Sun.COM * has done some I/O, which means there has been plenty of 885*12931SDave.Plauger@Sun.COM * time for helpers to arrive. 886*12931SDave.Plauger@Sun.COM */ 887*12931SDave.Plauger@Sun.COM if (!cfg->helper_present) { 888*12931SDave.Plauger@Sun.COM cfg->clevel = 0; 889*12931SDave.Plauger@Sun.COM return; 890*12931SDave.Plauger@Sun.COM } 891*12931SDave.Plauger@Sun.COM 892*12931SDave.Plauger@Sun.COM /* 893*12931SDave.Plauger@Sun.COM * There may be no point in looking for spare memory. If 894*12931SDave.Plauger@Sun.COM * dumping all memory, then none is spare. If doing a serial 895*12931SDave.Plauger@Sun.COM * dump, then already have buffers. 896*12931SDave.Plauger@Sun.COM */ 89710843SDave.Plauger@Sun.COM if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB || 89812042SDave.Plauger@Sun.COM (dump_conflags & DUMP_ALL) != 0) { 89912042SDave.Plauger@Sun.COM if (cfg->clevel > DUMP_CLEVEL_LZJB) 90012042SDave.Plauger@Sun.COM cfg->clevel = DUMP_CLEVEL_LZJB; 90110843SDave.Plauger@Sun.COM return; 90212042SDave.Plauger@Sun.COM } 90310843SDave.Plauger@Sun.COM 90410843SDave.Plauger@Sun.COM sz = 0; 90510843SDave.Plauger@Sun.COM cfg->found4m = 0; 90610843SDave.Plauger@Sun.COM cfg->foundsm = 0; 90710843SDave.Plauger@Sun.COM 90810843SDave.Plauger@Sun.COM /* bitmap of ranges used to estimate which pfns are being used */ 90910843SDave.Plauger@Sun.COM bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize)); 91010843SDave.Plauger@Sun.COM 91110843SDave.Plauger@Sun.COM /* find ranges that are not being dumped to use for buffers */ 91210843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 91310843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) { 91410843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 91510843SDave.Plauger@Sun.COM end = bitnum + CBUF_MAPNP; 91610843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 91710843SDave.Plauger@Sun.COM ASSERT(pfn != PFN_INVALID); 91810843SDave.Plauger@Sun.COM 91910843SDave.Plauger@Sun.COM /* skip partial range at end of mem segment */ 92010843SDave.Plauger@Sun.COM if (mlw.mpleft < CBUF_MAPNP) { 92110843SDave.Plauger@Sun.COM end = bitnum + mlw.mpleft; 92210843SDave.Plauger@Sun.COM continue; 92310843SDave.Plauger@Sun.COM } 92410843SDave.Plauger@Sun.COM 92510843SDave.Plauger@Sun.COM /* skip non aligned pages */ 92610843SDave.Plauger@Sun.COM off = P2PHASE(pfn, CBUF_MAPNP); 92710843SDave.Plauger@Sun.COM if (off != 0) { 92810843SDave.Plauger@Sun.COM end -= off; 92910843SDave.Plauger@Sun.COM continue; 93010843SDave.Plauger@Sun.COM } 93110843SDave.Plauger@Sun.COM 93210843SDave.Plauger@Sun.COM if (!dump_range_check(bitnum, end, pfn)) 93310843SDave.Plauger@Sun.COM continue; 93410843SDave.Plauger@Sun.COM 93510843SDave.Plauger@Sun.COM ASSERT((sz + CBUF_MAPSIZE) <= cfg->maxvmsize); 93610843SDave.Plauger@Sun.COM hat_devload(kas.a_hat, cfg->maxvm + sz, CBUF_MAPSIZE, pfn, 93710843SDave.Plauger@Sun.COM PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST); 93810843SDave.Plauger@Sun.COM sz += CBUF_MAPSIZE; 93910843SDave.Plauger@Sun.COM cfg->found4m++; 94010843SDave.Plauger@Sun.COM 94110843SDave.Plauger@Sun.COM /* set the bitmap for both ends to be sure to cover the range */ 94210843SDave.Plauger@Sun.COM dump_set_used(pfn); 94310843SDave.Plauger@Sun.COM dump_set_used(pfn + CBUF_MAPNP - 1); 94410843SDave.Plauger@Sun.COM 94510843SDave.Plauger@Sun.COM if (sz >= cfg->maxsize) 94610843SDave.Plauger@Sun.COM goto foundmax; 94710843SDave.Plauger@Sun.COM } 94810843SDave.Plauger@Sun.COM 94910843SDave.Plauger@Sun.COM /* Add small pages if we can't find enough large pages. */ 95010843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 95110843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) { 95210843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 95310843SDave.Plauger@Sun.COM end = bitnum + CBUF_MAPNP; 95410843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 95510843SDave.Plauger@Sun.COM ASSERT(pfn != PFN_INVALID); 95610843SDave.Plauger@Sun.COM 95710843SDave.Plauger@Sun.COM /* Find any non-aligned pages at start and end of segment. */ 95810843SDave.Plauger@Sun.COM off = P2PHASE(pfn, CBUF_MAPNP); 95910843SDave.Plauger@Sun.COM if (mlw.mpleft < CBUF_MAPNP) { 96010843SDave.Plauger@Sun.COM end = bitnum + mlw.mpleft; 96110843SDave.Plauger@Sun.COM } else if (off != 0) { 96210843SDave.Plauger@Sun.COM end -= off; 96310843SDave.Plauger@Sun.COM } else if (cfg->found4m && dump_test_used(pfn)) { 96410843SDave.Plauger@Sun.COM continue; 96510843SDave.Plauger@Sun.COM } 96610843SDave.Plauger@Sun.COM 96710843SDave.Plauger@Sun.COM for (; bitnum < end; bitnum++, pfn++) { 96810843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 96910843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, bitnum)) 97010843SDave.Plauger@Sun.COM continue; 97110843SDave.Plauger@Sun.COM if (!dump_pfn_check(pfn)) 97210843SDave.Plauger@Sun.COM continue; 97310843SDave.Plauger@Sun.COM ASSERT((sz + PAGESIZE) <= cfg->maxvmsize); 97410843SDave.Plauger@Sun.COM hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn, 97510843SDave.Plauger@Sun.COM PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST); 97610843SDave.Plauger@Sun.COM sz += PAGESIZE; 97710843SDave.Plauger@Sun.COM cfg->foundsm++; 97810843SDave.Plauger@Sun.COM dump_set_used(pfn); 97910843SDave.Plauger@Sun.COM if (sz >= cfg->maxsize) 98010843SDave.Plauger@Sun.COM goto foundmax; 98110843SDave.Plauger@Sun.COM } 98210843SDave.Plauger@Sun.COM } 98310843SDave.Plauger@Sun.COM 98410843SDave.Plauger@Sun.COM /* Fall back to lzjb if we did not get enough memory for bzip2. */ 98510843SDave.Plauger@Sun.COM endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper; 98610843SDave.Plauger@Sun.COM if (sz < endsz) { 98710843SDave.Plauger@Sun.COM cfg->clevel = DUMP_CLEVEL_LZJB; 98810843SDave.Plauger@Sun.COM } 98910843SDave.Plauger@Sun.COM 99010843SDave.Plauger@Sun.COM /* Allocate memory for as many helpers as we can. */ 99110843SDave.Plauger@Sun.COM foundmax: 99210843SDave.Plauger@Sun.COM 99310843SDave.Plauger@Sun.COM /* Byte offsets into memory found and mapped above */ 99410843SDave.Plauger@Sun.COM endsz = sz; 99510843SDave.Plauger@Sun.COM sz = 0; 99610843SDave.Plauger@Sun.COM 99710843SDave.Plauger@Sun.COM /* Set the size for bzip2 state. Only bzip2 needs it. */ 99810843SDave.Plauger@Sun.COM bz2size = BZ2_bzCompressInitSize(dump_bzip2_level); 99910843SDave.Plauger@Sun.COM 100010843SDave.Plauger@Sun.COM /* Skip the preallocate output buffers. */ 100110843SDave.Plauger@Sun.COM cp = &cfg->cbuf[MINCBUFS]; 100210843SDave.Plauger@Sun.COM 100310843SDave.Plauger@Sun.COM /* Use this to move memory up from the preallocated helpers. */ 100410843SDave.Plauger@Sun.COM ohp = cfg->helper; 100510843SDave.Plauger@Sun.COM 100610843SDave.Plauger@Sun.COM /* Loop over all helpers and allocate memory. */ 100710843SDave.Plauger@Sun.COM for (hp = cfg->helper; hp < endhp; hp++) { 100810843SDave.Plauger@Sun.COM 100910843SDave.Plauger@Sun.COM /* Skip preallocated helpers by checking hp->page. */ 101010843SDave.Plauger@Sun.COM if (hp->page == NULL) { 101110843SDave.Plauger@Sun.COM if (cfg->clevel <= DUMP_CLEVEL_LZJB) { 101210843SDave.Plauger@Sun.COM /* lzjb needs 2 1-page buffers */ 101310843SDave.Plauger@Sun.COM if ((sz + (2 * PAGESIZE)) > endsz) 101410843SDave.Plauger@Sun.COM break; 101510843SDave.Plauger@Sun.COM hp->page = cfg->maxvm + sz; 101610843SDave.Plauger@Sun.COM sz += PAGESIZE; 101710843SDave.Plauger@Sun.COM hp->lzbuf = cfg->maxvm + sz; 101810843SDave.Plauger@Sun.COM sz += PAGESIZE; 101910843SDave.Plauger@Sun.COM 102010843SDave.Plauger@Sun.COM } else if (ohp->lzbuf != NULL) { 102110843SDave.Plauger@Sun.COM /* re-use the preallocted lzjb page for bzip2 */ 102210843SDave.Plauger@Sun.COM hp->page = ohp->lzbuf; 102310843SDave.Plauger@Sun.COM ohp->lzbuf = NULL; 102410843SDave.Plauger@Sun.COM ++ohp; 102510843SDave.Plauger@Sun.COM 102610843SDave.Plauger@Sun.COM } else { 102710843SDave.Plauger@Sun.COM /* bzip2 needs a 1-page buffer */ 102810843SDave.Plauger@Sun.COM if ((sz + PAGESIZE) > endsz) 102910843SDave.Plauger@Sun.COM break; 103010843SDave.Plauger@Sun.COM hp->page = cfg->maxvm + sz; 103110843SDave.Plauger@Sun.COM sz += PAGESIZE; 103210843SDave.Plauger@Sun.COM } 103310843SDave.Plauger@Sun.COM } 103410843SDave.Plauger@Sun.COM 103510843SDave.Plauger@Sun.COM /* 103610843SDave.Plauger@Sun.COM * Add output buffers per helper. The number of 103710843SDave.Plauger@Sun.COM * buffers per helper is determined by the ratio of 103810843SDave.Plauger@Sun.COM * ncbuf to nhelper. 103910843SDave.Plauger@Sun.COM */ 104010843SDave.Plauger@Sun.COM for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz && 104110843SDave.Plauger@Sun.COM k < NCBUF_PER_HELPER; k++) { 104210843SDave.Plauger@Sun.COM cp->state = CBUF_FREEBUF; 104310843SDave.Plauger@Sun.COM cp->size = CBUF_SIZE; 104410843SDave.Plauger@Sun.COM cp->buf = cfg->maxvm + sz; 104510843SDave.Plauger@Sun.COM sz += CBUF_SIZE; 104610843SDave.Plauger@Sun.COM ++cp; 104710843SDave.Plauger@Sun.COM } 104810843SDave.Plauger@Sun.COM 104910843SDave.Plauger@Sun.COM /* 105010843SDave.Plauger@Sun.COM * bzip2 needs compression state. Use the dumpbzalloc 105110843SDave.Plauger@Sun.COM * and dumpbzfree callbacks to allocate the memory. 105210843SDave.Plauger@Sun.COM * bzip2 does allocation only at init time. 105310843SDave.Plauger@Sun.COM */ 105410843SDave.Plauger@Sun.COM if (cfg->clevel >= DUMP_CLEVEL_BZIP2) { 105510843SDave.Plauger@Sun.COM if ((sz + bz2size) > endsz) { 105610843SDave.Plauger@Sun.COM hp->page = NULL; 105710843SDave.Plauger@Sun.COM break; 105810843SDave.Plauger@Sun.COM } else { 105910843SDave.Plauger@Sun.COM hp->bzstream.opaque = &sz; 106010843SDave.Plauger@Sun.COM hp->bzstream.bzalloc = dumpbzalloc; 106110843SDave.Plauger@Sun.COM hp->bzstream.bzfree = dumpbzfree; 106210843SDave.Plauger@Sun.COM (void) BZ2_bzCompressInit(&hp->bzstream, 106310843SDave.Plauger@Sun.COM dump_bzip2_level, 0, 0); 106410843SDave.Plauger@Sun.COM hp->bzstream.opaque = NULL; 106510843SDave.Plauger@Sun.COM } 106610843SDave.Plauger@Sun.COM } 106710843SDave.Plauger@Sun.COM } 106810843SDave.Plauger@Sun.COM 106910843SDave.Plauger@Sun.COM /* Finish allocating output buffers */ 107010843SDave.Plauger@Sun.COM for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) { 107110843SDave.Plauger@Sun.COM cp->state = CBUF_FREEBUF; 107210843SDave.Plauger@Sun.COM cp->size = CBUF_SIZE; 107310843SDave.Plauger@Sun.COM cp->buf = cfg->maxvm + sz; 107410843SDave.Plauger@Sun.COM sz += CBUF_SIZE; 107510843SDave.Plauger@Sun.COM } 107610843SDave.Plauger@Sun.COM 107710843SDave.Plauger@Sun.COM /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */ 107810843SDave.Plauger@Sun.COM if (cfg->found4m || cfg->foundsm) 107910843SDave.Plauger@Sun.COM dump_check_used = 1; 108010843SDave.Plauger@Sun.COM 108110843SDave.Plauger@Sun.COM ASSERT(sz <= endsz); 108210843SDave.Plauger@Sun.COM } 108310843SDave.Plauger@Sun.COM 10840Sstevel@tonic-gate static void 10850Sstevel@tonic-gate dumphdr_init(void) 10860Sstevel@tonic-gate { 10870Sstevel@tonic-gate pgcnt_t npages = 0; 10880Sstevel@tonic-gate 10890Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 10900Sstevel@tonic-gate 10910Sstevel@tonic-gate if (dumphdr == NULL) { 10920Sstevel@tonic-gate dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP); 10930Sstevel@tonic-gate dumphdr->dump_magic = DUMP_MAGIC; 10940Sstevel@tonic-gate dumphdr->dump_version = DUMP_VERSION; 10950Sstevel@tonic-gate dumphdr->dump_wordsize = DUMP_WORDSIZE; 10960Sstevel@tonic-gate dumphdr->dump_pageshift = PAGESHIFT; 10970Sstevel@tonic-gate dumphdr->dump_pagesize = PAGESIZE; 10980Sstevel@tonic-gate dumphdr->dump_utsname = utsname; 10990Sstevel@tonic-gate (void) strcpy(dumphdr->dump_platform, platform); 110010843SDave.Plauger@Sun.COM dumpbuf.size = dumpbuf_iosize(maxphys); 110110843SDave.Plauger@Sun.COM dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP); 110210843SDave.Plauger@Sun.COM dumpbuf.end = dumpbuf.start + dumpbuf.size; 110310843SDave.Plauger@Sun.COM dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP); 110410843SDave.Plauger@Sun.COM dumpcfg.helpermap = kmem_zalloc(BT_SIZEOFMAP(NCPU), KM_SLEEP); 110510843SDave.Plauger@Sun.COM LOCK_INIT_HELD(&dumpcfg.helper_lock); 11060Sstevel@tonic-gate } 11070Sstevel@tonic-gate 11085084Sjohnlev npages = num_phys_pages(); 11090Sstevel@tonic-gate 111010843SDave.Plauger@Sun.COM if (dumpcfg.bitmapsize != npages) { 111110843SDave.Plauger@Sun.COM size_t rlen = CBUF_MAPP2R(P2ROUNDUP(npages, CBUF_MAPNP)); 11120Sstevel@tonic-gate void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP); 111310843SDave.Plauger@Sun.COM void *rmap = kmem_alloc(BT_SIZEOFMAP(rlen), KM_SLEEP); 111410843SDave.Plauger@Sun.COM 111510843SDave.Plauger@Sun.COM if (dumpcfg.bitmap != NULL) 111610843SDave.Plauger@Sun.COM kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg. 111710843SDave.Plauger@Sun.COM bitmapsize)); 111810843SDave.Plauger@Sun.COM if (dumpcfg.rbitmap != NULL) 111910843SDave.Plauger@Sun.COM kmem_free(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg. 112010843SDave.Plauger@Sun.COM rbitmapsize)); 112110843SDave.Plauger@Sun.COM dumpcfg.bitmap = map; 112210843SDave.Plauger@Sun.COM dumpcfg.bitmapsize = npages; 112310843SDave.Plauger@Sun.COM dumpcfg.rbitmap = rmap; 112410843SDave.Plauger@Sun.COM dumpcfg.rbitmapsize = rlen; 11250Sstevel@tonic-gate } 11260Sstevel@tonic-gate } 11270Sstevel@tonic-gate 11280Sstevel@tonic-gate /* 11290Sstevel@tonic-gate * Establish a new dump device. 11300Sstevel@tonic-gate */ 11310Sstevel@tonic-gate int 11320Sstevel@tonic-gate dumpinit(vnode_t *vp, char *name, int justchecking) 11330Sstevel@tonic-gate { 11340Sstevel@tonic-gate vnode_t *cvp; 11350Sstevel@tonic-gate vattr_t vattr; 11360Sstevel@tonic-gate vnode_t *cdev_vp; 11370Sstevel@tonic-gate int error = 0; 11380Sstevel@tonic-gate 11390Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 11400Sstevel@tonic-gate 11410Sstevel@tonic-gate dumphdr_init(); 11420Sstevel@tonic-gate 11430Sstevel@tonic-gate cvp = common_specvp(vp); 11440Sstevel@tonic-gate if (cvp == dumpvp) 11450Sstevel@tonic-gate return (0); 11460Sstevel@tonic-gate 11470Sstevel@tonic-gate /* 11480Sstevel@tonic-gate * Determine whether this is a plausible dump device. We want either: 11490Sstevel@tonic-gate * (1) a real device that's not mounted and has a cb_dump routine, or 11500Sstevel@tonic-gate * (2) a swapfile on some filesystem that has a vop_dump routine. 11510Sstevel@tonic-gate */ 11525331Samw if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0) 11530Sstevel@tonic-gate return (error); 11540Sstevel@tonic-gate 11550Sstevel@tonic-gate vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV; 11565331Samw if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) { 11570Sstevel@tonic-gate if (vattr.va_type == VBLK || vattr.va_type == VCHR) { 11580Sstevel@tonic-gate if (devopsp[getmajor(vattr.va_rdev)]-> 11590Sstevel@tonic-gate devo_cb_ops->cb_dump == nodev) 11600Sstevel@tonic-gate error = ENOTSUP; 11610Sstevel@tonic-gate else if (vfs_devismounted(vattr.va_rdev)) 11620Sstevel@tonic-gate error = EBUSY; 116310588SEric.Taylor@Sun.COM if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip), 116410588SEric.Taylor@Sun.COM ZFS_DRIVER) == 0 && 116510588SEric.Taylor@Sun.COM IS_SWAPVP(common_specvp(cvp))) 116610588SEric.Taylor@Sun.COM error = EBUSY; 11670Sstevel@tonic-gate } else { 11680Sstevel@tonic-gate if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) || 11690Sstevel@tonic-gate !IS_SWAPVP(cvp)) 11700Sstevel@tonic-gate error = ENOTSUP; 11710Sstevel@tonic-gate } 11720Sstevel@tonic-gate } 11730Sstevel@tonic-gate 11740Sstevel@tonic-gate if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) 11750Sstevel@tonic-gate error = ENOSPC; 11760Sstevel@tonic-gate 11770Sstevel@tonic-gate if (error || justchecking) { 11785331Samw (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0, 11795331Samw kcred, NULL); 11800Sstevel@tonic-gate return (error); 11810Sstevel@tonic-gate } 11820Sstevel@tonic-gate 11830Sstevel@tonic-gate VN_HOLD(cvp); 11840Sstevel@tonic-gate 11850Sstevel@tonic-gate if (dumpvp != NULL) 11860Sstevel@tonic-gate dumpfini(); /* unconfigure the old dump device */ 11870Sstevel@tonic-gate 11880Sstevel@tonic-gate dumpvp = cvp; 11890Sstevel@tonic-gate dumpvp_size = vattr.va_size & -DUMP_OFFSET; 11900Sstevel@tonic-gate dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP); 11910Sstevel@tonic-gate (void) strcpy(dumppath, name); 119210843SDave.Plauger@Sun.COM dumpbuf.iosize = 0; 11930Sstevel@tonic-gate 11940Sstevel@tonic-gate /* 11950Sstevel@tonic-gate * If the dump device is a block device, attempt to open up the 11960Sstevel@tonic-gate * corresponding character device and determine its maximum transfer 11970Sstevel@tonic-gate * size. We use this information to potentially resize dumpbuf to a 11980Sstevel@tonic-gate * larger and more optimal size for performing i/o to the dump device. 11990Sstevel@tonic-gate */ 12000Sstevel@tonic-gate if (cvp->v_type == VBLK && 12010Sstevel@tonic-gate (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) { 12025331Samw if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 12030Sstevel@tonic-gate size_t blk_size; 12040Sstevel@tonic-gate struct dk_cinfo dki; 12059889SLarry.Liu@Sun.COM struct dk_minfo minf; 12060Sstevel@tonic-gate 12079889SLarry.Liu@Sun.COM if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO, 12089889SLarry.Liu@Sun.COM (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL) 12099889SLarry.Liu@Sun.COM == 0 && minf.dki_lbsize != 0) 12109889SLarry.Liu@Sun.COM blk_size = minf.dki_lbsize; 12110Sstevel@tonic-gate else 12120Sstevel@tonic-gate blk_size = DEV_BSIZE; 12130Sstevel@tonic-gate 12140Sstevel@tonic-gate if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki, 12155331Samw FKIOCTL, kcred, NULL, NULL) == 0) { 121610843SDave.Plauger@Sun.COM dumpbuf.iosize = dki.dki_maxtransfer * blk_size; 12170Sstevel@tonic-gate dumpbuf_resize(); 12180Sstevel@tonic-gate } 12196423Sgw25295 /* 122010588SEric.Taylor@Sun.COM * If we are working with a zvol then dumpify it 122110588SEric.Taylor@Sun.COM * if it's not being used as swap. 12226423Sgw25295 */ 12236423Sgw25295 if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) { 122410588SEric.Taylor@Sun.COM if (IS_SWAPVP(common_specvp(cvp))) 122510588SEric.Taylor@Sun.COM error = EBUSY; 122610588SEric.Taylor@Sun.COM else if ((error = VOP_IOCTL(cdev_vp, 12276423Sgw25295 DKIOCDUMPINIT, NULL, FKIOCTL, kcred, 122810588SEric.Taylor@Sun.COM NULL, NULL)) != 0) 12296423Sgw25295 dumpfini(); 12306423Sgw25295 } 12310Sstevel@tonic-gate 12325331Samw (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 12335331Samw kcred, NULL); 12340Sstevel@tonic-gate } 12350Sstevel@tonic-gate 12360Sstevel@tonic-gate VN_RELE(cdev_vp); 12370Sstevel@tonic-gate } 12380Sstevel@tonic-gate 12390Sstevel@tonic-gate cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20); 12400Sstevel@tonic-gate 124110843SDave.Plauger@Sun.COM dump_update_clevel(); 124210843SDave.Plauger@Sun.COM 12436423Sgw25295 return (error); 12440Sstevel@tonic-gate } 12450Sstevel@tonic-gate 12460Sstevel@tonic-gate void 12470Sstevel@tonic-gate dumpfini(void) 12480Sstevel@tonic-gate { 12496423Sgw25295 vattr_t vattr; 12506423Sgw25295 boolean_t is_zfs = B_FALSE; 12516423Sgw25295 vnode_t *cdev_vp; 12520Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 12530Sstevel@tonic-gate 12540Sstevel@tonic-gate kmem_free(dumppath, strlen(dumppath) + 1); 12550Sstevel@tonic-gate 12566423Sgw25295 /* 12576423Sgw25295 * Determine if we are using zvols for our dump device 12586423Sgw25295 */ 12596423Sgw25295 vattr.va_mask = AT_RDEV; 12606423Sgw25295 if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) { 12616423Sgw25295 is_zfs = (getmajor(vattr.va_rdev) == 12626423Sgw25295 ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE; 12636423Sgw25295 } 12646423Sgw25295 12656423Sgw25295 /* 12666423Sgw25295 * If we have a zvol dump device then we call into zfs so 12676423Sgw25295 * that it may have a chance to cleanup. 12686423Sgw25295 */ 12696423Sgw25295 if (is_zfs && 12706423Sgw25295 (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) { 12716423Sgw25295 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 12726423Sgw25295 (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL, 12736423Sgw25295 kcred, NULL, NULL); 12746423Sgw25295 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 12756423Sgw25295 kcred, NULL); 12766423Sgw25295 } 12776423Sgw25295 VN_RELE(cdev_vp); 12786423Sgw25295 } 12796423Sgw25295 12805331Samw (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL); 12810Sstevel@tonic-gate 12820Sstevel@tonic-gate VN_RELE(dumpvp); 12830Sstevel@tonic-gate 12840Sstevel@tonic-gate dumpvp = NULL; 12850Sstevel@tonic-gate dumpvp_size = 0; 12860Sstevel@tonic-gate dumppath = NULL; 12870Sstevel@tonic-gate } 12880Sstevel@tonic-gate 12890Sstevel@tonic-gate static offset_t 12900Sstevel@tonic-gate dumpvp_flush(void) 12910Sstevel@tonic-gate { 129210843SDave.Plauger@Sun.COM size_t size = P2ROUNDUP(dumpbuf.cur - dumpbuf.start, PAGESIZE); 129310843SDave.Plauger@Sun.COM hrtime_t iotime; 12940Sstevel@tonic-gate int err; 12950Sstevel@tonic-gate 129610843SDave.Plauger@Sun.COM if (dumpbuf.vp_off + size > dumpbuf.vp_limit) { 12970Sstevel@tonic-gate dump_ioerr = ENOSPC; 129810843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpbuf.vp_limit; 12990Sstevel@tonic-gate } else if (size != 0) { 130010843SDave.Plauger@Sun.COM iotime = gethrtime(); 130110843SDave.Plauger@Sun.COM dumpsync.iowait += iotime - dumpsync.iowaitts; 13020Sstevel@tonic-gate if (panicstr) 130310843SDave.Plauger@Sun.COM err = VOP_DUMP(dumpvp, dumpbuf.start, 130410843SDave.Plauger@Sun.COM lbtodb(dumpbuf.vp_off), btod(size), NULL); 13050Sstevel@tonic-gate else 130610843SDave.Plauger@Sun.COM err = vn_rdwr(UIO_WRITE, dumpbuf.cdev_vp != NULL ? 130710843SDave.Plauger@Sun.COM dumpbuf.cdev_vp : dumpvp, dumpbuf.start, size, 130810843SDave.Plauger@Sun.COM dumpbuf.vp_off, UIO_SYSSPACE, 0, dumpbuf.vp_limit, 13090Sstevel@tonic-gate kcred, 0); 13100Sstevel@tonic-gate if (err && dump_ioerr == 0) 13110Sstevel@tonic-gate dump_ioerr = err; 131210843SDave.Plauger@Sun.COM dumpsync.iowaitts = gethrtime(); 131310843SDave.Plauger@Sun.COM dumpsync.iotime += dumpsync.iowaitts - iotime; 131410843SDave.Plauger@Sun.COM dumpsync.nwrite += size; 131510843SDave.Plauger@Sun.COM dumpbuf.vp_off += size; 13160Sstevel@tonic-gate } 131710843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 13180Sstevel@tonic-gate dump_timeleft = dump_timeout; 131910843SDave.Plauger@Sun.COM return (dumpbuf.vp_off); 13200Sstevel@tonic-gate } 13210Sstevel@tonic-gate 132210843SDave.Plauger@Sun.COM /* maximize write speed by keeping seek offset aligned with size */ 13230Sstevel@tonic-gate void 13240Sstevel@tonic-gate dumpvp_write(const void *va, size_t size) 13250Sstevel@tonic-gate { 132610843SDave.Plauger@Sun.COM size_t len, off, sz; 132710843SDave.Plauger@Sun.COM 13280Sstevel@tonic-gate while (size != 0) { 132910843SDave.Plauger@Sun.COM len = MIN(size, dumpbuf.end - dumpbuf.cur); 13300Sstevel@tonic-gate if (len == 0) { 133110843SDave.Plauger@Sun.COM off = P2PHASE(dumpbuf.vp_off, dumpbuf.size); 133210843SDave.Plauger@Sun.COM if (off == 0 || !ISP2(dumpbuf.size)) { 133310843SDave.Plauger@Sun.COM (void) dumpvp_flush(); 133410843SDave.Plauger@Sun.COM } else { 133510843SDave.Plauger@Sun.COM sz = dumpbuf.size - off; 133610843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start + sz; 133710843SDave.Plauger@Sun.COM (void) dumpvp_flush(); 133810843SDave.Plauger@Sun.COM ovbcopy(dumpbuf.start + sz, dumpbuf.start, off); 133910843SDave.Plauger@Sun.COM dumpbuf.cur += off; 134010843SDave.Plauger@Sun.COM } 13410Sstevel@tonic-gate } else { 134210843SDave.Plauger@Sun.COM bcopy(va, dumpbuf.cur, len); 13430Sstevel@tonic-gate va = (char *)va + len; 134410843SDave.Plauger@Sun.COM dumpbuf.cur += len; 13450Sstevel@tonic-gate size -= len; 13460Sstevel@tonic-gate } 13470Sstevel@tonic-gate } 13480Sstevel@tonic-gate } 13490Sstevel@tonic-gate 13500Sstevel@tonic-gate /*ARGSUSED*/ 13510Sstevel@tonic-gate static void 13520Sstevel@tonic-gate dumpvp_ksyms_write(const void *src, void *dst, size_t size) 13530Sstevel@tonic-gate { 13540Sstevel@tonic-gate dumpvp_write(src, size); 13550Sstevel@tonic-gate } 13560Sstevel@tonic-gate 13570Sstevel@tonic-gate /* 13580Sstevel@tonic-gate * Mark 'pfn' in the bitmap and dump its translation table entry. 13590Sstevel@tonic-gate */ 13600Sstevel@tonic-gate void 13610Sstevel@tonic-gate dump_addpage(struct as *as, void *va, pfn_t pfn) 13620Sstevel@tonic-gate { 13630Sstevel@tonic-gate mem_vtop_t mem_vtop; 13640Sstevel@tonic-gate pgcnt_t bitnum; 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 136710843SDave.Plauger@Sun.COM if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 13680Sstevel@tonic-gate dumphdr->dump_npages++; 136910843SDave.Plauger@Sun.COM BT_SET(dumpcfg.bitmap, bitnum); 13700Sstevel@tonic-gate } 13710Sstevel@tonic-gate dumphdr->dump_nvtop++; 13720Sstevel@tonic-gate mem_vtop.m_as = as; 13730Sstevel@tonic-gate mem_vtop.m_va = va; 13740Sstevel@tonic-gate mem_vtop.m_pfn = pfn; 13750Sstevel@tonic-gate dumpvp_write(&mem_vtop, sizeof (mem_vtop_t)); 13760Sstevel@tonic-gate } 13770Sstevel@tonic-gate dump_timeleft = dump_timeout; 13780Sstevel@tonic-gate } 13790Sstevel@tonic-gate 13800Sstevel@tonic-gate /* 13810Sstevel@tonic-gate * Mark 'pfn' in the bitmap 13820Sstevel@tonic-gate */ 13830Sstevel@tonic-gate void 13840Sstevel@tonic-gate dump_page(pfn_t pfn) 13850Sstevel@tonic-gate { 13860Sstevel@tonic-gate pgcnt_t bitnum; 13870Sstevel@tonic-gate 13880Sstevel@tonic-gate if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 138910843SDave.Plauger@Sun.COM if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 13900Sstevel@tonic-gate dumphdr->dump_npages++; 139110843SDave.Plauger@Sun.COM BT_SET(dumpcfg.bitmap, bitnum); 13920Sstevel@tonic-gate } 13930Sstevel@tonic-gate } 13940Sstevel@tonic-gate dump_timeleft = dump_timeout; 13950Sstevel@tonic-gate } 13960Sstevel@tonic-gate 13970Sstevel@tonic-gate /* 13980Sstevel@tonic-gate * Dump the <as, va, pfn> information for a given address space. 13990Sstevel@tonic-gate * SEGOP_DUMP() will call dump_addpage() for each page in the segment. 14000Sstevel@tonic-gate */ 14010Sstevel@tonic-gate static void 14020Sstevel@tonic-gate dump_as(struct as *as) 14030Sstevel@tonic-gate { 14040Sstevel@tonic-gate struct seg *seg; 14050Sstevel@tonic-gate 14060Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 14070Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 14080Sstevel@tonic-gate if (seg->s_as != as) 14090Sstevel@tonic-gate break; 14100Sstevel@tonic-gate if (seg->s_ops == NULL) 14110Sstevel@tonic-gate continue; 14120Sstevel@tonic-gate SEGOP_DUMP(seg); 14130Sstevel@tonic-gate } 14140Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 14150Sstevel@tonic-gate 14160Sstevel@tonic-gate if (seg != NULL) 14170Sstevel@tonic-gate cmn_err(CE_WARN, "invalid segment %p in address space %p", 14180Sstevel@tonic-gate (void *)seg, (void *)as); 14190Sstevel@tonic-gate } 14200Sstevel@tonic-gate 14210Sstevel@tonic-gate static int 14220Sstevel@tonic-gate dump_process(pid_t pid) 14230Sstevel@tonic-gate { 14240Sstevel@tonic-gate proc_t *p = sprlock(pid); 14250Sstevel@tonic-gate 14260Sstevel@tonic-gate if (p == NULL) 14270Sstevel@tonic-gate return (-1); 14280Sstevel@tonic-gate if (p->p_as != &kas) { 14290Sstevel@tonic-gate mutex_exit(&p->p_lock); 14300Sstevel@tonic-gate dump_as(p->p_as); 14310Sstevel@tonic-gate mutex_enter(&p->p_lock); 14320Sstevel@tonic-gate } 14330Sstevel@tonic-gate 14340Sstevel@tonic-gate sprunlock(p); 14350Sstevel@tonic-gate 14360Sstevel@tonic-gate return (0); 14370Sstevel@tonic-gate } 14380Sstevel@tonic-gate 14390Sstevel@tonic-gate void 14400Sstevel@tonic-gate dump_ereports(void) 14410Sstevel@tonic-gate { 14420Sstevel@tonic-gate u_offset_t dumpvp_start; 14430Sstevel@tonic-gate erpt_dump_t ed; 14440Sstevel@tonic-gate 14450Sstevel@tonic-gate if (dumpvp == NULL || dumphdr == NULL) 14460Sstevel@tonic-gate return; 14470Sstevel@tonic-gate 144810843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 144910843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE); 145010843SDave.Plauger@Sun.COM dumpvp_start = dumpbuf.vp_limit - DUMP_ERPTSIZE; 145110843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpvp_start; 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate fm_ereport_dump(); 14540Sstevel@tonic-gate if (panicstr) 14550Sstevel@tonic-gate errorq_dump(); 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate bzero(&ed, sizeof (ed)); /* indicate end of ereports */ 14580Sstevel@tonic-gate dumpvp_write(&ed, sizeof (ed)); 14590Sstevel@tonic-gate (void) dumpvp_flush(); 14600Sstevel@tonic-gate 14610Sstevel@tonic-gate if (!panicstr) { 14620Sstevel@tonic-gate (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 146310843SDave.Plauger@Sun.COM (size_t)(dumpbuf.vp_off - dumpvp_start), 14645331Samw B_INVAL | B_FORCE, kcred, NULL); 14650Sstevel@tonic-gate } 14660Sstevel@tonic-gate } 14670Sstevel@tonic-gate 14680Sstevel@tonic-gate void 14690Sstevel@tonic-gate dump_messages(void) 14700Sstevel@tonic-gate { 14710Sstevel@tonic-gate log_dump_t ld; 14720Sstevel@tonic-gate mblk_t *mctl, *mdata; 14730Sstevel@tonic-gate queue_t *q, *qlast; 14740Sstevel@tonic-gate u_offset_t dumpvp_start; 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL) 14770Sstevel@tonic-gate return; 14780Sstevel@tonic-gate 147910843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 148010843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size - DUMP_OFFSET; 148110843SDave.Plauger@Sun.COM dumpvp_start = dumpbuf.vp_limit - DUMP_LOGSIZE; 148210843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpvp_start; 14830Sstevel@tonic-gate 14840Sstevel@tonic-gate qlast = NULL; 14850Sstevel@tonic-gate do { 14860Sstevel@tonic-gate for (q = log_consq; q->q_next != qlast; q = q->q_next) 14870Sstevel@tonic-gate continue; 14880Sstevel@tonic-gate for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) { 14890Sstevel@tonic-gate dump_timeleft = dump_timeout; 14900Sstevel@tonic-gate mdata = mctl->b_cont; 14910Sstevel@tonic-gate ld.ld_magic = LOG_MAGIC; 14920Sstevel@tonic-gate ld.ld_msgsize = MBLKL(mctl->b_cont); 14930Sstevel@tonic-gate ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl)); 14940Sstevel@tonic-gate ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata)); 14950Sstevel@tonic-gate dumpvp_write(&ld, sizeof (ld)); 14960Sstevel@tonic-gate dumpvp_write(mctl->b_rptr, MBLKL(mctl)); 14970Sstevel@tonic-gate dumpvp_write(mdata->b_rptr, MBLKL(mdata)); 14980Sstevel@tonic-gate } 14990Sstevel@tonic-gate } while ((qlast = q) != log_consq); 15000Sstevel@tonic-gate 15010Sstevel@tonic-gate ld.ld_magic = 0; /* indicate end of messages */ 15020Sstevel@tonic-gate dumpvp_write(&ld, sizeof (ld)); 15030Sstevel@tonic-gate (void) dumpvp_flush(); 15040Sstevel@tonic-gate if (!panicstr) { 15050Sstevel@tonic-gate (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 150610843SDave.Plauger@Sun.COM (size_t)(dumpbuf.vp_off - dumpvp_start), 15075331Samw B_INVAL | B_FORCE, kcred, NULL); 15080Sstevel@tonic-gate } 15090Sstevel@tonic-gate } 15100Sstevel@tonic-gate 151110843SDave.Plauger@Sun.COM /* 151210843SDave.Plauger@Sun.COM * The following functions are called on multiple CPUs during dump. 151310843SDave.Plauger@Sun.COM * They must not use most kernel services, because all cross-calls are 151410843SDave.Plauger@Sun.COM * disabled during panic. Therefore, blocking locks and cache flushes 151510843SDave.Plauger@Sun.COM * will not work. 151610843SDave.Plauger@Sun.COM */ 151710843SDave.Plauger@Sun.COM 151811178SDave.Plauger@Sun.COM /* 151911178SDave.Plauger@Sun.COM * Copy pages, trapping ECC errors. Also, for robustness, trap data 152011178SDave.Plauger@Sun.COM * access in case something goes wrong in the hat layer and the 152111178SDave.Plauger@Sun.COM * mapping is broken. 152211178SDave.Plauger@Sun.COM */ 152310843SDave.Plauger@Sun.COM static int 15240Sstevel@tonic-gate dump_pagecopy(void *src, void *dst) 15250Sstevel@tonic-gate { 15260Sstevel@tonic-gate long *wsrc = (long *)src; 15270Sstevel@tonic-gate long *wdst = (long *)dst; 15280Sstevel@tonic-gate const ulong_t ncopies = PAGESIZE / sizeof (long); 15290Sstevel@tonic-gate volatile int w = 0; 15300Sstevel@tonic-gate volatile int ueoff = -1; 15310Sstevel@tonic-gate on_trap_data_t otd; 15320Sstevel@tonic-gate 153311178SDave.Plauger@Sun.COM if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) { 153410843SDave.Plauger@Sun.COM if (ueoff == -1) 15350Sstevel@tonic-gate ueoff = w * sizeof (long); 153611178SDave.Plauger@Sun.COM /* report "bad ECC" or "bad address" */ 15370Sstevel@tonic-gate #ifdef _LP64 153811178SDave.Plauger@Sun.COM if (otd.ot_trap & OT_DATA_EC) 153911178SDave.Plauger@Sun.COM wdst[w++] = 0x00badecc00badecc; 154011178SDave.Plauger@Sun.COM else 154111178SDave.Plauger@Sun.COM wdst[w++] = 0x00badadd00badadd; 15420Sstevel@tonic-gate #else 154311178SDave.Plauger@Sun.COM if (otd.ot_trap & OT_DATA_EC) 154411178SDave.Plauger@Sun.COM wdst[w++] = 0x00badecc; 154511178SDave.Plauger@Sun.COM else 154611178SDave.Plauger@Sun.COM wdst[w++] = 0x00badadd; 15470Sstevel@tonic-gate #endif 15480Sstevel@tonic-gate } 15490Sstevel@tonic-gate while (w < ncopies) { 15500Sstevel@tonic-gate wdst[w] = wsrc[w]; 15510Sstevel@tonic-gate w++; 15520Sstevel@tonic-gate } 15530Sstevel@tonic-gate no_trap(); 155410843SDave.Plauger@Sun.COM return (ueoff); 15550Sstevel@tonic-gate } 15560Sstevel@tonic-gate 155710843SDave.Plauger@Sun.COM static void 155810843SDave.Plauger@Sun.COM dumpsys_close_cq(cqueue_t *cq, int live) 155910843SDave.Plauger@Sun.COM { 156010843SDave.Plauger@Sun.COM if (live) { 156110843SDave.Plauger@Sun.COM mutex_enter(&cq->mutex); 156210843SDave.Plauger@Sun.COM atomic_dec_uint(&cq->open); 156310843SDave.Plauger@Sun.COM cv_signal(&cq->cv); 156410843SDave.Plauger@Sun.COM mutex_exit(&cq->mutex); 156510843SDave.Plauger@Sun.COM } else { 156610843SDave.Plauger@Sun.COM atomic_dec_uint(&cq->open); 156710843SDave.Plauger@Sun.COM } 156810843SDave.Plauger@Sun.COM } 156910843SDave.Plauger@Sun.COM 157010843SDave.Plauger@Sun.COM static inline void 157110843SDave.Plauger@Sun.COM dumpsys_spinlock(lock_t *lp) 157210843SDave.Plauger@Sun.COM { 157310843SDave.Plauger@Sun.COM uint_t backoff = 0; 157410843SDave.Plauger@Sun.COM int loop_count = 0; 157510843SDave.Plauger@Sun.COM 157610843SDave.Plauger@Sun.COM while (LOCK_HELD(lp) || !lock_spin_try(lp)) { 157710843SDave.Plauger@Sun.COM if (++loop_count >= ncpus) { 157810843SDave.Plauger@Sun.COM backoff = mutex_lock_backoff(0); 157910843SDave.Plauger@Sun.COM loop_count = 0; 158010843SDave.Plauger@Sun.COM } else { 158110843SDave.Plauger@Sun.COM backoff = mutex_lock_backoff(backoff); 158210843SDave.Plauger@Sun.COM } 158310843SDave.Plauger@Sun.COM mutex_lock_delay(backoff); 158410843SDave.Plauger@Sun.COM } 158510843SDave.Plauger@Sun.COM } 158610843SDave.Plauger@Sun.COM 158710843SDave.Plauger@Sun.COM static inline void 158810843SDave.Plauger@Sun.COM dumpsys_spinunlock(lock_t *lp) 158910843SDave.Plauger@Sun.COM { 159010843SDave.Plauger@Sun.COM lock_clear(lp); 159110843SDave.Plauger@Sun.COM } 159210843SDave.Plauger@Sun.COM 159310843SDave.Plauger@Sun.COM static inline void 159410843SDave.Plauger@Sun.COM dumpsys_lock(cqueue_t *cq, int live) 159510843SDave.Plauger@Sun.COM { 159610843SDave.Plauger@Sun.COM if (live) 159710843SDave.Plauger@Sun.COM mutex_enter(&cq->mutex); 159810843SDave.Plauger@Sun.COM else 159910843SDave.Plauger@Sun.COM dumpsys_spinlock(&cq->spinlock); 160010843SDave.Plauger@Sun.COM } 160110843SDave.Plauger@Sun.COM 160210843SDave.Plauger@Sun.COM static inline void 160310843SDave.Plauger@Sun.COM dumpsys_unlock(cqueue_t *cq, int live, int signal) 160410843SDave.Plauger@Sun.COM { 160510843SDave.Plauger@Sun.COM if (live) { 160610843SDave.Plauger@Sun.COM if (signal) 160710843SDave.Plauger@Sun.COM cv_signal(&cq->cv); 160810843SDave.Plauger@Sun.COM mutex_exit(&cq->mutex); 160910843SDave.Plauger@Sun.COM } else { 161010843SDave.Plauger@Sun.COM dumpsys_spinunlock(&cq->spinlock); 161110843SDave.Plauger@Sun.COM } 161210843SDave.Plauger@Sun.COM } 161310843SDave.Plauger@Sun.COM 161410843SDave.Plauger@Sun.COM static void 161510843SDave.Plauger@Sun.COM dumpsys_wait_cq(cqueue_t *cq, int live) 161610843SDave.Plauger@Sun.COM { 161710843SDave.Plauger@Sun.COM if (live) { 161810843SDave.Plauger@Sun.COM cv_wait(&cq->cv, &cq->mutex); 161910843SDave.Plauger@Sun.COM } else { 162010843SDave.Plauger@Sun.COM dumpsys_spinunlock(&cq->spinlock); 162110843SDave.Plauger@Sun.COM while (cq->open) 162210843SDave.Plauger@Sun.COM if (cq->first) 162310843SDave.Plauger@Sun.COM break; 162410843SDave.Plauger@Sun.COM dumpsys_spinlock(&cq->spinlock); 162510843SDave.Plauger@Sun.COM } 162610843SDave.Plauger@Sun.COM } 162710843SDave.Plauger@Sun.COM 162810843SDave.Plauger@Sun.COM static void 162910843SDave.Plauger@Sun.COM dumpsys_put_cq(cqueue_t *cq, cbuf_t *cp, int newstate, int live) 163010843SDave.Plauger@Sun.COM { 163110843SDave.Plauger@Sun.COM if (cp == NULL) 163210843SDave.Plauger@Sun.COM return; 163310843SDave.Plauger@Sun.COM 163410843SDave.Plauger@Sun.COM dumpsys_lock(cq, live); 163510843SDave.Plauger@Sun.COM 163610843SDave.Plauger@Sun.COM if (cq->ts != 0) { 163710843SDave.Plauger@Sun.COM cq->empty += gethrtime() - cq->ts; 163810843SDave.Plauger@Sun.COM cq->ts = 0; 163910843SDave.Plauger@Sun.COM } 164010843SDave.Plauger@Sun.COM 164110843SDave.Plauger@Sun.COM cp->state = newstate; 164210843SDave.Plauger@Sun.COM cp->next = NULL; 164310843SDave.Plauger@Sun.COM if (cq->last == NULL) 164410843SDave.Plauger@Sun.COM cq->first = cp; 164510843SDave.Plauger@Sun.COM else 164610843SDave.Plauger@Sun.COM cq->last->next = cp; 164710843SDave.Plauger@Sun.COM cq->last = cp; 164810843SDave.Plauger@Sun.COM 164910843SDave.Plauger@Sun.COM dumpsys_unlock(cq, live, 1); 165010843SDave.Plauger@Sun.COM } 165110843SDave.Plauger@Sun.COM 165210843SDave.Plauger@Sun.COM static cbuf_t * 165310843SDave.Plauger@Sun.COM dumpsys_get_cq(cqueue_t *cq, int live) 165410843SDave.Plauger@Sun.COM { 165510843SDave.Plauger@Sun.COM cbuf_t *cp; 165610843SDave.Plauger@Sun.COM hrtime_t now = gethrtime(); 165710843SDave.Plauger@Sun.COM 165810843SDave.Plauger@Sun.COM dumpsys_lock(cq, live); 165910843SDave.Plauger@Sun.COM 166010843SDave.Plauger@Sun.COM /* CONSTCOND */ 166110843SDave.Plauger@Sun.COM while (1) { 166210843SDave.Plauger@Sun.COM cp = (cbuf_t *)cq->first; 166310843SDave.Plauger@Sun.COM if (cp == NULL) { 166410843SDave.Plauger@Sun.COM if (cq->open == 0) 166510843SDave.Plauger@Sun.COM break; 166610843SDave.Plauger@Sun.COM dumpsys_wait_cq(cq, live); 166710843SDave.Plauger@Sun.COM continue; 166810843SDave.Plauger@Sun.COM } 166910843SDave.Plauger@Sun.COM cq->first = cp->next; 167010843SDave.Plauger@Sun.COM if (cq->first == NULL) { 167110843SDave.Plauger@Sun.COM cq->last = NULL; 167210843SDave.Plauger@Sun.COM cq->ts = now; 167310843SDave.Plauger@Sun.COM } 167410843SDave.Plauger@Sun.COM break; 167510843SDave.Plauger@Sun.COM } 167610843SDave.Plauger@Sun.COM 167710843SDave.Plauger@Sun.COM dumpsys_unlock(cq, live, cq->first != NULL || cq->open == 0); 167810843SDave.Plauger@Sun.COM return (cp); 167910843SDave.Plauger@Sun.COM } 168010843SDave.Plauger@Sun.COM 168110843SDave.Plauger@Sun.COM /* 168210843SDave.Plauger@Sun.COM * Send an error message to the console. If the main task is running 168310843SDave.Plauger@Sun.COM * just write the message via uprintf. If a helper is running the 168410843SDave.Plauger@Sun.COM * message has to be put on a queue for the main task. Setting fmt to 168510843SDave.Plauger@Sun.COM * NULL means flush the error message buffer. If fmt is not NULL, just 168610843SDave.Plauger@Sun.COM * add the text to the existing buffer. 168710843SDave.Plauger@Sun.COM */ 168810843SDave.Plauger@Sun.COM static void 168910843SDave.Plauger@Sun.COM dumpsys_errmsg(helper_t *hp, const char *fmt, ...) 169010843SDave.Plauger@Sun.COM { 169110843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 169210843SDave.Plauger@Sun.COM cbuf_t *cp = hp->cperr; 169310843SDave.Plauger@Sun.COM va_list adx; 169410843SDave.Plauger@Sun.COM 169510843SDave.Plauger@Sun.COM if (hp->helper == MAINHELPER) { 169610843SDave.Plauger@Sun.COM if (fmt != NULL) { 169710843SDave.Plauger@Sun.COM if (ds->neednl) { 169810843SDave.Plauger@Sun.COM uprintf("\n"); 169910843SDave.Plauger@Sun.COM ds->neednl = 0; 170010843SDave.Plauger@Sun.COM } 170110843SDave.Plauger@Sun.COM va_start(adx, fmt); 170210843SDave.Plauger@Sun.COM vuprintf(fmt, adx); 170310843SDave.Plauger@Sun.COM va_end(adx); 170410843SDave.Plauger@Sun.COM } 170510843SDave.Plauger@Sun.COM } else if (fmt == NULL) { 170610843SDave.Plauger@Sun.COM if (cp != NULL) { 170710843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_ERRMSG); 170810843SDave.Plauger@Sun.COM hp->cperr = NULL; 170910843SDave.Plauger@Sun.COM } 171010843SDave.Plauger@Sun.COM } else { 171110843SDave.Plauger@Sun.COM if (hp->cperr == NULL) { 171210843SDave.Plauger@Sun.COM cp = CQ_GET(freebufq); 171310843SDave.Plauger@Sun.COM hp->cperr = cp; 171410843SDave.Plauger@Sun.COM cp->used = 0; 171510843SDave.Plauger@Sun.COM } 171610843SDave.Plauger@Sun.COM va_start(adx, fmt); 171710843SDave.Plauger@Sun.COM cp->used += vsnprintf(cp->buf + cp->used, cp->size - cp->used, 171810843SDave.Plauger@Sun.COM fmt, adx); 171910843SDave.Plauger@Sun.COM va_end(adx); 172010843SDave.Plauger@Sun.COM if ((cp->used + LOG_MSGSIZE) > cp->size) { 172110843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_ERRMSG); 172210843SDave.Plauger@Sun.COM hp->cperr = NULL; 172310843SDave.Plauger@Sun.COM } 172410843SDave.Plauger@Sun.COM } 172510843SDave.Plauger@Sun.COM } 172610843SDave.Plauger@Sun.COM 172710843SDave.Plauger@Sun.COM /* 172810843SDave.Plauger@Sun.COM * Write an output buffer to the dump file. If the main task is 172910843SDave.Plauger@Sun.COM * running just write the data. If a helper is running the output is 173010843SDave.Plauger@Sun.COM * placed on a queue for the main task. 173110843SDave.Plauger@Sun.COM */ 173210843SDave.Plauger@Sun.COM static void 173310843SDave.Plauger@Sun.COM dumpsys_swrite(helper_t *hp, cbuf_t *cp, size_t used) 173410843SDave.Plauger@Sun.COM { 173510843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 173610843SDave.Plauger@Sun.COM 173710843SDave.Plauger@Sun.COM if (hp->helper == MAINHELPER) { 173810843SDave.Plauger@Sun.COM HRSTART(ds->perpage, write); 173910843SDave.Plauger@Sun.COM dumpvp_write(cp->buf, used); 174010843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, write); 174110843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 174210843SDave.Plauger@Sun.COM } else { 174310843SDave.Plauger@Sun.COM cp->used = used; 174410843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_WRITE); 174510843SDave.Plauger@Sun.COM } 174610843SDave.Plauger@Sun.COM } 174710843SDave.Plauger@Sun.COM 174810843SDave.Plauger@Sun.COM /* 174910843SDave.Plauger@Sun.COM * Copy one page within the mapped range. The offset starts at 0 and 175010843SDave.Plauger@Sun.COM * is relative to the first pfn. cp->buf + cp->off is the address of 175110843SDave.Plauger@Sun.COM * the first pfn. If dump_pagecopy returns a UE offset, create an 175210843SDave.Plauger@Sun.COM * error message. Returns the offset to the next pfn in the range 175310843SDave.Plauger@Sun.COM * selected by the bitmap. 175410843SDave.Plauger@Sun.COM */ 175510843SDave.Plauger@Sun.COM static int 175610843SDave.Plauger@Sun.COM dumpsys_copy_page(helper_t *hp, int offset) 175710843SDave.Plauger@Sun.COM { 175810843SDave.Plauger@Sun.COM cbuf_t *cp = hp->cpin; 175910843SDave.Plauger@Sun.COM int ueoff; 176010843SDave.Plauger@Sun.COM 176110843SDave.Plauger@Sun.COM ASSERT(cp->off + offset + PAGESIZE <= cp->size); 176210843SDave.Plauger@Sun.COM ASSERT(BT_TEST(dumpcfg.bitmap, cp->bitnum)); 176310843SDave.Plauger@Sun.COM 176410843SDave.Plauger@Sun.COM ueoff = dump_pagecopy(cp->buf + cp->off + offset, hp->page); 176510843SDave.Plauger@Sun.COM 176610843SDave.Plauger@Sun.COM /* ueoff is the offset in the page to a UE error */ 176710843SDave.Plauger@Sun.COM if (ueoff != -1) { 176810843SDave.Plauger@Sun.COM uint64_t pa = ptob(cp->pfn) + offset + ueoff; 176910843SDave.Plauger@Sun.COM 177011178SDave.Plauger@Sun.COM dumpsys_errmsg(hp, "cpu %d: memory error at PA 0x%08x.%08x\n", 177111178SDave.Plauger@Sun.COM CPU->cpu_id, (uint32_t)(pa >> 32), (uint32_t)pa); 177210843SDave.Plauger@Sun.COM } 177310843SDave.Plauger@Sun.COM 177410843SDave.Plauger@Sun.COM /* 177510843SDave.Plauger@Sun.COM * Advance bitnum and offset to the next input page for the 177610843SDave.Plauger@Sun.COM * next call to this function. 177710843SDave.Plauger@Sun.COM */ 177810843SDave.Plauger@Sun.COM offset += PAGESIZE; 177910843SDave.Plauger@Sun.COM cp->bitnum++; 178010843SDave.Plauger@Sun.COM while (cp->off + offset < cp->size) { 178110843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, cp->bitnum)) 178210843SDave.Plauger@Sun.COM break; 178310843SDave.Plauger@Sun.COM offset += PAGESIZE; 178410843SDave.Plauger@Sun.COM cp->bitnum++; 178510843SDave.Plauger@Sun.COM } 178610843SDave.Plauger@Sun.COM 178710843SDave.Plauger@Sun.COM return (offset); 178810843SDave.Plauger@Sun.COM } 178910843SDave.Plauger@Sun.COM 179010843SDave.Plauger@Sun.COM /* 179110843SDave.Plauger@Sun.COM * Read the helper queue, and copy one mapped page. Return 0 when 179210843SDave.Plauger@Sun.COM * done. Return 1 when a page has been copied into hp->page. 179310843SDave.Plauger@Sun.COM */ 179410843SDave.Plauger@Sun.COM static int 179510843SDave.Plauger@Sun.COM dumpsys_sread(helper_t *hp) 179610843SDave.Plauger@Sun.COM { 179710843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 179810843SDave.Plauger@Sun.COM 179910843SDave.Plauger@Sun.COM /* CONSTCOND */ 180010843SDave.Plauger@Sun.COM while (1) { 180110843SDave.Plauger@Sun.COM 180210843SDave.Plauger@Sun.COM /* Find the next input buffer. */ 180310843SDave.Plauger@Sun.COM if (hp->cpin == NULL) { 180410843SDave.Plauger@Sun.COM HRSTART(hp->perpage, inwait); 180510843SDave.Plauger@Sun.COM 180610843SDave.Plauger@Sun.COM /* CONSTCOND */ 180710843SDave.Plauger@Sun.COM while (1) { 180810843SDave.Plauger@Sun.COM hp->cpin = CQ_GET(helperq); 180910843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 181010843SDave.Plauger@Sun.COM 181110843SDave.Plauger@Sun.COM /* 181210843SDave.Plauger@Sun.COM * NULL return means the helper queue 181310843SDave.Plauger@Sun.COM * is closed and empty. 181410843SDave.Plauger@Sun.COM */ 181510843SDave.Plauger@Sun.COM if (hp->cpin == NULL) 181610843SDave.Plauger@Sun.COM break; 181710843SDave.Plauger@Sun.COM 181810843SDave.Plauger@Sun.COM /* Have input, check for dump I/O error. */ 181910843SDave.Plauger@Sun.COM if (!dump_ioerr) 182010843SDave.Plauger@Sun.COM break; 182110843SDave.Plauger@Sun.COM 182210843SDave.Plauger@Sun.COM /* 182310843SDave.Plauger@Sun.COM * If an I/O error occurs, stay in the 182410843SDave.Plauger@Sun.COM * loop in order to empty the helper 182510843SDave.Plauger@Sun.COM * queue. Return the buffers to the 182610843SDave.Plauger@Sun.COM * main task to unmap and free it. 182710843SDave.Plauger@Sun.COM */ 182810843SDave.Plauger@Sun.COM hp->cpin->used = 0; 182910843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 183010843SDave.Plauger@Sun.COM } 183110843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, inwait); 183210843SDave.Plauger@Sun.COM 183310843SDave.Plauger@Sun.COM /* Stop here when the helper queue is closed. */ 183410843SDave.Plauger@Sun.COM if (hp->cpin == NULL) 183510843SDave.Plauger@Sun.COM break; 183610843SDave.Plauger@Sun.COM 183710843SDave.Plauger@Sun.COM /* Set the offset=0 to get the first pfn. */ 183810843SDave.Plauger@Sun.COM hp->in = 0; 183910843SDave.Plauger@Sun.COM 184010843SDave.Plauger@Sun.COM /* Set the total processed to 0 */ 184110843SDave.Plauger@Sun.COM hp->used = 0; 184210843SDave.Plauger@Sun.COM } 184310843SDave.Plauger@Sun.COM 184410843SDave.Plauger@Sun.COM /* Process the next page. */ 184510843SDave.Plauger@Sun.COM if (hp->used < hp->cpin->used) { 184610843SDave.Plauger@Sun.COM 184710843SDave.Plauger@Sun.COM /* 184810843SDave.Plauger@Sun.COM * Get the next page from the input buffer and 184910843SDave.Plauger@Sun.COM * return a copy. 185010843SDave.Plauger@Sun.COM */ 185110843SDave.Plauger@Sun.COM ASSERT(hp->in != -1); 185210843SDave.Plauger@Sun.COM HRSTART(hp->perpage, copy); 185310843SDave.Plauger@Sun.COM hp->in = dumpsys_copy_page(hp, hp->in); 185410843SDave.Plauger@Sun.COM hp->used += PAGESIZE; 185510843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, copy); 185610843SDave.Plauger@Sun.COM break; 185710843SDave.Plauger@Sun.COM 185810843SDave.Plauger@Sun.COM } else { 185910843SDave.Plauger@Sun.COM 186010843SDave.Plauger@Sun.COM /* 186110843SDave.Plauger@Sun.COM * Done with the input. Flush the VM and 186210843SDave.Plauger@Sun.COM * return the buffer to the main task. 186310843SDave.Plauger@Sun.COM */ 186410843SDave.Plauger@Sun.COM if (panicstr && hp->helper != MAINHELPER) 186510843SDave.Plauger@Sun.COM hat_flush_range(kas.a_hat, 186610843SDave.Plauger@Sun.COM hp->cpin->buf, hp->cpin->size); 186710843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, NULL); 186810843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 186910843SDave.Plauger@Sun.COM hp->cpin = NULL; 187010843SDave.Plauger@Sun.COM } 187110843SDave.Plauger@Sun.COM } 187210843SDave.Plauger@Sun.COM 187310843SDave.Plauger@Sun.COM return (hp->cpin != NULL); 187410843SDave.Plauger@Sun.COM } 187510843SDave.Plauger@Sun.COM 187610843SDave.Plauger@Sun.COM /* 187710843SDave.Plauger@Sun.COM * Compress size bytes starting at buf with bzip2 187810843SDave.Plauger@Sun.COM * mode: 187910843SDave.Plauger@Sun.COM * BZ_RUN add one more compressed page 188010843SDave.Plauger@Sun.COM * BZ_FINISH no more input, flush the state 188110843SDave.Plauger@Sun.COM */ 188210843SDave.Plauger@Sun.COM static void 188310843SDave.Plauger@Sun.COM dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode) 188410843SDave.Plauger@Sun.COM { 188510843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 188610843SDave.Plauger@Sun.COM const int CSIZE = sizeof (dumpcsize_t); 188710843SDave.Plauger@Sun.COM bz_stream *ps = &hp->bzstream; 188810843SDave.Plauger@Sun.COM int rc = 0; 188910843SDave.Plauger@Sun.COM uint32_t csize; 189010843SDave.Plauger@Sun.COM dumpcsize_t cs; 189110843SDave.Plauger@Sun.COM 189210843SDave.Plauger@Sun.COM /* Set input pointers to new input page */ 189310843SDave.Plauger@Sun.COM if (size > 0) { 189410843SDave.Plauger@Sun.COM ps->avail_in = size; 189510843SDave.Plauger@Sun.COM ps->next_in = buf; 189610843SDave.Plauger@Sun.COM } 189710843SDave.Plauger@Sun.COM 189810843SDave.Plauger@Sun.COM /* CONSTCOND */ 189910843SDave.Plauger@Sun.COM while (1) { 190010843SDave.Plauger@Sun.COM 190110843SDave.Plauger@Sun.COM /* Quit when all input has been consumed */ 190210843SDave.Plauger@Sun.COM if (ps->avail_in == 0 && mode == BZ_RUN) 190310843SDave.Plauger@Sun.COM break; 190410843SDave.Plauger@Sun.COM 190510843SDave.Plauger@Sun.COM /* Get a new output buffer */ 190610843SDave.Plauger@Sun.COM if (hp->cpout == NULL) { 190710843SDave.Plauger@Sun.COM HRSTART(hp->perpage, outwait); 190810843SDave.Plauger@Sun.COM hp->cpout = CQ_GET(freebufq); 190910843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, outwait); 191010843SDave.Plauger@Sun.COM ps->avail_out = hp->cpout->size - CSIZE; 191110843SDave.Plauger@Sun.COM ps->next_out = hp->cpout->buf + CSIZE; 191210843SDave.Plauger@Sun.COM } 191310843SDave.Plauger@Sun.COM 191410843SDave.Plauger@Sun.COM /* Compress input, or finalize */ 191510843SDave.Plauger@Sun.COM HRSTART(hp->perpage, compress); 191610843SDave.Plauger@Sun.COM rc = BZ2_bzCompress(ps, mode); 191710843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, compress); 191810843SDave.Plauger@Sun.COM 191910843SDave.Plauger@Sun.COM /* Check for error */ 192010843SDave.Plauger@Sun.COM if (mode == BZ_RUN && rc != BZ_RUN_OK) { 192110843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n", 192210843SDave.Plauger@Sun.COM hp->helper, BZ2_bzErrorString(rc), 192310843SDave.Plauger@Sun.COM hp->cpin->pagenum); 192410843SDave.Plauger@Sun.COM break; 192510843SDave.Plauger@Sun.COM } 192610843SDave.Plauger@Sun.COM 192710843SDave.Plauger@Sun.COM /* Write the buffer if it is full, or we are flushing */ 192810843SDave.Plauger@Sun.COM if (ps->avail_out == 0 || mode == BZ_FINISH) { 192910843SDave.Plauger@Sun.COM csize = hp->cpout->size - CSIZE - ps->avail_out; 193010843SDave.Plauger@Sun.COM cs = DUMP_SET_TAG(csize, hp->tag); 193110843SDave.Plauger@Sun.COM if (csize > 0) { 193210843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf, &cs, CSIZE); 193310843SDave.Plauger@Sun.COM dumpsys_swrite(hp, hp->cpout, csize + CSIZE); 193410843SDave.Plauger@Sun.COM hp->cpout = NULL; 193510843SDave.Plauger@Sun.COM } 193610843SDave.Plauger@Sun.COM } 193710843SDave.Plauger@Sun.COM 193810843SDave.Plauger@Sun.COM /* Check for final complete */ 193910843SDave.Plauger@Sun.COM if (mode == BZ_FINISH) { 194010843SDave.Plauger@Sun.COM if (rc == BZ_STREAM_END) 194110843SDave.Plauger@Sun.COM break; 194210843SDave.Plauger@Sun.COM if (rc != BZ_FINISH_OK) { 194310843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n", 194410843SDave.Plauger@Sun.COM hp->helper, BZ2_bzErrorString(rc)); 194510843SDave.Plauger@Sun.COM break; 194610843SDave.Plauger@Sun.COM } 194710843SDave.Plauger@Sun.COM } 194810843SDave.Plauger@Sun.COM } 194910843SDave.Plauger@Sun.COM 195010843SDave.Plauger@Sun.COM /* Cleanup state and buffers */ 195110843SDave.Plauger@Sun.COM if (mode == BZ_FINISH) { 195210843SDave.Plauger@Sun.COM 195310843SDave.Plauger@Sun.COM /* Reset state so that it is re-usable. */ 195410843SDave.Plauger@Sun.COM (void) BZ2_bzCompressReset(&hp->bzstream); 195510843SDave.Plauger@Sun.COM 195610843SDave.Plauger@Sun.COM /* Give any unused outout buffer to the main task */ 195710843SDave.Plauger@Sun.COM if (hp->cpout != NULL) { 195810843SDave.Plauger@Sun.COM hp->cpout->used = 0; 195910843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG); 196010843SDave.Plauger@Sun.COM hp->cpout = NULL; 196110843SDave.Plauger@Sun.COM } 196210843SDave.Plauger@Sun.COM } 196310843SDave.Plauger@Sun.COM } 196410843SDave.Plauger@Sun.COM 196510843SDave.Plauger@Sun.COM static void 196610843SDave.Plauger@Sun.COM dumpsys_bz2compress(helper_t *hp) 196710843SDave.Plauger@Sun.COM { 196810843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 196910843SDave.Plauger@Sun.COM dumpstreamhdr_t sh; 197010843SDave.Plauger@Sun.COM 197110843SDave.Plauger@Sun.COM (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC); 197210843SDave.Plauger@Sun.COM sh.stream_pagenum = (pgcnt_t)-1; 197310843SDave.Plauger@Sun.COM sh.stream_npages = 0; 197410843SDave.Plauger@Sun.COM hp->cpin = NULL; 197510843SDave.Plauger@Sun.COM hp->cpout = NULL; 197610843SDave.Plauger@Sun.COM hp->cperr = NULL; 197710843SDave.Plauger@Sun.COM hp->in = 0; 197810843SDave.Plauger@Sun.COM hp->out = 0; 197910843SDave.Plauger@Sun.COM hp->bzstream.avail_in = 0; 198010843SDave.Plauger@Sun.COM 198110843SDave.Plauger@Sun.COM /* Bump reference to mainq while we are running */ 198210843SDave.Plauger@Sun.COM CQ_OPEN(mainq); 198310843SDave.Plauger@Sun.COM 198410843SDave.Plauger@Sun.COM /* Get one page at a time */ 198510843SDave.Plauger@Sun.COM while (dumpsys_sread(hp)) { 198610843SDave.Plauger@Sun.COM if (sh.stream_pagenum != hp->cpin->pagenum) { 198710843SDave.Plauger@Sun.COM sh.stream_pagenum = hp->cpin->pagenum; 198810843SDave.Plauger@Sun.COM sh.stream_npages = btop(hp->cpin->used); 198910843SDave.Plauger@Sun.COM dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN); 199010843SDave.Plauger@Sun.COM } 199110843SDave.Plauger@Sun.COM dumpsys_bzrun(hp, hp->page, PAGESIZE, 0); 199210843SDave.Plauger@Sun.COM } 199310843SDave.Plauger@Sun.COM 199410843SDave.Plauger@Sun.COM /* Done with input, flush any partial buffer */ 199510843SDave.Plauger@Sun.COM if (sh.stream_pagenum != (pgcnt_t)-1) { 199610843SDave.Plauger@Sun.COM dumpsys_bzrun(hp, NULL, 0, BZ_FINISH); 199710843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, NULL); 199810843SDave.Plauger@Sun.COM } 199910843SDave.Plauger@Sun.COM 200010843SDave.Plauger@Sun.COM ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL); 200110843SDave.Plauger@Sun.COM 200210843SDave.Plauger@Sun.COM /* Decrement main queue count, we are done */ 200310843SDave.Plauger@Sun.COM CQ_CLOSE(mainq); 200410843SDave.Plauger@Sun.COM } 200510843SDave.Plauger@Sun.COM 200610843SDave.Plauger@Sun.COM /* 200710843SDave.Plauger@Sun.COM * Compress with lzjb 200810843SDave.Plauger@Sun.COM * write stream block if full or size==0 200910843SDave.Plauger@Sun.COM * if csize==0 write stream header, else write <csize, data> 201010843SDave.Plauger@Sun.COM * size==0 is a call to flush a buffer 201110843SDave.Plauger@Sun.COM * hp->cpout is the buffer we are flushing or filling 201210843SDave.Plauger@Sun.COM * hp->out is the next index to fill data 201310843SDave.Plauger@Sun.COM * osize is either csize+data, or the size of a stream header 201410843SDave.Plauger@Sun.COM */ 201510843SDave.Plauger@Sun.COM static void 201610843SDave.Plauger@Sun.COM dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size) 201710843SDave.Plauger@Sun.COM { 201810843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 201910843SDave.Plauger@Sun.COM const int CSIZE = sizeof (dumpcsize_t); 202010843SDave.Plauger@Sun.COM dumpcsize_t cs; 202110843SDave.Plauger@Sun.COM size_t osize = csize > 0 ? CSIZE + size : size; 202210843SDave.Plauger@Sun.COM 202310843SDave.Plauger@Sun.COM /* If flush, and there is no buffer, just return */ 202410843SDave.Plauger@Sun.COM if (size == 0 && hp->cpout == NULL) 202510843SDave.Plauger@Sun.COM return; 202610843SDave.Plauger@Sun.COM 202710843SDave.Plauger@Sun.COM /* If flush, or cpout is full, write it out */ 202810843SDave.Plauger@Sun.COM if (size == 0 || 202910843SDave.Plauger@Sun.COM hp->cpout != NULL && hp->out + osize > hp->cpout->size) { 203010843SDave.Plauger@Sun.COM 203110843SDave.Plauger@Sun.COM /* Set tag+size word at the front of the stream block. */ 203210843SDave.Plauger@Sun.COM cs = DUMP_SET_TAG(hp->out - CSIZE, hp->tag); 203310843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf, &cs, CSIZE); 203410843SDave.Plauger@Sun.COM 203510843SDave.Plauger@Sun.COM /* Write block to dump file. */ 203610843SDave.Plauger@Sun.COM dumpsys_swrite(hp, hp->cpout, hp->out); 203710843SDave.Plauger@Sun.COM 203810843SDave.Plauger@Sun.COM /* Clear pointer to indicate we need a new buffer */ 203910843SDave.Plauger@Sun.COM hp->cpout = NULL; 204010843SDave.Plauger@Sun.COM 204110843SDave.Plauger@Sun.COM /* flushing, we are done */ 204210843SDave.Plauger@Sun.COM if (size == 0) 204310843SDave.Plauger@Sun.COM return; 204410843SDave.Plauger@Sun.COM } 204510843SDave.Plauger@Sun.COM 204610843SDave.Plauger@Sun.COM /* Get an output buffer if we dont have one. */ 204710843SDave.Plauger@Sun.COM if (hp->cpout == NULL) { 204810843SDave.Plauger@Sun.COM HRSTART(hp->perpage, outwait); 204910843SDave.Plauger@Sun.COM hp->cpout = CQ_GET(freebufq); 205010843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, outwait); 205110843SDave.Plauger@Sun.COM hp->out = CSIZE; 205210843SDave.Plauger@Sun.COM } 205310843SDave.Plauger@Sun.COM 205410843SDave.Plauger@Sun.COM /* Store csize word. This is the size of compressed data. */ 205510843SDave.Plauger@Sun.COM if (csize > 0) { 205610843SDave.Plauger@Sun.COM cs = DUMP_SET_TAG(csize, 0); 205710843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf + hp->out, &cs, CSIZE); 205810843SDave.Plauger@Sun.COM hp->out += CSIZE; 205910843SDave.Plauger@Sun.COM } 206010843SDave.Plauger@Sun.COM 206110843SDave.Plauger@Sun.COM /* Store the data. */ 206210843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf + hp->out, buf, size); 206310843SDave.Plauger@Sun.COM hp->out += size; 206410843SDave.Plauger@Sun.COM } 206510843SDave.Plauger@Sun.COM 206610843SDave.Plauger@Sun.COM static void 206710843SDave.Plauger@Sun.COM dumpsys_lzjbcompress(helper_t *hp) 206810843SDave.Plauger@Sun.COM { 206910843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 207010843SDave.Plauger@Sun.COM size_t csize; 207110843SDave.Plauger@Sun.COM dumpstreamhdr_t sh; 207210843SDave.Plauger@Sun.COM 207310843SDave.Plauger@Sun.COM (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC); 207410843SDave.Plauger@Sun.COM sh.stream_pagenum = (pfn_t)-1; 207510843SDave.Plauger@Sun.COM sh.stream_npages = 0; 207610843SDave.Plauger@Sun.COM hp->cpin = NULL; 207710843SDave.Plauger@Sun.COM hp->cpout = NULL; 207810843SDave.Plauger@Sun.COM hp->cperr = NULL; 207910843SDave.Plauger@Sun.COM hp->in = 0; 208010843SDave.Plauger@Sun.COM hp->out = 0; 208110843SDave.Plauger@Sun.COM 208210843SDave.Plauger@Sun.COM /* Bump reference to mainq while we are running */ 208310843SDave.Plauger@Sun.COM CQ_OPEN(mainq); 208410843SDave.Plauger@Sun.COM 208510843SDave.Plauger@Sun.COM /* Get one page at a time */ 208610843SDave.Plauger@Sun.COM while (dumpsys_sread(hp)) { 208710843SDave.Plauger@Sun.COM 208810843SDave.Plauger@Sun.COM /* Create a stream header for each new input map */ 208910843SDave.Plauger@Sun.COM if (sh.stream_pagenum != hp->cpin->pagenum) { 209010843SDave.Plauger@Sun.COM sh.stream_pagenum = hp->cpin->pagenum; 209110843SDave.Plauger@Sun.COM sh.stream_npages = btop(hp->cpin->used); 209210843SDave.Plauger@Sun.COM dumpsys_lzjbrun(hp, 0, &sh, sizeof (sh)); 209310843SDave.Plauger@Sun.COM } 209410843SDave.Plauger@Sun.COM 209510843SDave.Plauger@Sun.COM /* Compress one page */ 209610843SDave.Plauger@Sun.COM HRSTART(hp->perpage, compress); 209710843SDave.Plauger@Sun.COM csize = compress(hp->page, hp->lzbuf, PAGESIZE); 209810843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, compress); 209910843SDave.Plauger@Sun.COM 210010843SDave.Plauger@Sun.COM /* Add csize+data to output block */ 210110843SDave.Plauger@Sun.COM ASSERT(csize > 0 && csize <= PAGESIZE); 210210843SDave.Plauger@Sun.COM dumpsys_lzjbrun(hp, csize, hp->lzbuf, csize); 210310843SDave.Plauger@Sun.COM } 210410843SDave.Plauger@Sun.COM 210510843SDave.Plauger@Sun.COM /* Done with input, flush any partial buffer */ 210610843SDave.Plauger@Sun.COM if (sh.stream_pagenum != (pfn_t)-1) { 210710843SDave.Plauger@Sun.COM dumpsys_lzjbrun(hp, 0, NULL, 0); 210810843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, NULL); 210910843SDave.Plauger@Sun.COM } 211010843SDave.Plauger@Sun.COM 211110843SDave.Plauger@Sun.COM ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL); 211210843SDave.Plauger@Sun.COM 211310843SDave.Plauger@Sun.COM /* Decrement main queue count, we are done */ 211410843SDave.Plauger@Sun.COM CQ_CLOSE(mainq); 211510843SDave.Plauger@Sun.COM } 211610843SDave.Plauger@Sun.COM 211710843SDave.Plauger@Sun.COM /* 211810843SDave.Plauger@Sun.COM * Dump helper called from panic_idle() to compress pages. CPUs in 211910843SDave.Plauger@Sun.COM * this path must not call most kernel services. 212010843SDave.Plauger@Sun.COM * 212110843SDave.Plauger@Sun.COM * During panic, all but one of the CPUs is idle. These CPUs are used 212210843SDave.Plauger@Sun.COM * as helpers working in parallel to copy and compress memory 212310843SDave.Plauger@Sun.COM * pages. During a panic, however, these processors cannot call any 212410843SDave.Plauger@Sun.COM * kernel services. This is because mutexes become no-ops during 212510843SDave.Plauger@Sun.COM * panic, and, cross-call interrupts are inhibited. Therefore, during 212610843SDave.Plauger@Sun.COM * panic dump the helper CPUs communicate with the panic CPU using 212710843SDave.Plauger@Sun.COM * memory variables. All memory mapping and I/O is performed by the 212810843SDave.Plauger@Sun.COM * panic CPU. 212912640SDave.Plauger@Sun.COM * 213012640SDave.Plauger@Sun.COM * At dump configuration time, helper_lock is set and helpers_wanted 213112640SDave.Plauger@Sun.COM * is 0. dumpsys() decides whether to set helpers_wanted before 213212640SDave.Plauger@Sun.COM * clearing helper_lock. 213312640SDave.Plauger@Sun.COM * 213412640SDave.Plauger@Sun.COM * At panic time, idle CPUs spin-wait on helper_lock, then alternately 213512640SDave.Plauger@Sun.COM * take the lock and become a helper, or return. 213610843SDave.Plauger@Sun.COM */ 213710843SDave.Plauger@Sun.COM void 213810843SDave.Plauger@Sun.COM dumpsys_helper() 213910843SDave.Plauger@Sun.COM { 2140*12931SDave.Plauger@Sun.COM if (!dumpcfg.helper_present) 2141*12931SDave.Plauger@Sun.COM dumpcfg.helper_present = 1; 214210843SDave.Plauger@Sun.COM dumpsys_spinlock(&dumpcfg.helper_lock); 214310843SDave.Plauger@Sun.COM if (dumpcfg.helpers_wanted) { 214410843SDave.Plauger@Sun.COM helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper]; 214510843SDave.Plauger@Sun.COM 214610843SDave.Plauger@Sun.COM for (hp = dumpcfg.helper; hp != hpend; hp++) { 214710843SDave.Plauger@Sun.COM if (hp->helper == FREEHELPER) { 214810843SDave.Plauger@Sun.COM hp->helper = CPU->cpu_id; 214910843SDave.Plauger@Sun.COM BT_SET(dumpcfg.helpermap, CPU->cpu_seqid); 215010843SDave.Plauger@Sun.COM 215110843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 215210843SDave.Plauger@Sun.COM 215310843SDave.Plauger@Sun.COM if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2) 215410843SDave.Plauger@Sun.COM dumpsys_lzjbcompress(hp); 215510843SDave.Plauger@Sun.COM else 215610843SDave.Plauger@Sun.COM dumpsys_bz2compress(hp); 215710843SDave.Plauger@Sun.COM 215810843SDave.Plauger@Sun.COM hp->helper = DONEHELPER; 215910843SDave.Plauger@Sun.COM return; 216010843SDave.Plauger@Sun.COM } 216110843SDave.Plauger@Sun.COM } 216212640SDave.Plauger@Sun.COM 216312640SDave.Plauger@Sun.COM /* No more helpers are needed. */ 216412640SDave.Plauger@Sun.COM dumpcfg.helpers_wanted = 0; 216512640SDave.Plauger@Sun.COM 216610843SDave.Plauger@Sun.COM } 216710843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 216810843SDave.Plauger@Sun.COM } 216910843SDave.Plauger@Sun.COM 217010843SDave.Plauger@Sun.COM /* 217112640SDave.Plauger@Sun.COM * No-wait helper callable in spin loops. 217212640SDave.Plauger@Sun.COM * 217312640SDave.Plauger@Sun.COM * Do not wait for helper_lock. Just check helpers_wanted. The caller 217412640SDave.Plauger@Sun.COM * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s" 217512640SDave.Plauger@Sun.COM * case. 217612640SDave.Plauger@Sun.COM */ 217712640SDave.Plauger@Sun.COM void 217812640SDave.Plauger@Sun.COM dumpsys_helper_nw() 217912640SDave.Plauger@Sun.COM { 2180*12931SDave.Plauger@Sun.COM if (!dumpcfg.helper_present) 2181*12931SDave.Plauger@Sun.COM dumpcfg.helper_present = 1; 218212640SDave.Plauger@Sun.COM if (dumpcfg.helpers_wanted) 218312640SDave.Plauger@Sun.COM dumpsys_helper(); 218412640SDave.Plauger@Sun.COM } 218512640SDave.Plauger@Sun.COM 218612640SDave.Plauger@Sun.COM /* 218710843SDave.Plauger@Sun.COM * Dump helper for live dumps. 218810843SDave.Plauger@Sun.COM * These run as a system task. 218910843SDave.Plauger@Sun.COM */ 219010843SDave.Plauger@Sun.COM static void 219110843SDave.Plauger@Sun.COM dumpsys_live_helper(void *arg) 219210843SDave.Plauger@Sun.COM { 219310843SDave.Plauger@Sun.COM helper_t *hp = arg; 219410843SDave.Plauger@Sun.COM 219510843SDave.Plauger@Sun.COM BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid); 219610843SDave.Plauger@Sun.COM if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2) 219710843SDave.Plauger@Sun.COM dumpsys_lzjbcompress(hp); 219810843SDave.Plauger@Sun.COM else 219910843SDave.Plauger@Sun.COM dumpsys_bz2compress(hp); 220010843SDave.Plauger@Sun.COM } 220110843SDave.Plauger@Sun.COM 220210843SDave.Plauger@Sun.COM /* 220310843SDave.Plauger@Sun.COM * Compress one page with lzjb (single threaded case) 220410843SDave.Plauger@Sun.COM */ 220510843SDave.Plauger@Sun.COM static void 220610843SDave.Plauger@Sun.COM dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp) 220710843SDave.Plauger@Sun.COM { 220810843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 220910843SDave.Plauger@Sun.COM uint32_t csize; 221010843SDave.Plauger@Sun.COM 221110843SDave.Plauger@Sun.COM hp->helper = MAINHELPER; 221210843SDave.Plauger@Sun.COM hp->in = 0; 221310843SDave.Plauger@Sun.COM hp->used = 0; 221410843SDave.Plauger@Sun.COM hp->cpin = cp; 221510843SDave.Plauger@Sun.COM while (hp->used < cp->used) { 221610843SDave.Plauger@Sun.COM HRSTART(hp->perpage, copy); 221710843SDave.Plauger@Sun.COM hp->in = dumpsys_copy_page(hp, hp->in); 221810843SDave.Plauger@Sun.COM hp->used += PAGESIZE; 221910843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, copy); 222010843SDave.Plauger@Sun.COM 222110843SDave.Plauger@Sun.COM HRSTART(hp->perpage, compress); 222210843SDave.Plauger@Sun.COM csize = compress(hp->page, hp->lzbuf, PAGESIZE); 222310843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, compress); 222410843SDave.Plauger@Sun.COM 222510843SDave.Plauger@Sun.COM HRSTART(hp->perpage, write); 222610843SDave.Plauger@Sun.COM dumpvp_write(&csize, sizeof (csize)); 222710843SDave.Plauger@Sun.COM dumpvp_write(hp->lzbuf, csize); 222810843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, write); 222910843SDave.Plauger@Sun.COM } 223010843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 223110843SDave.Plauger@Sun.COM hp->cpin = NULL; 223210843SDave.Plauger@Sun.COM } 223310843SDave.Plauger@Sun.COM 223410843SDave.Plauger@Sun.COM /* 223510843SDave.Plauger@Sun.COM * Main task to dump pages. This is called on the dump CPU. 223610843SDave.Plauger@Sun.COM */ 223710843SDave.Plauger@Sun.COM static void 223810843SDave.Plauger@Sun.COM dumpsys_main_task(void *arg) 223910843SDave.Plauger@Sun.COM { 224010843SDave.Plauger@Sun.COM dumpsync_t *ds = arg; 224110843SDave.Plauger@Sun.COM pgcnt_t pagenum = 0, bitnum = 0, hibitnum; 224210843SDave.Plauger@Sun.COM dumpmlw_t mlw; 224310843SDave.Plauger@Sun.COM cbuf_t *cp; 224410843SDave.Plauger@Sun.COM pgcnt_t baseoff, pfnoff; 224510843SDave.Plauger@Sun.COM pfn_t base, pfn; 224610843SDave.Plauger@Sun.COM int sec; 224710843SDave.Plauger@Sun.COM 224810843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 224910843SDave.Plauger@Sun.COM 225010843SDave.Plauger@Sun.COM /* CONSTCOND */ 225110843SDave.Plauger@Sun.COM while (1) { 225210843SDave.Plauger@Sun.COM 225310843SDave.Plauger@Sun.COM if (ds->percent > ds->percent_done) { 225410843SDave.Plauger@Sun.COM ds->percent_done = ds->percent; 225510843SDave.Plauger@Sun.COM sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000; 225610843SDave.Plauger@Sun.COM uprintf("^\r%2d:%02d %3d%% done", 225710843SDave.Plauger@Sun.COM sec / 60, sec % 60, ds->percent); 225810843SDave.Plauger@Sun.COM ds->neednl = 1; 225910843SDave.Plauger@Sun.COM } 226010843SDave.Plauger@Sun.COM 226110843SDave.Plauger@Sun.COM while (CQ_IS_EMPTY(mainq) && !CQ_IS_EMPTY(writerq)) { 226210843SDave.Plauger@Sun.COM 226310843SDave.Plauger@Sun.COM /* the writerq never blocks */ 226410843SDave.Plauger@Sun.COM cp = CQ_GET(writerq); 226510843SDave.Plauger@Sun.COM if (cp == NULL) 226610843SDave.Plauger@Sun.COM break; 226710843SDave.Plauger@Sun.COM 226810843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 226910843SDave.Plauger@Sun.COM 227010843SDave.Plauger@Sun.COM HRSTART(ds->perpage, write); 227110843SDave.Plauger@Sun.COM dumpvp_write(cp->buf, cp->used); 227210843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, write); 227310843SDave.Plauger@Sun.COM 227410843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 227510843SDave.Plauger@Sun.COM } 227610843SDave.Plauger@Sun.COM 227710843SDave.Plauger@Sun.COM /* 227810843SDave.Plauger@Sun.COM * Wait here for some buffers to process. Returns NULL 227910843SDave.Plauger@Sun.COM * when all helpers have terminated and all buffers 228010843SDave.Plauger@Sun.COM * have been processed. 228110843SDave.Plauger@Sun.COM */ 228210843SDave.Plauger@Sun.COM cp = CQ_GET(mainq); 228310843SDave.Plauger@Sun.COM 228410843SDave.Plauger@Sun.COM if (cp == NULL) { 228510843SDave.Plauger@Sun.COM 228610843SDave.Plauger@Sun.COM /* Drain the write queue. */ 228710843SDave.Plauger@Sun.COM if (!CQ_IS_EMPTY(writerq)) 228810843SDave.Plauger@Sun.COM continue; 228910843SDave.Plauger@Sun.COM 229010843SDave.Plauger@Sun.COM /* Main task exits here. */ 229110843SDave.Plauger@Sun.COM break; 229210843SDave.Plauger@Sun.COM } 229310843SDave.Plauger@Sun.COM 229410843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 229510843SDave.Plauger@Sun.COM 229610843SDave.Plauger@Sun.COM switch (cp->state) { 229710843SDave.Plauger@Sun.COM 229810843SDave.Plauger@Sun.COM case CBUF_FREEMAP: 229910843SDave.Plauger@Sun.COM 230010843SDave.Plauger@Sun.COM /* 230110843SDave.Plauger@Sun.COM * Note that we drop CBUF_FREEMAP buffers on 230210843SDave.Plauger@Sun.COM * the floor (they will not be on any cqueue) 230310843SDave.Plauger@Sun.COM * when we no longer need them. 230410843SDave.Plauger@Sun.COM */ 230510843SDave.Plauger@Sun.COM if (bitnum >= dumpcfg.bitmapsize) 230610843SDave.Plauger@Sun.COM break; 230710843SDave.Plauger@Sun.COM 230810843SDave.Plauger@Sun.COM if (dump_ioerr) { 230910843SDave.Plauger@Sun.COM bitnum = dumpcfg.bitmapsize; 231010843SDave.Plauger@Sun.COM CQ_CLOSE(helperq); 231110843SDave.Plauger@Sun.COM break; 231210843SDave.Plauger@Sun.COM } 231310843SDave.Plauger@Sun.COM 231410843SDave.Plauger@Sun.COM HRSTART(ds->perpage, bitmap); 231510843SDave.Plauger@Sun.COM for (; bitnum < dumpcfg.bitmapsize; bitnum++) 231610843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, bitnum)) 231710843SDave.Plauger@Sun.COM break; 231810843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, bitmap); 231910843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 232010843SDave.Plauger@Sun.COM 232110843SDave.Plauger@Sun.COM if (bitnum >= dumpcfg.bitmapsize) { 232210843SDave.Plauger@Sun.COM CQ_CLOSE(helperq); 232310843SDave.Plauger@Sun.COM break; 232410843SDave.Plauger@Sun.COM } 232510843SDave.Plauger@Sun.COM 232610843SDave.Plauger@Sun.COM /* 232710843SDave.Plauger@Sun.COM * Try to map CBUF_MAPSIZE ranges. Can't 232810843SDave.Plauger@Sun.COM * assume that memory segment size is a 232910843SDave.Plauger@Sun.COM * multiple of CBUF_MAPSIZE. Can't assume that 233010843SDave.Plauger@Sun.COM * the segment starts on a CBUF_MAPSIZE 233110843SDave.Plauger@Sun.COM * boundary. 233210843SDave.Plauger@Sun.COM */ 233310843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 233410843SDave.Plauger@Sun.COM ASSERT(pfn != PFN_INVALID); 233510843SDave.Plauger@Sun.COM ASSERT(bitnum + mlw.mpleft <= dumpcfg.bitmapsize); 233610843SDave.Plauger@Sun.COM 233710843SDave.Plauger@Sun.COM base = P2ALIGN(pfn, CBUF_MAPNP); 233810843SDave.Plauger@Sun.COM if (base < mlw.mpaddr) { 233910843SDave.Plauger@Sun.COM base = mlw.mpaddr; 234010843SDave.Plauger@Sun.COM baseoff = P2PHASE(base, CBUF_MAPNP); 234110843SDave.Plauger@Sun.COM } else { 234210843SDave.Plauger@Sun.COM baseoff = 0; 234310843SDave.Plauger@Sun.COM } 234410843SDave.Plauger@Sun.COM 234510843SDave.Plauger@Sun.COM pfnoff = pfn - base; 234610843SDave.Plauger@Sun.COM if (pfnoff + mlw.mpleft < CBUF_MAPNP) { 234710843SDave.Plauger@Sun.COM hibitnum = bitnum + mlw.mpleft; 234810843SDave.Plauger@Sun.COM cp->size = ptob(pfnoff + mlw.mpleft); 234910843SDave.Plauger@Sun.COM } else { 235010843SDave.Plauger@Sun.COM hibitnum = bitnum - pfnoff + CBUF_MAPNP - 235110843SDave.Plauger@Sun.COM baseoff; 235210843SDave.Plauger@Sun.COM cp->size = CBUF_MAPSIZE - ptob(baseoff); 235310843SDave.Plauger@Sun.COM } 235410843SDave.Plauger@Sun.COM 235510843SDave.Plauger@Sun.COM cp->pfn = pfn; 235610843SDave.Plauger@Sun.COM cp->bitnum = bitnum++; 235710843SDave.Plauger@Sun.COM cp->pagenum = pagenum++; 235810843SDave.Plauger@Sun.COM cp->off = ptob(pfnoff); 235910843SDave.Plauger@Sun.COM 236010843SDave.Plauger@Sun.COM for (; bitnum < hibitnum; bitnum++) 236110843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, bitnum)) 236210843SDave.Plauger@Sun.COM pagenum++; 236310843SDave.Plauger@Sun.COM 236410843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 236510843SDave.Plauger@Sun.COM cp->used = ptob(pagenum - cp->pagenum); 236610843SDave.Plauger@Sun.COM 236710843SDave.Plauger@Sun.COM HRSTART(ds->perpage, map); 236810843SDave.Plauger@Sun.COM hat_devload(kas.a_hat, cp->buf, cp->size, base, 236910843SDave.Plauger@Sun.COM PROT_READ, HAT_LOAD_NOCONSIST); 237010843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, map); 237110843SDave.Plauger@Sun.COM 237210843SDave.Plauger@Sun.COM ds->pages_mapped += btop(cp->size); 237310843SDave.Plauger@Sun.COM ds->pages_used += pagenum - cp->pagenum; 237410843SDave.Plauger@Sun.COM 237510843SDave.Plauger@Sun.COM CQ_OPEN(mainq); 237610843SDave.Plauger@Sun.COM 237710843SDave.Plauger@Sun.COM /* 237810843SDave.Plauger@Sun.COM * If there are no helpers the main task does 237910843SDave.Plauger@Sun.COM * non-streams lzjb compress. 238010843SDave.Plauger@Sun.COM */ 238110843SDave.Plauger@Sun.COM if (dumpcfg.clevel == 0) { 238210843SDave.Plauger@Sun.COM dumpsys_lzjb_page(dumpcfg.helper, cp); 238310843SDave.Plauger@Sun.COM break; 238410843SDave.Plauger@Sun.COM } 238510843SDave.Plauger@Sun.COM 238610843SDave.Plauger@Sun.COM /* pass mapped pages to a helper */ 238710843SDave.Plauger@Sun.COM CQ_PUT(helperq, cp, CBUF_INREADY); 238810843SDave.Plauger@Sun.COM 238910843SDave.Plauger@Sun.COM /* the last page was done */ 239010843SDave.Plauger@Sun.COM if (bitnum >= dumpcfg.bitmapsize) 239110843SDave.Plauger@Sun.COM CQ_CLOSE(helperq); 239210843SDave.Plauger@Sun.COM 239310843SDave.Plauger@Sun.COM break; 239410843SDave.Plauger@Sun.COM 239510843SDave.Plauger@Sun.COM case CBUF_USEDMAP: 239610843SDave.Plauger@Sun.COM 239710843SDave.Plauger@Sun.COM ds->npages += btop(cp->used); 239810843SDave.Plauger@Sun.COM 239910843SDave.Plauger@Sun.COM HRSTART(ds->perpage, unmap); 240010843SDave.Plauger@Sun.COM hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD); 240110843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, unmap); 240210843SDave.Plauger@Sun.COM 240310843SDave.Plauger@Sun.COM if (bitnum < dumpcfg.bitmapsize) 240410843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_FREEMAP); 240510843SDave.Plauger@Sun.COM CQ_CLOSE(mainq); 240610843SDave.Plauger@Sun.COM 240710843SDave.Plauger@Sun.COM ASSERT(ds->npages <= dumphdr->dump_npages); 240810843SDave.Plauger@Sun.COM ds->percent = ds->npages * 100LL / dumphdr->dump_npages; 240910843SDave.Plauger@Sun.COM break; 241010843SDave.Plauger@Sun.COM 241110843SDave.Plauger@Sun.COM case CBUF_WRITE: 241210843SDave.Plauger@Sun.COM 241310843SDave.Plauger@Sun.COM CQ_PUT(writerq, cp, CBUF_WRITE); 241410843SDave.Plauger@Sun.COM break; 241510843SDave.Plauger@Sun.COM 241610843SDave.Plauger@Sun.COM case CBUF_ERRMSG: 241710843SDave.Plauger@Sun.COM 241810843SDave.Plauger@Sun.COM if (cp->used > 0) { 241910843SDave.Plauger@Sun.COM cp->buf[cp->size - 2] = '\n'; 242010843SDave.Plauger@Sun.COM cp->buf[cp->size - 1] = '\0'; 242110843SDave.Plauger@Sun.COM if (ds->neednl) { 242210843SDave.Plauger@Sun.COM uprintf("\n%s", cp->buf); 242310843SDave.Plauger@Sun.COM ds->neednl = 0; 242410843SDave.Plauger@Sun.COM } else { 242510843SDave.Plauger@Sun.COM uprintf("%s", cp->buf); 242610843SDave.Plauger@Sun.COM } 242711178SDave.Plauger@Sun.COM /* wait for console output */ 242811178SDave.Plauger@Sun.COM drv_usecwait(200000); 242911178SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 243010843SDave.Plauger@Sun.COM } 243110843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 243210843SDave.Plauger@Sun.COM break; 243310843SDave.Plauger@Sun.COM 243410843SDave.Plauger@Sun.COM default: 243510843SDave.Plauger@Sun.COM uprintf("dump: unexpected buffer state %d, " 243610843SDave.Plauger@Sun.COM "buffer will be lost\n", cp->state); 243710843SDave.Plauger@Sun.COM break; 243810843SDave.Plauger@Sun.COM 243910843SDave.Plauger@Sun.COM } /* end switch */ 244010843SDave.Plauger@Sun.COM 244110843SDave.Plauger@Sun.COM } /* end while(1) */ 244210843SDave.Plauger@Sun.COM } 244310843SDave.Plauger@Sun.COM 244410843SDave.Plauger@Sun.COM #ifdef COLLECT_METRICS 244510843SDave.Plauger@Sun.COM size_t 244610843SDave.Plauger@Sun.COM dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size) 244710843SDave.Plauger@Sun.COM { 244810843SDave.Plauger@Sun.COM dumpcfg_t *cfg = &dumpcfg; 244910843SDave.Plauger@Sun.COM int myid = CPU->cpu_seqid; 245010843SDave.Plauger@Sun.COM int i, compress_ratio; 245110843SDave.Plauger@Sun.COM int sec, iorate; 245210843SDave.Plauger@Sun.COM helper_t *hp, *hpend = &cfg->helper[cfg->nhelper]; 245310843SDave.Plauger@Sun.COM char *e = buf + size; 245410843SDave.Plauger@Sun.COM char *p = buf; 245510843SDave.Plauger@Sun.COM 245610843SDave.Plauger@Sun.COM sec = ds->elapsed / (1000 * 1000 * 1000ULL); 245710843SDave.Plauger@Sun.COM if (sec < 1) 245810843SDave.Plauger@Sun.COM sec = 1; 245910843SDave.Plauger@Sun.COM 246010843SDave.Plauger@Sun.COM if (ds->iotime < 1) 246110843SDave.Plauger@Sun.COM ds->iotime = 1; 246210843SDave.Plauger@Sun.COM iorate = (ds->nwrite * 100000ULL) / ds->iotime; 246310843SDave.Plauger@Sun.COM 246410843SDave.Plauger@Sun.COM compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1); 246510843SDave.Plauger@Sun.COM 246610843SDave.Plauger@Sun.COM #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0) 246710843SDave.Plauger@Sun.COM 246810843SDave.Plauger@Sun.COM P("Master cpu_seqid,%d\n", CPU->cpu_seqid); 246910843SDave.Plauger@Sun.COM P("Master cpu_id,%d\n", CPU->cpu_id); 247010843SDave.Plauger@Sun.COM P("dump_flags,0x%x\n", dumphdr->dump_flags); 247110843SDave.Plauger@Sun.COM P("dump_ioerr,%d\n", dump_ioerr); 247210843SDave.Plauger@Sun.COM 247310843SDave.Plauger@Sun.COM P("Helpers:\n"); 247410843SDave.Plauger@Sun.COM for (i = 0; i < ncpus; i++) { 247510843SDave.Plauger@Sun.COM if ((i & 15) == 0) 247610843SDave.Plauger@Sun.COM P(",,%03d,", i); 247710843SDave.Plauger@Sun.COM if (i == myid) 247810843SDave.Plauger@Sun.COM P(" M"); 247910843SDave.Plauger@Sun.COM else if (BT_TEST(cfg->helpermap, i)) 248010843SDave.Plauger@Sun.COM P("%4d", cpu_seq[i]->cpu_id); 248110843SDave.Plauger@Sun.COM else 248210843SDave.Plauger@Sun.COM P(" *"); 248310843SDave.Plauger@Sun.COM if ((i & 15) == 15) 248410843SDave.Plauger@Sun.COM P("\n"); 248510843SDave.Plauger@Sun.COM } 248610843SDave.Plauger@Sun.COM 248710843SDave.Plauger@Sun.COM P("ncbuf_used,%d\n", cfg->ncbuf_used); 248810843SDave.Plauger@Sun.COM P("ncmap,%d\n", cfg->ncmap); 248910843SDave.Plauger@Sun.COM 249010843SDave.Plauger@Sun.COM P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m); 249110843SDave.Plauger@Sun.COM P("Found small pages,%ld\n", cfg->foundsm); 249210843SDave.Plauger@Sun.COM 249310843SDave.Plauger@Sun.COM P("Compression level,%d\n", cfg->clevel); 249410843SDave.Plauger@Sun.COM P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel", 249510843SDave.Plauger@Sun.COM cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb"); 249610843SDave.Plauger@Sun.COM P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio % 249710843SDave.Plauger@Sun.COM 100); 249810843SDave.Plauger@Sun.COM P("nhelper_used,%d\n", cfg->nhelper_used); 249910843SDave.Plauger@Sun.COM 250010843SDave.Plauger@Sun.COM P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100); 250110843SDave.Plauger@Sun.COM P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite); 250210843SDave.Plauger@Sun.COM P("..total nsec,%lld\n", (u_longlong_t)ds->iotime); 250310843SDave.Plauger@Sun.COM P("dumpbuf.iosize,%ld\n", dumpbuf.iosize); 250410843SDave.Plauger@Sun.COM P("dumpbuf.size,%ld\n", dumpbuf.size); 250510843SDave.Plauger@Sun.COM 250610843SDave.Plauger@Sun.COM P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec); 250710843SDave.Plauger@Sun.COM P("Dump pages,%llu\n", (u_longlong_t)ds->npages); 250810843SDave.Plauger@Sun.COM P("Dump time,%d\n", sec); 250910843SDave.Plauger@Sun.COM 251010843SDave.Plauger@Sun.COM if (ds->pages_mapped > 0) 251110843SDave.Plauger@Sun.COM P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used) 251210843SDave.Plauger@Sun.COM / ds->pages_mapped)); 251310843SDave.Plauger@Sun.COM 251410843SDave.Plauger@Sun.COM P("\nPer-page metrics:\n"); 251510843SDave.Plauger@Sun.COM if (ds->npages > 0) { 251610843SDave.Plauger@Sun.COM for (hp = cfg->helper; hp != hpend; hp++) { 251710843SDave.Plauger@Sun.COM #define PERPAGE(x) ds->perpage.x += hp->perpage.x; 251810843SDave.Plauger@Sun.COM PERPAGES; 251910843SDave.Plauger@Sun.COM #undef PERPAGE 252010843SDave.Plauger@Sun.COM } 252110843SDave.Plauger@Sun.COM #define PERPAGE(x) \ 252210843SDave.Plauger@Sun.COM P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages)); 252310843SDave.Plauger@Sun.COM PERPAGES; 252410843SDave.Plauger@Sun.COM #undef PERPAGE 252510843SDave.Plauger@Sun.COM P("freebufq.empty,%d\n", (int)(ds->freebufq.empty / 252610843SDave.Plauger@Sun.COM ds->npages)); 252710843SDave.Plauger@Sun.COM P("helperq.empty,%d\n", (int)(ds->helperq.empty / 252810843SDave.Plauger@Sun.COM ds->npages)); 252910843SDave.Plauger@Sun.COM P("writerq.empty,%d\n", (int)(ds->writerq.empty / 253010843SDave.Plauger@Sun.COM ds->npages)); 253110843SDave.Plauger@Sun.COM P("mainq.empty,%d\n", (int)(ds->mainq.empty / ds->npages)); 253210843SDave.Plauger@Sun.COM 253310843SDave.Plauger@Sun.COM P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait / 253410843SDave.Plauger@Sun.COM ds->npages)); 253510843SDave.Plauger@Sun.COM } 253610843SDave.Plauger@Sun.COM #undef P 253710843SDave.Plauger@Sun.COM if (p < e) 253810843SDave.Plauger@Sun.COM bzero(p, e - p); 253910843SDave.Plauger@Sun.COM return (p - buf); 254010843SDave.Plauger@Sun.COM } 254110843SDave.Plauger@Sun.COM #endif /* COLLECT_METRICS */ 254210843SDave.Plauger@Sun.COM 25430Sstevel@tonic-gate /* 25440Sstevel@tonic-gate * Dump the system. 25450Sstevel@tonic-gate */ 25460Sstevel@tonic-gate void 25470Sstevel@tonic-gate dumpsys(void) 25480Sstevel@tonic-gate { 254910843SDave.Plauger@Sun.COM dumpsync_t *ds = &dumpsync; 255010843SDave.Plauger@Sun.COM taskq_t *livetaskq = NULL; 25510Sstevel@tonic-gate pfn_t pfn; 25520Sstevel@tonic-gate pgcnt_t bitnum; 25530Sstevel@tonic-gate proc_t *p; 255410843SDave.Plauger@Sun.COM helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper]; 255510843SDave.Plauger@Sun.COM cbuf_t *cp; 25560Sstevel@tonic-gate pid_t npids, pidx; 25570Sstevel@tonic-gate char *content; 255811178SDave.Plauger@Sun.COM char *buf; 255911178SDave.Plauger@Sun.COM size_t size; 256010843SDave.Plauger@Sun.COM int save_dump_clevel; 256110843SDave.Plauger@Sun.COM dumpmlw_t mlw; 256210843SDave.Plauger@Sun.COM dumpcsize_t datatag; 256310843SDave.Plauger@Sun.COM dumpdatahdr_t datahdr; 25640Sstevel@tonic-gate 25650Sstevel@tonic-gate if (dumpvp == NULL || dumphdr == NULL) { 25660Sstevel@tonic-gate uprintf("skipping system dump - no dump device configured\n"); 256710843SDave.Plauger@Sun.COM if (panicstr) { 256810843SDave.Plauger@Sun.COM dumpcfg.helpers_wanted = 0; 256910843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 257010843SDave.Plauger@Sun.COM } 25710Sstevel@tonic-gate return; 25720Sstevel@tonic-gate } 257310843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 257410843SDave.Plauger@Sun.COM 257510843SDave.Plauger@Sun.COM /* clear the sync variables */ 257610843SDave.Plauger@Sun.COM ASSERT(dumpcfg.nhelper > 0); 257710843SDave.Plauger@Sun.COM bzero(ds, sizeof (*ds)); 257810843SDave.Plauger@Sun.COM ds->dumpcpu = CPU->cpu_id; 25790Sstevel@tonic-gate 25800Sstevel@tonic-gate /* 25810Sstevel@tonic-gate * Calculate the starting block for dump. If we're dumping on a 25820Sstevel@tonic-gate * swap device, start 1/5 of the way in; otherwise, start at the 25830Sstevel@tonic-gate * beginning. And never use the first page -- it may be a disk label. 25840Sstevel@tonic-gate */ 25850Sstevel@tonic-gate if (dumpvp->v_flag & VISSWAP) 25860Sstevel@tonic-gate dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET); 25870Sstevel@tonic-gate else 25880Sstevel@tonic-gate dumphdr->dump_start = DUMP_OFFSET; 25890Sstevel@tonic-gate 259010843SDave.Plauger@Sun.COM dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED; 25910Sstevel@tonic-gate dumphdr->dump_crashtime = gethrestime_sec(); 25920Sstevel@tonic-gate dumphdr->dump_npages = 0; 25930Sstevel@tonic-gate dumphdr->dump_nvtop = 0; 259410843SDave.Plauger@Sun.COM bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize)); 25950Sstevel@tonic-gate dump_timeleft = dump_timeout; 25960Sstevel@tonic-gate 25970Sstevel@tonic-gate if (panicstr) { 25980Sstevel@tonic-gate dumphdr->dump_flags &= ~DF_LIVE; 25995331Samw (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL); 26005331Samw (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL); 26010Sstevel@tonic-gate (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE, 26020Sstevel@tonic-gate panicstr, panicargs); 260310843SDave.Plauger@Sun.COM 26040Sstevel@tonic-gate } 26050Sstevel@tonic-gate 26060Sstevel@tonic-gate if (dump_conflags & DUMP_ALL) 26070Sstevel@tonic-gate content = "all"; 26080Sstevel@tonic-gate else if (dump_conflags & DUMP_CURPROC) 26090Sstevel@tonic-gate content = "kernel + curproc"; 26100Sstevel@tonic-gate else 26110Sstevel@tonic-gate content = "kernel"; 26120Sstevel@tonic-gate uprintf("dumping to %s, offset %lld, content: %s\n", dumppath, 26130Sstevel@tonic-gate dumphdr->dump_start, content); 26140Sstevel@tonic-gate 261510843SDave.Plauger@Sun.COM /* Make sure nodename is current */ 261610843SDave.Plauger@Sun.COM bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN); 261710843SDave.Plauger@Sun.COM 261810843SDave.Plauger@Sun.COM /* 261910843SDave.Plauger@Sun.COM * If this is a live dump, try to open a VCHR vnode for better 262010843SDave.Plauger@Sun.COM * performance. We must take care to flush the buffer cache 262110843SDave.Plauger@Sun.COM * first. 262210843SDave.Plauger@Sun.COM */ 262310843SDave.Plauger@Sun.COM if (!panicstr) { 262410843SDave.Plauger@Sun.COM vnode_t *cdev_vp, *cmn_cdev_vp; 262510843SDave.Plauger@Sun.COM 262610843SDave.Plauger@Sun.COM ASSERT(dumpbuf.cdev_vp == NULL); 262710843SDave.Plauger@Sun.COM cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR); 262810843SDave.Plauger@Sun.COM if (cdev_vp != NULL) { 262910843SDave.Plauger@Sun.COM cmn_cdev_vp = common_specvp(cdev_vp); 263010843SDave.Plauger@Sun.COM if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL) 263110843SDave.Plauger@Sun.COM == 0) { 263210843SDave.Plauger@Sun.COM if (vn_has_cached_data(dumpvp)) 263310843SDave.Plauger@Sun.COM (void) pvn_vplist_dirty(dumpvp, 0, NULL, 263410843SDave.Plauger@Sun.COM B_INVAL | B_TRUNC, kcred); 263510843SDave.Plauger@Sun.COM dumpbuf.cdev_vp = cmn_cdev_vp; 263610843SDave.Plauger@Sun.COM } else { 263710843SDave.Plauger@Sun.COM VN_RELE(cdev_vp); 263810843SDave.Plauger@Sun.COM } 263910843SDave.Plauger@Sun.COM } 264010843SDave.Plauger@Sun.COM } 264110843SDave.Plauger@Sun.COM 26420Sstevel@tonic-gate /* 264311066Srafael.vanoni@sun.com * Store a hires timestamp so we can look it up during debugging. 264411066Srafael.vanoni@sun.com */ 264511066Srafael.vanoni@sun.com lbolt_debug_entry(); 264611066Srafael.vanoni@sun.com 264711066Srafael.vanoni@sun.com /* 26480Sstevel@tonic-gate * Leave room for the message and ereport save areas and terminal dump 26490Sstevel@tonic-gate * header. 26500Sstevel@tonic-gate */ 265110843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET - 265210843SDave.Plauger@Sun.COM DUMP_ERPTSIZE; 26530Sstevel@tonic-gate 26540Sstevel@tonic-gate /* 26550Sstevel@tonic-gate * Write out the symbol table. It's no longer compressed, 26560Sstevel@tonic-gate * so its 'size' and 'csize' are equal. 26570Sstevel@tonic-gate */ 265810843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE; 26590Sstevel@tonic-gate dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize = 26600Sstevel@tonic-gate ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX); 26610Sstevel@tonic-gate 26620Sstevel@tonic-gate /* 26630Sstevel@tonic-gate * Write out the translation map. 26640Sstevel@tonic-gate */ 26650Sstevel@tonic-gate dumphdr->dump_map = dumpvp_flush(); 26660Sstevel@tonic-gate dump_as(&kas); 26673446Smrj dumphdr->dump_nvtop += dump_plat_addr(); 26680Sstevel@tonic-gate 26690Sstevel@tonic-gate /* 26700Sstevel@tonic-gate * call into hat, which may have unmapped pages that also need to 26710Sstevel@tonic-gate * be in the dump 26720Sstevel@tonic-gate */ 26730Sstevel@tonic-gate hat_dump(); 26740Sstevel@tonic-gate 26750Sstevel@tonic-gate if (dump_conflags & DUMP_ALL) { 26760Sstevel@tonic-gate mutex_enter(&pidlock); 26770Sstevel@tonic-gate 26780Sstevel@tonic-gate for (npids = 0, p = practive; p != NULL; p = p->p_next) 267910843SDave.Plauger@Sun.COM dumpcfg.pids[npids++] = p->p_pid; 26800Sstevel@tonic-gate 26810Sstevel@tonic-gate mutex_exit(&pidlock); 26820Sstevel@tonic-gate 26830Sstevel@tonic-gate for (pidx = 0; pidx < npids; pidx++) 268410843SDave.Plauger@Sun.COM (void) dump_process(dumpcfg.pids[pidx]); 268510843SDave.Plauger@Sun.COM 268612042SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 268710843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 26880Sstevel@tonic-gate dump_timeleft = dump_timeout; 268911480SStuart.Maybee@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 269011480SStuart.Maybee@Sun.COM /* 269111480SStuart.Maybee@Sun.COM * Some hypervisors do not have all pages available to 269211480SStuart.Maybee@Sun.COM * be accessed by the guest OS. Check for page 269311480SStuart.Maybee@Sun.COM * accessibility. 269411480SStuart.Maybee@Sun.COM */ 269511480SStuart.Maybee@Sun.COM if (plat_hold_page(pfn, PLAT_HOLD_NO_LOCK, NULL) != 269611480SStuart.Maybee@Sun.COM PLAT_HOLD_OK) 269711480SStuart.Maybee@Sun.COM continue; 269810843SDave.Plauger@Sun.COM BT_SET(dumpcfg.bitmap, bitnum); 26990Sstevel@tonic-gate } 270010843SDave.Plauger@Sun.COM dumphdr->dump_npages = dumpcfg.bitmapsize; 27010Sstevel@tonic-gate dumphdr->dump_flags |= DF_ALL; 27020Sstevel@tonic-gate 27030Sstevel@tonic-gate } else if (dump_conflags & DUMP_CURPROC) { 27040Sstevel@tonic-gate /* 27050Sstevel@tonic-gate * Determine which pid is to be dumped. If we're panicking, we 27060Sstevel@tonic-gate * dump the process associated with panic_thread (if any). If 27070Sstevel@tonic-gate * this is a live dump, we dump the process associated with 27080Sstevel@tonic-gate * curthread. 27090Sstevel@tonic-gate */ 27100Sstevel@tonic-gate npids = 0; 27110Sstevel@tonic-gate if (panicstr) { 27120Sstevel@tonic-gate if (panic_thread != NULL && 27130Sstevel@tonic-gate panic_thread->t_procp != NULL && 27140Sstevel@tonic-gate panic_thread->t_procp != &p0) { 271510843SDave.Plauger@Sun.COM dumpcfg.pids[npids++] = 27160Sstevel@tonic-gate panic_thread->t_procp->p_pid; 27170Sstevel@tonic-gate } 27180Sstevel@tonic-gate } else { 271910843SDave.Plauger@Sun.COM dumpcfg.pids[npids++] = curthread->t_procp->p_pid; 27200Sstevel@tonic-gate } 27210Sstevel@tonic-gate 272210843SDave.Plauger@Sun.COM if (npids && dump_process(dumpcfg.pids[0]) == 0) 27230Sstevel@tonic-gate dumphdr->dump_flags |= DF_CURPROC; 27240Sstevel@tonic-gate else 27250Sstevel@tonic-gate dumphdr->dump_flags |= DF_KERNEL; 27260Sstevel@tonic-gate 27270Sstevel@tonic-gate } else { 27280Sstevel@tonic-gate dumphdr->dump_flags |= DF_KERNEL; 27290Sstevel@tonic-gate } 27300Sstevel@tonic-gate 27310Sstevel@tonic-gate dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1; 27320Sstevel@tonic-gate 27330Sstevel@tonic-gate /* 27340Sstevel@tonic-gate * Write out the pfn table. 27350Sstevel@tonic-gate */ 27360Sstevel@tonic-gate dumphdr->dump_pfn = dumpvp_flush(); 273710843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 273810843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 27390Sstevel@tonic-gate dump_timeleft = dump_timeout; 274010843SDave.Plauger@Sun.COM if (!BT_TEST(dumpcfg.bitmap, bitnum)) 27410Sstevel@tonic-gate continue; 274210843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 27430Sstevel@tonic-gate ASSERT(pfn != PFN_INVALID); 27440Sstevel@tonic-gate dumpvp_write(&pfn, sizeof (pfn_t)); 27450Sstevel@tonic-gate } 27463446Smrj dump_plat_pfn(); 27470Sstevel@tonic-gate 27480Sstevel@tonic-gate /* 27490Sstevel@tonic-gate * Write out all the pages. 275010843SDave.Plauger@Sun.COM * Map pages, copy them handling UEs, compress, and write them out. 275110843SDave.Plauger@Sun.COM * Cooperate with any helpers running on CPUs in panic_idle(). 27520Sstevel@tonic-gate */ 27530Sstevel@tonic-gate dumphdr->dump_data = dumpvp_flush(); 275410843SDave.Plauger@Sun.COM 275510843SDave.Plauger@Sun.COM bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU)); 275610843SDave.Plauger@Sun.COM ds->live = dumpcfg.clevel > 0 && 275710843SDave.Plauger@Sun.COM (dumphdr->dump_flags & DF_LIVE) != 0; 275810843SDave.Plauger@Sun.COM 275910843SDave.Plauger@Sun.COM save_dump_clevel = dumpcfg.clevel; 276010843SDave.Plauger@Sun.COM if (panicstr) 276110843SDave.Plauger@Sun.COM dumpsys_get_maxmem(); 276210843SDave.Plauger@Sun.COM else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2) 276310843SDave.Plauger@Sun.COM dumpcfg.clevel = DUMP_CLEVEL_LZJB; 276410843SDave.Plauger@Sun.COM 276510843SDave.Plauger@Sun.COM dumpcfg.nhelper_used = 0; 276610843SDave.Plauger@Sun.COM for (hp = dumpcfg.helper; hp != hpend; hp++) { 276710843SDave.Plauger@Sun.COM if (hp->page == NULL) { 276810843SDave.Plauger@Sun.COM hp->helper = DONEHELPER; 27690Sstevel@tonic-gate continue; 27700Sstevel@tonic-gate } 277110843SDave.Plauger@Sun.COM ++dumpcfg.nhelper_used; 277210843SDave.Plauger@Sun.COM hp->helper = FREEHELPER; 277310843SDave.Plauger@Sun.COM hp->taskqid = NULL; 277410843SDave.Plauger@Sun.COM hp->ds = ds; 277510843SDave.Plauger@Sun.COM bzero(&hp->perpage, sizeof (hp->perpage)); 277610843SDave.Plauger@Sun.COM if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2) 277710843SDave.Plauger@Sun.COM (void) BZ2_bzCompressReset(&hp->bzstream); 277810843SDave.Plauger@Sun.COM } 277910843SDave.Plauger@Sun.COM 278010843SDave.Plauger@Sun.COM CQ_OPEN(freebufq); 278110843SDave.Plauger@Sun.COM CQ_OPEN(helperq); 278210843SDave.Plauger@Sun.COM 278310843SDave.Plauger@Sun.COM dumpcfg.ncbuf_used = 0; 278410843SDave.Plauger@Sun.COM for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) { 278510843SDave.Plauger@Sun.COM if (cp->buf != NULL) { 278610843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 278710843SDave.Plauger@Sun.COM ++dumpcfg.ncbuf_used; 27880Sstevel@tonic-gate } 27890Sstevel@tonic-gate } 279010843SDave.Plauger@Sun.COM 279110843SDave.Plauger@Sun.COM for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++) 279210843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_FREEMAP); 279310843SDave.Plauger@Sun.COM 279410843SDave.Plauger@Sun.COM ds->start = gethrtime(); 279510843SDave.Plauger@Sun.COM ds->iowaitts = ds->start; 279610843SDave.Plauger@Sun.COM 279710843SDave.Plauger@Sun.COM /* start helpers */ 279810843SDave.Plauger@Sun.COM if (ds->live) { 279910843SDave.Plauger@Sun.COM int n = dumpcfg.nhelper_used; 280010843SDave.Plauger@Sun.COM int pri = MINCLSYSPRI - 25; 280110843SDave.Plauger@Sun.COM 280210843SDave.Plauger@Sun.COM livetaskq = taskq_create("LiveDump", n, pri, n, n, 280310843SDave.Plauger@Sun.COM TASKQ_PREPOPULATE); 280410843SDave.Plauger@Sun.COM for (hp = dumpcfg.helper; hp != hpend; hp++) { 280510843SDave.Plauger@Sun.COM if (hp->page == NULL) 280610843SDave.Plauger@Sun.COM continue; 280710843SDave.Plauger@Sun.COM hp->helper = hp - dumpcfg.helper; 280810843SDave.Plauger@Sun.COM hp->taskqid = taskq_dispatch(livetaskq, 280910843SDave.Plauger@Sun.COM dumpsys_live_helper, (void *)hp, TQ_NOSLEEP); 281010843SDave.Plauger@Sun.COM } 281110843SDave.Plauger@Sun.COM 281210843SDave.Plauger@Sun.COM } else { 281311178SDave.Plauger@Sun.COM if (panicstr) 281411178SDave.Plauger@Sun.COM kmem_dump_begin(); 281510843SDave.Plauger@Sun.COM dumpcfg.helpers_wanted = dumpcfg.clevel > 0; 281610843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 281710843SDave.Plauger@Sun.COM } 281810843SDave.Plauger@Sun.COM 281910843SDave.Plauger@Sun.COM /* run main task */ 282010843SDave.Plauger@Sun.COM dumpsys_main_task(ds); 282110843SDave.Plauger@Sun.COM 282210843SDave.Plauger@Sun.COM ds->elapsed = gethrtime() - ds->start; 282310843SDave.Plauger@Sun.COM if (ds->elapsed < 1) 282410843SDave.Plauger@Sun.COM ds->elapsed = 1; 282510843SDave.Plauger@Sun.COM 282610843SDave.Plauger@Sun.COM if (livetaskq != NULL) 282710843SDave.Plauger@Sun.COM taskq_destroy(livetaskq); 282810843SDave.Plauger@Sun.COM 282910843SDave.Plauger@Sun.COM if (ds->neednl) { 283010843SDave.Plauger@Sun.COM uprintf("\n"); 283110843SDave.Plauger@Sun.COM ds->neednl = 0; 283210843SDave.Plauger@Sun.COM } 283310843SDave.Plauger@Sun.COM 283410843SDave.Plauger@Sun.COM /* record actual pages dumped */ 283510843SDave.Plauger@Sun.COM dumphdr->dump_npages = ds->npages; 283610843SDave.Plauger@Sun.COM 283710843SDave.Plauger@Sun.COM /* platform-specific data */ 283810843SDave.Plauger@Sun.COM dumphdr->dump_npages += dump_plat_data(dumpcfg.cbuf[0].buf); 283910843SDave.Plauger@Sun.COM 284010843SDave.Plauger@Sun.COM /* note any errors by clearing DF_COMPLETE */ 284110843SDave.Plauger@Sun.COM if (dump_ioerr || ds->npages < dumphdr->dump_npages) 284210843SDave.Plauger@Sun.COM dumphdr->dump_flags &= ~DF_COMPLETE; 284310843SDave.Plauger@Sun.COM 284410843SDave.Plauger@Sun.COM /* end of stream blocks */ 284510843SDave.Plauger@Sun.COM datatag = 0; 284610843SDave.Plauger@Sun.COM dumpvp_write(&datatag, sizeof (datatag)); 284710843SDave.Plauger@Sun.COM 284811178SDave.Plauger@Sun.COM bzero(&datahdr, sizeof (datahdr)); 284911178SDave.Plauger@Sun.COM 285011178SDave.Plauger@Sun.COM /* buffer for metrics */ 285111178SDave.Plauger@Sun.COM buf = dumpcfg.cbuf[0].buf; 285211178SDave.Plauger@Sun.COM size = MIN(dumpcfg.cbuf[0].size, DUMP_OFFSET - sizeof (dumphdr_t) - 285311178SDave.Plauger@Sun.COM sizeof (dumpdatahdr_t)); 285411178SDave.Plauger@Sun.COM 285511178SDave.Plauger@Sun.COM /* finish the kmem intercepts, collect kmem verbose info */ 285611178SDave.Plauger@Sun.COM if (panicstr) { 285711178SDave.Plauger@Sun.COM datahdr.dump_metrics = kmem_dump_finish(buf, size); 285811178SDave.Plauger@Sun.COM buf += datahdr.dump_metrics; 285911178SDave.Plauger@Sun.COM size -= datahdr.dump_metrics; 286011178SDave.Plauger@Sun.COM } 286111178SDave.Plauger@Sun.COM 286210843SDave.Plauger@Sun.COM /* compression info in data header */ 286310843SDave.Plauger@Sun.COM datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC; 286410843SDave.Plauger@Sun.COM datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION; 286510843SDave.Plauger@Sun.COM datahdr.dump_maxcsize = CBUF_SIZE; 286610843SDave.Plauger@Sun.COM datahdr.dump_maxrange = CBUF_MAPSIZE / PAGESIZE; 286710843SDave.Plauger@Sun.COM datahdr.dump_nstreams = dumpcfg.nhelper_used; 286810843SDave.Plauger@Sun.COM datahdr.dump_clevel = dumpcfg.clevel; 286910843SDave.Plauger@Sun.COM #ifdef COLLECT_METRICS 287010843SDave.Plauger@Sun.COM if (dump_metrics_on) 287111178SDave.Plauger@Sun.COM datahdr.dump_metrics += dumpsys_metrics(ds, buf, size); 287210843SDave.Plauger@Sun.COM #endif 287310843SDave.Plauger@Sun.COM datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data; 28740Sstevel@tonic-gate 28750Sstevel@tonic-gate /* 28760Sstevel@tonic-gate * Write out the initial and terminal dump headers. 28770Sstevel@tonic-gate */ 287810843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumphdr->dump_start; 28790Sstevel@tonic-gate dumpvp_write(dumphdr, sizeof (dumphdr_t)); 28800Sstevel@tonic-gate (void) dumpvp_flush(); 28810Sstevel@tonic-gate 288210843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size; 288310843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET; 28840Sstevel@tonic-gate dumpvp_write(dumphdr, sizeof (dumphdr_t)); 288510843SDave.Plauger@Sun.COM dumpvp_write(&datahdr, sizeof (dumpdatahdr_t)); 288610843SDave.Plauger@Sun.COM dumpvp_write(dumpcfg.cbuf[0].buf, datahdr.dump_metrics); 288710843SDave.Plauger@Sun.COM 28880Sstevel@tonic-gate (void) dumpvp_flush(); 28890Sstevel@tonic-gate 289010843SDave.Plauger@Sun.COM uprintf("\r%3d%% done: %llu pages dumped, ", 289110843SDave.Plauger@Sun.COM ds->percent_done, (u_longlong_t)ds->npages); 28920Sstevel@tonic-gate 28930Sstevel@tonic-gate if (dump_ioerr == 0) { 28940Sstevel@tonic-gate uprintf("dump succeeded\n"); 28950Sstevel@tonic-gate } else { 28960Sstevel@tonic-gate uprintf("dump failed: error %d\n", dump_ioerr); 289710843SDave.Plauger@Sun.COM #ifdef DEBUG 289810843SDave.Plauger@Sun.COM if (panicstr) 28990Sstevel@tonic-gate debug_enter("dump failed"); 290010843SDave.Plauger@Sun.COM #endif 29010Sstevel@tonic-gate } 29020Sstevel@tonic-gate 29030Sstevel@tonic-gate /* 29040Sstevel@tonic-gate * Write out all undelivered messages. This has to be the *last* 29050Sstevel@tonic-gate * thing we do because the dump process itself emits messages. 29060Sstevel@tonic-gate */ 29070Sstevel@tonic-gate if (panicstr) { 29080Sstevel@tonic-gate dump_ereports(); 29090Sstevel@tonic-gate dump_messages(); 29100Sstevel@tonic-gate } 29110Sstevel@tonic-gate 29120Sstevel@tonic-gate delay(2 * hz); /* let people see the 'done' message */ 29130Sstevel@tonic-gate dump_timeleft = 0; 29140Sstevel@tonic-gate dump_ioerr = 0; 291510843SDave.Plauger@Sun.COM 291610843SDave.Plauger@Sun.COM /* restore settings after live dump completes */ 291710843SDave.Plauger@Sun.COM if (!panicstr) { 291810843SDave.Plauger@Sun.COM dumpcfg.clevel = save_dump_clevel; 291910843SDave.Plauger@Sun.COM 292010843SDave.Plauger@Sun.COM /* release any VCHR open of the dump device */ 292110843SDave.Plauger@Sun.COM if (dumpbuf.cdev_vp != NULL) { 292210843SDave.Plauger@Sun.COM (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0, 292310843SDave.Plauger@Sun.COM kcred, NULL); 292410843SDave.Plauger@Sun.COM VN_RELE(dumpbuf.cdev_vp); 292510843SDave.Plauger@Sun.COM dumpbuf.cdev_vp = NULL; 292610843SDave.Plauger@Sun.COM } 292710843SDave.Plauger@Sun.COM } 29280Sstevel@tonic-gate } 29290Sstevel@tonic-gate 29300Sstevel@tonic-gate /* 29310Sstevel@tonic-gate * This function is called whenever the memory size, as represented 29320Sstevel@tonic-gate * by the phys_install list, changes. 29330Sstevel@tonic-gate */ 29340Sstevel@tonic-gate void 29350Sstevel@tonic-gate dump_resize() 29360Sstevel@tonic-gate { 29370Sstevel@tonic-gate mutex_enter(&dump_lock); 29380Sstevel@tonic-gate dumphdr_init(); 29390Sstevel@tonic-gate dumpbuf_resize(); 294010843SDave.Plauger@Sun.COM dump_update_clevel(); 29410Sstevel@tonic-gate mutex_exit(&dump_lock); 29420Sstevel@tonic-gate } 29436423Sgw25295 29446423Sgw25295 /* 29456423Sgw25295 * This function allows for dynamic resizing of a dump area. It assumes that 29466423Sgw25295 * the underlying device has update its appropriate size(9P). 29476423Sgw25295 */ 29486423Sgw25295 int 29496423Sgw25295 dumpvp_resize() 29506423Sgw25295 { 29516423Sgw25295 int error; 29526423Sgw25295 vattr_t vattr; 29536423Sgw25295 29546423Sgw25295 mutex_enter(&dump_lock); 29556423Sgw25295 vattr.va_mask = AT_SIZE; 29566423Sgw25295 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) { 29576423Sgw25295 mutex_exit(&dump_lock); 29586423Sgw25295 return (error); 29596423Sgw25295 } 29606423Sgw25295 29616423Sgw25295 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) { 29626423Sgw25295 mutex_exit(&dump_lock); 29636423Sgw25295 return (ENOSPC); 29646423Sgw25295 } 29656423Sgw25295 29666423Sgw25295 dumpvp_size = vattr.va_size & -DUMP_OFFSET; 29676423Sgw25295 mutex_exit(&dump_lock); 29686423Sgw25295 return (0); 29696423Sgw25295 } 2970