10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 53446Smrj * Common Development and Distribution License (the "License"). 63446Smrj * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 217563SPrasad.Singamsetty@Sun.COM 220Sstevel@tonic-gate /* 23*11474SJonathan.Adams@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #include <sys/types.h> 280Sstevel@tonic-gate #include <sys/param.h> 290Sstevel@tonic-gate #include <sys/systm.h> 300Sstevel@tonic-gate #include <sys/vm.h> 310Sstevel@tonic-gate #include <sys/proc.h> 320Sstevel@tonic-gate #include <sys/file.h> 330Sstevel@tonic-gate #include <sys/conf.h> 340Sstevel@tonic-gate #include <sys/kmem.h> 350Sstevel@tonic-gate #include <sys/mem.h> 360Sstevel@tonic-gate #include <sys/mman.h> 370Sstevel@tonic-gate #include <sys/vnode.h> 380Sstevel@tonic-gate #include <sys/errno.h> 390Sstevel@tonic-gate #include <sys/memlist.h> 400Sstevel@tonic-gate #include <sys/dumphdr.h> 410Sstevel@tonic-gate #include <sys/dumpadm.h> 420Sstevel@tonic-gate #include <sys/ksyms.h> 430Sstevel@tonic-gate #include <sys/compress.h> 440Sstevel@tonic-gate #include <sys/stream.h> 450Sstevel@tonic-gate #include <sys/strsun.h> 460Sstevel@tonic-gate #include <sys/cmn_err.h> 470Sstevel@tonic-gate #include <sys/bitmap.h> 480Sstevel@tonic-gate #include <sys/modctl.h> 490Sstevel@tonic-gate #include <sys/utsname.h> 500Sstevel@tonic-gate #include <sys/systeminfo.h> 510Sstevel@tonic-gate #include <sys/vmem.h> 520Sstevel@tonic-gate #include <sys/log.h> 530Sstevel@tonic-gate #include <sys/var.h> 540Sstevel@tonic-gate #include <sys/debug.h> 550Sstevel@tonic-gate #include <sys/sunddi.h> 560Sstevel@tonic-gate #include <fs/fs_subr.h> 570Sstevel@tonic-gate #include <sys/fs/snode.h> 580Sstevel@tonic-gate #include <sys/ontrap.h> 590Sstevel@tonic-gate #include <sys/panic.h> 600Sstevel@tonic-gate #include <sys/dkio.h> 610Sstevel@tonic-gate #include <sys/vtoc.h> 620Sstevel@tonic-gate #include <sys/errorq.h> 630Sstevel@tonic-gate #include <sys/fm/util.h> 646423Sgw25295 #include <sys/fs/zfs.h> 650Sstevel@tonic-gate 660Sstevel@tonic-gate #include <vm/hat.h> 670Sstevel@tonic-gate #include <vm/as.h> 680Sstevel@tonic-gate #include <vm/page.h> 6910843SDave.Plauger@Sun.COM #include <vm/pvn.h> 700Sstevel@tonic-gate #include <vm/seg.h> 710Sstevel@tonic-gate #include <vm/seg_kmem.h> 7211066Srafael.vanoni@sun.com #include <sys/clock_impl.h> 730Sstevel@tonic-gate 7410843SDave.Plauger@Sun.COM #include <bzip2/bzlib.h> 7510843SDave.Plauger@Sun.COM 7610843SDave.Plauger@Sun.COM /* 7710843SDave.Plauger@Sun.COM * Crash dump time is dominated by disk write time. To reduce this, 7810843SDave.Plauger@Sun.COM * the stronger compression method bzip2 is applied to reduce the dump 7910843SDave.Plauger@Sun.COM * size and hence reduce I/O time. However, bzip2 is much more 8010843SDave.Plauger@Sun.COM * computationally expensive than the existing lzjb algorithm, so to 8110843SDave.Plauger@Sun.COM * avoid increasing compression time, CPUs that are otherwise idle 8210843SDave.Plauger@Sun.COM * during panic are employed to parallelize the compression task. 8310843SDave.Plauger@Sun.COM * Many helper CPUs are needed to prevent bzip2 from being a 8410843SDave.Plauger@Sun.COM * bottleneck, and on systems with too few CPUs, the lzjb algorithm is 8510843SDave.Plauger@Sun.COM * parallelized instead. Lastly, I/O and compression are performed by 8610843SDave.Plauger@Sun.COM * different CPUs, and are hence overlapped in time, unlike the older 8710843SDave.Plauger@Sun.COM * serial code. 8810843SDave.Plauger@Sun.COM * 8910843SDave.Plauger@Sun.COM * Another important consideration is the speed of the dump 9010843SDave.Plauger@Sun.COM * device. Faster disks need less CPUs in order to benefit from 9110843SDave.Plauger@Sun.COM * parallel lzjb versus parallel bzip2. Therefore, the CPU count 9210843SDave.Plauger@Sun.COM * threshold for switching from parallel lzjb to paralled bzip2 is 9310843SDave.Plauger@Sun.COM * elevated for faster disks. The dump device speed is adduced from 9410843SDave.Plauger@Sun.COM * the setting for dumpbuf.iosize, see dump_update_clevel. 9510843SDave.Plauger@Sun.COM */ 9610843SDave.Plauger@Sun.COM 9710843SDave.Plauger@Sun.COM /* 9810843SDave.Plauger@Sun.COM * exported vars 9910843SDave.Plauger@Sun.COM */ 10010843SDave.Plauger@Sun.COM kmutex_t dump_lock; /* lock for dump configuration */ 10110843SDave.Plauger@Sun.COM dumphdr_t *dumphdr; /* dump header */ 1020Sstevel@tonic-gate int dump_conflags = DUMP_KERNEL; /* dump configuration flags */ 10310843SDave.Plauger@Sun.COM vnode_t *dumpvp; /* dump device vnode pointer */ 10410843SDave.Plauger@Sun.COM u_offset_t dumpvp_size; /* size of dump device, in bytes */ 10510843SDave.Plauger@Sun.COM char *dumppath; /* pathname of dump device */ 10610843SDave.Plauger@Sun.COM int dump_timeout = 120; /* timeout for dumping pages */ 10710843SDave.Plauger@Sun.COM int dump_timeleft; /* portion of dump_timeout remaining */ 10810843SDave.Plauger@Sun.COM int dump_ioerr; /* dump i/o error */ 10910843SDave.Plauger@Sun.COM int dump_check_used; /* enable check for used pages */ 11010843SDave.Plauger@Sun.COM 11110843SDave.Plauger@Sun.COM /* 11210843SDave.Plauger@Sun.COM * Tunables for dump compression and parallelism. These can be set via 11310843SDave.Plauger@Sun.COM * /etc/system. 11410843SDave.Plauger@Sun.COM * 11510843SDave.Plauger@Sun.COM * dump_ncpu_low number of helpers for parallel lzjb 11610843SDave.Plauger@Sun.COM * This is also the minimum configuration. 11710843SDave.Plauger@Sun.COM * 11810843SDave.Plauger@Sun.COM * dump_bzip2_level bzip2 compression level: 1-9 11910843SDave.Plauger@Sun.COM * Higher numbers give greater compression, but take more memory 12010843SDave.Plauger@Sun.COM * and time. Memory used per helper is ~(dump_bzip2_level * 1MB). 12110843SDave.Plauger@Sun.COM * 12210843SDave.Plauger@Sun.COM * dump_plat_mincpu the cross-over limit for using bzip2 (per platform): 12310843SDave.Plauger@Sun.COM * if dump_plat_mincpu == 0, then always do single threaded dump 12410843SDave.Plauger@Sun.COM * if ncpu >= dump_plat_mincpu then try to use bzip2 12510843SDave.Plauger@Sun.COM * 12610843SDave.Plauger@Sun.COM * dump_metrics_on if set, metrics are collected in the kernel, passed 12710843SDave.Plauger@Sun.COM * to savecore via the dump file, and recorded by savecore in 12810843SDave.Plauger@Sun.COM * METRICS.txt. 12910843SDave.Plauger@Sun.COM */ 13010843SDave.Plauger@Sun.COM uint_t dump_ncpu_low = 4; /* minimum config for parallel lzjb */ 13110843SDave.Plauger@Sun.COM uint_t dump_bzip2_level = 1; /* bzip2 level (1-9) */ 13210843SDave.Plauger@Sun.COM 13311178SDave.Plauger@Sun.COM /* tunables for pre-reserved heap */ 13411178SDave.Plauger@Sun.COM uint_t dump_kmem_permap = 1024; 13511178SDave.Plauger@Sun.COM uint_t dump_kmem_pages = 8; 13611178SDave.Plauger@Sun.COM 13710843SDave.Plauger@Sun.COM /* Define multiple buffers per helper to avoid stalling */ 13810843SDave.Plauger@Sun.COM #define NCBUF_PER_HELPER 2 13910843SDave.Plauger@Sun.COM #define NCMAP_PER_HELPER 4 14010843SDave.Plauger@Sun.COM 14110843SDave.Plauger@Sun.COM /* minimum number of helpers configured */ 14210843SDave.Plauger@Sun.COM #define MINHELPERS (dump_ncpu_low) 14310843SDave.Plauger@Sun.COM #define MINCBUFS (MINHELPERS * NCBUF_PER_HELPER) 14410843SDave.Plauger@Sun.COM 14510843SDave.Plauger@Sun.COM /* 14610843SDave.Plauger@Sun.COM * Define constant parameters. 14710843SDave.Plauger@Sun.COM * 14810843SDave.Plauger@Sun.COM * CBUF_SIZE size of an output buffer 14910843SDave.Plauger@Sun.COM * 15010843SDave.Plauger@Sun.COM * CBUF_MAPSIZE size of virtual range for mapping pages 15110843SDave.Plauger@Sun.COM * 15210843SDave.Plauger@Sun.COM * CBUF_MAPNP size of virtual range in pages 15310843SDave.Plauger@Sun.COM * 15410843SDave.Plauger@Sun.COM */ 15510843SDave.Plauger@Sun.COM #define DUMP_1KB ((size_t)1 << 10) 15610843SDave.Plauger@Sun.COM #define DUMP_1MB ((size_t)1 << 20) 15710843SDave.Plauger@Sun.COM #define CBUF_SIZE ((size_t)1 << 17) 15810843SDave.Plauger@Sun.COM #define CBUF_MAPSHIFT (22) 15910843SDave.Plauger@Sun.COM #define CBUF_MAPSIZE ((size_t)1 << CBUF_MAPSHIFT) 16010843SDave.Plauger@Sun.COM #define CBUF_MAPNP ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT)) 16110843SDave.Plauger@Sun.COM 16210843SDave.Plauger@Sun.COM /* 16310843SDave.Plauger@Sun.COM * Compression metrics are accumulated nano-second subtotals. The 16410843SDave.Plauger@Sun.COM * results are normalized by the number of pages dumped. A report is 16510843SDave.Plauger@Sun.COM * generated when dumpsys() completes and is saved in the dump image 16610843SDave.Plauger@Sun.COM * after the trailing dump header. 16710843SDave.Plauger@Sun.COM * 16810843SDave.Plauger@Sun.COM * Metrics are always collected. Set the variable dump_metrics_on to 16910843SDave.Plauger@Sun.COM * cause metrics to be saved in the crash file, where savecore will 17010843SDave.Plauger@Sun.COM * save it in the file METRICS.txt. 17110843SDave.Plauger@Sun.COM */ 17210843SDave.Plauger@Sun.COM #define PERPAGES \ 17310843SDave.Plauger@Sun.COM PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \ 17410843SDave.Plauger@Sun.COM PERPAGE(copy) PERPAGE(compress) \ 17510843SDave.Plauger@Sun.COM PERPAGE(write) \ 17610843SDave.Plauger@Sun.COM PERPAGE(inwait) PERPAGE(outwait) 17710843SDave.Plauger@Sun.COM 17810843SDave.Plauger@Sun.COM typedef struct perpage { 17910843SDave.Plauger@Sun.COM #define PERPAGE(x) hrtime_t x; 18010843SDave.Plauger@Sun.COM PERPAGES 18110843SDave.Plauger@Sun.COM #undef PERPAGE 18210843SDave.Plauger@Sun.COM } perpage_t; 18310843SDave.Plauger@Sun.COM 18410843SDave.Plauger@Sun.COM /* 18510843SDave.Plauger@Sun.COM * This macro controls the code generation for collecting dump 18610843SDave.Plauger@Sun.COM * performance information. By default, the code is generated, but 18710843SDave.Plauger@Sun.COM * automatic saving of the information is disabled. If dump_metrics_on 18810843SDave.Plauger@Sun.COM * is set to 1, the timing information is passed to savecore via the 18910843SDave.Plauger@Sun.COM * crash file, where it is appended to the file dump-dir/METRICS.txt. 19010843SDave.Plauger@Sun.COM */ 19110843SDave.Plauger@Sun.COM #define COLLECT_METRICS 19210843SDave.Plauger@Sun.COM 19310843SDave.Plauger@Sun.COM #ifdef COLLECT_METRICS 19410843SDave.Plauger@Sun.COM uint_t dump_metrics_on = 0; /* set to 1 to enable recording metrics */ 19510843SDave.Plauger@Sun.COM 19610843SDave.Plauger@Sun.COM #define HRSTART(v, m) v##ts.m = gethrtime() 19710843SDave.Plauger@Sun.COM #define HRSTOP(v, m) v.m += gethrtime() - v##ts.m 19810843SDave.Plauger@Sun.COM #define HRBEGIN(v, m, s) v##ts.m = gethrtime(); v.size += s 19910843SDave.Plauger@Sun.COM #define HREND(v, m) v.m += gethrtime() - v##ts.m 20010843SDave.Plauger@Sun.COM #define HRNORM(v, m, n) v.m /= (n) 20110843SDave.Plauger@Sun.COM 2020Sstevel@tonic-gate #else 20310843SDave.Plauger@Sun.COM #define HRSTART(v, m) 20410843SDave.Plauger@Sun.COM #define HRSTOP(v, m) 20510843SDave.Plauger@Sun.COM #define HRBEGIN(v, m, s) 20610843SDave.Plauger@Sun.COM #define HREND(v, m) 20710843SDave.Plauger@Sun.COM #define HRNORM(v, m, n) 20810843SDave.Plauger@Sun.COM #endif /* COLLECT_METRICS */ 20910843SDave.Plauger@Sun.COM 21010843SDave.Plauger@Sun.COM /* 21110843SDave.Plauger@Sun.COM * Buffers for copying and compressing memory pages. 21210843SDave.Plauger@Sun.COM * 21310843SDave.Plauger@Sun.COM * cbuf_t buffer controllers: used for both input and output. 21410843SDave.Plauger@Sun.COM * 21510843SDave.Plauger@Sun.COM * The buffer state indicates how it is being used: 21610843SDave.Plauger@Sun.COM * 21710843SDave.Plauger@Sun.COM * CBUF_FREEMAP: CBUF_MAPSIZE virtual address range is available for 21810843SDave.Plauger@Sun.COM * mapping input pages. 21910843SDave.Plauger@Sun.COM * 22010843SDave.Plauger@Sun.COM * CBUF_INREADY: input pages are mapped and ready for compression by a 22110843SDave.Plauger@Sun.COM * helper. 22210843SDave.Plauger@Sun.COM * 22310843SDave.Plauger@Sun.COM * CBUF_USEDMAP: mapping has been consumed by a helper. Needs unmap. 22410843SDave.Plauger@Sun.COM * 22510843SDave.Plauger@Sun.COM * CBUF_FREEBUF: CBUF_SIZE output buffer, which is available. 22610843SDave.Plauger@Sun.COM * 22710843SDave.Plauger@Sun.COM * CBUF_WRITE: CBUF_SIZE block of compressed pages from a helper, 22810843SDave.Plauger@Sun.COM * ready to write out. 22910843SDave.Plauger@Sun.COM * 23010843SDave.Plauger@Sun.COM * CBUF_ERRMSG: CBUF_SIZE block of error messages from a helper 23110843SDave.Plauger@Sun.COM * (reports UE errors.) 23210843SDave.Plauger@Sun.COM */ 23310843SDave.Plauger@Sun.COM 23410843SDave.Plauger@Sun.COM typedef enum cbufstate { 23510843SDave.Plauger@Sun.COM CBUF_FREEMAP, 23610843SDave.Plauger@Sun.COM CBUF_INREADY, 23710843SDave.Plauger@Sun.COM CBUF_USEDMAP, 23810843SDave.Plauger@Sun.COM CBUF_FREEBUF, 23910843SDave.Plauger@Sun.COM CBUF_WRITE, 24010843SDave.Plauger@Sun.COM CBUF_ERRMSG 24110843SDave.Plauger@Sun.COM } cbufstate_t; 24210843SDave.Plauger@Sun.COM 24310843SDave.Plauger@Sun.COM typedef struct cbuf cbuf_t; 24410843SDave.Plauger@Sun.COM 24510843SDave.Plauger@Sun.COM struct cbuf { 24610843SDave.Plauger@Sun.COM cbuf_t *next; /* next in list */ 24710843SDave.Plauger@Sun.COM cbufstate_t state; /* processing state */ 24810843SDave.Plauger@Sun.COM size_t used; /* amount used */ 24910843SDave.Plauger@Sun.COM size_t size; /* mem size */ 25010843SDave.Plauger@Sun.COM char *buf; /* kmem or vmem */ 25110843SDave.Plauger@Sun.COM pgcnt_t pagenum; /* index to pfn map */ 25210843SDave.Plauger@Sun.COM pgcnt_t bitnum; /* first set bitnum */ 25310843SDave.Plauger@Sun.COM pfn_t pfn; /* first pfn in mapped range */ 25410843SDave.Plauger@Sun.COM int off; /* byte offset to first pfn */ 25510843SDave.Plauger@Sun.COM }; 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate /* 25810843SDave.Plauger@Sun.COM * cqueue_t queues: a uni-directional channel for communication 25910843SDave.Plauger@Sun.COM * from the master to helper tasks or vice-versa using put and 26010843SDave.Plauger@Sun.COM * get primitives. Both mappings and data buffers are passed via 26110843SDave.Plauger@Sun.COM * queues. Producers close a queue when done. The number of 26210843SDave.Plauger@Sun.COM * active producers is reference counted so the consumer can 26310843SDave.Plauger@Sun.COM * detect end of data. Concurrent access is mediated by atomic 26410843SDave.Plauger@Sun.COM * operations for panic dump, or mutex/cv for live dump. 26510843SDave.Plauger@Sun.COM * 26610843SDave.Plauger@Sun.COM * There a four queues, used as follows: 26710843SDave.Plauger@Sun.COM * 26810843SDave.Plauger@Sun.COM * Queue Dataflow NewState 26910843SDave.Plauger@Sun.COM * -------------------------------------------------- 27010843SDave.Plauger@Sun.COM * mainq master -> master FREEMAP 27110843SDave.Plauger@Sun.COM * master has initialized or unmapped an input buffer 27210843SDave.Plauger@Sun.COM * -------------------------------------------------- 27310843SDave.Plauger@Sun.COM * helperq master -> helper INREADY 27410843SDave.Plauger@Sun.COM * master has mapped input for use by helper 27510843SDave.Plauger@Sun.COM * -------------------------------------------------- 27610843SDave.Plauger@Sun.COM * mainq master <- helper USEDMAP 27710843SDave.Plauger@Sun.COM * helper is done with input 27810843SDave.Plauger@Sun.COM * -------------------------------------------------- 27910843SDave.Plauger@Sun.COM * freebufq master -> helper FREEBUF 28010843SDave.Plauger@Sun.COM * master has initialized or written an output buffer 28110843SDave.Plauger@Sun.COM * -------------------------------------------------- 28210843SDave.Plauger@Sun.COM * mainq master <- helper WRITE 28310843SDave.Plauger@Sun.COM * block of compressed pages from a helper 28410843SDave.Plauger@Sun.COM * -------------------------------------------------- 28510843SDave.Plauger@Sun.COM * mainq master <- helper ERRMSG 28610843SDave.Plauger@Sun.COM * error messages from a helper (memory error case) 28710843SDave.Plauger@Sun.COM * -------------------------------------------------- 28810843SDave.Plauger@Sun.COM * writerq master <- master WRITE 28910843SDave.Plauger@Sun.COM * non-blocking queue of blocks to write 29010843SDave.Plauger@Sun.COM * -------------------------------------------------- 29110843SDave.Plauger@Sun.COM */ 29210843SDave.Plauger@Sun.COM typedef struct cqueue { 29310843SDave.Plauger@Sun.COM cbuf_t *volatile first; /* first in list */ 29410843SDave.Plauger@Sun.COM cbuf_t *last; /* last in list */ 29510843SDave.Plauger@Sun.COM hrtime_t ts; /* timestamp */ 29610843SDave.Plauger@Sun.COM hrtime_t empty; /* total time empty */ 29710843SDave.Plauger@Sun.COM kmutex_t mutex; /* live state lock */ 29810843SDave.Plauger@Sun.COM kcondvar_t cv; /* live wait var */ 29910843SDave.Plauger@Sun.COM lock_t spinlock; /* panic mode spin lock */ 30010843SDave.Plauger@Sun.COM volatile uint_t open; /* producer ref count */ 30110843SDave.Plauger@Sun.COM } cqueue_t; 30210843SDave.Plauger@Sun.COM 30310843SDave.Plauger@Sun.COM /* 30410843SDave.Plauger@Sun.COM * Convenience macros for using the cqueue functions 30510843SDave.Plauger@Sun.COM * Note that the caller must have defined "dumpsync_t *ds" 30610843SDave.Plauger@Sun.COM */ 30710843SDave.Plauger@Sun.COM #define CQ_IS_EMPTY(q) \ 30810843SDave.Plauger@Sun.COM (ds->q.first == NULL) 30910843SDave.Plauger@Sun.COM 31010843SDave.Plauger@Sun.COM #define CQ_OPEN(q) \ 31110843SDave.Plauger@Sun.COM atomic_inc_uint(&ds->q.open) 31210843SDave.Plauger@Sun.COM 31310843SDave.Plauger@Sun.COM #define CQ_CLOSE(q) \ 31410843SDave.Plauger@Sun.COM dumpsys_close_cq(&ds->q, ds->live) 31510843SDave.Plauger@Sun.COM 31610843SDave.Plauger@Sun.COM #define CQ_PUT(q, cp, st) \ 31710843SDave.Plauger@Sun.COM dumpsys_put_cq(&ds->q, cp, st, ds->live) 31810843SDave.Plauger@Sun.COM 31910843SDave.Plauger@Sun.COM #define CQ_GET(q) \ 32010843SDave.Plauger@Sun.COM dumpsys_get_cq(&ds->q, ds->live) 32110843SDave.Plauger@Sun.COM 32210843SDave.Plauger@Sun.COM /* 32310843SDave.Plauger@Sun.COM * Dynamic state when dumpsys() is running. 3240Sstevel@tonic-gate */ 32510843SDave.Plauger@Sun.COM typedef struct dumpsync { 32610843SDave.Plauger@Sun.COM pgcnt_t npages; /* subtotal of pages dumped */ 32710843SDave.Plauger@Sun.COM pgcnt_t pages_mapped; /* subtotal of pages mapped */ 32810843SDave.Plauger@Sun.COM pgcnt_t pages_used; /* subtotal of pages used per map */ 32910843SDave.Plauger@Sun.COM size_t nwrite; /* subtotal of bytes written */ 33010843SDave.Plauger@Sun.COM uint_t live; /* running live dump */ 33110843SDave.Plauger@Sun.COM uint_t neednl; /* will need to print a newline */ 33210843SDave.Plauger@Sun.COM uint_t percent; /* dump progress */ 33310843SDave.Plauger@Sun.COM uint_t percent_done; /* dump progress reported */ 33410843SDave.Plauger@Sun.COM cqueue_t freebufq; /* free kmem bufs for writing */ 33510843SDave.Plauger@Sun.COM cqueue_t mainq; /* input for main task */ 33610843SDave.Plauger@Sun.COM cqueue_t helperq; /* input for helpers */ 33710843SDave.Plauger@Sun.COM cqueue_t writerq; /* input for writer */ 33810843SDave.Plauger@Sun.COM hrtime_t start; /* start time */ 33910843SDave.Plauger@Sun.COM hrtime_t elapsed; /* elapsed time when completed */ 34010843SDave.Plauger@Sun.COM hrtime_t iotime; /* time spent writing nwrite bytes */ 34110843SDave.Plauger@Sun.COM hrtime_t iowait; /* time spent waiting for output */ 34210843SDave.Plauger@Sun.COM hrtime_t iowaitts; /* iowait timestamp */ 34310843SDave.Plauger@Sun.COM perpage_t perpage; /* metrics */ 34410843SDave.Plauger@Sun.COM perpage_t perpagets; 34510843SDave.Plauger@Sun.COM int dumpcpu; /* master cpu */ 34610843SDave.Plauger@Sun.COM } dumpsync_t; 34710843SDave.Plauger@Sun.COM 34810843SDave.Plauger@Sun.COM static dumpsync_t dumpsync; /* synchronization vars */ 34910843SDave.Plauger@Sun.COM 35010843SDave.Plauger@Sun.COM /* 35110843SDave.Plauger@Sun.COM * helper_t helpers: contains the context for a stream. CPUs run in 35210843SDave.Plauger@Sun.COM * parallel at dump time; each CPU creates a single stream of 35310843SDave.Plauger@Sun.COM * compression data. Stream data is divided into CBUF_SIZE blocks. 35410843SDave.Plauger@Sun.COM * The blocks are written in order within a stream. But, blocks from 35510843SDave.Plauger@Sun.COM * multiple streams can be interleaved. Each stream is identified by a 35610843SDave.Plauger@Sun.COM * unique tag. 35710843SDave.Plauger@Sun.COM */ 35810843SDave.Plauger@Sun.COM typedef struct helper { 35910843SDave.Plauger@Sun.COM int helper; /* bound helper id */ 36010843SDave.Plauger@Sun.COM int tag; /* compression stream tag */ 36110843SDave.Plauger@Sun.COM perpage_t perpage; /* per page metrics */ 36210843SDave.Plauger@Sun.COM perpage_t perpagets; /* per page metrics (timestamps) */ 36310843SDave.Plauger@Sun.COM taskqid_t taskqid; /* live dump task ptr */ 36410843SDave.Plauger@Sun.COM int in, out; /* buffer offsets */ 36510843SDave.Plauger@Sun.COM cbuf_t *cpin, *cpout, *cperr; /* cbuf objects in process */ 36610843SDave.Plauger@Sun.COM dumpsync_t *ds; /* pointer to sync vars */ 36710843SDave.Plauger@Sun.COM size_t used; /* counts input consumed */ 36810843SDave.Plauger@Sun.COM char *page; /* buffer for page copy */ 36910843SDave.Plauger@Sun.COM char *lzbuf; /* lzjb output */ 37010843SDave.Plauger@Sun.COM bz_stream bzstream; /* bzip2 state */ 37110843SDave.Plauger@Sun.COM } helper_t; 37210843SDave.Plauger@Sun.COM 37310843SDave.Plauger@Sun.COM #define MAINHELPER (-1) /* helper is also the main task */ 37410843SDave.Plauger@Sun.COM #define FREEHELPER (-2) /* unbound helper */ 37510843SDave.Plauger@Sun.COM #define DONEHELPER (-3) /* helper finished */ 37610843SDave.Plauger@Sun.COM 37710843SDave.Plauger@Sun.COM /* 37810843SDave.Plauger@Sun.COM * configuration vars for dumpsys 37910843SDave.Plauger@Sun.COM */ 38010843SDave.Plauger@Sun.COM typedef struct dumpcfg { 38110843SDave.Plauger@Sun.COM int threshold; /* ncpu threshold for bzip2 */ 38210843SDave.Plauger@Sun.COM int nhelper; /* number of helpers */ 38310843SDave.Plauger@Sun.COM int nhelper_used; /* actual number of helpers used */ 38410843SDave.Plauger@Sun.COM int ncmap; /* number VA pages for compression */ 38510843SDave.Plauger@Sun.COM int ncbuf; /* number of bufs for compression */ 38610843SDave.Plauger@Sun.COM int ncbuf_used; /* number of bufs in use */ 38710843SDave.Plauger@Sun.COM uint_t clevel; /* dump compression level */ 38810843SDave.Plauger@Sun.COM helper_t *helper; /* array of helpers */ 38910843SDave.Plauger@Sun.COM cbuf_t *cmap; /* array of input (map) buffers */ 39010843SDave.Plauger@Sun.COM cbuf_t *cbuf; /* array of output buffers */ 39110843SDave.Plauger@Sun.COM ulong_t *helpermap; /* set of dumpsys helper CPU ids */ 39210843SDave.Plauger@Sun.COM ulong_t *bitmap; /* bitmap for marking pages to dump */ 39310843SDave.Plauger@Sun.COM ulong_t *rbitmap; /* bitmap for used CBUF_MAPSIZE ranges */ 39410843SDave.Plauger@Sun.COM pgcnt_t bitmapsize; /* size of bitmap */ 39510843SDave.Plauger@Sun.COM pgcnt_t rbitmapsize; /* size of bitmap for ranges */ 39610843SDave.Plauger@Sun.COM pgcnt_t found4m; /* number ranges allocated by dump */ 39710843SDave.Plauger@Sun.COM pgcnt_t foundsm; /* number small pages allocated by dump */ 39810843SDave.Plauger@Sun.COM pid_t *pids; /* list of process IDs at dump time */ 39910843SDave.Plauger@Sun.COM size_t maxsize; /* memory size needed at dump time */ 40010843SDave.Plauger@Sun.COM size_t maxvmsize; /* size of reserved VM */ 40110843SDave.Plauger@Sun.COM char *maxvm; /* reserved VM for spare pages */ 40210843SDave.Plauger@Sun.COM lock_t helper_lock; /* protect helper state */ 40310843SDave.Plauger@Sun.COM char helpers_wanted; /* flag to enable parallelism */ 40410843SDave.Plauger@Sun.COM } dumpcfg_t; 40510843SDave.Plauger@Sun.COM 40610843SDave.Plauger@Sun.COM static dumpcfg_t dumpcfg; /* config vars */ 40710843SDave.Plauger@Sun.COM 40810843SDave.Plauger@Sun.COM /* 40910843SDave.Plauger@Sun.COM * The dump I/O buffer. 41010843SDave.Plauger@Sun.COM * 41110843SDave.Plauger@Sun.COM * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is 41210843SDave.Plauger@Sun.COM * sized according to the optimum device transfer speed. 41310843SDave.Plauger@Sun.COM */ 41410843SDave.Plauger@Sun.COM typedef struct dumpbuf { 41510843SDave.Plauger@Sun.COM vnode_t *cdev_vp; /* VCHR open of the dump device */ 41610843SDave.Plauger@Sun.COM len_t vp_limit; /* maximum write offset */ 41710843SDave.Plauger@Sun.COM offset_t vp_off; /* current dump device offset */ 41810843SDave.Plauger@Sun.COM char *cur; /* dump write pointer */ 41910843SDave.Plauger@Sun.COM char *start; /* dump buffer address */ 42010843SDave.Plauger@Sun.COM char *end; /* dump buffer end */ 42110843SDave.Plauger@Sun.COM size_t size; /* size of dumpbuf in bytes */ 42210843SDave.Plauger@Sun.COM size_t iosize; /* best transfer size for device */ 42310843SDave.Plauger@Sun.COM } dumpbuf_t; 42410843SDave.Plauger@Sun.COM 42510843SDave.Plauger@Sun.COM dumpbuf_t dumpbuf; /* I/O buffer */ 42610843SDave.Plauger@Sun.COM 42710843SDave.Plauger@Sun.COM /* 42810843SDave.Plauger@Sun.COM * The dump I/O buffer must be at least one page, at most xfer_size 42910843SDave.Plauger@Sun.COM * bytes, and should scale with physmem in between. The transfer size 43010843SDave.Plauger@Sun.COM * passed in will either represent a global default (maxphys) or the 43110843SDave.Plauger@Sun.COM * best size for the device. The size of the dumpbuf I/O buffer is 43210843SDave.Plauger@Sun.COM * limited by dumpbuf_limit (8MB by default) because the dump 43310843SDave.Plauger@Sun.COM * performance saturates beyond a certain size. The default is to 43410843SDave.Plauger@Sun.COM * select 1/4096 of the memory. 43510843SDave.Plauger@Sun.COM */ 43610843SDave.Plauger@Sun.COM static int dumpbuf_fraction = 12; /* memory size scale factor */ 43710843SDave.Plauger@Sun.COM static size_t dumpbuf_limit = 8 * DUMP_1MB; /* max I/O buf size */ 43810843SDave.Plauger@Sun.COM 4390Sstevel@tonic-gate static size_t 4400Sstevel@tonic-gate dumpbuf_iosize(size_t xfer_size) 4410Sstevel@tonic-gate { 44210843SDave.Plauger@Sun.COM size_t iosize = ptob(physmem >> dumpbuf_fraction); 44310843SDave.Plauger@Sun.COM 44410843SDave.Plauger@Sun.COM if (iosize < PAGESIZE) 44510843SDave.Plauger@Sun.COM iosize = PAGESIZE; 44610843SDave.Plauger@Sun.COM else if (iosize > xfer_size) 44710843SDave.Plauger@Sun.COM iosize = xfer_size; 44810843SDave.Plauger@Sun.COM if (iosize > dumpbuf_limit) 44910843SDave.Plauger@Sun.COM iosize = dumpbuf_limit; 45010843SDave.Plauger@Sun.COM return (iosize & PAGEMASK); 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate 45310843SDave.Plauger@Sun.COM /* 45410843SDave.Plauger@Sun.COM * resize the I/O buffer 45510843SDave.Plauger@Sun.COM */ 4560Sstevel@tonic-gate static void 4570Sstevel@tonic-gate dumpbuf_resize(void) 4580Sstevel@tonic-gate { 45910843SDave.Plauger@Sun.COM char *old_buf = dumpbuf.start; 46010843SDave.Plauger@Sun.COM size_t old_size = dumpbuf.size; 4610Sstevel@tonic-gate char *new_buf; 4620Sstevel@tonic-gate size_t new_size; 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 4650Sstevel@tonic-gate 46610843SDave.Plauger@Sun.COM new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys)); 46710843SDave.Plauger@Sun.COM if (new_size <= old_size) 4680Sstevel@tonic-gate return; /* no need to reallocate buffer */ 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate new_buf = kmem_alloc(new_size, KM_SLEEP); 47110843SDave.Plauger@Sun.COM dumpbuf.size = new_size; 47210843SDave.Plauger@Sun.COM dumpbuf.start = new_buf; 47310843SDave.Plauger@Sun.COM dumpbuf.end = new_buf + new_size; 4740Sstevel@tonic-gate kmem_free(old_buf, old_size); 4750Sstevel@tonic-gate } 4760Sstevel@tonic-gate 47710843SDave.Plauger@Sun.COM /* 47810843SDave.Plauger@Sun.COM * dump_update_clevel is called when dumpadm configures the dump device. 47910843SDave.Plauger@Sun.COM * Calculate number of helpers and buffers. 48010843SDave.Plauger@Sun.COM * Allocate the minimum configuration for now. 48110843SDave.Plauger@Sun.COM * 48210843SDave.Plauger@Sun.COM * When the dump file is configured we reserve a minimum amount of 48310843SDave.Plauger@Sun.COM * memory for use at crash time. But we reserve VA for all the memory 48410843SDave.Plauger@Sun.COM * we really want in order to do the fastest dump possible. The VA is 48510843SDave.Plauger@Sun.COM * backed by pages not being dumped, according to the bitmap. If 48610843SDave.Plauger@Sun.COM * there is insufficient spare memory, however, we fall back to the 48710843SDave.Plauger@Sun.COM * minimum. 48810843SDave.Plauger@Sun.COM * 48910843SDave.Plauger@Sun.COM * Live dump (savecore -L) always uses the minimum config. 49010843SDave.Plauger@Sun.COM * 49110843SDave.Plauger@Sun.COM * clevel 0 is single threaded lzjb 49210843SDave.Plauger@Sun.COM * clevel 1 is parallel lzjb 49310843SDave.Plauger@Sun.COM * clevel 2 is parallel bzip2 49410843SDave.Plauger@Sun.COM * 49510843SDave.Plauger@Sun.COM * The ncpu threshold is selected with dump_plat_mincpu. 49610843SDave.Plauger@Sun.COM * On OPL, set_platform_defaults() overrides the sun4u setting. 49710843SDave.Plauger@Sun.COM * The actual values are defined via DUMP_PLAT_*_MINCPU macros. 49810843SDave.Plauger@Sun.COM * 49910843SDave.Plauger@Sun.COM * Architecture Threshold Algorithm 50010843SDave.Plauger@Sun.COM * sun4u < 51 parallel lzjb 50110843SDave.Plauger@Sun.COM * sun4u >= 51 parallel bzip2(*) 50210843SDave.Plauger@Sun.COM * sun4u OPL < 8 parallel lzjb 50310843SDave.Plauger@Sun.COM * sun4u OPL >= 8 parallel bzip2(*) 50410843SDave.Plauger@Sun.COM * sun4v < 128 parallel lzjb 50510843SDave.Plauger@Sun.COM * sun4v >= 128 parallel bzip2(*) 50610843SDave.Plauger@Sun.COM * x86 < 11 parallel lzjb 50710843SDave.Plauger@Sun.COM * x86 >= 11 parallel bzip2(*) 50810843SDave.Plauger@Sun.COM * 32-bit N/A single-threaded lzjb 50910843SDave.Plauger@Sun.COM * 51010843SDave.Plauger@Sun.COM * (*) bzip2 is only chosen if there is sufficient available 51110843SDave.Plauger@Sun.COM * memory for buffers at dump time. See dumpsys_get_maxmem(). 51210843SDave.Plauger@Sun.COM * 51310843SDave.Plauger@Sun.COM * Faster dump devices have larger I/O buffers. The threshold value is 51410843SDave.Plauger@Sun.COM * increased according to the size of the dump I/O buffer, because 51510843SDave.Plauger@Sun.COM * parallel lzjb performs better with faster disks. For buffers >= 1MB 51610843SDave.Plauger@Sun.COM * the threshold is 3X; for buffers >= 256K threshold is 2X. 51710843SDave.Plauger@Sun.COM * 51810843SDave.Plauger@Sun.COM * For parallel dumps, the number of helpers is ncpu-1. The CPU 51910843SDave.Plauger@Sun.COM * running panic runs the main task. For single-threaded dumps, the 52010843SDave.Plauger@Sun.COM * panic CPU does lzjb compression (it is tagged as MAINHELPER.) 52110843SDave.Plauger@Sun.COM * 52210843SDave.Plauger@Sun.COM * Need multiple buffers per helper so that they do not block waiting 52310843SDave.Plauger@Sun.COM * for the main task. 52410843SDave.Plauger@Sun.COM * parallel single-threaded 52510843SDave.Plauger@Sun.COM * Number of output buffers: nhelper*2 1 52610843SDave.Plauger@Sun.COM * Number of mapping buffers: nhelper*4 1 52710843SDave.Plauger@Sun.COM * 52810843SDave.Plauger@Sun.COM */ 52910843SDave.Plauger@Sun.COM static void 53010843SDave.Plauger@Sun.COM dump_update_clevel() 53110843SDave.Plauger@Sun.COM { 53210843SDave.Plauger@Sun.COM int tag; 53310843SDave.Plauger@Sun.COM size_t bz2size; 53410843SDave.Plauger@Sun.COM helper_t *hp, *hpend; 53510843SDave.Plauger@Sun.COM cbuf_t *cp, *cpend; 53610843SDave.Plauger@Sun.COM dumpcfg_t *old = &dumpcfg; 53710843SDave.Plauger@Sun.COM dumpcfg_t newcfg = *old; 53810843SDave.Plauger@Sun.COM dumpcfg_t *new = &newcfg; 53910843SDave.Plauger@Sun.COM 54010843SDave.Plauger@Sun.COM ASSERT(MUTEX_HELD(&dump_lock)); 54110843SDave.Plauger@Sun.COM 54210843SDave.Plauger@Sun.COM /* 54310843SDave.Plauger@Sun.COM * Free the previously allocated bufs and VM. 54410843SDave.Plauger@Sun.COM */ 54510843SDave.Plauger@Sun.COM if (old->helper != NULL) { 54610843SDave.Plauger@Sun.COM 54710843SDave.Plauger@Sun.COM /* helpers */ 54810843SDave.Plauger@Sun.COM hpend = &old->helper[old->nhelper]; 54910843SDave.Plauger@Sun.COM for (hp = old->helper; hp != hpend; hp++) { 55010843SDave.Plauger@Sun.COM if (hp->lzbuf != NULL) 55110843SDave.Plauger@Sun.COM kmem_free(hp->lzbuf, PAGESIZE); 55210843SDave.Plauger@Sun.COM if (hp->page != NULL) 55310843SDave.Plauger@Sun.COM kmem_free(hp->page, PAGESIZE); 55410843SDave.Plauger@Sun.COM } 55510843SDave.Plauger@Sun.COM kmem_free(old->helper, old->nhelper * sizeof (helper_t)); 55610843SDave.Plauger@Sun.COM 55710843SDave.Plauger@Sun.COM /* VM space for mapping pages */ 55810843SDave.Plauger@Sun.COM cpend = &old->cmap[old->ncmap]; 55910843SDave.Plauger@Sun.COM for (cp = old->cmap; cp != cpend; cp++) 56010843SDave.Plauger@Sun.COM vmem_xfree(heap_arena, cp->buf, CBUF_MAPSIZE); 56110843SDave.Plauger@Sun.COM kmem_free(old->cmap, old->ncmap * sizeof (cbuf_t)); 56210843SDave.Plauger@Sun.COM 56310843SDave.Plauger@Sun.COM /* output bufs */ 56410843SDave.Plauger@Sun.COM cpend = &old->cbuf[old->ncbuf]; 56510843SDave.Plauger@Sun.COM for (cp = old->cbuf; cp != cpend; cp++) 56610843SDave.Plauger@Sun.COM if (cp->buf != NULL) 56710843SDave.Plauger@Sun.COM kmem_free(cp->buf, cp->size); 56810843SDave.Plauger@Sun.COM kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t)); 56910843SDave.Plauger@Sun.COM 57010843SDave.Plauger@Sun.COM /* reserved VM for dumpsys_get_maxmem */ 57110843SDave.Plauger@Sun.COM if (old->maxvmsize > 0) 57210843SDave.Plauger@Sun.COM vmem_xfree(heap_arena, old->maxvm, old->maxvmsize); 57310843SDave.Plauger@Sun.COM } 57410843SDave.Plauger@Sun.COM 57510843SDave.Plauger@Sun.COM /* 57610843SDave.Plauger@Sun.COM * Allocate memory and VM. 57710843SDave.Plauger@Sun.COM * One CPU runs dumpsys, the rest are helpers. 57810843SDave.Plauger@Sun.COM */ 57910843SDave.Plauger@Sun.COM new->nhelper = ncpus - 1; 58010843SDave.Plauger@Sun.COM if (new->nhelper < 1) 58110843SDave.Plauger@Sun.COM new->nhelper = 1; 58210843SDave.Plauger@Sun.COM 58310843SDave.Plauger@Sun.COM if (new->nhelper > DUMP_MAX_NHELPER) 58410843SDave.Plauger@Sun.COM new->nhelper = DUMP_MAX_NHELPER; 58510843SDave.Plauger@Sun.COM 58610843SDave.Plauger@Sun.COM /* increase threshold for faster disks */ 58710843SDave.Plauger@Sun.COM new->threshold = dump_plat_mincpu; 58810843SDave.Plauger@Sun.COM if (dumpbuf.iosize >= DUMP_1MB) 58910843SDave.Plauger@Sun.COM new->threshold *= 3; 59010843SDave.Plauger@Sun.COM else if (dumpbuf.iosize >= (256 * DUMP_1KB)) 59110843SDave.Plauger@Sun.COM new->threshold *= 2; 59210843SDave.Plauger@Sun.COM 59310843SDave.Plauger@Sun.COM /* figure compression level based upon the computed threshold. */ 59410843SDave.Plauger@Sun.COM if (dump_plat_mincpu == 0 || new->nhelper < 2) { 59510843SDave.Plauger@Sun.COM new->clevel = 0; 59610843SDave.Plauger@Sun.COM new->nhelper = 1; 59710843SDave.Plauger@Sun.COM } else if ((new->nhelper + 1) >= new->threshold) { 59810843SDave.Plauger@Sun.COM new->clevel = DUMP_CLEVEL_BZIP2; 59910843SDave.Plauger@Sun.COM } else { 60010843SDave.Plauger@Sun.COM new->clevel = DUMP_CLEVEL_LZJB; 60110843SDave.Plauger@Sun.COM } 60210843SDave.Plauger@Sun.COM 60310843SDave.Plauger@Sun.COM if (new->clevel == 0) { 60410843SDave.Plauger@Sun.COM new->ncbuf = 1; 60510843SDave.Plauger@Sun.COM new->ncmap = 1; 60610843SDave.Plauger@Sun.COM } else { 60710843SDave.Plauger@Sun.COM new->ncbuf = NCBUF_PER_HELPER * new->nhelper; 60810843SDave.Plauger@Sun.COM new->ncmap = NCMAP_PER_HELPER * new->nhelper; 60910843SDave.Plauger@Sun.COM } 61010843SDave.Plauger@Sun.COM 61110843SDave.Plauger@Sun.COM /* 61210843SDave.Plauger@Sun.COM * Allocate new data structures and buffers for MINHELPERS, 61310843SDave.Plauger@Sun.COM * and also figure the max desired size. 61410843SDave.Plauger@Sun.COM */ 61510843SDave.Plauger@Sun.COM bz2size = BZ2_bzCompressInitSize(dump_bzip2_level); 61610843SDave.Plauger@Sun.COM new->maxsize = 0; 61710843SDave.Plauger@Sun.COM new->maxvmsize = 0; 61810843SDave.Plauger@Sun.COM new->maxvm = NULL; 61910843SDave.Plauger@Sun.COM tag = 1; 62010843SDave.Plauger@Sun.COM new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP); 62110843SDave.Plauger@Sun.COM hpend = &new->helper[new->nhelper]; 62210843SDave.Plauger@Sun.COM for (hp = new->helper; hp != hpend; hp++) { 62310843SDave.Plauger@Sun.COM hp->tag = tag++; 62410843SDave.Plauger@Sun.COM if (hp < &new->helper[MINHELPERS]) { 62510843SDave.Plauger@Sun.COM hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP); 62610843SDave.Plauger@Sun.COM hp->page = kmem_alloc(PAGESIZE, KM_SLEEP); 62710843SDave.Plauger@Sun.COM } else if (new->clevel < DUMP_CLEVEL_BZIP2) { 62810843SDave.Plauger@Sun.COM new->maxsize += 2 * PAGESIZE; 62910843SDave.Plauger@Sun.COM } else { 63010843SDave.Plauger@Sun.COM new->maxsize += PAGESIZE; 63110843SDave.Plauger@Sun.COM } 63210843SDave.Plauger@Sun.COM if (new->clevel >= DUMP_CLEVEL_BZIP2) 63310843SDave.Plauger@Sun.COM new->maxsize += bz2size; 63410843SDave.Plauger@Sun.COM } 63510843SDave.Plauger@Sun.COM 63610843SDave.Plauger@Sun.COM new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP); 63710843SDave.Plauger@Sun.COM cpend = &new->cbuf[new->ncbuf]; 63810843SDave.Plauger@Sun.COM for (cp = new->cbuf; cp != cpend; cp++) { 63910843SDave.Plauger@Sun.COM cp->state = CBUF_FREEBUF; 64010843SDave.Plauger@Sun.COM cp->size = CBUF_SIZE; 64110843SDave.Plauger@Sun.COM if (cp < &new->cbuf[MINCBUFS]) 64210843SDave.Plauger@Sun.COM cp->buf = kmem_alloc(cp->size, KM_SLEEP); 64310843SDave.Plauger@Sun.COM else 64410843SDave.Plauger@Sun.COM new->maxsize += cp->size; 64510843SDave.Plauger@Sun.COM } 64610843SDave.Plauger@Sun.COM 64710843SDave.Plauger@Sun.COM new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP); 64810843SDave.Plauger@Sun.COM cpend = &new->cmap[new->ncmap]; 64910843SDave.Plauger@Sun.COM for (cp = new->cmap; cp != cpend; cp++) { 65010843SDave.Plauger@Sun.COM cp->state = CBUF_FREEMAP; 65110843SDave.Plauger@Sun.COM cp->size = CBUF_MAPSIZE; 65210843SDave.Plauger@Sun.COM cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE, 65310843SDave.Plauger@Sun.COM 0, 0, NULL, NULL, VM_SLEEP); 65410843SDave.Plauger@Sun.COM } 65510843SDave.Plauger@Sun.COM 65610843SDave.Plauger@Sun.COM /* reserve VA to be backed with spare pages at crash time */ 65710843SDave.Plauger@Sun.COM if (new->maxsize > 0) { 65810843SDave.Plauger@Sun.COM new->maxsize = P2ROUNDUP(new->maxsize, PAGESIZE); 65910843SDave.Plauger@Sun.COM new->maxvmsize = P2ROUNDUP(new->maxsize, CBUF_MAPSIZE); 66010843SDave.Plauger@Sun.COM new->maxvm = vmem_xalloc(heap_arena, new->maxvmsize, 66110843SDave.Plauger@Sun.COM CBUF_MAPSIZE, 0, 0, NULL, NULL, VM_SLEEP); 66210843SDave.Plauger@Sun.COM } 66310843SDave.Plauger@Sun.COM 66411178SDave.Plauger@Sun.COM /* 66511178SDave.Plauger@Sun.COM * Reserve memory for kmem allocation calls made during crash 66611178SDave.Plauger@Sun.COM * dump. The hat layer allocates memory for each mapping 66711178SDave.Plauger@Sun.COM * created, and the I/O path allocates buffers and data structs. 66811178SDave.Plauger@Sun.COM * Add a few pages for safety. 66911178SDave.Plauger@Sun.COM */ 67011178SDave.Plauger@Sun.COM kmem_dump_init((new->ncmap * dump_kmem_permap) + 67111178SDave.Plauger@Sun.COM (dump_kmem_pages * PAGESIZE)); 67211178SDave.Plauger@Sun.COM 67310843SDave.Plauger@Sun.COM /* set new config pointers */ 67410843SDave.Plauger@Sun.COM *old = *new; 67510843SDave.Plauger@Sun.COM } 67610843SDave.Plauger@Sun.COM 67710843SDave.Plauger@Sun.COM /* 67810843SDave.Plauger@Sun.COM * Define a struct memlist walker to optimize bitnum to pfn 67910843SDave.Plauger@Sun.COM * lookup. The walker maintains the state of the list traversal. 68010843SDave.Plauger@Sun.COM */ 68110843SDave.Plauger@Sun.COM typedef struct dumpmlw { 68210843SDave.Plauger@Sun.COM struct memlist *mp; /* current memlist */ 68310843SDave.Plauger@Sun.COM pgcnt_t basenum; /* bitnum base offset */ 68410843SDave.Plauger@Sun.COM pgcnt_t mppages; /* current memlist size */ 68510843SDave.Plauger@Sun.COM pgcnt_t mpleft; /* size to end of current memlist */ 68610843SDave.Plauger@Sun.COM pfn_t mpaddr; /* first pfn in memlist */ 68710843SDave.Plauger@Sun.COM } dumpmlw_t; 68810843SDave.Plauger@Sun.COM 68910843SDave.Plauger@Sun.COM /* initialize the walker */ 69010843SDave.Plauger@Sun.COM static inline void 69110843SDave.Plauger@Sun.COM dump_init_memlist_walker(dumpmlw_t *pw) 69210843SDave.Plauger@Sun.COM { 69310843SDave.Plauger@Sun.COM pw->mp = phys_install; 69410843SDave.Plauger@Sun.COM pw->basenum = 0; 695*11474SJonathan.Adams@Sun.COM pw->mppages = pw->mp->ml_size >> PAGESHIFT; 69610843SDave.Plauger@Sun.COM pw->mpleft = pw->mppages; 697*11474SJonathan.Adams@Sun.COM pw->mpaddr = pw->mp->ml_address >> PAGESHIFT; 69810843SDave.Plauger@Sun.COM } 69910843SDave.Plauger@Sun.COM 70010843SDave.Plauger@Sun.COM /* 70110843SDave.Plauger@Sun.COM * Lookup pfn given bitnum. The memlist can be quite long on some 70210843SDave.Plauger@Sun.COM * systems (e.g.: one per board). To optimize sequential lookups, the 70310843SDave.Plauger@Sun.COM * caller initializes and presents a memlist walker. 70410843SDave.Plauger@Sun.COM */ 70510843SDave.Plauger@Sun.COM static pfn_t 70610843SDave.Plauger@Sun.COM dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw) 70710843SDave.Plauger@Sun.COM { 70810843SDave.Plauger@Sun.COM bitnum -= pw->basenum; 70910843SDave.Plauger@Sun.COM while (pw->mp != NULL) { 71010843SDave.Plauger@Sun.COM if (bitnum < pw->mppages) { 71110843SDave.Plauger@Sun.COM pw->mpleft = pw->mppages - bitnum; 71210843SDave.Plauger@Sun.COM return (pw->mpaddr + bitnum); 71310843SDave.Plauger@Sun.COM } 71410843SDave.Plauger@Sun.COM bitnum -= pw->mppages; 71510843SDave.Plauger@Sun.COM pw->basenum += pw->mppages; 716*11474SJonathan.Adams@Sun.COM pw->mp = pw->mp->ml_next; 71710843SDave.Plauger@Sun.COM if (pw->mp != NULL) { 718*11474SJonathan.Adams@Sun.COM pw->mppages = pw->mp->ml_size >> PAGESHIFT; 71910843SDave.Plauger@Sun.COM pw->mpleft = pw->mppages; 720*11474SJonathan.Adams@Sun.COM pw->mpaddr = pw->mp->ml_address >> PAGESHIFT; 72110843SDave.Plauger@Sun.COM } 72210843SDave.Plauger@Sun.COM } 72310843SDave.Plauger@Sun.COM return (PFN_INVALID); 72410843SDave.Plauger@Sun.COM } 72510843SDave.Plauger@Sun.COM 72610843SDave.Plauger@Sun.COM static pgcnt_t 72710843SDave.Plauger@Sun.COM dump_pfn_to_bitnum(pfn_t pfn) 72810843SDave.Plauger@Sun.COM { 72910843SDave.Plauger@Sun.COM struct memlist *mp; 73010843SDave.Plauger@Sun.COM pgcnt_t bitnum = 0; 73110843SDave.Plauger@Sun.COM 732*11474SJonathan.Adams@Sun.COM for (mp = phys_install; mp != NULL; mp = mp->ml_next) { 733*11474SJonathan.Adams@Sun.COM if (pfn >= (mp->ml_address >> PAGESHIFT) && 734*11474SJonathan.Adams@Sun.COM pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT)) 735*11474SJonathan.Adams@Sun.COM return (bitnum + pfn - (mp->ml_address >> PAGESHIFT)); 736*11474SJonathan.Adams@Sun.COM bitnum += mp->ml_size >> PAGESHIFT; 73710843SDave.Plauger@Sun.COM } 73810843SDave.Plauger@Sun.COM return ((pgcnt_t)-1); 73910843SDave.Plauger@Sun.COM } 74010843SDave.Plauger@Sun.COM 74110843SDave.Plauger@Sun.COM /* 74210843SDave.Plauger@Sun.COM * Set/test bitmap for a CBUF_MAPSIZE range which includes pfn. The 74310843SDave.Plauger@Sun.COM * mapping of pfn to range index is imperfect because pfn and bitnum 74410843SDave.Plauger@Sun.COM * do not have the same phase. To make sure a CBUF_MAPSIZE range is 74510843SDave.Plauger@Sun.COM * covered, call this for both ends: 74610843SDave.Plauger@Sun.COM * dump_set_used(base) 74710843SDave.Plauger@Sun.COM * dump_set_used(base+CBUF_MAPNP-1) 74810843SDave.Plauger@Sun.COM * 74910843SDave.Plauger@Sun.COM * This is used during a panic dump to mark pages allocated by 75010843SDave.Plauger@Sun.COM * dumpsys_get_maxmem(). The macro IS_DUMP_PAGE(pp) is used by 75110843SDave.Plauger@Sun.COM * page_get_mnode_freelist() to make sure pages used by dump are never 75210843SDave.Plauger@Sun.COM * allocated. 75310843SDave.Plauger@Sun.COM */ 75410843SDave.Plauger@Sun.COM #define CBUF_MAPP2R(pfn) ((pfn) >> (CBUF_MAPSHIFT - PAGESHIFT)) 75510843SDave.Plauger@Sun.COM 75610843SDave.Plauger@Sun.COM static void 75710843SDave.Plauger@Sun.COM dump_set_used(pfn_t pfn) 75810843SDave.Plauger@Sun.COM { 75910843SDave.Plauger@Sun.COM 76010843SDave.Plauger@Sun.COM pgcnt_t bitnum, rbitnum; 76110843SDave.Plauger@Sun.COM 76210843SDave.Plauger@Sun.COM bitnum = dump_pfn_to_bitnum(pfn); 76310843SDave.Plauger@Sun.COM ASSERT(bitnum != (pgcnt_t)-1); 76410843SDave.Plauger@Sun.COM 76510843SDave.Plauger@Sun.COM rbitnum = CBUF_MAPP2R(bitnum); 76610843SDave.Plauger@Sun.COM ASSERT(rbitnum < dumpcfg.rbitmapsize); 76710843SDave.Plauger@Sun.COM 76810843SDave.Plauger@Sun.COM BT_SET(dumpcfg.rbitmap, rbitnum); 76910843SDave.Plauger@Sun.COM } 77010843SDave.Plauger@Sun.COM 77110843SDave.Plauger@Sun.COM int 77210843SDave.Plauger@Sun.COM dump_test_used(pfn_t pfn) 77310843SDave.Plauger@Sun.COM { 77410843SDave.Plauger@Sun.COM pgcnt_t bitnum, rbitnum; 77510843SDave.Plauger@Sun.COM 77610843SDave.Plauger@Sun.COM bitnum = dump_pfn_to_bitnum(pfn); 77710843SDave.Plauger@Sun.COM ASSERT(bitnum != (pgcnt_t)-1); 77810843SDave.Plauger@Sun.COM 77910843SDave.Plauger@Sun.COM rbitnum = CBUF_MAPP2R(bitnum); 78010843SDave.Plauger@Sun.COM ASSERT(rbitnum < dumpcfg.rbitmapsize); 78110843SDave.Plauger@Sun.COM 78210843SDave.Plauger@Sun.COM return (BT_TEST(dumpcfg.rbitmap, rbitnum)); 78310843SDave.Plauger@Sun.COM } 78410843SDave.Plauger@Sun.COM 78510843SDave.Plauger@Sun.COM /* 78610843SDave.Plauger@Sun.COM * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library. 78710843SDave.Plauger@Sun.COM * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit(). 78810843SDave.Plauger@Sun.COM */ 78910843SDave.Plauger@Sun.COM static void * 79010843SDave.Plauger@Sun.COM dumpbzalloc(void *opaque, int items, int size) 79110843SDave.Plauger@Sun.COM { 79210843SDave.Plauger@Sun.COM size_t *sz; 79310843SDave.Plauger@Sun.COM char *ret; 79410843SDave.Plauger@Sun.COM 79510843SDave.Plauger@Sun.COM ASSERT(opaque != NULL); 79610843SDave.Plauger@Sun.COM sz = opaque; 79710843SDave.Plauger@Sun.COM ret = dumpcfg.maxvm + *sz; 79810843SDave.Plauger@Sun.COM *sz += items * size; 79910843SDave.Plauger@Sun.COM *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN); 80010843SDave.Plauger@Sun.COM ASSERT(*sz <= dumpcfg.maxvmsize); 80110843SDave.Plauger@Sun.COM return (ret); 80210843SDave.Plauger@Sun.COM } 80310843SDave.Plauger@Sun.COM 80410843SDave.Plauger@Sun.COM /*ARGSUSED*/ 80510843SDave.Plauger@Sun.COM static void 80610843SDave.Plauger@Sun.COM dumpbzfree(void *opaque, void *addr) 80710843SDave.Plauger@Sun.COM { 80810843SDave.Plauger@Sun.COM } 80910843SDave.Plauger@Sun.COM 81010843SDave.Plauger@Sun.COM /* 81110843SDave.Plauger@Sun.COM * Perform additional checks on the page to see if we can really use 81210843SDave.Plauger@Sun.COM * it. The kernel (kas) pages are always set in the bitmap. However, 81310843SDave.Plauger@Sun.COM * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the 81410843SDave.Plauger@Sun.COM * bitmap. So we check for them. 81510843SDave.Plauger@Sun.COM */ 81610843SDave.Plauger@Sun.COM static inline int 81710843SDave.Plauger@Sun.COM dump_pfn_check(pfn_t pfn) 81810843SDave.Plauger@Sun.COM { 81910843SDave.Plauger@Sun.COM page_t *pp = page_numtopp_nolock(pfn); 82010843SDave.Plauger@Sun.COM if (pp == NULL || pp->p_pagenum != pfn || 82110843SDave.Plauger@Sun.COM #if defined(__sparc) 82211185SSean.McEnroe@Sun.COM pp->p_vnode == &promvp || 82310843SDave.Plauger@Sun.COM #else 82410843SDave.Plauger@Sun.COM PP_ISBOOTPAGES(pp) || 82510843SDave.Plauger@Sun.COM #endif 82610843SDave.Plauger@Sun.COM pp->p_toxic != 0) 82710843SDave.Plauger@Sun.COM return (0); 82810843SDave.Plauger@Sun.COM return (1); 82910843SDave.Plauger@Sun.COM } 83010843SDave.Plauger@Sun.COM 83110843SDave.Plauger@Sun.COM /* 83210843SDave.Plauger@Sun.COM * Check a range to see if all contained pages are available and 83310843SDave.Plauger@Sun.COM * return non-zero if the range can be used. 83410843SDave.Plauger@Sun.COM */ 83510843SDave.Plauger@Sun.COM static inline int 83610843SDave.Plauger@Sun.COM dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn) 83710843SDave.Plauger@Sun.COM { 83810843SDave.Plauger@Sun.COM for (; start < end; start++, pfn++) { 83910843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, start)) 84010843SDave.Plauger@Sun.COM return (0); 84110843SDave.Plauger@Sun.COM if (!dump_pfn_check(pfn)) 84210843SDave.Plauger@Sun.COM return (0); 84310843SDave.Plauger@Sun.COM } 84410843SDave.Plauger@Sun.COM return (1); 84510843SDave.Plauger@Sun.COM } 84610843SDave.Plauger@Sun.COM 84710843SDave.Plauger@Sun.COM /* 84810843SDave.Plauger@Sun.COM * dumpsys_get_maxmem() is called during panic. Find unused ranges 84910843SDave.Plauger@Sun.COM * and use them for buffers. If we find enough memory switch to 85010843SDave.Plauger@Sun.COM * parallel bzip2, otherwise use parallel lzjb. 85110843SDave.Plauger@Sun.COM * 85210843SDave.Plauger@Sun.COM * It searches the dump bitmap in 2 passes. The first time it looks 85310843SDave.Plauger@Sun.COM * for CBUF_MAPSIZE ranges. On the second pass it uses small pages. 85410843SDave.Plauger@Sun.COM */ 85510843SDave.Plauger@Sun.COM static void 85610843SDave.Plauger@Sun.COM dumpsys_get_maxmem() 85710843SDave.Plauger@Sun.COM { 85810843SDave.Plauger@Sun.COM dumpcfg_t *cfg = &dumpcfg; 85910843SDave.Plauger@Sun.COM cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf]; 86010843SDave.Plauger@Sun.COM helper_t *endhp = &cfg->helper[cfg->nhelper]; 86110843SDave.Plauger@Sun.COM pgcnt_t bitnum, end; 86210843SDave.Plauger@Sun.COM size_t sz, endsz, bz2size; 86310843SDave.Plauger@Sun.COM pfn_t pfn, off; 86410843SDave.Plauger@Sun.COM cbuf_t *cp; 86510843SDave.Plauger@Sun.COM helper_t *hp, *ohp; 86610843SDave.Plauger@Sun.COM dumpmlw_t mlw; 86710843SDave.Plauger@Sun.COM int k; 86810843SDave.Plauger@Sun.COM 86910843SDave.Plauger@Sun.COM if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB || 87010843SDave.Plauger@Sun.COM (dump_conflags & DUMP_ALL) != 0) 87110843SDave.Plauger@Sun.COM return; 87210843SDave.Plauger@Sun.COM 87310843SDave.Plauger@Sun.COM sz = 0; 87410843SDave.Plauger@Sun.COM cfg->found4m = 0; 87510843SDave.Plauger@Sun.COM cfg->foundsm = 0; 87610843SDave.Plauger@Sun.COM 87710843SDave.Plauger@Sun.COM /* bitmap of ranges used to estimate which pfns are being used */ 87810843SDave.Plauger@Sun.COM bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize)); 87910843SDave.Plauger@Sun.COM 88010843SDave.Plauger@Sun.COM /* find ranges that are not being dumped to use for buffers */ 88110843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 88210843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) { 88310843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 88410843SDave.Plauger@Sun.COM end = bitnum + CBUF_MAPNP; 88510843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 88610843SDave.Plauger@Sun.COM ASSERT(pfn != PFN_INVALID); 88710843SDave.Plauger@Sun.COM 88810843SDave.Plauger@Sun.COM /* skip partial range at end of mem segment */ 88910843SDave.Plauger@Sun.COM if (mlw.mpleft < CBUF_MAPNP) { 89010843SDave.Plauger@Sun.COM end = bitnum + mlw.mpleft; 89110843SDave.Plauger@Sun.COM continue; 89210843SDave.Plauger@Sun.COM } 89310843SDave.Plauger@Sun.COM 89410843SDave.Plauger@Sun.COM /* skip non aligned pages */ 89510843SDave.Plauger@Sun.COM off = P2PHASE(pfn, CBUF_MAPNP); 89610843SDave.Plauger@Sun.COM if (off != 0) { 89710843SDave.Plauger@Sun.COM end -= off; 89810843SDave.Plauger@Sun.COM continue; 89910843SDave.Plauger@Sun.COM } 90010843SDave.Plauger@Sun.COM 90110843SDave.Plauger@Sun.COM if (!dump_range_check(bitnum, end, pfn)) 90210843SDave.Plauger@Sun.COM continue; 90310843SDave.Plauger@Sun.COM 90410843SDave.Plauger@Sun.COM ASSERT((sz + CBUF_MAPSIZE) <= cfg->maxvmsize); 90510843SDave.Plauger@Sun.COM hat_devload(kas.a_hat, cfg->maxvm + sz, CBUF_MAPSIZE, pfn, 90610843SDave.Plauger@Sun.COM PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST); 90710843SDave.Plauger@Sun.COM sz += CBUF_MAPSIZE; 90810843SDave.Plauger@Sun.COM cfg->found4m++; 90910843SDave.Plauger@Sun.COM 91010843SDave.Plauger@Sun.COM /* set the bitmap for both ends to be sure to cover the range */ 91110843SDave.Plauger@Sun.COM dump_set_used(pfn); 91210843SDave.Plauger@Sun.COM dump_set_used(pfn + CBUF_MAPNP - 1); 91310843SDave.Plauger@Sun.COM 91410843SDave.Plauger@Sun.COM if (sz >= cfg->maxsize) 91510843SDave.Plauger@Sun.COM goto foundmax; 91610843SDave.Plauger@Sun.COM } 91710843SDave.Plauger@Sun.COM 91810843SDave.Plauger@Sun.COM /* Add small pages if we can't find enough large pages. */ 91910843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 92010843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) { 92110843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 92210843SDave.Plauger@Sun.COM end = bitnum + CBUF_MAPNP; 92310843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 92410843SDave.Plauger@Sun.COM ASSERT(pfn != PFN_INVALID); 92510843SDave.Plauger@Sun.COM 92610843SDave.Plauger@Sun.COM /* Find any non-aligned pages at start and end of segment. */ 92710843SDave.Plauger@Sun.COM off = P2PHASE(pfn, CBUF_MAPNP); 92810843SDave.Plauger@Sun.COM if (mlw.mpleft < CBUF_MAPNP) { 92910843SDave.Plauger@Sun.COM end = bitnum + mlw.mpleft; 93010843SDave.Plauger@Sun.COM } else if (off != 0) { 93110843SDave.Plauger@Sun.COM end -= off; 93210843SDave.Plauger@Sun.COM } else if (cfg->found4m && dump_test_used(pfn)) { 93310843SDave.Plauger@Sun.COM continue; 93410843SDave.Plauger@Sun.COM } 93510843SDave.Plauger@Sun.COM 93610843SDave.Plauger@Sun.COM for (; bitnum < end; bitnum++, pfn++) { 93710843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 93810843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, bitnum)) 93910843SDave.Plauger@Sun.COM continue; 94010843SDave.Plauger@Sun.COM if (!dump_pfn_check(pfn)) 94110843SDave.Plauger@Sun.COM continue; 94210843SDave.Plauger@Sun.COM ASSERT((sz + PAGESIZE) <= cfg->maxvmsize); 94310843SDave.Plauger@Sun.COM hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn, 94410843SDave.Plauger@Sun.COM PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST); 94510843SDave.Plauger@Sun.COM sz += PAGESIZE; 94610843SDave.Plauger@Sun.COM cfg->foundsm++; 94710843SDave.Plauger@Sun.COM dump_set_used(pfn); 94810843SDave.Plauger@Sun.COM if (sz >= cfg->maxsize) 94910843SDave.Plauger@Sun.COM goto foundmax; 95010843SDave.Plauger@Sun.COM } 95110843SDave.Plauger@Sun.COM } 95210843SDave.Plauger@Sun.COM 95310843SDave.Plauger@Sun.COM /* Fall back to lzjb if we did not get enough memory for bzip2. */ 95410843SDave.Plauger@Sun.COM endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper; 95510843SDave.Plauger@Sun.COM if (sz < endsz) { 95610843SDave.Plauger@Sun.COM cfg->clevel = DUMP_CLEVEL_LZJB; 95710843SDave.Plauger@Sun.COM } 95810843SDave.Plauger@Sun.COM 95910843SDave.Plauger@Sun.COM /* Allocate memory for as many helpers as we can. */ 96010843SDave.Plauger@Sun.COM foundmax: 96110843SDave.Plauger@Sun.COM 96210843SDave.Plauger@Sun.COM /* Byte offsets into memory found and mapped above */ 96310843SDave.Plauger@Sun.COM endsz = sz; 96410843SDave.Plauger@Sun.COM sz = 0; 96510843SDave.Plauger@Sun.COM 96610843SDave.Plauger@Sun.COM /* Set the size for bzip2 state. Only bzip2 needs it. */ 96710843SDave.Plauger@Sun.COM bz2size = BZ2_bzCompressInitSize(dump_bzip2_level); 96810843SDave.Plauger@Sun.COM 96910843SDave.Plauger@Sun.COM /* Skip the preallocate output buffers. */ 97010843SDave.Plauger@Sun.COM cp = &cfg->cbuf[MINCBUFS]; 97110843SDave.Plauger@Sun.COM 97210843SDave.Plauger@Sun.COM /* Use this to move memory up from the preallocated helpers. */ 97310843SDave.Plauger@Sun.COM ohp = cfg->helper; 97410843SDave.Plauger@Sun.COM 97510843SDave.Plauger@Sun.COM /* Loop over all helpers and allocate memory. */ 97610843SDave.Plauger@Sun.COM for (hp = cfg->helper; hp < endhp; hp++) { 97710843SDave.Plauger@Sun.COM 97810843SDave.Plauger@Sun.COM /* Skip preallocated helpers by checking hp->page. */ 97910843SDave.Plauger@Sun.COM if (hp->page == NULL) { 98010843SDave.Plauger@Sun.COM if (cfg->clevel <= DUMP_CLEVEL_LZJB) { 98110843SDave.Plauger@Sun.COM /* lzjb needs 2 1-page buffers */ 98210843SDave.Plauger@Sun.COM if ((sz + (2 * PAGESIZE)) > endsz) 98310843SDave.Plauger@Sun.COM break; 98410843SDave.Plauger@Sun.COM hp->page = cfg->maxvm + sz; 98510843SDave.Plauger@Sun.COM sz += PAGESIZE; 98610843SDave.Plauger@Sun.COM hp->lzbuf = cfg->maxvm + sz; 98710843SDave.Plauger@Sun.COM sz += PAGESIZE; 98810843SDave.Plauger@Sun.COM 98910843SDave.Plauger@Sun.COM } else if (ohp->lzbuf != NULL) { 99010843SDave.Plauger@Sun.COM /* re-use the preallocted lzjb page for bzip2 */ 99110843SDave.Plauger@Sun.COM hp->page = ohp->lzbuf; 99210843SDave.Plauger@Sun.COM ohp->lzbuf = NULL; 99310843SDave.Plauger@Sun.COM ++ohp; 99410843SDave.Plauger@Sun.COM 99510843SDave.Plauger@Sun.COM } else { 99610843SDave.Plauger@Sun.COM /* bzip2 needs a 1-page buffer */ 99710843SDave.Plauger@Sun.COM if ((sz + PAGESIZE) > endsz) 99810843SDave.Plauger@Sun.COM break; 99910843SDave.Plauger@Sun.COM hp->page = cfg->maxvm + sz; 100010843SDave.Plauger@Sun.COM sz += PAGESIZE; 100110843SDave.Plauger@Sun.COM } 100210843SDave.Plauger@Sun.COM } 100310843SDave.Plauger@Sun.COM 100410843SDave.Plauger@Sun.COM /* 100510843SDave.Plauger@Sun.COM * Add output buffers per helper. The number of 100610843SDave.Plauger@Sun.COM * buffers per helper is determined by the ratio of 100710843SDave.Plauger@Sun.COM * ncbuf to nhelper. 100810843SDave.Plauger@Sun.COM */ 100910843SDave.Plauger@Sun.COM for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz && 101010843SDave.Plauger@Sun.COM k < NCBUF_PER_HELPER; k++) { 101110843SDave.Plauger@Sun.COM cp->state = CBUF_FREEBUF; 101210843SDave.Plauger@Sun.COM cp->size = CBUF_SIZE; 101310843SDave.Plauger@Sun.COM cp->buf = cfg->maxvm + sz; 101410843SDave.Plauger@Sun.COM sz += CBUF_SIZE; 101510843SDave.Plauger@Sun.COM ++cp; 101610843SDave.Plauger@Sun.COM } 101710843SDave.Plauger@Sun.COM 101810843SDave.Plauger@Sun.COM /* 101910843SDave.Plauger@Sun.COM * bzip2 needs compression state. Use the dumpbzalloc 102010843SDave.Plauger@Sun.COM * and dumpbzfree callbacks to allocate the memory. 102110843SDave.Plauger@Sun.COM * bzip2 does allocation only at init time. 102210843SDave.Plauger@Sun.COM */ 102310843SDave.Plauger@Sun.COM if (cfg->clevel >= DUMP_CLEVEL_BZIP2) { 102410843SDave.Plauger@Sun.COM if ((sz + bz2size) > endsz) { 102510843SDave.Plauger@Sun.COM hp->page = NULL; 102610843SDave.Plauger@Sun.COM break; 102710843SDave.Plauger@Sun.COM } else { 102810843SDave.Plauger@Sun.COM hp->bzstream.opaque = &sz; 102910843SDave.Plauger@Sun.COM hp->bzstream.bzalloc = dumpbzalloc; 103010843SDave.Plauger@Sun.COM hp->bzstream.bzfree = dumpbzfree; 103110843SDave.Plauger@Sun.COM (void) BZ2_bzCompressInit(&hp->bzstream, 103210843SDave.Plauger@Sun.COM dump_bzip2_level, 0, 0); 103310843SDave.Plauger@Sun.COM hp->bzstream.opaque = NULL; 103410843SDave.Plauger@Sun.COM } 103510843SDave.Plauger@Sun.COM } 103610843SDave.Plauger@Sun.COM } 103710843SDave.Plauger@Sun.COM 103810843SDave.Plauger@Sun.COM /* Finish allocating output buffers */ 103910843SDave.Plauger@Sun.COM for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) { 104010843SDave.Plauger@Sun.COM cp->state = CBUF_FREEBUF; 104110843SDave.Plauger@Sun.COM cp->size = CBUF_SIZE; 104210843SDave.Plauger@Sun.COM cp->buf = cfg->maxvm + sz; 104310843SDave.Plauger@Sun.COM sz += CBUF_SIZE; 104410843SDave.Plauger@Sun.COM } 104510843SDave.Plauger@Sun.COM 104610843SDave.Plauger@Sun.COM /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */ 104710843SDave.Plauger@Sun.COM if (cfg->found4m || cfg->foundsm) 104810843SDave.Plauger@Sun.COM dump_check_used = 1; 104910843SDave.Plauger@Sun.COM 105010843SDave.Plauger@Sun.COM ASSERT(sz <= endsz); 105110843SDave.Plauger@Sun.COM } 105210843SDave.Plauger@Sun.COM 10530Sstevel@tonic-gate static void 10540Sstevel@tonic-gate dumphdr_init(void) 10550Sstevel@tonic-gate { 10560Sstevel@tonic-gate pgcnt_t npages = 0; 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate if (dumphdr == NULL) { 10610Sstevel@tonic-gate dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP); 10620Sstevel@tonic-gate dumphdr->dump_magic = DUMP_MAGIC; 10630Sstevel@tonic-gate dumphdr->dump_version = DUMP_VERSION; 10640Sstevel@tonic-gate dumphdr->dump_wordsize = DUMP_WORDSIZE; 10650Sstevel@tonic-gate dumphdr->dump_pageshift = PAGESHIFT; 10660Sstevel@tonic-gate dumphdr->dump_pagesize = PAGESIZE; 10670Sstevel@tonic-gate dumphdr->dump_utsname = utsname; 10680Sstevel@tonic-gate (void) strcpy(dumphdr->dump_platform, platform); 106910843SDave.Plauger@Sun.COM dumpbuf.size = dumpbuf_iosize(maxphys); 107010843SDave.Plauger@Sun.COM dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP); 107110843SDave.Plauger@Sun.COM dumpbuf.end = dumpbuf.start + dumpbuf.size; 107210843SDave.Plauger@Sun.COM dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP); 107310843SDave.Plauger@Sun.COM dumpcfg.helpermap = kmem_zalloc(BT_SIZEOFMAP(NCPU), KM_SLEEP); 107410843SDave.Plauger@Sun.COM LOCK_INIT_HELD(&dumpcfg.helper_lock); 10750Sstevel@tonic-gate } 10760Sstevel@tonic-gate 10775084Sjohnlev npages = num_phys_pages(); 10780Sstevel@tonic-gate 107910843SDave.Plauger@Sun.COM if (dumpcfg.bitmapsize != npages) { 108010843SDave.Plauger@Sun.COM size_t rlen = CBUF_MAPP2R(P2ROUNDUP(npages, CBUF_MAPNP)); 10810Sstevel@tonic-gate void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP); 108210843SDave.Plauger@Sun.COM void *rmap = kmem_alloc(BT_SIZEOFMAP(rlen), KM_SLEEP); 108310843SDave.Plauger@Sun.COM 108410843SDave.Plauger@Sun.COM if (dumpcfg.bitmap != NULL) 108510843SDave.Plauger@Sun.COM kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg. 108610843SDave.Plauger@Sun.COM bitmapsize)); 108710843SDave.Plauger@Sun.COM if (dumpcfg.rbitmap != NULL) 108810843SDave.Plauger@Sun.COM kmem_free(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg. 108910843SDave.Plauger@Sun.COM rbitmapsize)); 109010843SDave.Plauger@Sun.COM dumpcfg.bitmap = map; 109110843SDave.Plauger@Sun.COM dumpcfg.bitmapsize = npages; 109210843SDave.Plauger@Sun.COM dumpcfg.rbitmap = rmap; 109310843SDave.Plauger@Sun.COM dumpcfg.rbitmapsize = rlen; 10940Sstevel@tonic-gate } 10950Sstevel@tonic-gate } 10960Sstevel@tonic-gate 10970Sstevel@tonic-gate /* 10980Sstevel@tonic-gate * Establish a new dump device. 10990Sstevel@tonic-gate */ 11000Sstevel@tonic-gate int 11010Sstevel@tonic-gate dumpinit(vnode_t *vp, char *name, int justchecking) 11020Sstevel@tonic-gate { 11030Sstevel@tonic-gate vnode_t *cvp; 11040Sstevel@tonic-gate vattr_t vattr; 11050Sstevel@tonic-gate vnode_t *cdev_vp; 11060Sstevel@tonic-gate int error = 0; 11070Sstevel@tonic-gate 11080Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 11090Sstevel@tonic-gate 11100Sstevel@tonic-gate dumphdr_init(); 11110Sstevel@tonic-gate 11120Sstevel@tonic-gate cvp = common_specvp(vp); 11130Sstevel@tonic-gate if (cvp == dumpvp) 11140Sstevel@tonic-gate return (0); 11150Sstevel@tonic-gate 11160Sstevel@tonic-gate /* 11170Sstevel@tonic-gate * Determine whether this is a plausible dump device. We want either: 11180Sstevel@tonic-gate * (1) a real device that's not mounted and has a cb_dump routine, or 11190Sstevel@tonic-gate * (2) a swapfile on some filesystem that has a vop_dump routine. 11200Sstevel@tonic-gate */ 11215331Samw if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0) 11220Sstevel@tonic-gate return (error); 11230Sstevel@tonic-gate 11240Sstevel@tonic-gate vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV; 11255331Samw if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) { 11260Sstevel@tonic-gate if (vattr.va_type == VBLK || vattr.va_type == VCHR) { 11270Sstevel@tonic-gate if (devopsp[getmajor(vattr.va_rdev)]-> 11280Sstevel@tonic-gate devo_cb_ops->cb_dump == nodev) 11290Sstevel@tonic-gate error = ENOTSUP; 11300Sstevel@tonic-gate else if (vfs_devismounted(vattr.va_rdev)) 11310Sstevel@tonic-gate error = EBUSY; 113210588SEric.Taylor@Sun.COM if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip), 113310588SEric.Taylor@Sun.COM ZFS_DRIVER) == 0 && 113410588SEric.Taylor@Sun.COM IS_SWAPVP(common_specvp(cvp))) 113510588SEric.Taylor@Sun.COM error = EBUSY; 11360Sstevel@tonic-gate } else { 11370Sstevel@tonic-gate if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) || 11380Sstevel@tonic-gate !IS_SWAPVP(cvp)) 11390Sstevel@tonic-gate error = ENOTSUP; 11400Sstevel@tonic-gate } 11410Sstevel@tonic-gate } 11420Sstevel@tonic-gate 11430Sstevel@tonic-gate if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) 11440Sstevel@tonic-gate error = ENOSPC; 11450Sstevel@tonic-gate 11460Sstevel@tonic-gate if (error || justchecking) { 11475331Samw (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0, 11485331Samw kcred, NULL); 11490Sstevel@tonic-gate return (error); 11500Sstevel@tonic-gate } 11510Sstevel@tonic-gate 11520Sstevel@tonic-gate VN_HOLD(cvp); 11530Sstevel@tonic-gate 11540Sstevel@tonic-gate if (dumpvp != NULL) 11550Sstevel@tonic-gate dumpfini(); /* unconfigure the old dump device */ 11560Sstevel@tonic-gate 11570Sstevel@tonic-gate dumpvp = cvp; 11580Sstevel@tonic-gate dumpvp_size = vattr.va_size & -DUMP_OFFSET; 11590Sstevel@tonic-gate dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP); 11600Sstevel@tonic-gate (void) strcpy(dumppath, name); 116110843SDave.Plauger@Sun.COM dumpbuf.iosize = 0; 11620Sstevel@tonic-gate 11630Sstevel@tonic-gate /* 11640Sstevel@tonic-gate * If the dump device is a block device, attempt to open up the 11650Sstevel@tonic-gate * corresponding character device and determine its maximum transfer 11660Sstevel@tonic-gate * size. We use this information to potentially resize dumpbuf to a 11670Sstevel@tonic-gate * larger and more optimal size for performing i/o to the dump device. 11680Sstevel@tonic-gate */ 11690Sstevel@tonic-gate if (cvp->v_type == VBLK && 11700Sstevel@tonic-gate (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) { 11715331Samw if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 11720Sstevel@tonic-gate size_t blk_size; 11730Sstevel@tonic-gate struct dk_cinfo dki; 11749889SLarry.Liu@Sun.COM struct dk_minfo minf; 11750Sstevel@tonic-gate 11769889SLarry.Liu@Sun.COM if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO, 11779889SLarry.Liu@Sun.COM (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL) 11789889SLarry.Liu@Sun.COM == 0 && minf.dki_lbsize != 0) 11799889SLarry.Liu@Sun.COM blk_size = minf.dki_lbsize; 11800Sstevel@tonic-gate else 11810Sstevel@tonic-gate blk_size = DEV_BSIZE; 11820Sstevel@tonic-gate 11830Sstevel@tonic-gate if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki, 11845331Samw FKIOCTL, kcred, NULL, NULL) == 0) { 118510843SDave.Plauger@Sun.COM dumpbuf.iosize = dki.dki_maxtransfer * blk_size; 11860Sstevel@tonic-gate dumpbuf_resize(); 11870Sstevel@tonic-gate } 11886423Sgw25295 /* 118910588SEric.Taylor@Sun.COM * If we are working with a zvol then dumpify it 119010588SEric.Taylor@Sun.COM * if it's not being used as swap. 11916423Sgw25295 */ 11926423Sgw25295 if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) { 119310588SEric.Taylor@Sun.COM if (IS_SWAPVP(common_specvp(cvp))) 119410588SEric.Taylor@Sun.COM error = EBUSY; 119510588SEric.Taylor@Sun.COM else if ((error = VOP_IOCTL(cdev_vp, 11966423Sgw25295 DKIOCDUMPINIT, NULL, FKIOCTL, kcred, 119710588SEric.Taylor@Sun.COM NULL, NULL)) != 0) 11986423Sgw25295 dumpfini(); 11996423Sgw25295 } 12000Sstevel@tonic-gate 12015331Samw (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 12025331Samw kcred, NULL); 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate 12050Sstevel@tonic-gate VN_RELE(cdev_vp); 12060Sstevel@tonic-gate } 12070Sstevel@tonic-gate 12080Sstevel@tonic-gate cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20); 12090Sstevel@tonic-gate 121010843SDave.Plauger@Sun.COM dump_update_clevel(); 121110843SDave.Plauger@Sun.COM 12126423Sgw25295 return (error); 12130Sstevel@tonic-gate } 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate void 12160Sstevel@tonic-gate dumpfini(void) 12170Sstevel@tonic-gate { 12186423Sgw25295 vattr_t vattr; 12196423Sgw25295 boolean_t is_zfs = B_FALSE; 12206423Sgw25295 vnode_t *cdev_vp; 12210Sstevel@tonic-gate ASSERT(MUTEX_HELD(&dump_lock)); 12220Sstevel@tonic-gate 12230Sstevel@tonic-gate kmem_free(dumppath, strlen(dumppath) + 1); 12240Sstevel@tonic-gate 12256423Sgw25295 /* 12266423Sgw25295 * Determine if we are using zvols for our dump device 12276423Sgw25295 */ 12286423Sgw25295 vattr.va_mask = AT_RDEV; 12296423Sgw25295 if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) { 12306423Sgw25295 is_zfs = (getmajor(vattr.va_rdev) == 12316423Sgw25295 ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE; 12326423Sgw25295 } 12336423Sgw25295 12346423Sgw25295 /* 12356423Sgw25295 * If we have a zvol dump device then we call into zfs so 12366423Sgw25295 * that it may have a chance to cleanup. 12376423Sgw25295 */ 12386423Sgw25295 if (is_zfs && 12396423Sgw25295 (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) { 12406423Sgw25295 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 12416423Sgw25295 (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL, 12426423Sgw25295 kcred, NULL, NULL); 12436423Sgw25295 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 12446423Sgw25295 kcred, NULL); 12456423Sgw25295 } 12466423Sgw25295 VN_RELE(cdev_vp); 12476423Sgw25295 } 12486423Sgw25295 12495331Samw (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL); 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate VN_RELE(dumpvp); 12520Sstevel@tonic-gate 12530Sstevel@tonic-gate dumpvp = NULL; 12540Sstevel@tonic-gate dumpvp_size = 0; 12550Sstevel@tonic-gate dumppath = NULL; 12560Sstevel@tonic-gate } 12570Sstevel@tonic-gate 12580Sstevel@tonic-gate static offset_t 12590Sstevel@tonic-gate dumpvp_flush(void) 12600Sstevel@tonic-gate { 126110843SDave.Plauger@Sun.COM size_t size = P2ROUNDUP(dumpbuf.cur - dumpbuf.start, PAGESIZE); 126210843SDave.Plauger@Sun.COM hrtime_t iotime; 12630Sstevel@tonic-gate int err; 12640Sstevel@tonic-gate 126510843SDave.Plauger@Sun.COM if (dumpbuf.vp_off + size > dumpbuf.vp_limit) { 12660Sstevel@tonic-gate dump_ioerr = ENOSPC; 126710843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpbuf.vp_limit; 12680Sstevel@tonic-gate } else if (size != 0) { 126910843SDave.Plauger@Sun.COM iotime = gethrtime(); 127010843SDave.Plauger@Sun.COM dumpsync.iowait += iotime - dumpsync.iowaitts; 12710Sstevel@tonic-gate if (panicstr) 127210843SDave.Plauger@Sun.COM err = VOP_DUMP(dumpvp, dumpbuf.start, 127310843SDave.Plauger@Sun.COM lbtodb(dumpbuf.vp_off), btod(size), NULL); 12740Sstevel@tonic-gate else 127510843SDave.Plauger@Sun.COM err = vn_rdwr(UIO_WRITE, dumpbuf.cdev_vp != NULL ? 127610843SDave.Plauger@Sun.COM dumpbuf.cdev_vp : dumpvp, dumpbuf.start, size, 127710843SDave.Plauger@Sun.COM dumpbuf.vp_off, UIO_SYSSPACE, 0, dumpbuf.vp_limit, 12780Sstevel@tonic-gate kcred, 0); 12790Sstevel@tonic-gate if (err && dump_ioerr == 0) 12800Sstevel@tonic-gate dump_ioerr = err; 128110843SDave.Plauger@Sun.COM dumpsync.iowaitts = gethrtime(); 128210843SDave.Plauger@Sun.COM dumpsync.iotime += dumpsync.iowaitts - iotime; 128310843SDave.Plauger@Sun.COM dumpsync.nwrite += size; 128410843SDave.Plauger@Sun.COM dumpbuf.vp_off += size; 12850Sstevel@tonic-gate } 128610843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 12870Sstevel@tonic-gate dump_timeleft = dump_timeout; 128810843SDave.Plauger@Sun.COM return (dumpbuf.vp_off); 12890Sstevel@tonic-gate } 12900Sstevel@tonic-gate 129110843SDave.Plauger@Sun.COM /* maximize write speed by keeping seek offset aligned with size */ 12920Sstevel@tonic-gate void 12930Sstevel@tonic-gate dumpvp_write(const void *va, size_t size) 12940Sstevel@tonic-gate { 129510843SDave.Plauger@Sun.COM size_t len, off, sz; 129610843SDave.Plauger@Sun.COM 12970Sstevel@tonic-gate while (size != 0) { 129810843SDave.Plauger@Sun.COM len = MIN(size, dumpbuf.end - dumpbuf.cur); 12990Sstevel@tonic-gate if (len == 0) { 130010843SDave.Plauger@Sun.COM off = P2PHASE(dumpbuf.vp_off, dumpbuf.size); 130110843SDave.Plauger@Sun.COM if (off == 0 || !ISP2(dumpbuf.size)) { 130210843SDave.Plauger@Sun.COM (void) dumpvp_flush(); 130310843SDave.Plauger@Sun.COM } else { 130410843SDave.Plauger@Sun.COM sz = dumpbuf.size - off; 130510843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start + sz; 130610843SDave.Plauger@Sun.COM (void) dumpvp_flush(); 130710843SDave.Plauger@Sun.COM ovbcopy(dumpbuf.start + sz, dumpbuf.start, off); 130810843SDave.Plauger@Sun.COM dumpbuf.cur += off; 130910843SDave.Plauger@Sun.COM } 13100Sstevel@tonic-gate } else { 131110843SDave.Plauger@Sun.COM bcopy(va, dumpbuf.cur, len); 13120Sstevel@tonic-gate va = (char *)va + len; 131310843SDave.Plauger@Sun.COM dumpbuf.cur += len; 13140Sstevel@tonic-gate size -= len; 13150Sstevel@tonic-gate } 13160Sstevel@tonic-gate } 13170Sstevel@tonic-gate } 13180Sstevel@tonic-gate 13190Sstevel@tonic-gate /*ARGSUSED*/ 13200Sstevel@tonic-gate static void 13210Sstevel@tonic-gate dumpvp_ksyms_write(const void *src, void *dst, size_t size) 13220Sstevel@tonic-gate { 13230Sstevel@tonic-gate dumpvp_write(src, size); 13240Sstevel@tonic-gate } 13250Sstevel@tonic-gate 13260Sstevel@tonic-gate /* 13270Sstevel@tonic-gate * Mark 'pfn' in the bitmap and dump its translation table entry. 13280Sstevel@tonic-gate */ 13290Sstevel@tonic-gate void 13300Sstevel@tonic-gate dump_addpage(struct as *as, void *va, pfn_t pfn) 13310Sstevel@tonic-gate { 13320Sstevel@tonic-gate mem_vtop_t mem_vtop; 13330Sstevel@tonic-gate pgcnt_t bitnum; 13340Sstevel@tonic-gate 13350Sstevel@tonic-gate if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 133610843SDave.Plauger@Sun.COM if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 13370Sstevel@tonic-gate dumphdr->dump_npages++; 133810843SDave.Plauger@Sun.COM BT_SET(dumpcfg.bitmap, bitnum); 13390Sstevel@tonic-gate } 13400Sstevel@tonic-gate dumphdr->dump_nvtop++; 13410Sstevel@tonic-gate mem_vtop.m_as = as; 13420Sstevel@tonic-gate mem_vtop.m_va = va; 13430Sstevel@tonic-gate mem_vtop.m_pfn = pfn; 13440Sstevel@tonic-gate dumpvp_write(&mem_vtop, sizeof (mem_vtop_t)); 13450Sstevel@tonic-gate } 13460Sstevel@tonic-gate dump_timeleft = dump_timeout; 13470Sstevel@tonic-gate } 13480Sstevel@tonic-gate 13490Sstevel@tonic-gate /* 13500Sstevel@tonic-gate * Mark 'pfn' in the bitmap 13510Sstevel@tonic-gate */ 13520Sstevel@tonic-gate void 13530Sstevel@tonic-gate dump_page(pfn_t pfn) 13540Sstevel@tonic-gate { 13550Sstevel@tonic-gate pgcnt_t bitnum; 13560Sstevel@tonic-gate 13570Sstevel@tonic-gate if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 135810843SDave.Plauger@Sun.COM if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 13590Sstevel@tonic-gate dumphdr->dump_npages++; 136010843SDave.Plauger@Sun.COM BT_SET(dumpcfg.bitmap, bitnum); 13610Sstevel@tonic-gate } 13620Sstevel@tonic-gate } 13630Sstevel@tonic-gate dump_timeleft = dump_timeout; 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate /* 13670Sstevel@tonic-gate * Dump the <as, va, pfn> information for a given address space. 13680Sstevel@tonic-gate * SEGOP_DUMP() will call dump_addpage() for each page in the segment. 13690Sstevel@tonic-gate */ 13700Sstevel@tonic-gate static void 13710Sstevel@tonic-gate dump_as(struct as *as) 13720Sstevel@tonic-gate { 13730Sstevel@tonic-gate struct seg *seg; 13740Sstevel@tonic-gate 13750Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 13760Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 13770Sstevel@tonic-gate if (seg->s_as != as) 13780Sstevel@tonic-gate break; 13790Sstevel@tonic-gate if (seg->s_ops == NULL) 13800Sstevel@tonic-gate continue; 13810Sstevel@tonic-gate SEGOP_DUMP(seg); 13820Sstevel@tonic-gate } 13830Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 13840Sstevel@tonic-gate 13850Sstevel@tonic-gate if (seg != NULL) 13860Sstevel@tonic-gate cmn_err(CE_WARN, "invalid segment %p in address space %p", 13870Sstevel@tonic-gate (void *)seg, (void *)as); 13880Sstevel@tonic-gate } 13890Sstevel@tonic-gate 13900Sstevel@tonic-gate static int 13910Sstevel@tonic-gate dump_process(pid_t pid) 13920Sstevel@tonic-gate { 13930Sstevel@tonic-gate proc_t *p = sprlock(pid); 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate if (p == NULL) 13960Sstevel@tonic-gate return (-1); 13970Sstevel@tonic-gate if (p->p_as != &kas) { 13980Sstevel@tonic-gate mutex_exit(&p->p_lock); 13990Sstevel@tonic-gate dump_as(p->p_as); 14000Sstevel@tonic-gate mutex_enter(&p->p_lock); 14010Sstevel@tonic-gate } 14020Sstevel@tonic-gate 14030Sstevel@tonic-gate sprunlock(p); 14040Sstevel@tonic-gate 14050Sstevel@tonic-gate return (0); 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate 14080Sstevel@tonic-gate void 14090Sstevel@tonic-gate dump_ereports(void) 14100Sstevel@tonic-gate { 14110Sstevel@tonic-gate u_offset_t dumpvp_start; 14120Sstevel@tonic-gate erpt_dump_t ed; 14130Sstevel@tonic-gate 14140Sstevel@tonic-gate if (dumpvp == NULL || dumphdr == NULL) 14150Sstevel@tonic-gate return; 14160Sstevel@tonic-gate 141710843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 141810843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE); 141910843SDave.Plauger@Sun.COM dumpvp_start = dumpbuf.vp_limit - DUMP_ERPTSIZE; 142010843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpvp_start; 14210Sstevel@tonic-gate 14220Sstevel@tonic-gate fm_ereport_dump(); 14230Sstevel@tonic-gate if (panicstr) 14240Sstevel@tonic-gate errorq_dump(); 14250Sstevel@tonic-gate 14260Sstevel@tonic-gate bzero(&ed, sizeof (ed)); /* indicate end of ereports */ 14270Sstevel@tonic-gate dumpvp_write(&ed, sizeof (ed)); 14280Sstevel@tonic-gate (void) dumpvp_flush(); 14290Sstevel@tonic-gate 14300Sstevel@tonic-gate if (!panicstr) { 14310Sstevel@tonic-gate (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 143210843SDave.Plauger@Sun.COM (size_t)(dumpbuf.vp_off - dumpvp_start), 14335331Samw B_INVAL | B_FORCE, kcred, NULL); 14340Sstevel@tonic-gate } 14350Sstevel@tonic-gate } 14360Sstevel@tonic-gate 14370Sstevel@tonic-gate void 14380Sstevel@tonic-gate dump_messages(void) 14390Sstevel@tonic-gate { 14400Sstevel@tonic-gate log_dump_t ld; 14410Sstevel@tonic-gate mblk_t *mctl, *mdata; 14420Sstevel@tonic-gate queue_t *q, *qlast; 14430Sstevel@tonic-gate u_offset_t dumpvp_start; 14440Sstevel@tonic-gate 14450Sstevel@tonic-gate if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL) 14460Sstevel@tonic-gate return; 14470Sstevel@tonic-gate 144810843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 144910843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size - DUMP_OFFSET; 145010843SDave.Plauger@Sun.COM dumpvp_start = dumpbuf.vp_limit - DUMP_LOGSIZE; 145110843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpvp_start; 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate qlast = NULL; 14540Sstevel@tonic-gate do { 14550Sstevel@tonic-gate for (q = log_consq; q->q_next != qlast; q = q->q_next) 14560Sstevel@tonic-gate continue; 14570Sstevel@tonic-gate for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) { 14580Sstevel@tonic-gate dump_timeleft = dump_timeout; 14590Sstevel@tonic-gate mdata = mctl->b_cont; 14600Sstevel@tonic-gate ld.ld_magic = LOG_MAGIC; 14610Sstevel@tonic-gate ld.ld_msgsize = MBLKL(mctl->b_cont); 14620Sstevel@tonic-gate ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl)); 14630Sstevel@tonic-gate ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata)); 14640Sstevel@tonic-gate dumpvp_write(&ld, sizeof (ld)); 14650Sstevel@tonic-gate dumpvp_write(mctl->b_rptr, MBLKL(mctl)); 14660Sstevel@tonic-gate dumpvp_write(mdata->b_rptr, MBLKL(mdata)); 14670Sstevel@tonic-gate } 14680Sstevel@tonic-gate } while ((qlast = q) != log_consq); 14690Sstevel@tonic-gate 14700Sstevel@tonic-gate ld.ld_magic = 0; /* indicate end of messages */ 14710Sstevel@tonic-gate dumpvp_write(&ld, sizeof (ld)); 14720Sstevel@tonic-gate (void) dumpvp_flush(); 14730Sstevel@tonic-gate if (!panicstr) { 14740Sstevel@tonic-gate (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 147510843SDave.Plauger@Sun.COM (size_t)(dumpbuf.vp_off - dumpvp_start), 14765331Samw B_INVAL | B_FORCE, kcred, NULL); 14770Sstevel@tonic-gate } 14780Sstevel@tonic-gate } 14790Sstevel@tonic-gate 148010843SDave.Plauger@Sun.COM /* 148110843SDave.Plauger@Sun.COM * The following functions are called on multiple CPUs during dump. 148210843SDave.Plauger@Sun.COM * They must not use most kernel services, because all cross-calls are 148310843SDave.Plauger@Sun.COM * disabled during panic. Therefore, blocking locks and cache flushes 148410843SDave.Plauger@Sun.COM * will not work. 148510843SDave.Plauger@Sun.COM */ 148610843SDave.Plauger@Sun.COM 148711178SDave.Plauger@Sun.COM /* 148811178SDave.Plauger@Sun.COM * Copy pages, trapping ECC errors. Also, for robustness, trap data 148911178SDave.Plauger@Sun.COM * access in case something goes wrong in the hat layer and the 149011178SDave.Plauger@Sun.COM * mapping is broken. 149111178SDave.Plauger@Sun.COM */ 149210843SDave.Plauger@Sun.COM static int 14930Sstevel@tonic-gate dump_pagecopy(void *src, void *dst) 14940Sstevel@tonic-gate { 14950Sstevel@tonic-gate long *wsrc = (long *)src; 14960Sstevel@tonic-gate long *wdst = (long *)dst; 14970Sstevel@tonic-gate const ulong_t ncopies = PAGESIZE / sizeof (long); 14980Sstevel@tonic-gate volatile int w = 0; 14990Sstevel@tonic-gate volatile int ueoff = -1; 15000Sstevel@tonic-gate on_trap_data_t otd; 15010Sstevel@tonic-gate 150211178SDave.Plauger@Sun.COM if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) { 150310843SDave.Plauger@Sun.COM if (ueoff == -1) 15040Sstevel@tonic-gate ueoff = w * sizeof (long); 150511178SDave.Plauger@Sun.COM /* report "bad ECC" or "bad address" */ 15060Sstevel@tonic-gate #ifdef _LP64 150711178SDave.Plauger@Sun.COM if (otd.ot_trap & OT_DATA_EC) 150811178SDave.Plauger@Sun.COM wdst[w++] = 0x00badecc00badecc; 150911178SDave.Plauger@Sun.COM else 151011178SDave.Plauger@Sun.COM wdst[w++] = 0x00badadd00badadd; 15110Sstevel@tonic-gate #else 151211178SDave.Plauger@Sun.COM if (otd.ot_trap & OT_DATA_EC) 151311178SDave.Plauger@Sun.COM wdst[w++] = 0x00badecc; 151411178SDave.Plauger@Sun.COM else 151511178SDave.Plauger@Sun.COM wdst[w++] = 0x00badadd; 15160Sstevel@tonic-gate #endif 15170Sstevel@tonic-gate } 15180Sstevel@tonic-gate while (w < ncopies) { 15190Sstevel@tonic-gate wdst[w] = wsrc[w]; 15200Sstevel@tonic-gate w++; 15210Sstevel@tonic-gate } 15220Sstevel@tonic-gate no_trap(); 152310843SDave.Plauger@Sun.COM return (ueoff); 15240Sstevel@tonic-gate } 15250Sstevel@tonic-gate 152610843SDave.Plauger@Sun.COM static void 152710843SDave.Plauger@Sun.COM dumpsys_close_cq(cqueue_t *cq, int live) 152810843SDave.Plauger@Sun.COM { 152910843SDave.Plauger@Sun.COM if (live) { 153010843SDave.Plauger@Sun.COM mutex_enter(&cq->mutex); 153110843SDave.Plauger@Sun.COM atomic_dec_uint(&cq->open); 153210843SDave.Plauger@Sun.COM cv_signal(&cq->cv); 153310843SDave.Plauger@Sun.COM mutex_exit(&cq->mutex); 153410843SDave.Plauger@Sun.COM } else { 153510843SDave.Plauger@Sun.COM atomic_dec_uint(&cq->open); 153610843SDave.Plauger@Sun.COM } 153710843SDave.Plauger@Sun.COM } 153810843SDave.Plauger@Sun.COM 153910843SDave.Plauger@Sun.COM static inline void 154010843SDave.Plauger@Sun.COM dumpsys_spinlock(lock_t *lp) 154110843SDave.Plauger@Sun.COM { 154210843SDave.Plauger@Sun.COM uint_t backoff = 0; 154310843SDave.Plauger@Sun.COM int loop_count = 0; 154410843SDave.Plauger@Sun.COM 154510843SDave.Plauger@Sun.COM while (LOCK_HELD(lp) || !lock_spin_try(lp)) { 154610843SDave.Plauger@Sun.COM if (++loop_count >= ncpus) { 154710843SDave.Plauger@Sun.COM backoff = mutex_lock_backoff(0); 154810843SDave.Plauger@Sun.COM loop_count = 0; 154910843SDave.Plauger@Sun.COM } else { 155010843SDave.Plauger@Sun.COM backoff = mutex_lock_backoff(backoff); 155110843SDave.Plauger@Sun.COM } 155210843SDave.Plauger@Sun.COM mutex_lock_delay(backoff); 155310843SDave.Plauger@Sun.COM } 155410843SDave.Plauger@Sun.COM } 155510843SDave.Plauger@Sun.COM 155610843SDave.Plauger@Sun.COM static inline void 155710843SDave.Plauger@Sun.COM dumpsys_spinunlock(lock_t *lp) 155810843SDave.Plauger@Sun.COM { 155910843SDave.Plauger@Sun.COM lock_clear(lp); 156010843SDave.Plauger@Sun.COM } 156110843SDave.Plauger@Sun.COM 156210843SDave.Plauger@Sun.COM static inline void 156310843SDave.Plauger@Sun.COM dumpsys_lock(cqueue_t *cq, int live) 156410843SDave.Plauger@Sun.COM { 156510843SDave.Plauger@Sun.COM if (live) 156610843SDave.Plauger@Sun.COM mutex_enter(&cq->mutex); 156710843SDave.Plauger@Sun.COM else 156810843SDave.Plauger@Sun.COM dumpsys_spinlock(&cq->spinlock); 156910843SDave.Plauger@Sun.COM } 157010843SDave.Plauger@Sun.COM 157110843SDave.Plauger@Sun.COM static inline void 157210843SDave.Plauger@Sun.COM dumpsys_unlock(cqueue_t *cq, int live, int signal) 157310843SDave.Plauger@Sun.COM { 157410843SDave.Plauger@Sun.COM if (live) { 157510843SDave.Plauger@Sun.COM if (signal) 157610843SDave.Plauger@Sun.COM cv_signal(&cq->cv); 157710843SDave.Plauger@Sun.COM mutex_exit(&cq->mutex); 157810843SDave.Plauger@Sun.COM } else { 157910843SDave.Plauger@Sun.COM dumpsys_spinunlock(&cq->spinlock); 158010843SDave.Plauger@Sun.COM } 158110843SDave.Plauger@Sun.COM } 158210843SDave.Plauger@Sun.COM 158310843SDave.Plauger@Sun.COM static void 158410843SDave.Plauger@Sun.COM dumpsys_wait_cq(cqueue_t *cq, int live) 158510843SDave.Plauger@Sun.COM { 158610843SDave.Plauger@Sun.COM if (live) { 158710843SDave.Plauger@Sun.COM cv_wait(&cq->cv, &cq->mutex); 158810843SDave.Plauger@Sun.COM } else { 158910843SDave.Plauger@Sun.COM dumpsys_spinunlock(&cq->spinlock); 159010843SDave.Plauger@Sun.COM while (cq->open) 159110843SDave.Plauger@Sun.COM if (cq->first) 159210843SDave.Plauger@Sun.COM break; 159310843SDave.Plauger@Sun.COM dumpsys_spinlock(&cq->spinlock); 159410843SDave.Plauger@Sun.COM } 159510843SDave.Plauger@Sun.COM } 159610843SDave.Plauger@Sun.COM 159710843SDave.Plauger@Sun.COM static void 159810843SDave.Plauger@Sun.COM dumpsys_put_cq(cqueue_t *cq, cbuf_t *cp, int newstate, int live) 159910843SDave.Plauger@Sun.COM { 160010843SDave.Plauger@Sun.COM if (cp == NULL) 160110843SDave.Plauger@Sun.COM return; 160210843SDave.Plauger@Sun.COM 160310843SDave.Plauger@Sun.COM dumpsys_lock(cq, live); 160410843SDave.Plauger@Sun.COM 160510843SDave.Plauger@Sun.COM if (cq->ts != 0) { 160610843SDave.Plauger@Sun.COM cq->empty += gethrtime() - cq->ts; 160710843SDave.Plauger@Sun.COM cq->ts = 0; 160810843SDave.Plauger@Sun.COM } 160910843SDave.Plauger@Sun.COM 161010843SDave.Plauger@Sun.COM cp->state = newstate; 161110843SDave.Plauger@Sun.COM cp->next = NULL; 161210843SDave.Plauger@Sun.COM if (cq->last == NULL) 161310843SDave.Plauger@Sun.COM cq->first = cp; 161410843SDave.Plauger@Sun.COM else 161510843SDave.Plauger@Sun.COM cq->last->next = cp; 161610843SDave.Plauger@Sun.COM cq->last = cp; 161710843SDave.Plauger@Sun.COM 161810843SDave.Plauger@Sun.COM dumpsys_unlock(cq, live, 1); 161910843SDave.Plauger@Sun.COM } 162010843SDave.Plauger@Sun.COM 162110843SDave.Plauger@Sun.COM static cbuf_t * 162210843SDave.Plauger@Sun.COM dumpsys_get_cq(cqueue_t *cq, int live) 162310843SDave.Plauger@Sun.COM { 162410843SDave.Plauger@Sun.COM cbuf_t *cp; 162510843SDave.Plauger@Sun.COM hrtime_t now = gethrtime(); 162610843SDave.Plauger@Sun.COM 162710843SDave.Plauger@Sun.COM dumpsys_lock(cq, live); 162810843SDave.Plauger@Sun.COM 162910843SDave.Plauger@Sun.COM /* CONSTCOND */ 163010843SDave.Plauger@Sun.COM while (1) { 163110843SDave.Plauger@Sun.COM cp = (cbuf_t *)cq->first; 163210843SDave.Plauger@Sun.COM if (cp == NULL) { 163310843SDave.Plauger@Sun.COM if (cq->open == 0) 163410843SDave.Plauger@Sun.COM break; 163510843SDave.Plauger@Sun.COM dumpsys_wait_cq(cq, live); 163610843SDave.Plauger@Sun.COM continue; 163710843SDave.Plauger@Sun.COM } 163810843SDave.Plauger@Sun.COM cq->first = cp->next; 163910843SDave.Plauger@Sun.COM if (cq->first == NULL) { 164010843SDave.Plauger@Sun.COM cq->last = NULL; 164110843SDave.Plauger@Sun.COM cq->ts = now; 164210843SDave.Plauger@Sun.COM } 164310843SDave.Plauger@Sun.COM break; 164410843SDave.Plauger@Sun.COM } 164510843SDave.Plauger@Sun.COM 164610843SDave.Plauger@Sun.COM dumpsys_unlock(cq, live, cq->first != NULL || cq->open == 0); 164710843SDave.Plauger@Sun.COM return (cp); 164810843SDave.Plauger@Sun.COM } 164910843SDave.Plauger@Sun.COM 165010843SDave.Plauger@Sun.COM /* 165110843SDave.Plauger@Sun.COM * Send an error message to the console. If the main task is running 165210843SDave.Plauger@Sun.COM * just write the message via uprintf. If a helper is running the 165310843SDave.Plauger@Sun.COM * message has to be put on a queue for the main task. Setting fmt to 165410843SDave.Plauger@Sun.COM * NULL means flush the error message buffer. If fmt is not NULL, just 165510843SDave.Plauger@Sun.COM * add the text to the existing buffer. 165610843SDave.Plauger@Sun.COM */ 165710843SDave.Plauger@Sun.COM static void 165810843SDave.Plauger@Sun.COM dumpsys_errmsg(helper_t *hp, const char *fmt, ...) 165910843SDave.Plauger@Sun.COM { 166010843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 166110843SDave.Plauger@Sun.COM cbuf_t *cp = hp->cperr; 166210843SDave.Plauger@Sun.COM va_list adx; 166310843SDave.Plauger@Sun.COM 166410843SDave.Plauger@Sun.COM if (hp->helper == MAINHELPER) { 166510843SDave.Plauger@Sun.COM if (fmt != NULL) { 166610843SDave.Plauger@Sun.COM if (ds->neednl) { 166710843SDave.Plauger@Sun.COM uprintf("\n"); 166810843SDave.Plauger@Sun.COM ds->neednl = 0; 166910843SDave.Plauger@Sun.COM } 167010843SDave.Plauger@Sun.COM va_start(adx, fmt); 167110843SDave.Plauger@Sun.COM vuprintf(fmt, adx); 167210843SDave.Plauger@Sun.COM va_end(adx); 167310843SDave.Plauger@Sun.COM } 167410843SDave.Plauger@Sun.COM } else if (fmt == NULL) { 167510843SDave.Plauger@Sun.COM if (cp != NULL) { 167610843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_ERRMSG); 167710843SDave.Plauger@Sun.COM hp->cperr = NULL; 167810843SDave.Plauger@Sun.COM } 167910843SDave.Plauger@Sun.COM } else { 168010843SDave.Plauger@Sun.COM if (hp->cperr == NULL) { 168110843SDave.Plauger@Sun.COM cp = CQ_GET(freebufq); 168210843SDave.Plauger@Sun.COM hp->cperr = cp; 168310843SDave.Plauger@Sun.COM cp->used = 0; 168410843SDave.Plauger@Sun.COM } 168510843SDave.Plauger@Sun.COM va_start(adx, fmt); 168610843SDave.Plauger@Sun.COM cp->used += vsnprintf(cp->buf + cp->used, cp->size - cp->used, 168710843SDave.Plauger@Sun.COM fmt, adx); 168810843SDave.Plauger@Sun.COM va_end(adx); 168910843SDave.Plauger@Sun.COM if ((cp->used + LOG_MSGSIZE) > cp->size) { 169010843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_ERRMSG); 169110843SDave.Plauger@Sun.COM hp->cperr = NULL; 169210843SDave.Plauger@Sun.COM } 169310843SDave.Plauger@Sun.COM } 169410843SDave.Plauger@Sun.COM } 169510843SDave.Plauger@Sun.COM 169610843SDave.Plauger@Sun.COM /* 169710843SDave.Plauger@Sun.COM * Write an output buffer to the dump file. If the main task is 169810843SDave.Plauger@Sun.COM * running just write the data. If a helper is running the output is 169910843SDave.Plauger@Sun.COM * placed on a queue for the main task. 170010843SDave.Plauger@Sun.COM */ 170110843SDave.Plauger@Sun.COM static void 170210843SDave.Plauger@Sun.COM dumpsys_swrite(helper_t *hp, cbuf_t *cp, size_t used) 170310843SDave.Plauger@Sun.COM { 170410843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 170510843SDave.Plauger@Sun.COM 170610843SDave.Plauger@Sun.COM if (hp->helper == MAINHELPER) { 170710843SDave.Plauger@Sun.COM HRSTART(ds->perpage, write); 170810843SDave.Plauger@Sun.COM dumpvp_write(cp->buf, used); 170910843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, write); 171010843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 171110843SDave.Plauger@Sun.COM } else { 171210843SDave.Plauger@Sun.COM cp->used = used; 171310843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_WRITE); 171410843SDave.Plauger@Sun.COM } 171510843SDave.Plauger@Sun.COM } 171610843SDave.Plauger@Sun.COM 171710843SDave.Plauger@Sun.COM /* 171810843SDave.Plauger@Sun.COM * Copy one page within the mapped range. The offset starts at 0 and 171910843SDave.Plauger@Sun.COM * is relative to the first pfn. cp->buf + cp->off is the address of 172010843SDave.Plauger@Sun.COM * the first pfn. If dump_pagecopy returns a UE offset, create an 172110843SDave.Plauger@Sun.COM * error message. Returns the offset to the next pfn in the range 172210843SDave.Plauger@Sun.COM * selected by the bitmap. 172310843SDave.Plauger@Sun.COM */ 172410843SDave.Plauger@Sun.COM static int 172510843SDave.Plauger@Sun.COM dumpsys_copy_page(helper_t *hp, int offset) 172610843SDave.Plauger@Sun.COM { 172710843SDave.Plauger@Sun.COM cbuf_t *cp = hp->cpin; 172810843SDave.Plauger@Sun.COM int ueoff; 172910843SDave.Plauger@Sun.COM 173010843SDave.Plauger@Sun.COM ASSERT(cp->off + offset + PAGESIZE <= cp->size); 173110843SDave.Plauger@Sun.COM ASSERT(BT_TEST(dumpcfg.bitmap, cp->bitnum)); 173210843SDave.Plauger@Sun.COM 173310843SDave.Plauger@Sun.COM ueoff = dump_pagecopy(cp->buf + cp->off + offset, hp->page); 173410843SDave.Plauger@Sun.COM 173510843SDave.Plauger@Sun.COM /* ueoff is the offset in the page to a UE error */ 173610843SDave.Plauger@Sun.COM if (ueoff != -1) { 173710843SDave.Plauger@Sun.COM uint64_t pa = ptob(cp->pfn) + offset + ueoff; 173810843SDave.Plauger@Sun.COM 173911178SDave.Plauger@Sun.COM dumpsys_errmsg(hp, "cpu %d: memory error at PA 0x%08x.%08x\n", 174011178SDave.Plauger@Sun.COM CPU->cpu_id, (uint32_t)(pa >> 32), (uint32_t)pa); 174110843SDave.Plauger@Sun.COM } 174210843SDave.Plauger@Sun.COM 174310843SDave.Plauger@Sun.COM /* 174410843SDave.Plauger@Sun.COM * Advance bitnum and offset to the next input page for the 174510843SDave.Plauger@Sun.COM * next call to this function. 174610843SDave.Plauger@Sun.COM */ 174710843SDave.Plauger@Sun.COM offset += PAGESIZE; 174810843SDave.Plauger@Sun.COM cp->bitnum++; 174910843SDave.Plauger@Sun.COM while (cp->off + offset < cp->size) { 175010843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, cp->bitnum)) 175110843SDave.Plauger@Sun.COM break; 175210843SDave.Plauger@Sun.COM offset += PAGESIZE; 175310843SDave.Plauger@Sun.COM cp->bitnum++; 175410843SDave.Plauger@Sun.COM } 175510843SDave.Plauger@Sun.COM 175610843SDave.Plauger@Sun.COM return (offset); 175710843SDave.Plauger@Sun.COM } 175810843SDave.Plauger@Sun.COM 175910843SDave.Plauger@Sun.COM /* 176010843SDave.Plauger@Sun.COM * Read the helper queue, and copy one mapped page. Return 0 when 176110843SDave.Plauger@Sun.COM * done. Return 1 when a page has been copied into hp->page. 176210843SDave.Plauger@Sun.COM */ 176310843SDave.Plauger@Sun.COM static int 176410843SDave.Plauger@Sun.COM dumpsys_sread(helper_t *hp) 176510843SDave.Plauger@Sun.COM { 176610843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 176710843SDave.Plauger@Sun.COM 176810843SDave.Plauger@Sun.COM /* CONSTCOND */ 176910843SDave.Plauger@Sun.COM while (1) { 177010843SDave.Plauger@Sun.COM 177110843SDave.Plauger@Sun.COM /* Find the next input buffer. */ 177210843SDave.Plauger@Sun.COM if (hp->cpin == NULL) { 177310843SDave.Plauger@Sun.COM HRSTART(hp->perpage, inwait); 177410843SDave.Plauger@Sun.COM 177510843SDave.Plauger@Sun.COM /* CONSTCOND */ 177610843SDave.Plauger@Sun.COM while (1) { 177710843SDave.Plauger@Sun.COM hp->cpin = CQ_GET(helperq); 177810843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 177910843SDave.Plauger@Sun.COM 178010843SDave.Plauger@Sun.COM /* 178110843SDave.Plauger@Sun.COM * NULL return means the helper queue 178210843SDave.Plauger@Sun.COM * is closed and empty. 178310843SDave.Plauger@Sun.COM */ 178410843SDave.Plauger@Sun.COM if (hp->cpin == NULL) 178510843SDave.Plauger@Sun.COM break; 178610843SDave.Plauger@Sun.COM 178710843SDave.Plauger@Sun.COM /* Have input, check for dump I/O error. */ 178810843SDave.Plauger@Sun.COM if (!dump_ioerr) 178910843SDave.Plauger@Sun.COM break; 179010843SDave.Plauger@Sun.COM 179110843SDave.Plauger@Sun.COM /* 179210843SDave.Plauger@Sun.COM * If an I/O error occurs, stay in the 179310843SDave.Plauger@Sun.COM * loop in order to empty the helper 179410843SDave.Plauger@Sun.COM * queue. Return the buffers to the 179510843SDave.Plauger@Sun.COM * main task to unmap and free it. 179610843SDave.Plauger@Sun.COM */ 179710843SDave.Plauger@Sun.COM hp->cpin->used = 0; 179810843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 179910843SDave.Plauger@Sun.COM } 180010843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, inwait); 180110843SDave.Plauger@Sun.COM 180210843SDave.Plauger@Sun.COM /* Stop here when the helper queue is closed. */ 180310843SDave.Plauger@Sun.COM if (hp->cpin == NULL) 180410843SDave.Plauger@Sun.COM break; 180510843SDave.Plauger@Sun.COM 180610843SDave.Plauger@Sun.COM /* Set the offset=0 to get the first pfn. */ 180710843SDave.Plauger@Sun.COM hp->in = 0; 180810843SDave.Plauger@Sun.COM 180910843SDave.Plauger@Sun.COM /* Set the total processed to 0 */ 181010843SDave.Plauger@Sun.COM hp->used = 0; 181110843SDave.Plauger@Sun.COM } 181210843SDave.Plauger@Sun.COM 181310843SDave.Plauger@Sun.COM /* Process the next page. */ 181410843SDave.Plauger@Sun.COM if (hp->used < hp->cpin->used) { 181510843SDave.Plauger@Sun.COM 181610843SDave.Plauger@Sun.COM /* 181710843SDave.Plauger@Sun.COM * Get the next page from the input buffer and 181810843SDave.Plauger@Sun.COM * return a copy. 181910843SDave.Plauger@Sun.COM */ 182010843SDave.Plauger@Sun.COM ASSERT(hp->in != -1); 182110843SDave.Plauger@Sun.COM HRSTART(hp->perpage, copy); 182210843SDave.Plauger@Sun.COM hp->in = dumpsys_copy_page(hp, hp->in); 182310843SDave.Plauger@Sun.COM hp->used += PAGESIZE; 182410843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, copy); 182510843SDave.Plauger@Sun.COM break; 182610843SDave.Plauger@Sun.COM 182710843SDave.Plauger@Sun.COM } else { 182810843SDave.Plauger@Sun.COM 182910843SDave.Plauger@Sun.COM /* 183010843SDave.Plauger@Sun.COM * Done with the input. Flush the VM and 183110843SDave.Plauger@Sun.COM * return the buffer to the main task. 183210843SDave.Plauger@Sun.COM */ 183310843SDave.Plauger@Sun.COM if (panicstr && hp->helper != MAINHELPER) 183410843SDave.Plauger@Sun.COM hat_flush_range(kas.a_hat, 183510843SDave.Plauger@Sun.COM hp->cpin->buf, hp->cpin->size); 183610843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, NULL); 183710843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 183810843SDave.Plauger@Sun.COM hp->cpin = NULL; 183910843SDave.Plauger@Sun.COM } 184010843SDave.Plauger@Sun.COM } 184110843SDave.Plauger@Sun.COM 184210843SDave.Plauger@Sun.COM return (hp->cpin != NULL); 184310843SDave.Plauger@Sun.COM } 184410843SDave.Plauger@Sun.COM 184510843SDave.Plauger@Sun.COM /* 184610843SDave.Plauger@Sun.COM * Compress size bytes starting at buf with bzip2 184710843SDave.Plauger@Sun.COM * mode: 184810843SDave.Plauger@Sun.COM * BZ_RUN add one more compressed page 184910843SDave.Plauger@Sun.COM * BZ_FINISH no more input, flush the state 185010843SDave.Plauger@Sun.COM */ 185110843SDave.Plauger@Sun.COM static void 185210843SDave.Plauger@Sun.COM dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode) 185310843SDave.Plauger@Sun.COM { 185410843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 185510843SDave.Plauger@Sun.COM const int CSIZE = sizeof (dumpcsize_t); 185610843SDave.Plauger@Sun.COM bz_stream *ps = &hp->bzstream; 185710843SDave.Plauger@Sun.COM int rc = 0; 185810843SDave.Plauger@Sun.COM uint32_t csize; 185910843SDave.Plauger@Sun.COM dumpcsize_t cs; 186010843SDave.Plauger@Sun.COM 186110843SDave.Plauger@Sun.COM /* Set input pointers to new input page */ 186210843SDave.Plauger@Sun.COM if (size > 0) { 186310843SDave.Plauger@Sun.COM ps->avail_in = size; 186410843SDave.Plauger@Sun.COM ps->next_in = buf; 186510843SDave.Plauger@Sun.COM } 186610843SDave.Plauger@Sun.COM 186710843SDave.Plauger@Sun.COM /* CONSTCOND */ 186810843SDave.Plauger@Sun.COM while (1) { 186910843SDave.Plauger@Sun.COM 187010843SDave.Plauger@Sun.COM /* Quit when all input has been consumed */ 187110843SDave.Plauger@Sun.COM if (ps->avail_in == 0 && mode == BZ_RUN) 187210843SDave.Plauger@Sun.COM break; 187310843SDave.Plauger@Sun.COM 187410843SDave.Plauger@Sun.COM /* Get a new output buffer */ 187510843SDave.Plauger@Sun.COM if (hp->cpout == NULL) { 187610843SDave.Plauger@Sun.COM HRSTART(hp->perpage, outwait); 187710843SDave.Plauger@Sun.COM hp->cpout = CQ_GET(freebufq); 187810843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, outwait); 187910843SDave.Plauger@Sun.COM ps->avail_out = hp->cpout->size - CSIZE; 188010843SDave.Plauger@Sun.COM ps->next_out = hp->cpout->buf + CSIZE; 188110843SDave.Plauger@Sun.COM } 188210843SDave.Plauger@Sun.COM 188310843SDave.Plauger@Sun.COM /* Compress input, or finalize */ 188410843SDave.Plauger@Sun.COM HRSTART(hp->perpage, compress); 188510843SDave.Plauger@Sun.COM rc = BZ2_bzCompress(ps, mode); 188610843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, compress); 188710843SDave.Plauger@Sun.COM 188810843SDave.Plauger@Sun.COM /* Check for error */ 188910843SDave.Plauger@Sun.COM if (mode == BZ_RUN && rc != BZ_RUN_OK) { 189010843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n", 189110843SDave.Plauger@Sun.COM hp->helper, BZ2_bzErrorString(rc), 189210843SDave.Plauger@Sun.COM hp->cpin->pagenum); 189310843SDave.Plauger@Sun.COM break; 189410843SDave.Plauger@Sun.COM } 189510843SDave.Plauger@Sun.COM 189610843SDave.Plauger@Sun.COM /* Write the buffer if it is full, or we are flushing */ 189710843SDave.Plauger@Sun.COM if (ps->avail_out == 0 || mode == BZ_FINISH) { 189810843SDave.Plauger@Sun.COM csize = hp->cpout->size - CSIZE - ps->avail_out; 189910843SDave.Plauger@Sun.COM cs = DUMP_SET_TAG(csize, hp->tag); 190010843SDave.Plauger@Sun.COM if (csize > 0) { 190110843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf, &cs, CSIZE); 190210843SDave.Plauger@Sun.COM dumpsys_swrite(hp, hp->cpout, csize + CSIZE); 190310843SDave.Plauger@Sun.COM hp->cpout = NULL; 190410843SDave.Plauger@Sun.COM } 190510843SDave.Plauger@Sun.COM } 190610843SDave.Plauger@Sun.COM 190710843SDave.Plauger@Sun.COM /* Check for final complete */ 190810843SDave.Plauger@Sun.COM if (mode == BZ_FINISH) { 190910843SDave.Plauger@Sun.COM if (rc == BZ_STREAM_END) 191010843SDave.Plauger@Sun.COM break; 191110843SDave.Plauger@Sun.COM if (rc != BZ_FINISH_OK) { 191210843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n", 191310843SDave.Plauger@Sun.COM hp->helper, BZ2_bzErrorString(rc)); 191410843SDave.Plauger@Sun.COM break; 191510843SDave.Plauger@Sun.COM } 191610843SDave.Plauger@Sun.COM } 191710843SDave.Plauger@Sun.COM } 191810843SDave.Plauger@Sun.COM 191910843SDave.Plauger@Sun.COM /* Cleanup state and buffers */ 192010843SDave.Plauger@Sun.COM if (mode == BZ_FINISH) { 192110843SDave.Plauger@Sun.COM 192210843SDave.Plauger@Sun.COM /* Reset state so that it is re-usable. */ 192310843SDave.Plauger@Sun.COM (void) BZ2_bzCompressReset(&hp->bzstream); 192410843SDave.Plauger@Sun.COM 192510843SDave.Plauger@Sun.COM /* Give any unused outout buffer to the main task */ 192610843SDave.Plauger@Sun.COM if (hp->cpout != NULL) { 192710843SDave.Plauger@Sun.COM hp->cpout->used = 0; 192810843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG); 192910843SDave.Plauger@Sun.COM hp->cpout = NULL; 193010843SDave.Plauger@Sun.COM } 193110843SDave.Plauger@Sun.COM } 193210843SDave.Plauger@Sun.COM } 193310843SDave.Plauger@Sun.COM 193410843SDave.Plauger@Sun.COM static void 193510843SDave.Plauger@Sun.COM dumpsys_bz2compress(helper_t *hp) 193610843SDave.Plauger@Sun.COM { 193710843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 193810843SDave.Plauger@Sun.COM dumpstreamhdr_t sh; 193910843SDave.Plauger@Sun.COM 194010843SDave.Plauger@Sun.COM (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC); 194110843SDave.Plauger@Sun.COM sh.stream_pagenum = (pgcnt_t)-1; 194210843SDave.Plauger@Sun.COM sh.stream_npages = 0; 194310843SDave.Plauger@Sun.COM hp->cpin = NULL; 194410843SDave.Plauger@Sun.COM hp->cpout = NULL; 194510843SDave.Plauger@Sun.COM hp->cperr = NULL; 194610843SDave.Plauger@Sun.COM hp->in = 0; 194710843SDave.Plauger@Sun.COM hp->out = 0; 194810843SDave.Plauger@Sun.COM hp->bzstream.avail_in = 0; 194910843SDave.Plauger@Sun.COM 195010843SDave.Plauger@Sun.COM /* Bump reference to mainq while we are running */ 195110843SDave.Plauger@Sun.COM CQ_OPEN(mainq); 195210843SDave.Plauger@Sun.COM 195310843SDave.Plauger@Sun.COM /* Get one page at a time */ 195410843SDave.Plauger@Sun.COM while (dumpsys_sread(hp)) { 195510843SDave.Plauger@Sun.COM if (sh.stream_pagenum != hp->cpin->pagenum) { 195610843SDave.Plauger@Sun.COM sh.stream_pagenum = hp->cpin->pagenum; 195710843SDave.Plauger@Sun.COM sh.stream_npages = btop(hp->cpin->used); 195810843SDave.Plauger@Sun.COM dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN); 195910843SDave.Plauger@Sun.COM } 196010843SDave.Plauger@Sun.COM dumpsys_bzrun(hp, hp->page, PAGESIZE, 0); 196110843SDave.Plauger@Sun.COM } 196210843SDave.Plauger@Sun.COM 196310843SDave.Plauger@Sun.COM /* Done with input, flush any partial buffer */ 196410843SDave.Plauger@Sun.COM if (sh.stream_pagenum != (pgcnt_t)-1) { 196510843SDave.Plauger@Sun.COM dumpsys_bzrun(hp, NULL, 0, BZ_FINISH); 196610843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, NULL); 196710843SDave.Plauger@Sun.COM } 196810843SDave.Plauger@Sun.COM 196910843SDave.Plauger@Sun.COM ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL); 197010843SDave.Plauger@Sun.COM 197110843SDave.Plauger@Sun.COM /* Decrement main queue count, we are done */ 197210843SDave.Plauger@Sun.COM CQ_CLOSE(mainq); 197310843SDave.Plauger@Sun.COM } 197410843SDave.Plauger@Sun.COM 197510843SDave.Plauger@Sun.COM /* 197610843SDave.Plauger@Sun.COM * Compress with lzjb 197710843SDave.Plauger@Sun.COM * write stream block if full or size==0 197810843SDave.Plauger@Sun.COM * if csize==0 write stream header, else write <csize, data> 197910843SDave.Plauger@Sun.COM * size==0 is a call to flush a buffer 198010843SDave.Plauger@Sun.COM * hp->cpout is the buffer we are flushing or filling 198110843SDave.Plauger@Sun.COM * hp->out is the next index to fill data 198210843SDave.Plauger@Sun.COM * osize is either csize+data, or the size of a stream header 198310843SDave.Plauger@Sun.COM */ 198410843SDave.Plauger@Sun.COM static void 198510843SDave.Plauger@Sun.COM dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size) 198610843SDave.Plauger@Sun.COM { 198710843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 198810843SDave.Plauger@Sun.COM const int CSIZE = sizeof (dumpcsize_t); 198910843SDave.Plauger@Sun.COM dumpcsize_t cs; 199010843SDave.Plauger@Sun.COM size_t osize = csize > 0 ? CSIZE + size : size; 199110843SDave.Plauger@Sun.COM 199210843SDave.Plauger@Sun.COM /* If flush, and there is no buffer, just return */ 199310843SDave.Plauger@Sun.COM if (size == 0 && hp->cpout == NULL) 199410843SDave.Plauger@Sun.COM return; 199510843SDave.Plauger@Sun.COM 199610843SDave.Plauger@Sun.COM /* If flush, or cpout is full, write it out */ 199710843SDave.Plauger@Sun.COM if (size == 0 || 199810843SDave.Plauger@Sun.COM hp->cpout != NULL && hp->out + osize > hp->cpout->size) { 199910843SDave.Plauger@Sun.COM 200010843SDave.Plauger@Sun.COM /* Set tag+size word at the front of the stream block. */ 200110843SDave.Plauger@Sun.COM cs = DUMP_SET_TAG(hp->out - CSIZE, hp->tag); 200210843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf, &cs, CSIZE); 200310843SDave.Plauger@Sun.COM 200410843SDave.Plauger@Sun.COM /* Write block to dump file. */ 200510843SDave.Plauger@Sun.COM dumpsys_swrite(hp, hp->cpout, hp->out); 200610843SDave.Plauger@Sun.COM 200710843SDave.Plauger@Sun.COM /* Clear pointer to indicate we need a new buffer */ 200810843SDave.Plauger@Sun.COM hp->cpout = NULL; 200910843SDave.Plauger@Sun.COM 201010843SDave.Plauger@Sun.COM /* flushing, we are done */ 201110843SDave.Plauger@Sun.COM if (size == 0) 201210843SDave.Plauger@Sun.COM return; 201310843SDave.Plauger@Sun.COM } 201410843SDave.Plauger@Sun.COM 201510843SDave.Plauger@Sun.COM /* Get an output buffer if we dont have one. */ 201610843SDave.Plauger@Sun.COM if (hp->cpout == NULL) { 201710843SDave.Plauger@Sun.COM HRSTART(hp->perpage, outwait); 201810843SDave.Plauger@Sun.COM hp->cpout = CQ_GET(freebufq); 201910843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, outwait); 202010843SDave.Plauger@Sun.COM hp->out = CSIZE; 202110843SDave.Plauger@Sun.COM } 202210843SDave.Plauger@Sun.COM 202310843SDave.Plauger@Sun.COM /* Store csize word. This is the size of compressed data. */ 202410843SDave.Plauger@Sun.COM if (csize > 0) { 202510843SDave.Plauger@Sun.COM cs = DUMP_SET_TAG(csize, 0); 202610843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf + hp->out, &cs, CSIZE); 202710843SDave.Plauger@Sun.COM hp->out += CSIZE; 202810843SDave.Plauger@Sun.COM } 202910843SDave.Plauger@Sun.COM 203010843SDave.Plauger@Sun.COM /* Store the data. */ 203110843SDave.Plauger@Sun.COM (void) memcpy(hp->cpout->buf + hp->out, buf, size); 203210843SDave.Plauger@Sun.COM hp->out += size; 203310843SDave.Plauger@Sun.COM } 203410843SDave.Plauger@Sun.COM 203510843SDave.Plauger@Sun.COM static void 203610843SDave.Plauger@Sun.COM dumpsys_lzjbcompress(helper_t *hp) 203710843SDave.Plauger@Sun.COM { 203810843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 203910843SDave.Plauger@Sun.COM size_t csize; 204010843SDave.Plauger@Sun.COM dumpstreamhdr_t sh; 204110843SDave.Plauger@Sun.COM 204210843SDave.Plauger@Sun.COM (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC); 204310843SDave.Plauger@Sun.COM sh.stream_pagenum = (pfn_t)-1; 204410843SDave.Plauger@Sun.COM sh.stream_npages = 0; 204510843SDave.Plauger@Sun.COM hp->cpin = NULL; 204610843SDave.Plauger@Sun.COM hp->cpout = NULL; 204710843SDave.Plauger@Sun.COM hp->cperr = NULL; 204810843SDave.Plauger@Sun.COM hp->in = 0; 204910843SDave.Plauger@Sun.COM hp->out = 0; 205010843SDave.Plauger@Sun.COM 205110843SDave.Plauger@Sun.COM /* Bump reference to mainq while we are running */ 205210843SDave.Plauger@Sun.COM CQ_OPEN(mainq); 205310843SDave.Plauger@Sun.COM 205410843SDave.Plauger@Sun.COM /* Get one page at a time */ 205510843SDave.Plauger@Sun.COM while (dumpsys_sread(hp)) { 205610843SDave.Plauger@Sun.COM 205710843SDave.Plauger@Sun.COM /* Create a stream header for each new input map */ 205810843SDave.Plauger@Sun.COM if (sh.stream_pagenum != hp->cpin->pagenum) { 205910843SDave.Plauger@Sun.COM sh.stream_pagenum = hp->cpin->pagenum; 206010843SDave.Plauger@Sun.COM sh.stream_npages = btop(hp->cpin->used); 206110843SDave.Plauger@Sun.COM dumpsys_lzjbrun(hp, 0, &sh, sizeof (sh)); 206210843SDave.Plauger@Sun.COM } 206310843SDave.Plauger@Sun.COM 206410843SDave.Plauger@Sun.COM /* Compress one page */ 206510843SDave.Plauger@Sun.COM HRSTART(hp->perpage, compress); 206610843SDave.Plauger@Sun.COM csize = compress(hp->page, hp->lzbuf, PAGESIZE); 206710843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, compress); 206810843SDave.Plauger@Sun.COM 206910843SDave.Plauger@Sun.COM /* Add csize+data to output block */ 207010843SDave.Plauger@Sun.COM ASSERT(csize > 0 && csize <= PAGESIZE); 207110843SDave.Plauger@Sun.COM dumpsys_lzjbrun(hp, csize, hp->lzbuf, csize); 207210843SDave.Plauger@Sun.COM } 207310843SDave.Plauger@Sun.COM 207410843SDave.Plauger@Sun.COM /* Done with input, flush any partial buffer */ 207510843SDave.Plauger@Sun.COM if (sh.stream_pagenum != (pfn_t)-1) { 207610843SDave.Plauger@Sun.COM dumpsys_lzjbrun(hp, 0, NULL, 0); 207710843SDave.Plauger@Sun.COM dumpsys_errmsg(hp, NULL); 207810843SDave.Plauger@Sun.COM } 207910843SDave.Plauger@Sun.COM 208010843SDave.Plauger@Sun.COM ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL); 208110843SDave.Plauger@Sun.COM 208210843SDave.Plauger@Sun.COM /* Decrement main queue count, we are done */ 208310843SDave.Plauger@Sun.COM CQ_CLOSE(mainq); 208410843SDave.Plauger@Sun.COM } 208510843SDave.Plauger@Sun.COM 208610843SDave.Plauger@Sun.COM /* 208710843SDave.Plauger@Sun.COM * Dump helper called from panic_idle() to compress pages. CPUs in 208810843SDave.Plauger@Sun.COM * this path must not call most kernel services. 208910843SDave.Plauger@Sun.COM * 209010843SDave.Plauger@Sun.COM * During panic, all but one of the CPUs is idle. These CPUs are used 209110843SDave.Plauger@Sun.COM * as helpers working in parallel to copy and compress memory 209210843SDave.Plauger@Sun.COM * pages. During a panic, however, these processors cannot call any 209310843SDave.Plauger@Sun.COM * kernel services. This is because mutexes become no-ops during 209410843SDave.Plauger@Sun.COM * panic, and, cross-call interrupts are inhibited. Therefore, during 209510843SDave.Plauger@Sun.COM * panic dump the helper CPUs communicate with the panic CPU using 209610843SDave.Plauger@Sun.COM * memory variables. All memory mapping and I/O is performed by the 209710843SDave.Plauger@Sun.COM * panic CPU. 209810843SDave.Plauger@Sun.COM */ 209910843SDave.Plauger@Sun.COM void 210010843SDave.Plauger@Sun.COM dumpsys_helper() 210110843SDave.Plauger@Sun.COM { 210210843SDave.Plauger@Sun.COM dumpsys_spinlock(&dumpcfg.helper_lock); 210310843SDave.Plauger@Sun.COM if (dumpcfg.helpers_wanted) { 210410843SDave.Plauger@Sun.COM helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper]; 210510843SDave.Plauger@Sun.COM 210610843SDave.Plauger@Sun.COM for (hp = dumpcfg.helper; hp != hpend; hp++) { 210710843SDave.Plauger@Sun.COM if (hp->helper == FREEHELPER) { 210810843SDave.Plauger@Sun.COM hp->helper = CPU->cpu_id; 210910843SDave.Plauger@Sun.COM BT_SET(dumpcfg.helpermap, CPU->cpu_seqid); 211010843SDave.Plauger@Sun.COM 211110843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 211210843SDave.Plauger@Sun.COM 211310843SDave.Plauger@Sun.COM if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2) 211410843SDave.Plauger@Sun.COM dumpsys_lzjbcompress(hp); 211510843SDave.Plauger@Sun.COM else 211610843SDave.Plauger@Sun.COM dumpsys_bz2compress(hp); 211710843SDave.Plauger@Sun.COM 211810843SDave.Plauger@Sun.COM hp->helper = DONEHELPER; 211910843SDave.Plauger@Sun.COM return; 212010843SDave.Plauger@Sun.COM } 212110843SDave.Plauger@Sun.COM } 212210843SDave.Plauger@Sun.COM } 212310843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 212410843SDave.Plauger@Sun.COM } 212510843SDave.Plauger@Sun.COM 212610843SDave.Plauger@Sun.COM /* 212710843SDave.Plauger@Sun.COM * Dump helper for live dumps. 212810843SDave.Plauger@Sun.COM * These run as a system task. 212910843SDave.Plauger@Sun.COM */ 213010843SDave.Plauger@Sun.COM static void 213110843SDave.Plauger@Sun.COM dumpsys_live_helper(void *arg) 213210843SDave.Plauger@Sun.COM { 213310843SDave.Plauger@Sun.COM helper_t *hp = arg; 213410843SDave.Plauger@Sun.COM 213510843SDave.Plauger@Sun.COM BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid); 213610843SDave.Plauger@Sun.COM if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2) 213710843SDave.Plauger@Sun.COM dumpsys_lzjbcompress(hp); 213810843SDave.Plauger@Sun.COM else 213910843SDave.Plauger@Sun.COM dumpsys_bz2compress(hp); 214010843SDave.Plauger@Sun.COM } 214110843SDave.Plauger@Sun.COM 214210843SDave.Plauger@Sun.COM /* 214310843SDave.Plauger@Sun.COM * Compress one page with lzjb (single threaded case) 214410843SDave.Plauger@Sun.COM */ 214510843SDave.Plauger@Sun.COM static void 214610843SDave.Plauger@Sun.COM dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp) 214710843SDave.Plauger@Sun.COM { 214810843SDave.Plauger@Sun.COM dumpsync_t *ds = hp->ds; 214910843SDave.Plauger@Sun.COM uint32_t csize; 215010843SDave.Plauger@Sun.COM 215110843SDave.Plauger@Sun.COM hp->helper = MAINHELPER; 215210843SDave.Plauger@Sun.COM hp->in = 0; 215310843SDave.Plauger@Sun.COM hp->used = 0; 215410843SDave.Plauger@Sun.COM hp->cpin = cp; 215510843SDave.Plauger@Sun.COM while (hp->used < cp->used) { 215610843SDave.Plauger@Sun.COM HRSTART(hp->perpage, copy); 215710843SDave.Plauger@Sun.COM hp->in = dumpsys_copy_page(hp, hp->in); 215810843SDave.Plauger@Sun.COM hp->used += PAGESIZE; 215910843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, copy); 216010843SDave.Plauger@Sun.COM 216110843SDave.Plauger@Sun.COM HRSTART(hp->perpage, compress); 216210843SDave.Plauger@Sun.COM csize = compress(hp->page, hp->lzbuf, PAGESIZE); 216310843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, compress); 216410843SDave.Plauger@Sun.COM 216510843SDave.Plauger@Sun.COM HRSTART(hp->perpage, write); 216610843SDave.Plauger@Sun.COM dumpvp_write(&csize, sizeof (csize)); 216710843SDave.Plauger@Sun.COM dumpvp_write(hp->lzbuf, csize); 216810843SDave.Plauger@Sun.COM HRSTOP(hp->perpage, write); 216910843SDave.Plauger@Sun.COM } 217010843SDave.Plauger@Sun.COM CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP); 217110843SDave.Plauger@Sun.COM hp->cpin = NULL; 217210843SDave.Plauger@Sun.COM } 217310843SDave.Plauger@Sun.COM 217410843SDave.Plauger@Sun.COM /* 217510843SDave.Plauger@Sun.COM * Main task to dump pages. This is called on the dump CPU. 217610843SDave.Plauger@Sun.COM */ 217710843SDave.Plauger@Sun.COM static void 217810843SDave.Plauger@Sun.COM dumpsys_main_task(void *arg) 217910843SDave.Plauger@Sun.COM { 218010843SDave.Plauger@Sun.COM dumpsync_t *ds = arg; 218110843SDave.Plauger@Sun.COM pgcnt_t pagenum = 0, bitnum = 0, hibitnum; 218210843SDave.Plauger@Sun.COM dumpmlw_t mlw; 218310843SDave.Plauger@Sun.COM cbuf_t *cp; 218410843SDave.Plauger@Sun.COM pgcnt_t baseoff, pfnoff; 218510843SDave.Plauger@Sun.COM pfn_t base, pfn; 218610843SDave.Plauger@Sun.COM int sec; 218710843SDave.Plauger@Sun.COM 218810843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 218910843SDave.Plauger@Sun.COM 219010843SDave.Plauger@Sun.COM /* CONSTCOND */ 219110843SDave.Plauger@Sun.COM while (1) { 219210843SDave.Plauger@Sun.COM 219310843SDave.Plauger@Sun.COM if (ds->percent > ds->percent_done) { 219410843SDave.Plauger@Sun.COM ds->percent_done = ds->percent; 219510843SDave.Plauger@Sun.COM sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000; 219610843SDave.Plauger@Sun.COM uprintf("^\r%2d:%02d %3d%% done", 219710843SDave.Plauger@Sun.COM sec / 60, sec % 60, ds->percent); 219810843SDave.Plauger@Sun.COM ds->neednl = 1; 219910843SDave.Plauger@Sun.COM } 220010843SDave.Plauger@Sun.COM 220110843SDave.Plauger@Sun.COM while (CQ_IS_EMPTY(mainq) && !CQ_IS_EMPTY(writerq)) { 220210843SDave.Plauger@Sun.COM 220310843SDave.Plauger@Sun.COM /* the writerq never blocks */ 220410843SDave.Plauger@Sun.COM cp = CQ_GET(writerq); 220510843SDave.Plauger@Sun.COM if (cp == NULL) 220610843SDave.Plauger@Sun.COM break; 220710843SDave.Plauger@Sun.COM 220810843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 220910843SDave.Plauger@Sun.COM 221010843SDave.Plauger@Sun.COM HRSTART(ds->perpage, write); 221110843SDave.Plauger@Sun.COM dumpvp_write(cp->buf, cp->used); 221210843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, write); 221310843SDave.Plauger@Sun.COM 221410843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 221510843SDave.Plauger@Sun.COM } 221610843SDave.Plauger@Sun.COM 221710843SDave.Plauger@Sun.COM /* 221810843SDave.Plauger@Sun.COM * Wait here for some buffers to process. Returns NULL 221910843SDave.Plauger@Sun.COM * when all helpers have terminated and all buffers 222010843SDave.Plauger@Sun.COM * have been processed. 222110843SDave.Plauger@Sun.COM */ 222210843SDave.Plauger@Sun.COM cp = CQ_GET(mainq); 222310843SDave.Plauger@Sun.COM 222410843SDave.Plauger@Sun.COM if (cp == NULL) { 222510843SDave.Plauger@Sun.COM 222610843SDave.Plauger@Sun.COM /* Drain the write queue. */ 222710843SDave.Plauger@Sun.COM if (!CQ_IS_EMPTY(writerq)) 222810843SDave.Plauger@Sun.COM continue; 222910843SDave.Plauger@Sun.COM 223010843SDave.Plauger@Sun.COM /* Main task exits here. */ 223110843SDave.Plauger@Sun.COM break; 223210843SDave.Plauger@Sun.COM } 223310843SDave.Plauger@Sun.COM 223410843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 223510843SDave.Plauger@Sun.COM 223610843SDave.Plauger@Sun.COM switch (cp->state) { 223710843SDave.Plauger@Sun.COM 223810843SDave.Plauger@Sun.COM case CBUF_FREEMAP: 223910843SDave.Plauger@Sun.COM 224010843SDave.Plauger@Sun.COM /* 224110843SDave.Plauger@Sun.COM * Note that we drop CBUF_FREEMAP buffers on 224210843SDave.Plauger@Sun.COM * the floor (they will not be on any cqueue) 224310843SDave.Plauger@Sun.COM * when we no longer need them. 224410843SDave.Plauger@Sun.COM */ 224510843SDave.Plauger@Sun.COM if (bitnum >= dumpcfg.bitmapsize) 224610843SDave.Plauger@Sun.COM break; 224710843SDave.Plauger@Sun.COM 224810843SDave.Plauger@Sun.COM if (dump_ioerr) { 224910843SDave.Plauger@Sun.COM bitnum = dumpcfg.bitmapsize; 225010843SDave.Plauger@Sun.COM CQ_CLOSE(helperq); 225110843SDave.Plauger@Sun.COM break; 225210843SDave.Plauger@Sun.COM } 225310843SDave.Plauger@Sun.COM 225410843SDave.Plauger@Sun.COM HRSTART(ds->perpage, bitmap); 225510843SDave.Plauger@Sun.COM for (; bitnum < dumpcfg.bitmapsize; bitnum++) 225610843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, bitnum)) 225710843SDave.Plauger@Sun.COM break; 225810843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, bitmap); 225910843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 226010843SDave.Plauger@Sun.COM 226110843SDave.Plauger@Sun.COM if (bitnum >= dumpcfg.bitmapsize) { 226210843SDave.Plauger@Sun.COM CQ_CLOSE(helperq); 226310843SDave.Plauger@Sun.COM break; 226410843SDave.Plauger@Sun.COM } 226510843SDave.Plauger@Sun.COM 226610843SDave.Plauger@Sun.COM /* 226710843SDave.Plauger@Sun.COM * Try to map CBUF_MAPSIZE ranges. Can't 226810843SDave.Plauger@Sun.COM * assume that memory segment size is a 226910843SDave.Plauger@Sun.COM * multiple of CBUF_MAPSIZE. Can't assume that 227010843SDave.Plauger@Sun.COM * the segment starts on a CBUF_MAPSIZE 227110843SDave.Plauger@Sun.COM * boundary. 227210843SDave.Plauger@Sun.COM */ 227310843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 227410843SDave.Plauger@Sun.COM ASSERT(pfn != PFN_INVALID); 227510843SDave.Plauger@Sun.COM ASSERT(bitnum + mlw.mpleft <= dumpcfg.bitmapsize); 227610843SDave.Plauger@Sun.COM 227710843SDave.Plauger@Sun.COM base = P2ALIGN(pfn, CBUF_MAPNP); 227810843SDave.Plauger@Sun.COM if (base < mlw.mpaddr) { 227910843SDave.Plauger@Sun.COM base = mlw.mpaddr; 228010843SDave.Plauger@Sun.COM baseoff = P2PHASE(base, CBUF_MAPNP); 228110843SDave.Plauger@Sun.COM } else { 228210843SDave.Plauger@Sun.COM baseoff = 0; 228310843SDave.Plauger@Sun.COM } 228410843SDave.Plauger@Sun.COM 228510843SDave.Plauger@Sun.COM pfnoff = pfn - base; 228610843SDave.Plauger@Sun.COM if (pfnoff + mlw.mpleft < CBUF_MAPNP) { 228710843SDave.Plauger@Sun.COM hibitnum = bitnum + mlw.mpleft; 228810843SDave.Plauger@Sun.COM cp->size = ptob(pfnoff + mlw.mpleft); 228910843SDave.Plauger@Sun.COM } else { 229010843SDave.Plauger@Sun.COM hibitnum = bitnum - pfnoff + CBUF_MAPNP - 229110843SDave.Plauger@Sun.COM baseoff; 229210843SDave.Plauger@Sun.COM cp->size = CBUF_MAPSIZE - ptob(baseoff); 229310843SDave.Plauger@Sun.COM } 229410843SDave.Plauger@Sun.COM 229510843SDave.Plauger@Sun.COM cp->pfn = pfn; 229610843SDave.Plauger@Sun.COM cp->bitnum = bitnum++; 229710843SDave.Plauger@Sun.COM cp->pagenum = pagenum++; 229810843SDave.Plauger@Sun.COM cp->off = ptob(pfnoff); 229910843SDave.Plauger@Sun.COM 230010843SDave.Plauger@Sun.COM for (; bitnum < hibitnum; bitnum++) 230110843SDave.Plauger@Sun.COM if (BT_TEST(dumpcfg.bitmap, bitnum)) 230210843SDave.Plauger@Sun.COM pagenum++; 230310843SDave.Plauger@Sun.COM 230410843SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 230510843SDave.Plauger@Sun.COM cp->used = ptob(pagenum - cp->pagenum); 230610843SDave.Plauger@Sun.COM 230710843SDave.Plauger@Sun.COM HRSTART(ds->perpage, map); 230810843SDave.Plauger@Sun.COM hat_devload(kas.a_hat, cp->buf, cp->size, base, 230910843SDave.Plauger@Sun.COM PROT_READ, HAT_LOAD_NOCONSIST); 231010843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, map); 231110843SDave.Plauger@Sun.COM 231210843SDave.Plauger@Sun.COM ds->pages_mapped += btop(cp->size); 231310843SDave.Plauger@Sun.COM ds->pages_used += pagenum - cp->pagenum; 231410843SDave.Plauger@Sun.COM 231510843SDave.Plauger@Sun.COM CQ_OPEN(mainq); 231610843SDave.Plauger@Sun.COM 231710843SDave.Plauger@Sun.COM /* 231810843SDave.Plauger@Sun.COM * If there are no helpers the main task does 231910843SDave.Plauger@Sun.COM * non-streams lzjb compress. 232010843SDave.Plauger@Sun.COM */ 232110843SDave.Plauger@Sun.COM if (dumpcfg.clevel == 0) { 232210843SDave.Plauger@Sun.COM dumpsys_lzjb_page(dumpcfg.helper, cp); 232310843SDave.Plauger@Sun.COM break; 232410843SDave.Plauger@Sun.COM } 232510843SDave.Plauger@Sun.COM 232610843SDave.Plauger@Sun.COM /* pass mapped pages to a helper */ 232710843SDave.Plauger@Sun.COM CQ_PUT(helperq, cp, CBUF_INREADY); 232810843SDave.Plauger@Sun.COM 232910843SDave.Plauger@Sun.COM /* the last page was done */ 233010843SDave.Plauger@Sun.COM if (bitnum >= dumpcfg.bitmapsize) 233110843SDave.Plauger@Sun.COM CQ_CLOSE(helperq); 233210843SDave.Plauger@Sun.COM 233310843SDave.Plauger@Sun.COM break; 233410843SDave.Plauger@Sun.COM 233510843SDave.Plauger@Sun.COM case CBUF_USEDMAP: 233610843SDave.Plauger@Sun.COM 233710843SDave.Plauger@Sun.COM ds->npages += btop(cp->used); 233810843SDave.Plauger@Sun.COM 233910843SDave.Plauger@Sun.COM HRSTART(ds->perpage, unmap); 234010843SDave.Plauger@Sun.COM hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD); 234110843SDave.Plauger@Sun.COM HRSTOP(ds->perpage, unmap); 234210843SDave.Plauger@Sun.COM 234310843SDave.Plauger@Sun.COM if (bitnum < dumpcfg.bitmapsize) 234410843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_FREEMAP); 234510843SDave.Plauger@Sun.COM CQ_CLOSE(mainq); 234610843SDave.Plauger@Sun.COM 234710843SDave.Plauger@Sun.COM ASSERT(ds->npages <= dumphdr->dump_npages); 234810843SDave.Plauger@Sun.COM ds->percent = ds->npages * 100LL / dumphdr->dump_npages; 234910843SDave.Plauger@Sun.COM break; 235010843SDave.Plauger@Sun.COM 235110843SDave.Plauger@Sun.COM case CBUF_WRITE: 235210843SDave.Plauger@Sun.COM 235310843SDave.Plauger@Sun.COM CQ_PUT(writerq, cp, CBUF_WRITE); 235410843SDave.Plauger@Sun.COM break; 235510843SDave.Plauger@Sun.COM 235610843SDave.Plauger@Sun.COM case CBUF_ERRMSG: 235710843SDave.Plauger@Sun.COM 235810843SDave.Plauger@Sun.COM if (cp->used > 0) { 235910843SDave.Plauger@Sun.COM cp->buf[cp->size - 2] = '\n'; 236010843SDave.Plauger@Sun.COM cp->buf[cp->size - 1] = '\0'; 236110843SDave.Plauger@Sun.COM if (ds->neednl) { 236210843SDave.Plauger@Sun.COM uprintf("\n%s", cp->buf); 236310843SDave.Plauger@Sun.COM ds->neednl = 0; 236410843SDave.Plauger@Sun.COM } else { 236510843SDave.Plauger@Sun.COM uprintf("%s", cp->buf); 236610843SDave.Plauger@Sun.COM } 236711178SDave.Plauger@Sun.COM /* wait for console output */ 236811178SDave.Plauger@Sun.COM drv_usecwait(200000); 236911178SDave.Plauger@Sun.COM dump_timeleft = dump_timeout; 237010843SDave.Plauger@Sun.COM } 237110843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 237210843SDave.Plauger@Sun.COM break; 237310843SDave.Plauger@Sun.COM 237410843SDave.Plauger@Sun.COM default: 237510843SDave.Plauger@Sun.COM uprintf("dump: unexpected buffer state %d, " 237610843SDave.Plauger@Sun.COM "buffer will be lost\n", cp->state); 237710843SDave.Plauger@Sun.COM break; 237810843SDave.Plauger@Sun.COM 237910843SDave.Plauger@Sun.COM } /* end switch */ 238010843SDave.Plauger@Sun.COM 238110843SDave.Plauger@Sun.COM } /* end while(1) */ 238210843SDave.Plauger@Sun.COM } 238310843SDave.Plauger@Sun.COM 238410843SDave.Plauger@Sun.COM #ifdef COLLECT_METRICS 238510843SDave.Plauger@Sun.COM size_t 238610843SDave.Plauger@Sun.COM dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size) 238710843SDave.Plauger@Sun.COM { 238810843SDave.Plauger@Sun.COM dumpcfg_t *cfg = &dumpcfg; 238910843SDave.Plauger@Sun.COM int myid = CPU->cpu_seqid; 239010843SDave.Plauger@Sun.COM int i, compress_ratio; 239110843SDave.Plauger@Sun.COM int sec, iorate; 239210843SDave.Plauger@Sun.COM helper_t *hp, *hpend = &cfg->helper[cfg->nhelper]; 239310843SDave.Plauger@Sun.COM char *e = buf + size; 239410843SDave.Plauger@Sun.COM char *p = buf; 239510843SDave.Plauger@Sun.COM 239610843SDave.Plauger@Sun.COM sec = ds->elapsed / (1000 * 1000 * 1000ULL); 239710843SDave.Plauger@Sun.COM if (sec < 1) 239810843SDave.Plauger@Sun.COM sec = 1; 239910843SDave.Plauger@Sun.COM 240010843SDave.Plauger@Sun.COM if (ds->iotime < 1) 240110843SDave.Plauger@Sun.COM ds->iotime = 1; 240210843SDave.Plauger@Sun.COM iorate = (ds->nwrite * 100000ULL) / ds->iotime; 240310843SDave.Plauger@Sun.COM 240410843SDave.Plauger@Sun.COM compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1); 240510843SDave.Plauger@Sun.COM 240610843SDave.Plauger@Sun.COM #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0) 240710843SDave.Plauger@Sun.COM 240810843SDave.Plauger@Sun.COM P("Master cpu_seqid,%d\n", CPU->cpu_seqid); 240910843SDave.Plauger@Sun.COM P("Master cpu_id,%d\n", CPU->cpu_id); 241010843SDave.Plauger@Sun.COM P("dump_flags,0x%x\n", dumphdr->dump_flags); 241110843SDave.Plauger@Sun.COM P("dump_ioerr,%d\n", dump_ioerr); 241210843SDave.Plauger@Sun.COM 241310843SDave.Plauger@Sun.COM P("Helpers:\n"); 241410843SDave.Plauger@Sun.COM for (i = 0; i < ncpus; i++) { 241510843SDave.Plauger@Sun.COM if ((i & 15) == 0) 241610843SDave.Plauger@Sun.COM P(",,%03d,", i); 241710843SDave.Plauger@Sun.COM if (i == myid) 241810843SDave.Plauger@Sun.COM P(" M"); 241910843SDave.Plauger@Sun.COM else if (BT_TEST(cfg->helpermap, i)) 242010843SDave.Plauger@Sun.COM P("%4d", cpu_seq[i]->cpu_id); 242110843SDave.Plauger@Sun.COM else 242210843SDave.Plauger@Sun.COM P(" *"); 242310843SDave.Plauger@Sun.COM if ((i & 15) == 15) 242410843SDave.Plauger@Sun.COM P("\n"); 242510843SDave.Plauger@Sun.COM } 242610843SDave.Plauger@Sun.COM 242710843SDave.Plauger@Sun.COM P("ncbuf_used,%d\n", cfg->ncbuf_used); 242810843SDave.Plauger@Sun.COM P("ncmap,%d\n", cfg->ncmap); 242910843SDave.Plauger@Sun.COM 243010843SDave.Plauger@Sun.COM P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m); 243110843SDave.Plauger@Sun.COM P("Found small pages,%ld\n", cfg->foundsm); 243210843SDave.Plauger@Sun.COM 243310843SDave.Plauger@Sun.COM P("Compression level,%d\n", cfg->clevel); 243410843SDave.Plauger@Sun.COM P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel", 243510843SDave.Plauger@Sun.COM cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb"); 243610843SDave.Plauger@Sun.COM P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio % 243710843SDave.Plauger@Sun.COM 100); 243810843SDave.Plauger@Sun.COM P("nhelper_used,%d\n", cfg->nhelper_used); 243910843SDave.Plauger@Sun.COM 244010843SDave.Plauger@Sun.COM P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100); 244110843SDave.Plauger@Sun.COM P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite); 244210843SDave.Plauger@Sun.COM P("..total nsec,%lld\n", (u_longlong_t)ds->iotime); 244310843SDave.Plauger@Sun.COM P("dumpbuf.iosize,%ld\n", dumpbuf.iosize); 244410843SDave.Plauger@Sun.COM P("dumpbuf.size,%ld\n", dumpbuf.size); 244510843SDave.Plauger@Sun.COM 244610843SDave.Plauger@Sun.COM P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec); 244710843SDave.Plauger@Sun.COM P("Dump pages,%llu\n", (u_longlong_t)ds->npages); 244810843SDave.Plauger@Sun.COM P("Dump time,%d\n", sec); 244910843SDave.Plauger@Sun.COM 245010843SDave.Plauger@Sun.COM if (ds->pages_mapped > 0) 245110843SDave.Plauger@Sun.COM P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used) 245210843SDave.Plauger@Sun.COM / ds->pages_mapped)); 245310843SDave.Plauger@Sun.COM 245410843SDave.Plauger@Sun.COM P("\nPer-page metrics:\n"); 245510843SDave.Plauger@Sun.COM if (ds->npages > 0) { 245610843SDave.Plauger@Sun.COM for (hp = cfg->helper; hp != hpend; hp++) { 245710843SDave.Plauger@Sun.COM #define PERPAGE(x) ds->perpage.x += hp->perpage.x; 245810843SDave.Plauger@Sun.COM PERPAGES; 245910843SDave.Plauger@Sun.COM #undef PERPAGE 246010843SDave.Plauger@Sun.COM } 246110843SDave.Plauger@Sun.COM #define PERPAGE(x) \ 246210843SDave.Plauger@Sun.COM P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages)); 246310843SDave.Plauger@Sun.COM PERPAGES; 246410843SDave.Plauger@Sun.COM #undef PERPAGE 246510843SDave.Plauger@Sun.COM P("freebufq.empty,%d\n", (int)(ds->freebufq.empty / 246610843SDave.Plauger@Sun.COM ds->npages)); 246710843SDave.Plauger@Sun.COM P("helperq.empty,%d\n", (int)(ds->helperq.empty / 246810843SDave.Plauger@Sun.COM ds->npages)); 246910843SDave.Plauger@Sun.COM P("writerq.empty,%d\n", (int)(ds->writerq.empty / 247010843SDave.Plauger@Sun.COM ds->npages)); 247110843SDave.Plauger@Sun.COM P("mainq.empty,%d\n", (int)(ds->mainq.empty / ds->npages)); 247210843SDave.Plauger@Sun.COM 247310843SDave.Plauger@Sun.COM P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait / 247410843SDave.Plauger@Sun.COM ds->npages)); 247510843SDave.Plauger@Sun.COM } 247610843SDave.Plauger@Sun.COM #undef P 247710843SDave.Plauger@Sun.COM if (p < e) 247810843SDave.Plauger@Sun.COM bzero(p, e - p); 247910843SDave.Plauger@Sun.COM return (p - buf); 248010843SDave.Plauger@Sun.COM } 248110843SDave.Plauger@Sun.COM #endif /* COLLECT_METRICS */ 248210843SDave.Plauger@Sun.COM 24830Sstevel@tonic-gate /* 24840Sstevel@tonic-gate * Dump the system. 24850Sstevel@tonic-gate */ 24860Sstevel@tonic-gate void 24870Sstevel@tonic-gate dumpsys(void) 24880Sstevel@tonic-gate { 248910843SDave.Plauger@Sun.COM dumpsync_t *ds = &dumpsync; 249010843SDave.Plauger@Sun.COM taskq_t *livetaskq = NULL; 24910Sstevel@tonic-gate pfn_t pfn; 24920Sstevel@tonic-gate pgcnt_t bitnum; 24930Sstevel@tonic-gate proc_t *p; 249410843SDave.Plauger@Sun.COM helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper]; 249510843SDave.Plauger@Sun.COM cbuf_t *cp; 24960Sstevel@tonic-gate pid_t npids, pidx; 24970Sstevel@tonic-gate char *content; 249811178SDave.Plauger@Sun.COM char *buf; 249911178SDave.Plauger@Sun.COM size_t size; 250010843SDave.Plauger@Sun.COM int save_dump_clevel; 250110843SDave.Plauger@Sun.COM dumpmlw_t mlw; 250210843SDave.Plauger@Sun.COM dumpcsize_t datatag; 250310843SDave.Plauger@Sun.COM dumpdatahdr_t datahdr; 25040Sstevel@tonic-gate 25050Sstevel@tonic-gate if (dumpvp == NULL || dumphdr == NULL) { 25060Sstevel@tonic-gate uprintf("skipping system dump - no dump device configured\n"); 250710843SDave.Plauger@Sun.COM if (panicstr) { 250810843SDave.Plauger@Sun.COM dumpcfg.helpers_wanted = 0; 250910843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 251010843SDave.Plauger@Sun.COM } 25110Sstevel@tonic-gate return; 25120Sstevel@tonic-gate } 251310843SDave.Plauger@Sun.COM dumpbuf.cur = dumpbuf.start; 251410843SDave.Plauger@Sun.COM 251510843SDave.Plauger@Sun.COM /* clear the sync variables */ 251610843SDave.Plauger@Sun.COM ASSERT(dumpcfg.nhelper > 0); 251710843SDave.Plauger@Sun.COM bzero(ds, sizeof (*ds)); 251810843SDave.Plauger@Sun.COM ds->dumpcpu = CPU->cpu_id; 25190Sstevel@tonic-gate 25200Sstevel@tonic-gate /* 25210Sstevel@tonic-gate * Calculate the starting block for dump. If we're dumping on a 25220Sstevel@tonic-gate * swap device, start 1/5 of the way in; otherwise, start at the 25230Sstevel@tonic-gate * beginning. And never use the first page -- it may be a disk label. 25240Sstevel@tonic-gate */ 25250Sstevel@tonic-gate if (dumpvp->v_flag & VISSWAP) 25260Sstevel@tonic-gate dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET); 25270Sstevel@tonic-gate else 25280Sstevel@tonic-gate dumphdr->dump_start = DUMP_OFFSET; 25290Sstevel@tonic-gate 253010843SDave.Plauger@Sun.COM dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED; 25310Sstevel@tonic-gate dumphdr->dump_crashtime = gethrestime_sec(); 25320Sstevel@tonic-gate dumphdr->dump_npages = 0; 25330Sstevel@tonic-gate dumphdr->dump_nvtop = 0; 253410843SDave.Plauger@Sun.COM bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize)); 25350Sstevel@tonic-gate dump_timeleft = dump_timeout; 25360Sstevel@tonic-gate 25370Sstevel@tonic-gate if (panicstr) { 25380Sstevel@tonic-gate dumphdr->dump_flags &= ~DF_LIVE; 25395331Samw (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL); 25405331Samw (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL); 25410Sstevel@tonic-gate (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE, 25420Sstevel@tonic-gate panicstr, panicargs); 254310843SDave.Plauger@Sun.COM 25440Sstevel@tonic-gate } 25450Sstevel@tonic-gate 25460Sstevel@tonic-gate if (dump_conflags & DUMP_ALL) 25470Sstevel@tonic-gate content = "all"; 25480Sstevel@tonic-gate else if (dump_conflags & DUMP_CURPROC) 25490Sstevel@tonic-gate content = "kernel + curproc"; 25500Sstevel@tonic-gate else 25510Sstevel@tonic-gate content = "kernel"; 25520Sstevel@tonic-gate uprintf("dumping to %s, offset %lld, content: %s\n", dumppath, 25530Sstevel@tonic-gate dumphdr->dump_start, content); 25540Sstevel@tonic-gate 255510843SDave.Plauger@Sun.COM /* Make sure nodename is current */ 255610843SDave.Plauger@Sun.COM bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN); 255710843SDave.Plauger@Sun.COM 255810843SDave.Plauger@Sun.COM /* 255910843SDave.Plauger@Sun.COM * If this is a live dump, try to open a VCHR vnode for better 256010843SDave.Plauger@Sun.COM * performance. We must take care to flush the buffer cache 256110843SDave.Plauger@Sun.COM * first. 256210843SDave.Plauger@Sun.COM */ 256310843SDave.Plauger@Sun.COM if (!panicstr) { 256410843SDave.Plauger@Sun.COM vnode_t *cdev_vp, *cmn_cdev_vp; 256510843SDave.Plauger@Sun.COM 256610843SDave.Plauger@Sun.COM ASSERT(dumpbuf.cdev_vp == NULL); 256710843SDave.Plauger@Sun.COM cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR); 256810843SDave.Plauger@Sun.COM if (cdev_vp != NULL) { 256910843SDave.Plauger@Sun.COM cmn_cdev_vp = common_specvp(cdev_vp); 257010843SDave.Plauger@Sun.COM if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL) 257110843SDave.Plauger@Sun.COM == 0) { 257210843SDave.Plauger@Sun.COM if (vn_has_cached_data(dumpvp)) 257310843SDave.Plauger@Sun.COM (void) pvn_vplist_dirty(dumpvp, 0, NULL, 257410843SDave.Plauger@Sun.COM B_INVAL | B_TRUNC, kcred); 257510843SDave.Plauger@Sun.COM dumpbuf.cdev_vp = cmn_cdev_vp; 257610843SDave.Plauger@Sun.COM } else { 257710843SDave.Plauger@Sun.COM VN_RELE(cdev_vp); 257810843SDave.Plauger@Sun.COM } 257910843SDave.Plauger@Sun.COM } 258010843SDave.Plauger@Sun.COM } 258110843SDave.Plauger@Sun.COM 25820Sstevel@tonic-gate /* 258311066Srafael.vanoni@sun.com * Store a hires timestamp so we can look it up during debugging. 258411066Srafael.vanoni@sun.com */ 258511066Srafael.vanoni@sun.com lbolt_debug_entry(); 258611066Srafael.vanoni@sun.com 258711066Srafael.vanoni@sun.com /* 25880Sstevel@tonic-gate * Leave room for the message and ereport save areas and terminal dump 25890Sstevel@tonic-gate * header. 25900Sstevel@tonic-gate */ 259110843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET - 259210843SDave.Plauger@Sun.COM DUMP_ERPTSIZE; 25930Sstevel@tonic-gate 25940Sstevel@tonic-gate /* 25950Sstevel@tonic-gate * Write out the symbol table. It's no longer compressed, 25960Sstevel@tonic-gate * so its 'size' and 'csize' are equal. 25970Sstevel@tonic-gate */ 259810843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE; 25990Sstevel@tonic-gate dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize = 26000Sstevel@tonic-gate ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX); 26010Sstevel@tonic-gate 26020Sstevel@tonic-gate /* 26030Sstevel@tonic-gate * Write out the translation map. 26040Sstevel@tonic-gate */ 26050Sstevel@tonic-gate dumphdr->dump_map = dumpvp_flush(); 26060Sstevel@tonic-gate dump_as(&kas); 26073446Smrj dumphdr->dump_nvtop += dump_plat_addr(); 26080Sstevel@tonic-gate 26090Sstevel@tonic-gate /* 26100Sstevel@tonic-gate * call into hat, which may have unmapped pages that also need to 26110Sstevel@tonic-gate * be in the dump 26120Sstevel@tonic-gate */ 26130Sstevel@tonic-gate hat_dump(); 26140Sstevel@tonic-gate 26150Sstevel@tonic-gate if (dump_conflags & DUMP_ALL) { 26160Sstevel@tonic-gate mutex_enter(&pidlock); 26170Sstevel@tonic-gate 26180Sstevel@tonic-gate for (npids = 0, p = practive; p != NULL; p = p->p_next) 261910843SDave.Plauger@Sun.COM dumpcfg.pids[npids++] = p->p_pid; 26200Sstevel@tonic-gate 26210Sstevel@tonic-gate mutex_exit(&pidlock); 26220Sstevel@tonic-gate 26230Sstevel@tonic-gate for (pidx = 0; pidx < npids; pidx++) 262410843SDave.Plauger@Sun.COM (void) dump_process(dumpcfg.pids[pidx]); 262510843SDave.Plauger@Sun.COM 262610843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 26270Sstevel@tonic-gate dump_timeleft = dump_timeout; 262810843SDave.Plauger@Sun.COM BT_SET(dumpcfg.bitmap, bitnum); 26290Sstevel@tonic-gate } 263010843SDave.Plauger@Sun.COM dumphdr->dump_npages = dumpcfg.bitmapsize; 26310Sstevel@tonic-gate dumphdr->dump_flags |= DF_ALL; 26320Sstevel@tonic-gate 26330Sstevel@tonic-gate } else if (dump_conflags & DUMP_CURPROC) { 26340Sstevel@tonic-gate /* 26350Sstevel@tonic-gate * Determine which pid is to be dumped. If we're panicking, we 26360Sstevel@tonic-gate * dump the process associated with panic_thread (if any). If 26370Sstevel@tonic-gate * this is a live dump, we dump the process associated with 26380Sstevel@tonic-gate * curthread. 26390Sstevel@tonic-gate */ 26400Sstevel@tonic-gate npids = 0; 26410Sstevel@tonic-gate if (panicstr) { 26420Sstevel@tonic-gate if (panic_thread != NULL && 26430Sstevel@tonic-gate panic_thread->t_procp != NULL && 26440Sstevel@tonic-gate panic_thread->t_procp != &p0) { 264510843SDave.Plauger@Sun.COM dumpcfg.pids[npids++] = 26460Sstevel@tonic-gate panic_thread->t_procp->p_pid; 26470Sstevel@tonic-gate } 26480Sstevel@tonic-gate } else { 264910843SDave.Plauger@Sun.COM dumpcfg.pids[npids++] = curthread->t_procp->p_pid; 26500Sstevel@tonic-gate } 26510Sstevel@tonic-gate 265210843SDave.Plauger@Sun.COM if (npids && dump_process(dumpcfg.pids[0]) == 0) 26530Sstevel@tonic-gate dumphdr->dump_flags |= DF_CURPROC; 26540Sstevel@tonic-gate else 26550Sstevel@tonic-gate dumphdr->dump_flags |= DF_KERNEL; 26560Sstevel@tonic-gate 26570Sstevel@tonic-gate } else { 26580Sstevel@tonic-gate dumphdr->dump_flags |= DF_KERNEL; 26590Sstevel@tonic-gate } 26600Sstevel@tonic-gate 26610Sstevel@tonic-gate dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1; 26620Sstevel@tonic-gate 26630Sstevel@tonic-gate /* 26640Sstevel@tonic-gate * Write out the pfn table. 26650Sstevel@tonic-gate */ 26660Sstevel@tonic-gate dumphdr->dump_pfn = dumpvp_flush(); 266710843SDave.Plauger@Sun.COM dump_init_memlist_walker(&mlw); 266810843SDave.Plauger@Sun.COM for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 26690Sstevel@tonic-gate dump_timeleft = dump_timeout; 267010843SDave.Plauger@Sun.COM if (!BT_TEST(dumpcfg.bitmap, bitnum)) 26710Sstevel@tonic-gate continue; 267210843SDave.Plauger@Sun.COM pfn = dump_bitnum_to_pfn(bitnum, &mlw); 26730Sstevel@tonic-gate ASSERT(pfn != PFN_INVALID); 26740Sstevel@tonic-gate dumpvp_write(&pfn, sizeof (pfn_t)); 26750Sstevel@tonic-gate } 26763446Smrj dump_plat_pfn(); 26770Sstevel@tonic-gate 26780Sstevel@tonic-gate /* 26790Sstevel@tonic-gate * Write out all the pages. 268010843SDave.Plauger@Sun.COM * Map pages, copy them handling UEs, compress, and write them out. 268110843SDave.Plauger@Sun.COM * Cooperate with any helpers running on CPUs in panic_idle(). 26820Sstevel@tonic-gate */ 26830Sstevel@tonic-gate dumphdr->dump_data = dumpvp_flush(); 268410843SDave.Plauger@Sun.COM 268510843SDave.Plauger@Sun.COM bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU)); 268610843SDave.Plauger@Sun.COM ds->live = dumpcfg.clevel > 0 && 268710843SDave.Plauger@Sun.COM (dumphdr->dump_flags & DF_LIVE) != 0; 268810843SDave.Plauger@Sun.COM 268910843SDave.Plauger@Sun.COM save_dump_clevel = dumpcfg.clevel; 269010843SDave.Plauger@Sun.COM if (panicstr) 269110843SDave.Plauger@Sun.COM dumpsys_get_maxmem(); 269210843SDave.Plauger@Sun.COM else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2) 269310843SDave.Plauger@Sun.COM dumpcfg.clevel = DUMP_CLEVEL_LZJB; 269410843SDave.Plauger@Sun.COM 269510843SDave.Plauger@Sun.COM dumpcfg.nhelper_used = 0; 269610843SDave.Plauger@Sun.COM for (hp = dumpcfg.helper; hp != hpend; hp++) { 269710843SDave.Plauger@Sun.COM if (hp->page == NULL) { 269810843SDave.Plauger@Sun.COM hp->helper = DONEHELPER; 26990Sstevel@tonic-gate continue; 27000Sstevel@tonic-gate } 270110843SDave.Plauger@Sun.COM ++dumpcfg.nhelper_used; 270210843SDave.Plauger@Sun.COM hp->helper = FREEHELPER; 270310843SDave.Plauger@Sun.COM hp->taskqid = NULL; 270410843SDave.Plauger@Sun.COM hp->ds = ds; 270510843SDave.Plauger@Sun.COM bzero(&hp->perpage, sizeof (hp->perpage)); 270610843SDave.Plauger@Sun.COM if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2) 270710843SDave.Plauger@Sun.COM (void) BZ2_bzCompressReset(&hp->bzstream); 270810843SDave.Plauger@Sun.COM } 270910843SDave.Plauger@Sun.COM 271010843SDave.Plauger@Sun.COM CQ_OPEN(freebufq); 271110843SDave.Plauger@Sun.COM CQ_OPEN(helperq); 271210843SDave.Plauger@Sun.COM 271310843SDave.Plauger@Sun.COM dumpcfg.ncbuf_used = 0; 271410843SDave.Plauger@Sun.COM for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) { 271510843SDave.Plauger@Sun.COM if (cp->buf != NULL) { 271610843SDave.Plauger@Sun.COM CQ_PUT(freebufq, cp, CBUF_FREEBUF); 271710843SDave.Plauger@Sun.COM ++dumpcfg.ncbuf_used; 27180Sstevel@tonic-gate } 27190Sstevel@tonic-gate } 272010843SDave.Plauger@Sun.COM 272110843SDave.Plauger@Sun.COM for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++) 272210843SDave.Plauger@Sun.COM CQ_PUT(mainq, cp, CBUF_FREEMAP); 272310843SDave.Plauger@Sun.COM 272410843SDave.Plauger@Sun.COM ds->start = gethrtime(); 272510843SDave.Plauger@Sun.COM ds->iowaitts = ds->start; 272610843SDave.Plauger@Sun.COM 272710843SDave.Plauger@Sun.COM /* start helpers */ 272810843SDave.Plauger@Sun.COM if (ds->live) { 272910843SDave.Plauger@Sun.COM int n = dumpcfg.nhelper_used; 273010843SDave.Plauger@Sun.COM int pri = MINCLSYSPRI - 25; 273110843SDave.Plauger@Sun.COM 273210843SDave.Plauger@Sun.COM livetaskq = taskq_create("LiveDump", n, pri, n, n, 273310843SDave.Plauger@Sun.COM TASKQ_PREPOPULATE); 273410843SDave.Plauger@Sun.COM for (hp = dumpcfg.helper; hp != hpend; hp++) { 273510843SDave.Plauger@Sun.COM if (hp->page == NULL) 273610843SDave.Plauger@Sun.COM continue; 273710843SDave.Plauger@Sun.COM hp->helper = hp - dumpcfg.helper; 273810843SDave.Plauger@Sun.COM hp->taskqid = taskq_dispatch(livetaskq, 273910843SDave.Plauger@Sun.COM dumpsys_live_helper, (void *)hp, TQ_NOSLEEP); 274010843SDave.Plauger@Sun.COM } 274110843SDave.Plauger@Sun.COM 274210843SDave.Plauger@Sun.COM } else { 274311178SDave.Plauger@Sun.COM if (panicstr) 274411178SDave.Plauger@Sun.COM kmem_dump_begin(); 274510843SDave.Plauger@Sun.COM dumpcfg.helpers_wanted = dumpcfg.clevel > 0; 274610843SDave.Plauger@Sun.COM dumpsys_spinunlock(&dumpcfg.helper_lock); 274710843SDave.Plauger@Sun.COM } 274810843SDave.Plauger@Sun.COM 274910843SDave.Plauger@Sun.COM /* run main task */ 275010843SDave.Plauger@Sun.COM dumpsys_main_task(ds); 275110843SDave.Plauger@Sun.COM 275210843SDave.Plauger@Sun.COM ds->elapsed = gethrtime() - ds->start; 275310843SDave.Plauger@Sun.COM if (ds->elapsed < 1) 275410843SDave.Plauger@Sun.COM ds->elapsed = 1; 275510843SDave.Plauger@Sun.COM 275610843SDave.Plauger@Sun.COM if (livetaskq != NULL) 275710843SDave.Plauger@Sun.COM taskq_destroy(livetaskq); 275810843SDave.Plauger@Sun.COM 275910843SDave.Plauger@Sun.COM if (ds->neednl) { 276010843SDave.Plauger@Sun.COM uprintf("\n"); 276110843SDave.Plauger@Sun.COM ds->neednl = 0; 276210843SDave.Plauger@Sun.COM } 276310843SDave.Plauger@Sun.COM 276410843SDave.Plauger@Sun.COM /* record actual pages dumped */ 276510843SDave.Plauger@Sun.COM dumphdr->dump_npages = ds->npages; 276610843SDave.Plauger@Sun.COM 276710843SDave.Plauger@Sun.COM /* platform-specific data */ 276810843SDave.Plauger@Sun.COM dumphdr->dump_npages += dump_plat_data(dumpcfg.cbuf[0].buf); 276910843SDave.Plauger@Sun.COM 277010843SDave.Plauger@Sun.COM /* note any errors by clearing DF_COMPLETE */ 277110843SDave.Plauger@Sun.COM if (dump_ioerr || ds->npages < dumphdr->dump_npages) 277210843SDave.Plauger@Sun.COM dumphdr->dump_flags &= ~DF_COMPLETE; 277310843SDave.Plauger@Sun.COM 277410843SDave.Plauger@Sun.COM /* end of stream blocks */ 277510843SDave.Plauger@Sun.COM datatag = 0; 277610843SDave.Plauger@Sun.COM dumpvp_write(&datatag, sizeof (datatag)); 277710843SDave.Plauger@Sun.COM 277811178SDave.Plauger@Sun.COM bzero(&datahdr, sizeof (datahdr)); 277911178SDave.Plauger@Sun.COM 278011178SDave.Plauger@Sun.COM /* buffer for metrics */ 278111178SDave.Plauger@Sun.COM buf = dumpcfg.cbuf[0].buf; 278211178SDave.Plauger@Sun.COM size = MIN(dumpcfg.cbuf[0].size, DUMP_OFFSET - sizeof (dumphdr_t) - 278311178SDave.Plauger@Sun.COM sizeof (dumpdatahdr_t)); 278411178SDave.Plauger@Sun.COM 278511178SDave.Plauger@Sun.COM /* finish the kmem intercepts, collect kmem verbose info */ 278611178SDave.Plauger@Sun.COM if (panicstr) { 278711178SDave.Plauger@Sun.COM datahdr.dump_metrics = kmem_dump_finish(buf, size); 278811178SDave.Plauger@Sun.COM buf += datahdr.dump_metrics; 278911178SDave.Plauger@Sun.COM size -= datahdr.dump_metrics; 279011178SDave.Plauger@Sun.COM } 279111178SDave.Plauger@Sun.COM 279210843SDave.Plauger@Sun.COM /* compression info in data header */ 279310843SDave.Plauger@Sun.COM datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC; 279410843SDave.Plauger@Sun.COM datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION; 279510843SDave.Plauger@Sun.COM datahdr.dump_maxcsize = CBUF_SIZE; 279610843SDave.Plauger@Sun.COM datahdr.dump_maxrange = CBUF_MAPSIZE / PAGESIZE; 279710843SDave.Plauger@Sun.COM datahdr.dump_nstreams = dumpcfg.nhelper_used; 279810843SDave.Plauger@Sun.COM datahdr.dump_clevel = dumpcfg.clevel; 279910843SDave.Plauger@Sun.COM #ifdef COLLECT_METRICS 280010843SDave.Plauger@Sun.COM if (dump_metrics_on) 280111178SDave.Plauger@Sun.COM datahdr.dump_metrics += dumpsys_metrics(ds, buf, size); 280210843SDave.Plauger@Sun.COM #endif 280310843SDave.Plauger@Sun.COM datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data; 28040Sstevel@tonic-gate 28050Sstevel@tonic-gate /* 28060Sstevel@tonic-gate * Write out the initial and terminal dump headers. 28070Sstevel@tonic-gate */ 280810843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumphdr->dump_start; 28090Sstevel@tonic-gate dumpvp_write(dumphdr, sizeof (dumphdr_t)); 28100Sstevel@tonic-gate (void) dumpvp_flush(); 28110Sstevel@tonic-gate 281210843SDave.Plauger@Sun.COM dumpbuf.vp_limit = dumpvp_size; 281310843SDave.Plauger@Sun.COM dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET; 28140Sstevel@tonic-gate dumpvp_write(dumphdr, sizeof (dumphdr_t)); 281510843SDave.Plauger@Sun.COM dumpvp_write(&datahdr, sizeof (dumpdatahdr_t)); 281610843SDave.Plauger@Sun.COM dumpvp_write(dumpcfg.cbuf[0].buf, datahdr.dump_metrics); 281710843SDave.Plauger@Sun.COM 28180Sstevel@tonic-gate (void) dumpvp_flush(); 28190Sstevel@tonic-gate 282010843SDave.Plauger@Sun.COM uprintf("\r%3d%% done: %llu pages dumped, ", 282110843SDave.Plauger@Sun.COM ds->percent_done, (u_longlong_t)ds->npages); 28220Sstevel@tonic-gate 28230Sstevel@tonic-gate if (dump_ioerr == 0) { 28240Sstevel@tonic-gate uprintf("dump succeeded\n"); 28250Sstevel@tonic-gate } else { 28260Sstevel@tonic-gate uprintf("dump failed: error %d\n", dump_ioerr); 282710843SDave.Plauger@Sun.COM #ifdef DEBUG 282810843SDave.Plauger@Sun.COM if (panicstr) 28290Sstevel@tonic-gate debug_enter("dump failed"); 283010843SDave.Plauger@Sun.COM #endif 28310Sstevel@tonic-gate } 28320Sstevel@tonic-gate 28330Sstevel@tonic-gate /* 28340Sstevel@tonic-gate * Write out all undelivered messages. This has to be the *last* 28350Sstevel@tonic-gate * thing we do because the dump process itself emits messages. 28360Sstevel@tonic-gate */ 28370Sstevel@tonic-gate if (panicstr) { 28380Sstevel@tonic-gate dump_ereports(); 28390Sstevel@tonic-gate dump_messages(); 28400Sstevel@tonic-gate } 28410Sstevel@tonic-gate 28420Sstevel@tonic-gate delay(2 * hz); /* let people see the 'done' message */ 28430Sstevel@tonic-gate dump_timeleft = 0; 28440Sstevel@tonic-gate dump_ioerr = 0; 284510843SDave.Plauger@Sun.COM 284610843SDave.Plauger@Sun.COM /* restore settings after live dump completes */ 284710843SDave.Plauger@Sun.COM if (!panicstr) { 284810843SDave.Plauger@Sun.COM dumpcfg.clevel = save_dump_clevel; 284910843SDave.Plauger@Sun.COM 285010843SDave.Plauger@Sun.COM /* release any VCHR open of the dump device */ 285110843SDave.Plauger@Sun.COM if (dumpbuf.cdev_vp != NULL) { 285210843SDave.Plauger@Sun.COM (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0, 285310843SDave.Plauger@Sun.COM kcred, NULL); 285410843SDave.Plauger@Sun.COM VN_RELE(dumpbuf.cdev_vp); 285510843SDave.Plauger@Sun.COM dumpbuf.cdev_vp = NULL; 285610843SDave.Plauger@Sun.COM } 285710843SDave.Plauger@Sun.COM } 28580Sstevel@tonic-gate } 28590Sstevel@tonic-gate 28600Sstevel@tonic-gate /* 28610Sstevel@tonic-gate * This function is called whenever the memory size, as represented 28620Sstevel@tonic-gate * by the phys_install list, changes. 28630Sstevel@tonic-gate */ 28640Sstevel@tonic-gate void 28650Sstevel@tonic-gate dump_resize() 28660Sstevel@tonic-gate { 28670Sstevel@tonic-gate mutex_enter(&dump_lock); 28680Sstevel@tonic-gate dumphdr_init(); 28690Sstevel@tonic-gate dumpbuf_resize(); 287010843SDave.Plauger@Sun.COM dump_update_clevel(); 28710Sstevel@tonic-gate mutex_exit(&dump_lock); 28720Sstevel@tonic-gate } 28736423Sgw25295 28746423Sgw25295 /* 28756423Sgw25295 * This function allows for dynamic resizing of a dump area. It assumes that 28766423Sgw25295 * the underlying device has update its appropriate size(9P). 28776423Sgw25295 */ 28786423Sgw25295 int 28796423Sgw25295 dumpvp_resize() 28806423Sgw25295 { 28816423Sgw25295 int error; 28826423Sgw25295 vattr_t vattr; 28836423Sgw25295 28846423Sgw25295 mutex_enter(&dump_lock); 28856423Sgw25295 vattr.va_mask = AT_SIZE; 28866423Sgw25295 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) { 28876423Sgw25295 mutex_exit(&dump_lock); 28886423Sgw25295 return (error); 28896423Sgw25295 } 28906423Sgw25295 28916423Sgw25295 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) { 28926423Sgw25295 mutex_exit(&dump_lock); 28936423Sgw25295 return (ENOSPC); 28946423Sgw25295 } 28956423Sgw25295 28966423Sgw25295 dumpvp_size = vattr.va_size & -DUMP_OFFSET; 28976423Sgw25295 mutex_exit(&dump_lock); 28986423Sgw25295 return (0); 28996423Sgw25295 } 2900