10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*1366Spetede * Common Development and Distribution License (the "License"). 6*1366Spetede * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*1366Spetede * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * NAME: raid.c 300Sstevel@tonic-gate * 310Sstevel@tonic-gate * DESCRIPTION: Main RAID driver source file containing open, close and I/O 320Sstevel@tonic-gate * operations. 330Sstevel@tonic-gate * 340Sstevel@tonic-gate * ROUTINES PROVIDED FOR EXTERNAL USE: 350Sstevel@tonic-gate * raid_open() - open the RAID metadevice for access. 360Sstevel@tonic-gate * raid_internal_open() - internal open routine of RAID metdevice. 370Sstevel@tonic-gate * md_raid_strategy() - perform normal I/O operations, 380Sstevel@tonic-gate * such as read and write. 390Sstevel@tonic-gate * raid_close() - close the RAID metadevice. 400Sstevel@tonic-gate * raid_internal_close() - internal close routine of RAID metadevice. 410Sstevel@tonic-gate * raid_snarf() - initialize and clean up MDD records. 420Sstevel@tonic-gate * raid_halt() - reset the RAID metadevice 430Sstevel@tonic-gate * raid_line() - return the line # of this segment 440Sstevel@tonic-gate * raid_dcolumn() - return the data column # of this segment 450Sstevel@tonic-gate * raid_pcolumn() - return the parity column # of this segment 460Sstevel@tonic-gate */ 470Sstevel@tonic-gate 480Sstevel@tonic-gate #include <sys/param.h> 490Sstevel@tonic-gate #include <sys/systm.h> 500Sstevel@tonic-gate #include <sys/conf.h> 510Sstevel@tonic-gate #include <sys/file.h> 520Sstevel@tonic-gate #include <sys/user.h> 530Sstevel@tonic-gate #include <sys/uio.h> 540Sstevel@tonic-gate #include <sys/t_lock.h> 550Sstevel@tonic-gate #include <sys/buf.h> 560Sstevel@tonic-gate #include <sys/dkio.h> 570Sstevel@tonic-gate #include <sys/vtoc.h> 580Sstevel@tonic-gate #include <sys/kmem.h> 590Sstevel@tonic-gate #include <vm/page.h> 600Sstevel@tonic-gate #include <sys/cmn_err.h> 610Sstevel@tonic-gate #include <sys/sysmacros.h> 620Sstevel@tonic-gate #include <sys/types.h> 630Sstevel@tonic-gate #include <sys/mkdev.h> 640Sstevel@tonic-gate #include <sys/stat.h> 650Sstevel@tonic-gate #include <sys/open.h> 660Sstevel@tonic-gate #include <sys/modctl.h> 670Sstevel@tonic-gate #include <sys/ddi.h> 680Sstevel@tonic-gate #include <sys/sunddi.h> 690Sstevel@tonic-gate #include <sys/debug.h> 700Sstevel@tonic-gate #include <sys/lvm/md_raid.h> 710Sstevel@tonic-gate #include <sys/lvm/mdvar.h> 720Sstevel@tonic-gate #include <sys/lvm/md_convert.h> 730Sstevel@tonic-gate 740Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h> 750Sstevel@tonic-gate #include <sys/sysevent/svm.h> 760Sstevel@tonic-gate 770Sstevel@tonic-gate md_ops_t raid_md_ops; 780Sstevel@tonic-gate #ifndef lint 79*1366Spetede char _depends_on[] = "drv/md"; 800Sstevel@tonic-gate md_ops_t *md_interface_ops = &raid_md_ops; 810Sstevel@tonic-gate #endif /* lint */ 820Sstevel@tonic-gate 830Sstevel@tonic-gate extern unit_t md_nunits; 840Sstevel@tonic-gate extern unit_t md_nsets; 850Sstevel@tonic-gate extern md_set_t md_set[]; 860Sstevel@tonic-gate extern int md_status; 870Sstevel@tonic-gate extern major_t md_major; 880Sstevel@tonic-gate extern mdq_anchor_t md_done_daemon; 890Sstevel@tonic-gate extern mdq_anchor_t md_mstr_daemon; 900Sstevel@tonic-gate extern int md_sleep_for_test; 910Sstevel@tonic-gate extern clock_t md_hz; 920Sstevel@tonic-gate 930Sstevel@tonic-gate extern md_event_queue_t *md_event_queue; 940Sstevel@tonic-gate 950Sstevel@tonic-gate 960Sstevel@tonic-gate int pchunks = 16; 970Sstevel@tonic-gate int phigh = 1024; 980Sstevel@tonic-gate int plow = 128; 990Sstevel@tonic-gate int cchunks = 64; 1000Sstevel@tonic-gate int chigh = 1024; 1010Sstevel@tonic-gate int clow = 512; 1020Sstevel@tonic-gate int bchunks = 32; 1030Sstevel@tonic-gate int bhigh = 256; 1040Sstevel@tonic-gate int blow = 128; 1050Sstevel@tonic-gate 1060Sstevel@tonic-gate int raid_total_io = 0; 1070Sstevel@tonic-gate int raid_reads = 0; 1080Sstevel@tonic-gate int raid_writes = 0; 1090Sstevel@tonic-gate int raid_no_bpmaps = 0; 1100Sstevel@tonic-gate int raid_512 = 0; 1110Sstevel@tonic-gate int raid_1024 = 0; 1120Sstevel@tonic-gate int raid_1024_8192 = 0; 1130Sstevel@tonic-gate int raid_8192 = 0; 1140Sstevel@tonic-gate int raid_8192_bigger = 0; 1150Sstevel@tonic-gate int raid_line_lock_wait = 0; 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate int data_buffer_waits = 0; 1180Sstevel@tonic-gate int parity_buffer_waits = 0; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate /* writer line locks */ 1210Sstevel@tonic-gate int raid_writer_locks = 0; /* total writer locks */ 1220Sstevel@tonic-gate int raid_write_waits = 0; /* total writer locks that waited */ 1230Sstevel@tonic-gate int raid_full_line_writes = 0; /* total full line writes */ 1240Sstevel@tonic-gate int raid_write_queue_length = 0; /* wait queue length */ 1250Sstevel@tonic-gate int raid_max_write_q_length = 0; /* maximum queue length */ 1260Sstevel@tonic-gate int raid_write_locks_active = 0; /* writer locks at any time */ 1270Sstevel@tonic-gate int raid_max_write_locks = 0; /* maximum writer locks active */ 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* read line locks */ 1300Sstevel@tonic-gate int raid_reader_locks = 0; /* total reader locks held */ 1310Sstevel@tonic-gate int raid_reader_locks_active = 0; /* reader locks held */ 1320Sstevel@tonic-gate int raid_max_reader_locks = 0; /* maximum reader locks held in run */ 1330Sstevel@tonic-gate int raid_read_overlaps = 0; /* number of times 2 reads hit same line */ 1340Sstevel@tonic-gate int raid_read_waits = 0; /* times a reader waited on writer */ 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /* prewrite stats */ 1370Sstevel@tonic-gate int raid_prewrite_waits = 0; /* number of waits for a pw slot */ 1380Sstevel@tonic-gate int raid_pw = 0; /* number of pw slots in use */ 1390Sstevel@tonic-gate int raid_prewrite_max = 0; /* maximum number of pw slots in use */ 1400Sstevel@tonic-gate int raid_pw_invalidates = 0; 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate static clock_t md_wr_wait = 0; 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate int nv_available = 0; /* presence of nv-ram support in device */ 1450Sstevel@tonic-gate int nv_prewrite = 1; /* mark prewrites with nv_available */ 1460Sstevel@tonic-gate int nv_parity = 1; /* mark parity with nv_available */ 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate kmem_cache_t *raid_parent_cache = NULL; 1490Sstevel@tonic-gate kmem_cache_t *raid_child_cache = NULL; 1500Sstevel@tonic-gate kmem_cache_t *raid_cbuf_cache = NULL; 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate int raid_internal_open(minor_t mnum, int flag, int otyp, 1530Sstevel@tonic-gate int md_oflags); 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate static void freebuffers(md_raidcs_t *cs); 1560Sstevel@tonic-gate static int raid_read(mr_unit_t *un, md_raidcs_t *cs); 1570Sstevel@tonic-gate static void raid_read_io(mr_unit_t *un, md_raidcs_t *cs); 1580Sstevel@tonic-gate static int raid_write(mr_unit_t *un, md_raidcs_t *cs); 1590Sstevel@tonic-gate static void raid_write_io(mr_unit_t *un, md_raidcs_t *cs); 1600Sstevel@tonic-gate static void raid_stage(md_raidcs_t *cs); 1610Sstevel@tonic-gate static void raid_enqueue(md_raidcs_t *cs); 1620Sstevel@tonic-gate static diskaddr_t raid_line(diskaddr_t segment, mr_unit_t *un); 1630Sstevel@tonic-gate uint_t raid_dcolumn(diskaddr_t segment, mr_unit_t *un); 1640Sstevel@tonic-gate static void getpbuffer(md_raidcs_t *cs); 1650Sstevel@tonic-gate static void getdbuffer(md_raidcs_t *cs); 1660Sstevel@tonic-gate static void raid_done(buf_t *bp); 1670Sstevel@tonic-gate static void raid_io_startup(mr_unit_t *un); 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate static rus_state_t 1700Sstevel@tonic-gate raid_col2unit(rcs_state_t state, rus_state_t unitstate) 1710Sstevel@tonic-gate { 1720Sstevel@tonic-gate switch (state) { 1730Sstevel@tonic-gate case RCS_INIT: 1740Sstevel@tonic-gate return (RUS_INIT); 1750Sstevel@tonic-gate case RCS_OKAY: 1760Sstevel@tonic-gate return (RUS_OKAY); 1770Sstevel@tonic-gate case RCS_RESYNC: 1780Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) 1790Sstevel@tonic-gate return (RUS_LAST_ERRED); 1800Sstevel@tonic-gate else 1810Sstevel@tonic-gate return (RUS_ERRED); 1820Sstevel@tonic-gate case RCS_ERRED: 1830Sstevel@tonic-gate return (RUS_ERRED); 1840Sstevel@tonic-gate case RCS_LAST_ERRED: 1850Sstevel@tonic-gate return (RUS_ERRED); 1860Sstevel@tonic-gate default: 1870Sstevel@tonic-gate break; 1880Sstevel@tonic-gate } 1890Sstevel@tonic-gate panic("raid_col2unit"); 1900Sstevel@tonic-gate /*NOTREACHED*/ 1910Sstevel@tonic-gate } 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate void 1940Sstevel@tonic-gate raid_set_state(mr_unit_t *un, int col, rcs_state_t newstate, int force) 1950Sstevel@tonic-gate { 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate rus_state_t unitstate, origstate; 1980Sstevel@tonic-gate rcs_state_t colstate; 1990Sstevel@tonic-gate rcs_state_t orig_colstate; 2000Sstevel@tonic-gate int errcnt = 0, 2010Sstevel@tonic-gate okaycnt = 0, 2020Sstevel@tonic-gate resynccnt = 0; 2030Sstevel@tonic-gate int i; 2040Sstevel@tonic-gate char *devname; 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate ASSERT(un); 2070Sstevel@tonic-gate ASSERT(col < un->un_totalcolumncnt); 2080Sstevel@tonic-gate ASSERT(newstate & 2090Sstevel@tonic-gate (RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 2100Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)); 2110Sstevel@tonic-gate ASSERT((newstate & 2120Sstevel@tonic-gate ~(RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 2130Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)) 2140Sstevel@tonic-gate == 0); 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1); 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate unitstate = un->un_state; 2190Sstevel@tonic-gate origstate = unitstate; 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate if (force) { 2220Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 2230Sstevel@tonic-gate un->un_state = raid_col2unit(newstate, unitstate); 2240Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 2250Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 2260Sstevel@tonic-gate return; 2270Sstevel@tonic-gate } 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate ASSERT(un->un_state & 2300Sstevel@tonic-gate (RUS_INIT | RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | 2310Sstevel@tonic-gate RUS_REGEN)); 2320Sstevel@tonic-gate ASSERT((un->un_state & ~(RUS_INIT | 2330Sstevel@tonic-gate RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | RUS_REGEN)) == 0); 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate if (un->un_column[col].un_devstate == newstate) 2360Sstevel@tonic-gate return; 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate if (newstate == RCS_REGEN) { 2390Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) 2400Sstevel@tonic-gate return; 2410Sstevel@tonic-gate un->un_state = RUS_REGEN; 2420Sstevel@tonic-gate return; 2430Sstevel@tonic-gate } 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate orig_colstate = un->un_column[col].un_devstate; 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate /* 2480Sstevel@tonic-gate * if there is another column in the error state then this 2490Sstevel@tonic-gate * column should go to the last errored state 2500Sstevel@tonic-gate */ 2510Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 2520Sstevel@tonic-gate if (i == col) 2530Sstevel@tonic-gate colstate = newstate; 2540Sstevel@tonic-gate else 2550Sstevel@tonic-gate colstate = un->un_column[i].un_devstate; 2560Sstevel@tonic-gate if (colstate & (RCS_ERRED | RCS_LAST_ERRED | RCS_INIT_ERRED)) 2570Sstevel@tonic-gate errcnt++; 2580Sstevel@tonic-gate if (colstate & RCS_OKAY) 2590Sstevel@tonic-gate okaycnt++; 2600Sstevel@tonic-gate if (colstate & RCS_RESYNC) 2610Sstevel@tonic-gate resynccnt++; 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate ASSERT(resynccnt < 2); 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate if (okaycnt == un->un_totalcolumncnt) 2660Sstevel@tonic-gate unitstate = RUS_OKAY; 2670Sstevel@tonic-gate else if (errcnt > 1) { 2680Sstevel@tonic-gate unitstate = RUS_LAST_ERRED; 2690Sstevel@tonic-gate if (newstate & RCS_ERRED) 2700Sstevel@tonic-gate newstate = RCS_LAST_ERRED; 2710Sstevel@tonic-gate } else if (errcnt == 1) 2720Sstevel@tonic-gate if (!(unitstate & RUS_LAST_ERRED)) 2730Sstevel@tonic-gate unitstate = RUS_ERRED; 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate if (un->un_state == RUS_DOI) 2760Sstevel@tonic-gate unitstate = RUS_DOI; 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 2790Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 2800Sstevel@tonic-gate /* 2810Sstevel@tonic-gate * if there are last errored column being brought back online 2820Sstevel@tonic-gate * by open or snarf, then be sure to clear the RUS_LAST_ERRED 2830Sstevel@tonic-gate * bit to allow writes. If there is a real error then the 2840Sstevel@tonic-gate * column will go back into last erred. 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate if ((raid_state_cnt(un, RCS_LAST_ERRED) == 0) && 2870Sstevel@tonic-gate (raid_state_cnt(un, RCS_ERRED) == 1)) 2880Sstevel@tonic-gate unitstate = RUS_ERRED; 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate un->un_state = unitstate; 2910Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate if ((! (origstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) && 2940Sstevel@tonic-gate (unitstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) { 2950Sstevel@tonic-gate devname = md_devname(MD_UN2SET(un), 2960Sstevel@tonic-gate un->un_column[col].un_dev, NULL, 0); 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s needs maintenance", 2990Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 3000Sstevel@tonic-gate 3010Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) { 3020Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s last erred", 3030Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate } else if (un->un_column[col].un_devflags & 3060Sstevel@tonic-gate MD_RAID_DEV_ISOPEN) { 3070Sstevel@tonic-gate /* 3080Sstevel@tonic-gate * Close the broken device and clear the open flag on 3090Sstevel@tonic-gate * it. We have to check that the device is open, 3100Sstevel@tonic-gate * otherwise the first open on it has resulted in the 3110Sstevel@tonic-gate * error that is being processed and the actual un_dev 3120Sstevel@tonic-gate * will be NODEV64. 3130Sstevel@tonic-gate */ 3140Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, 3150Sstevel@tonic-gate MD_OFLG_NULL); 3160Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 3170Sstevel@tonic-gate } 3180Sstevel@tonic-gate } else if (orig_colstate == RCS_LAST_ERRED && newstate == RCS_ERRED && 3190Sstevel@tonic-gate un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN) { 3200Sstevel@tonic-gate /* 3210Sstevel@tonic-gate * Similar to logic above except no log messages since we 3220Sstevel@tonic-gate * are just transitioning from Last Erred to Erred. 3230Sstevel@tonic-gate */ 3240Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, MD_OFLG_NULL); 3250Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate /* 3290Sstevel@tonic-gate * If a resync has completed, see if there is a Last Erred 3300Sstevel@tonic-gate * component that we can change to the Erred state. 3310Sstevel@tonic-gate */ 3320Sstevel@tonic-gate if ((orig_colstate == RCS_RESYNC) && (newstate == RCS_OKAY)) { 3330Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 3340Sstevel@tonic-gate if (i != col && 3350Sstevel@tonic-gate (un->un_column[i].un_devstate & RCS_LAST_ERRED)) { 3360Sstevel@tonic-gate raid_set_state(un, i, RCS_ERRED, 0); 3370Sstevel@tonic-gate break; 3380Sstevel@tonic-gate } 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate } 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate /* 3440Sstevel@tonic-gate * NAME: erred_check_line 3450Sstevel@tonic-gate * 3460Sstevel@tonic-gate * DESCRIPTION: Return the type of write to perform on an erred column based 3470Sstevel@tonic-gate * upon any resync activity. 3480Sstevel@tonic-gate * 3490Sstevel@tonic-gate * if a column is being resynced and the write is above the 3500Sstevel@tonic-gate * resync point may have to write to the target being resynced. 3510Sstevel@tonic-gate * 3520Sstevel@tonic-gate * Column state may make it impossible to do the write 3530Sstevel@tonic-gate * in which case RCL_EIO or RCL_ENXIO is returned. 3540Sstevel@tonic-gate * 3550Sstevel@tonic-gate * If a column cannot be written directly, RCL_ERRED is 3560Sstevel@tonic-gate * returned and processing should proceed accordingly. 3570Sstevel@tonic-gate * 3580Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number identity of metadevice 3590Sstevel@tonic-gate * md_raidcs_t *cs - child save structure 3600Sstevel@tonic-gate * mr_column_t *dcolumn - pointer to data column structure 3610Sstevel@tonic-gate * mr_column_t *pcolumn - pointer to parity column structure 3620Sstevel@tonic-gate * 3630Sstevel@tonic-gate * RETURNS: RCL_OKAY, RCL_ERRED 3640Sstevel@tonic-gate * 3650Sstevel@tonic-gate * LOCKS: Expects Line Writer Lock and Unit Resource Lock to be held 3660Sstevel@tonic-gate * across call. 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate static int 3700Sstevel@tonic-gate erred_check_line(mr_unit_t *un, md_raidcs_t *cs, mr_column_t *column) 3710Sstevel@tonic-gate { 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate ASSERT(un != NULL); 3740Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate if (column->un_devstate & RCS_OKAY) 3770Sstevel@tonic-gate return (RCL_OKAY); 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 3800Sstevel@tonic-gate return (RCL_ERRED); /* do not read from errored disk */ 3810Sstevel@tonic-gate 3820Sstevel@tonic-gate /* 3830Sstevel@tonic-gate * for the last errored case their are two considerations. 3840Sstevel@tonic-gate * When the last errored column is the only errored column then 3850Sstevel@tonic-gate * do treat it like a maintenance column, not doing I/O from 3860Sstevel@tonic-gate * it. When it there are other failures then just attempt 3870Sstevel@tonic-gate * to use it. 3880Sstevel@tonic-gate */ 3890Sstevel@tonic-gate if (column->un_devstate & RCS_LAST_ERRED) 3900Sstevel@tonic-gate return (RCL_ERRED); 3910Sstevel@tonic-gate 3920Sstevel@tonic-gate ASSERT(column->un_devstate & RCS_RESYNC); 3930Sstevel@tonic-gate 3940Sstevel@tonic-gate /* 3950Sstevel@tonic-gate * When a resync from a hotspare is being done (copy resync) 3960Sstevel@tonic-gate * then always treat it as an OKAY column, since no regen 3970Sstevel@tonic-gate * is required. 3980Sstevel@tonic-gate */ 3990Sstevel@tonic-gate if (column->un_devflags & MD_RAID_COPY_RESYNC) { 4000Sstevel@tonic-gate return (RCL_OKAY); 4010Sstevel@tonic-gate } 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate mutex_enter(&un->un_mx); 4040Sstevel@tonic-gate if (cs->cs_line < un->un_resync_line_index) { 4050Sstevel@tonic-gate mutex_exit(&un->un_mx); 4060Sstevel@tonic-gate return (RCL_OKAY); 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate mutex_exit(&un->un_mx); 4090Sstevel@tonic-gate return (RCL_ERRED); 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate } 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate /* 4140Sstevel@tonic-gate * NAMES: raid_state_cnt 4150Sstevel@tonic-gate * 4160Sstevel@tonic-gate * DESCRIPTION: counts number of column in a specific state 4170Sstevel@tonic-gate * 4180Sstevel@tonic-gate * PARAMETERS: md_raid_t *un 4190Sstevel@tonic-gate * rcs_state state 4200Sstevel@tonic-gate */ 4210Sstevel@tonic-gate int 4220Sstevel@tonic-gate raid_state_cnt(mr_unit_t *un, rcs_state_t state) 4230Sstevel@tonic-gate { 4240Sstevel@tonic-gate int i, retval = 0; 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 4270Sstevel@tonic-gate if (un->un_column[i].un_devstate & state) 4280Sstevel@tonic-gate retval++; 4290Sstevel@tonic-gate return (retval); 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate /* 4330Sstevel@tonic-gate * NAMES: raid_io_overlaps 4340Sstevel@tonic-gate * 4350Sstevel@tonic-gate * DESCRIPTION: checkst for overlap of 2 child save structures 4360Sstevel@tonic-gate * 4370Sstevel@tonic-gate * PARAMETERS: md_raidcs_t cs1 4380Sstevel@tonic-gate * md_raidcs_t cs2 4390Sstevel@tonic-gate * 4400Sstevel@tonic-gate * RETURNS: 0 - no overlap 4410Sstevel@tonic-gate * 1 - overlap 4420Sstevel@tonic-gate */ 4430Sstevel@tonic-gate int 4440Sstevel@tonic-gate raid_io_overlaps(md_raidcs_t *cs1, md_raidcs_t *cs2) 4450Sstevel@tonic-gate { 4460Sstevel@tonic-gate if (cs1->cs_blkno > cs2->cs_lastblk) 4470Sstevel@tonic-gate return (0); 4480Sstevel@tonic-gate if (cs1->cs_lastblk < cs2->cs_blkno) 4490Sstevel@tonic-gate return (0); 4500Sstevel@tonic-gate return (1); 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate /* 4540Sstevel@tonic-gate * NAMES: raid_parent_constructor 4550Sstevel@tonic-gate * DESCRIPTION: parent structure constructor routine 4560Sstevel@tonic-gate * PARAMETERS: 4570Sstevel@tonic-gate */ 4580Sstevel@tonic-gate /*ARGSUSED1*/ 4590Sstevel@tonic-gate static int 4600Sstevel@tonic-gate raid_parent_constructor(void *p, void *d1, int d2) 4610Sstevel@tonic-gate { 4620Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mx, 4630Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 4640Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mapin_mx, 4650Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 4660Sstevel@tonic-gate return (0); 4670Sstevel@tonic-gate } 4680Sstevel@tonic-gate 4690Sstevel@tonic-gate void 4700Sstevel@tonic-gate raid_parent_init(md_raidps_t *ps) 4710Sstevel@tonic-gate { 4720Sstevel@tonic-gate bzero(ps, offsetof(md_raidps_t, ps_mx)); 4730Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_flags = MD_RPS_INUSE; 4740Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_magic = RAID_PSMAGIC; 4750Sstevel@tonic-gate } 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /*ARGSUSED1*/ 4780Sstevel@tonic-gate static void 4790Sstevel@tonic-gate raid_parent_destructor(void *p, void *d) 4800Sstevel@tonic-gate { 4810Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mx); 4820Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mapin_mx); 4830Sstevel@tonic-gate } 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* 4860Sstevel@tonic-gate * NAMES: raid_child_constructor 4870Sstevel@tonic-gate * DESCRIPTION: child structure constructor routine 4880Sstevel@tonic-gate * PARAMETERS: 4890Sstevel@tonic-gate */ 4900Sstevel@tonic-gate /*ARGSUSED1*/ 4910Sstevel@tonic-gate static int 4920Sstevel@tonic-gate raid_child_constructor(void *p, void *d1, int d2) 4930Sstevel@tonic-gate { 4940Sstevel@tonic-gate md_raidcs_t *cs = (md_raidcs_t *)p; 4950Sstevel@tonic-gate mutex_init(&cs->cs_mx, NULL, MUTEX_DEFAULT, NULL); 4960Sstevel@tonic-gate bioinit(&cs->cs_dbuf); 4970Sstevel@tonic-gate bioinit(&cs->cs_pbuf); 4980Sstevel@tonic-gate bioinit(&cs->cs_hbuf); 4990Sstevel@tonic-gate return (0); 5000Sstevel@tonic-gate } 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate void 5030Sstevel@tonic-gate raid_child_init(md_raidcs_t *cs) 5040Sstevel@tonic-gate { 5050Sstevel@tonic-gate bzero(cs, offsetof(md_raidcs_t, cs_mx)); 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate md_bioreset(&cs->cs_dbuf); 5080Sstevel@tonic-gate md_bioreset(&cs->cs_pbuf); 5090Sstevel@tonic-gate md_bioreset(&cs->cs_hbuf); 5100Sstevel@tonic-gate 5110Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_dbuf.b_chain = 5120Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_pbuf.b_chain = 5130Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_hbuf.b_chain = 5140Sstevel@tonic-gate (struct buf *)(cs); 5150Sstevel@tonic-gate 5160Sstevel@tonic-gate cs->cs_magic = RAID_CSMAGIC; 5170Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 5180Sstevel@tonic-gate cs->cs_dpwslot = -1; 5190Sstevel@tonic-gate cs->cs_ppwslot = -1; 5200Sstevel@tonic-gate } 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate /*ARGSUSED1*/ 5230Sstevel@tonic-gate static void 5240Sstevel@tonic-gate raid_child_destructor(void *p, void *d) 5250Sstevel@tonic-gate { 5260Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_dbuf); 5270Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_hbuf); 5280Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_pbuf); 5290Sstevel@tonic-gate mutex_destroy(&((md_raidcs_t *)p)->cs_mx); 5300Sstevel@tonic-gate } 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate /*ARGSUSED1*/ 5330Sstevel@tonic-gate static int 5340Sstevel@tonic-gate raid_cbuf_constructor(void *p, void *d1, int d2) 5350Sstevel@tonic-gate { 5360Sstevel@tonic-gate bioinit(&((md_raidcbuf_t *)p)->cbuf_bp); 5370Sstevel@tonic-gate return (0); 5380Sstevel@tonic-gate } 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate static void 5410Sstevel@tonic-gate raid_cbuf_init(md_raidcbuf_t *cb) 5420Sstevel@tonic-gate { 5430Sstevel@tonic-gate bzero(cb, offsetof(md_raidcbuf_t, cbuf_bp)); 5440Sstevel@tonic-gate md_bioreset(&cb->cbuf_bp); 5450Sstevel@tonic-gate cb->cbuf_magic = RAID_BUFMAGIC; 5460Sstevel@tonic-gate cb->cbuf_pwslot = -1; 5470Sstevel@tonic-gate cb->cbuf_flags = CBUF_WRITE; 5480Sstevel@tonic-gate } 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate /*ARGSUSED1*/ 5510Sstevel@tonic-gate static void 5520Sstevel@tonic-gate raid_cbuf_destructor(void *p, void *d) 5530Sstevel@tonic-gate { 5540Sstevel@tonic-gate biofini(&((md_raidcbuf_t *)p)->cbuf_bp); 5550Sstevel@tonic-gate } 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate /* 5580Sstevel@tonic-gate * NAMES: raid_run_queue 5590Sstevel@tonic-gate * DESCRIPTION: spawn a backend processing daemon for RAID metadevice. 5600Sstevel@tonic-gate * PARAMETERS: 5610Sstevel@tonic-gate */ 5620Sstevel@tonic-gate /*ARGSUSED*/ 5630Sstevel@tonic-gate static void 5640Sstevel@tonic-gate raid_run_queue(void *d) 5650Sstevel@tonic-gate { 5660Sstevel@tonic-gate if (!(md_status & MD_GBL_DAEMONS_LIVE)) 5670Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* 5710Sstevel@tonic-gate * NAME: raid_build_pwslot 5720Sstevel@tonic-gate * DESCRIPTION: builds mr_pw_reserve for the column 5730Sstevel@tonic-gate * PARAMETERS: un is the pointer to the unit structure 5740Sstevel@tonic-gate * colindex is the column to create the structure for 5750Sstevel@tonic-gate */ 5760Sstevel@tonic-gate int 5770Sstevel@tonic-gate raid_build_pw_reservation(mr_unit_t *un, int colindex) 5780Sstevel@tonic-gate { 5790Sstevel@tonic-gate mr_pw_reserve_t *pw; 5800Sstevel@tonic-gate mr_scoreboard_t *sb; 5810Sstevel@tonic-gate int i; 5820Sstevel@tonic-gate 5830Sstevel@tonic-gate pw = (mr_pw_reserve_t *) kmem_zalloc(sizeof (mr_pw_reserve_t) + 5840Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt), KM_SLEEP); 5850Sstevel@tonic-gate pw->pw_magic = RAID_PWMAGIC; 5860Sstevel@tonic-gate pw->pw_column = colindex; 5870Sstevel@tonic-gate pw->pw_free = un->un_pwcnt; 5880Sstevel@tonic-gate sb = &pw->pw_sb[0]; 5890Sstevel@tonic-gate for (i = 0; i < un->un_pwcnt; i++) { 5900Sstevel@tonic-gate sb[i].sb_column = colindex; 5910Sstevel@tonic-gate sb[i].sb_flags = SB_UNUSED; 5920Sstevel@tonic-gate sb[i].sb_start_blk = 0; 5930Sstevel@tonic-gate sb[i].sb_last_blk = 0; 5940Sstevel@tonic-gate sb[i].sb_cs = NULL; 5950Sstevel@tonic-gate } 5960Sstevel@tonic-gate un->un_column_ic[colindex].un_pw_reserve = pw; 5970Sstevel@tonic-gate return (0); 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate /* 6000Sstevel@tonic-gate * NAME: raid_free_pw_reservation 6010Sstevel@tonic-gate * DESCRIPTION: RAID metadevice pre-write slot structure destroy routine 6020Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 6030Sstevel@tonic-gate * int colindex - index of the column whose pre-write slot struct 6040Sstevel@tonic-gate * is to be destroyed. 6050Sstevel@tonic-gate */ 6060Sstevel@tonic-gate void 6070Sstevel@tonic-gate raid_free_pw_reservation(mr_unit_t *un, int colindex) 6080Sstevel@tonic-gate { 6090Sstevel@tonic-gate mr_pw_reserve_t *pw = un->un_column_ic[colindex].un_pw_reserve; 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate kmem_free(pw, sizeof (mr_pw_reserve_t) + 6120Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt)); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate /* 6160Sstevel@tonic-gate * NAME: raid_cancel_pwslot 6170Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 6180Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 6190Sstevel@tonic-gate */ 6200Sstevel@tonic-gate static void 6210Sstevel@tonic-gate raid_cancel_pwslot(md_raidcs_t *cs) 6220Sstevel@tonic-gate { 6230Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 6240Sstevel@tonic-gate mr_pw_reserve_t *pw; 6250Sstevel@tonic-gate mr_scoreboard_t *sb; 6260Sstevel@tonic-gate mr_column_ic_t *col; 6270Sstevel@tonic-gate md_raidcbuf_t *cbuf; 6280Sstevel@tonic-gate int broadcast = 0; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate if (cs->cs_ps->ps_flags & MD_RPS_READ) 6310Sstevel@tonic-gate return; 6320Sstevel@tonic-gate if (cs->cs_dpwslot != -1) { 6330Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_dcolumn]; 6340Sstevel@tonic-gate pw = col->un_pw_reserve; 6350Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_dpwslot]; 6360Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6370Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6380Sstevel@tonic-gate broadcast++; 6390Sstevel@tonic-gate sb->sb_cs = NULL; 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate if (cs->cs_ppwslot != -1) { 6430Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_pcolumn]; 6440Sstevel@tonic-gate pw = col->un_pw_reserve; 6450Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_ppwslot]; 6460Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6470Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6480Sstevel@tonic-gate broadcast++; 6490Sstevel@tonic-gate sb->sb_cs = NULL; 6500Sstevel@tonic-gate } 6510Sstevel@tonic-gate 6520Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 6530Sstevel@tonic-gate if (cbuf->cbuf_pwslot == -1) 6540Sstevel@tonic-gate continue; 6550Sstevel@tonic-gate col = &un->un_column_ic[cbuf->cbuf_column]; 6560Sstevel@tonic-gate pw = col->un_pw_reserve; 6570Sstevel@tonic-gate sb = &pw->pw_sb[cbuf->cbuf_pwslot]; 6580Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6590Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6600Sstevel@tonic-gate broadcast++; 6610Sstevel@tonic-gate sb->sb_cs = NULL; 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate if (broadcast) { 6640Sstevel@tonic-gate cv_broadcast(&un->un_cv); 6650Sstevel@tonic-gate return; 6660Sstevel@tonic-gate } 6670Sstevel@tonic-gate mutex_enter(&un->un_mx); 6680Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 6690Sstevel@tonic-gate cv_broadcast(&un->un_cv); 6700Sstevel@tonic-gate mutex_exit(&un->un_mx); 6710Sstevel@tonic-gate } 6720Sstevel@tonic-gate 6730Sstevel@tonic-gate static void 6740Sstevel@tonic-gate raid_free_pwinvalidate(md_raidcs_t *cs) 6750Sstevel@tonic-gate { 6760Sstevel@tonic-gate md_raidcbuf_t *cbuf; 6770Sstevel@tonic-gate md_raidcbuf_t *cbuf_to_free; 6780Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 6790Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 6800Sstevel@tonic-gate mr_pw_reserve_t *pw; 6810Sstevel@tonic-gate mr_scoreboard_t *sb; 6820Sstevel@tonic-gate int broadcast = 0; 6830Sstevel@tonic-gate 6840Sstevel@tonic-gate cbuf = cs->cs_pw_inval_list; 6850Sstevel@tonic-gate ASSERT(cbuf); 6860Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 6870Sstevel@tonic-gate while (cbuf) { 6880Sstevel@tonic-gate pw = un->un_column_ic[cbuf->cbuf_column].un_pw_reserve; 6890Sstevel@tonic-gate sb = &pw->pw_sb[0]; 6900Sstevel@tonic-gate ASSERT(sb[cbuf->cbuf_pwslot].sb_flags & SB_INVAL_PEND); 6910Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_flags = SB_UNUSED; 6920Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_cs = NULL; 6930Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6940Sstevel@tonic-gate broadcast++; 6950Sstevel@tonic-gate cbuf_to_free = cbuf; 6960Sstevel@tonic-gate cbuf = cbuf->cbuf_next; 6970Sstevel@tonic-gate kmem_free(cbuf_to_free->cbuf_buffer, dbtob(un->un_iosize)); 6980Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf_to_free); 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate cs->cs_pw_inval_list = (md_raidcbuf_t *)NULL; 7010Sstevel@tonic-gate /* 7020Sstevel@tonic-gate * now that there is a free prewrite slot, check to see if there 7030Sstevel@tonic-gate * are any io operations waiting first wake up the raid_io_startup 7040Sstevel@tonic-gate * then signal the the processes waiting in raid_write. 7050Sstevel@tonic-gate */ 7060Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front) 7070Sstevel@tonic-gate raid_io_startup(un); 7080Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 7090Sstevel@tonic-gate if (broadcast) { 7100Sstevel@tonic-gate cv_broadcast(&un->un_cv); 7110Sstevel@tonic-gate return; 7120Sstevel@tonic-gate } 7130Sstevel@tonic-gate mutex_enter(&un->un_mx); 7140Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 7150Sstevel@tonic-gate cv_broadcast(&un->un_cv); 7160Sstevel@tonic-gate mutex_exit(&un->un_mx); 7170Sstevel@tonic-gate } 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate 7200Sstevel@tonic-gate static int 7210Sstevel@tonic-gate raid_get_pwslot(md_raidcs_t *cs, int column) 7220Sstevel@tonic-gate { 7230Sstevel@tonic-gate mr_scoreboard_t *sb; 7240Sstevel@tonic-gate mr_pw_reserve_t *pw; 7250Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 7260Sstevel@tonic-gate diskaddr_t start_blk = cs->cs_blkno; 7270Sstevel@tonic-gate diskaddr_t last_blk = cs->cs_lastblk; 7280Sstevel@tonic-gate int i; 7290Sstevel@tonic-gate int pwcnt = un->un_pwcnt; 7300Sstevel@tonic-gate int avail = -1; 7310Sstevel@tonic-gate int use = -1; 7320Sstevel@tonic-gate int flags; 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate 7350Sstevel@tonic-gate /* start with the data column */ 7360Sstevel@tonic-gate pw = cs->cs_un->un_column_ic[column].un_pw_reserve; 7370Sstevel@tonic-gate sb = &pw->pw_sb[0]; 7380Sstevel@tonic-gate ASSERT(pw->pw_free > 0); 7390Sstevel@tonic-gate for (i = 0; i < pwcnt; i++) { 7400Sstevel@tonic-gate flags = sb[i].sb_flags; 7410Sstevel@tonic-gate if (flags & SB_INVAL_PEND) 7420Sstevel@tonic-gate continue; 7430Sstevel@tonic-gate 7440Sstevel@tonic-gate if ((avail == -1) && (flags & (SB_AVAIL | SB_UNUSED))) 7450Sstevel@tonic-gate avail = i; 7460Sstevel@tonic-gate 7470Sstevel@tonic-gate if ((start_blk > sb[i].sb_last_blk) || 7480Sstevel@tonic-gate (last_blk < sb[i].sb_start_blk)) 7490Sstevel@tonic-gate continue; 7500Sstevel@tonic-gate 7510Sstevel@tonic-gate /* OVERLAP */ 7520Sstevel@tonic-gate ASSERT(! (sb[i].sb_flags & SB_INUSE)); 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * raid_invalidate_pwslot attempts to zero out prewrite entry 7560Sstevel@tonic-gate * in parallel with other disk reads/writes related to current 7570Sstevel@tonic-gate * transaction. however cs_frags accounting for this case is 7580Sstevel@tonic-gate * broken because raid_write_io resets cs_frags i.e. ignoring 7590Sstevel@tonic-gate * that it could have been been set to > 0 value by 7600Sstevel@tonic-gate * raid_invalidate_pwslot. While this can be fixed an 7610Sstevel@tonic-gate * additional problem is that we don't seem to handle 7620Sstevel@tonic-gate * correctly the case of getting a disk error for prewrite 7630Sstevel@tonic-gate * entry invalidation. 7640Sstevel@tonic-gate * It does not look like we really need 7650Sstevel@tonic-gate * to invalidate prewrite slots because raid_replay sorts 7660Sstevel@tonic-gate * prewrite id's in ascending order and during recovery the 7670Sstevel@tonic-gate * latest prewrite entry for the same block will be replay 7680Sstevel@tonic-gate * last. That's why i ifdef'd out the call to 7690Sstevel@tonic-gate * raid_invalidate_pwslot. --aguzovsk@east 7700Sstevel@tonic-gate */ 7710Sstevel@tonic-gate 7720Sstevel@tonic-gate if (use == -1) { 7730Sstevel@tonic-gate use = i; 7740Sstevel@tonic-gate } 7750Sstevel@tonic-gate } 7760Sstevel@tonic-gate 7770Sstevel@tonic-gate ASSERT(avail != -1); 7780Sstevel@tonic-gate pw->pw_free--; 7790Sstevel@tonic-gate if (use == -1) 7800Sstevel@tonic-gate use = avail; 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate ASSERT(! (sb[use].sb_flags & SB_INUSE)); 7830Sstevel@tonic-gate sb[use].sb_flags = SB_INUSE; 7840Sstevel@tonic-gate sb[use].sb_cs = cs; 7850Sstevel@tonic-gate sb[use].sb_start_blk = start_blk; 7860Sstevel@tonic-gate sb[use].sb_last_blk = last_blk; 7870Sstevel@tonic-gate ASSERT((use >= 0) && (use < un->un_pwcnt)); 7880Sstevel@tonic-gate return (use); 7890Sstevel@tonic-gate } 7900Sstevel@tonic-gate 7910Sstevel@tonic-gate static int 7920Sstevel@tonic-gate raid_check_pw(md_raidcs_t *cs) 7930Sstevel@tonic-gate { 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 7960Sstevel@tonic-gate int i; 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 7990Sstevel@tonic-gate /* 8000Sstevel@tonic-gate * check to be sure there is a prewrite slot available 8010Sstevel@tonic-gate * if not just return. 8020Sstevel@tonic-gate */ 8030Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 8040Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 8050Sstevel@tonic-gate if (un->un_column_ic[i].un_pw_reserve->pw_free <= 0) 8060Sstevel@tonic-gate return (1); 8070Sstevel@tonic-gate return (0); 8080Sstevel@tonic-gate } 8090Sstevel@tonic-gate 8100Sstevel@tonic-gate if (un->un_column_ic[cs->cs_dcolumn].un_pw_reserve->pw_free <= 0) 8110Sstevel@tonic-gate return (1); 8120Sstevel@tonic-gate if (un->un_column_ic[cs->cs_pcolumn].un_pw_reserve->pw_free <= 0) 8130Sstevel@tonic-gate return (1); 8140Sstevel@tonic-gate return (0); 8150Sstevel@tonic-gate } 8160Sstevel@tonic-gate static int 8170Sstevel@tonic-gate raid_alloc_pwslot(md_raidcs_t *cs) 8180Sstevel@tonic-gate { 8190Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 8200Sstevel@tonic-gate md_raidcbuf_t *cbuf; 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 8230Sstevel@tonic-gate if (raid_check_pw(cs)) 8240Sstevel@tonic-gate return (1); 8250Sstevel@tonic-gate 8260Sstevel@tonic-gate mutex_enter(&un->un_mx); 8270Sstevel@tonic-gate un->un_pwid++; 8280Sstevel@tonic-gate cs->cs_pwid = un->un_pwid; 8290Sstevel@tonic-gate mutex_exit(&un->un_mx); 8300Sstevel@tonic-gate 8310Sstevel@tonic-gate cs->cs_dpwslot = raid_get_pwslot(cs, cs->cs_dcolumn); 8320Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 8330Sstevel@tonic-gate cbuf->cbuf_pwslot = raid_get_pwslot(cs, cbuf->cbuf_column); 8340Sstevel@tonic-gate } 8350Sstevel@tonic-gate cs->cs_ppwslot = raid_get_pwslot(cs, cs->cs_pcolumn); 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate cs->cs_flags |= MD_RCS_HAVE_PW_SLOTS; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate return (0); 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate /* 8430Sstevel@tonic-gate * NAMES: raid_build_incore 8440Sstevel@tonic-gate * DESCRIPTION: RAID metadevice incore structure building routine 8450Sstevel@tonic-gate * PARAMETERS: void *p - pointer to a unit structure 8460Sstevel@tonic-gate * int snarfing - a flag to indicate snarfing is required 8470Sstevel@tonic-gate */ 8480Sstevel@tonic-gate int 8490Sstevel@tonic-gate raid_build_incore(void *p, int snarfing) 8500Sstevel@tonic-gate { 8510Sstevel@tonic-gate mr_unit_t *un = (mr_unit_t *)p; 8520Sstevel@tonic-gate minor_t mnum = MD_SID(un); 8530Sstevel@tonic-gate mddb_recid_t hs_recid = 0; 8540Sstevel@tonic-gate int i; 8550Sstevel@tonic-gate int preserve_flags; 8560Sstevel@tonic-gate mr_column_t *column; 8570Sstevel@tonic-gate int iosize; 8580Sstevel@tonic-gate md_dev64_t hs, dev; 8590Sstevel@tonic-gate int resync_cnt = 0, 8600Sstevel@tonic-gate error_cnt = 0; 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate hs = NODEV64; 8630Sstevel@tonic-gate dev = NODEV64; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate /* clear out bogus pointer incase we return(1) prior to alloc */ 8660Sstevel@tonic-gate un->mr_ic = NULL; 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate if (MD_STATUS(un) & MD_UN_BEING_RESET) { 8690Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 8700Sstevel@tonic-gate return (1); 8710Sstevel@tonic-gate } 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate if (MD_UNIT(mnum) != NULL) 8740Sstevel@tonic-gate return (0); 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate if (snarfing) 8770Sstevel@tonic-gate MD_STATUS(un) = 0; 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate un->mr_ic = (mr_unit_ic_t *)kmem_zalloc(sizeof (*un->mr_ic), 8800Sstevel@tonic-gate KM_SLEEP); 8810Sstevel@tonic-gate 8820Sstevel@tonic-gate un->un_column_ic = (mr_column_ic_t *) 8830Sstevel@tonic-gate kmem_zalloc(sizeof (mr_column_ic_t) * 8840Sstevel@tonic-gate un->un_totalcolumncnt, KM_SLEEP); 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 8870Sstevel@tonic-gate 8880Sstevel@tonic-gate column = &un->un_column[i]; 8890Sstevel@tonic-gate preserve_flags = column->un_devflags & 8900Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); 8910Sstevel@tonic-gate column->un_devflags &= 8920Sstevel@tonic-gate ~(MD_RAID_ALT_ISOPEN | MD_RAID_DEV_ISOPEN | 8930Sstevel@tonic-gate MD_RAID_WRITE_ALT); 8940Sstevel@tonic-gate if (raid_build_pw_reservation(un, i) != 0) { 8950Sstevel@tonic-gate /* could not build pwslot */ 8960Sstevel@tonic-gate return (1); 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate 8990Sstevel@tonic-gate if (snarfing) { 9000Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum); 9010Sstevel@tonic-gate dev = md_getdevnum(setno, mddb_getsidenum(setno), 9020Sstevel@tonic-gate column->un_orig_key, MD_NOTRUST_DEVT); 9030Sstevel@tonic-gate /* 9040Sstevel@tonic-gate * Comment out instead of remove so we have history 9050Sstevel@tonic-gate * In the pre-SVM releases stored devt is used so 9060Sstevel@tonic-gate * as long as there is one snarf is always happy 9070Sstevel@tonic-gate * even the component is powered off. This is not 9080Sstevel@tonic-gate * the case in current SVM implementation. NODEV64 9090Sstevel@tonic-gate * can be returned and in this case since we resolve 9100Sstevel@tonic-gate * the devt at 'open' time (first use of metadevice) 9110Sstevel@tonic-gate * we will allow snarf continue. 9120Sstevel@tonic-gate * 9130Sstevel@tonic-gate * if (dev == NODEV64) 9140Sstevel@tonic-gate * return (1); 9150Sstevel@tonic-gate */ 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate /* 9180Sstevel@tonic-gate * Setup un_orig_dev from device id info if the device 9190Sstevel@tonic-gate * is valid (not NODEV64). 9200Sstevel@tonic-gate */ 9210Sstevel@tonic-gate if (dev != NODEV64) 9220Sstevel@tonic-gate column->un_orig_dev = dev; 9230Sstevel@tonic-gate 9240Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 9250Sstevel@tonic-gate resync_cnt++; 9260Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 9270Sstevel@tonic-gate error_cnt++; 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate if (HOTSPARED(un, i)) { 9300Sstevel@tonic-gate (void) md_hot_spare_ifc(HS_MKDEV, 9310Sstevel@tonic-gate 0, 0, 0, &column->un_hs_id, NULL, 9320Sstevel@tonic-gate &hs, NULL); 9330Sstevel@tonic-gate /* 9340Sstevel@tonic-gate * Same here 9350Sstevel@tonic-gate * 9360Sstevel@tonic-gate * if (hs == NODEV64) 9370Sstevel@tonic-gate * return (1); 9380Sstevel@tonic-gate */ 9390Sstevel@tonic-gate } 9400Sstevel@tonic-gate 9410Sstevel@tonic-gate if (HOTSPARED(un, i)) { 9420Sstevel@tonic-gate if (column->un_devstate & 9430Sstevel@tonic-gate (RCS_OKAY | RCS_LAST_ERRED)) { 9440Sstevel@tonic-gate column->un_dev = hs; 9450Sstevel@tonic-gate column->un_pwstart = 9460Sstevel@tonic-gate column->un_hs_pwstart; 9470Sstevel@tonic-gate column->un_devstart = 9480Sstevel@tonic-gate column->un_hs_devstart; 9490Sstevel@tonic-gate preserve_flags &= 9500Sstevel@tonic-gate ~(MD_RAID_COPY_RESYNC | 9510Sstevel@tonic-gate MD_RAID_REGEN_RESYNC); 9520Sstevel@tonic-gate } else if (column->un_devstate & RCS_RESYNC) { 9530Sstevel@tonic-gate /* 9540Sstevel@tonic-gate * if previous system was 4.0 set 9550Sstevel@tonic-gate * the direction flags 9560Sstevel@tonic-gate */ 9570Sstevel@tonic-gate if ((preserve_flags & 9580Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | 9590Sstevel@tonic-gate MD_RAID_REGEN_RESYNC)) == 0) { 9600Sstevel@tonic-gate if (column->un_alt_dev != NODEV64) 9610Sstevel@tonic-gate preserve_flags |= 9620Sstevel@tonic-gate MD_RAID_COPY_RESYNC; 9630Sstevel@tonic-gate else 9640Sstevel@tonic-gate preserve_flags |= 9650Sstevel@tonic-gate MD_RAID_REGEN_RESYNC; 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate } 9680Sstevel@tonic-gate } else { /* no hot spares */ 9690Sstevel@tonic-gate column->un_dev = dev; 9700Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 9710Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 9720Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) { 9730Sstevel@tonic-gate preserve_flags |= MD_RAID_REGEN_RESYNC; 9740Sstevel@tonic-gate preserve_flags &= ~MD_RAID_COPY_RESYNC; 9750Sstevel@tonic-gate } 9760Sstevel@tonic-gate } 9770Sstevel@tonic-gate if (! (column->un_devstate & RCS_RESYNC)) { 9780Sstevel@tonic-gate preserve_flags &= 9790Sstevel@tonic-gate ~(MD_RAID_REGEN_RESYNC | 9800Sstevel@tonic-gate MD_RAID_COPY_RESYNC); 9810Sstevel@tonic-gate } 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate column->un_devflags = preserve_flags; 9840Sstevel@tonic-gate column->un_alt_dev = NODEV64; 9850Sstevel@tonic-gate column->un_alt_pwstart = 0; 9860Sstevel@tonic-gate column->un_alt_devstart = 0; 9870Sstevel@tonic-gate un->un_resync_line_index = 0; 9880Sstevel@tonic-gate un->un_resync_index = 0; 9890Sstevel@tonic-gate un->un_percent_done = 0; 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate } 9920Sstevel@tonic-gate 9930Sstevel@tonic-gate if (resync_cnt && error_cnt) { 9940Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 9950Sstevel@tonic-gate column = &un->un_column[i]; 9960Sstevel@tonic-gate if (HOTSPARED(un, i) && 9970Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC) && 9980Sstevel@tonic-gate (column->un_devflags & MD_RAID_COPY_RESYNC)) 9990Sstevel@tonic-gate /* hotspare has data */ 10000Sstevel@tonic-gate continue; 10010Sstevel@tonic-gate 10020Sstevel@tonic-gate if (HOTSPARED(un, i) && 10030Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC)) { 10040Sstevel@tonic-gate /* hotspare does not have data */ 10050Sstevel@tonic-gate raid_hs_release(HS_FREE, un, &hs_recid, i); 10060Sstevel@tonic-gate column->un_dev = column->un_orig_dev; 10070Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 10080Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 10090Sstevel@tonic-gate mddb_setrecprivate(hs_recid, MD_PRV_PENDCOM); 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 10130Sstevel@tonic-gate column->un_devstate = RCS_LAST_ERRED; 10140Sstevel@tonic-gate 10150Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 10160Sstevel@tonic-gate column->un_devstate = RCS_ERRED; 10170Sstevel@tonic-gate } 10180Sstevel@tonic-gate } 10190Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate un->un_pwid = 1; /* or some other possible value */ 10220Sstevel@tonic-gate un->un_magic = RAID_UNMAGIC; 10230Sstevel@tonic-gate iosize = un->un_iosize; 10240Sstevel@tonic-gate un->un_pbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 10250Sstevel@tonic-gate un->un_dbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 10260Sstevel@tonic-gate mutex_init(&un->un_linlck_mx, NULL, MUTEX_DEFAULT, NULL); 10270Sstevel@tonic-gate cv_init(&un->un_linlck_cv, NULL, CV_DEFAULT, NULL); 10280Sstevel@tonic-gate un->un_linlck_chn = NULL; 10290Sstevel@tonic-gate MD_UNIT(mnum) = un; 10300Sstevel@tonic-gate 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate return (0); 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate 10350Sstevel@tonic-gate /* 10360Sstevel@tonic-gate * NAMES: reset_raid 10370Sstevel@tonic-gate * DESCRIPTION: RAID metadevice reset routine 10380Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 10390Sstevel@tonic-gate * minor_t mnum - RAID metadevice minor number 10400Sstevel@tonic-gate * int removing - a flag to imply removing device name from 10410Sstevel@tonic-gate * MDDB database. 10420Sstevel@tonic-gate */ 10430Sstevel@tonic-gate void 10440Sstevel@tonic-gate reset_raid(mr_unit_t *un, minor_t mnum, int removing) 10450Sstevel@tonic-gate { 10460Sstevel@tonic-gate int i, n = 0; 10470Sstevel@tonic-gate sv_dev_t *sv; 10480Sstevel@tonic-gate mr_column_t *column; 10490Sstevel@tonic-gate int column_cnt = un->un_totalcolumncnt; 10500Sstevel@tonic-gate mddb_recid_t *recids, vtoc_id; 10510Sstevel@tonic-gate int hserr; 10520Sstevel@tonic-gate 10530Sstevel@tonic-gate ASSERT((MDI_UNIT(mnum)->ui_io_lock->io_list_front == NULL) && 10540Sstevel@tonic-gate (MDI_UNIT(mnum)->ui_io_lock->io_list_back == NULL)); 10550Sstevel@tonic-gate 10560Sstevel@tonic-gate md_destroy_unit_incore(mnum, &raid_md_ops); 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate MD_UNIT(mnum) = NULL; 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate if (un->un_pbuffer) { 10610Sstevel@tonic-gate kmem_free(un->un_pbuffer, dbtob(un->un_iosize)); 10620Sstevel@tonic-gate un->un_pbuffer = NULL; 10630Sstevel@tonic-gate } 10640Sstevel@tonic-gate if (un->un_dbuffer) { 10650Sstevel@tonic-gate kmem_free(un->un_dbuffer, dbtob(un->un_iosize)); 10660Sstevel@tonic-gate un->un_dbuffer = NULL; 10670Sstevel@tonic-gate } 10680Sstevel@tonic-gate 10690Sstevel@tonic-gate /* free all pre-write slots created during build incore */ 10700Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 10710Sstevel@tonic-gate raid_free_pw_reservation(un, i); 10720Sstevel@tonic-gate 10730Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 10740Sstevel@tonic-gate un->un_totalcolumncnt); 10750Sstevel@tonic-gate 10760Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 10770Sstevel@tonic-gate 10780Sstevel@tonic-gate if (!removing) 10790Sstevel@tonic-gate return; 10800Sstevel@tonic-gate 10810Sstevel@tonic-gate sv = (sv_dev_t *)kmem_zalloc((column_cnt + 1) * sizeof (sv_dev_t), 10820Sstevel@tonic-gate KM_SLEEP); 10830Sstevel@tonic-gate 10840Sstevel@tonic-gate recids = (mddb_recid_t *) 10850Sstevel@tonic-gate kmem_zalloc((column_cnt + 2) * sizeof (mddb_recid_t), KM_SLEEP); 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate for (i = 0; i < column_cnt; i++) { 10880Sstevel@tonic-gate md_unit_t *comp_un; 10890Sstevel@tonic-gate md_dev64_t comp_dev; 10900Sstevel@tonic-gate 10910Sstevel@tonic-gate column = &un->un_column[i]; 10920Sstevel@tonic-gate sv[i].setno = MD_MIN2SET(mnum); 10930Sstevel@tonic-gate sv[i].key = column->un_orig_key; 10940Sstevel@tonic-gate if (HOTSPARED(un, i)) { 10950Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 10960Sstevel@tonic-gate hserr = HS_BAD; 10970Sstevel@tonic-gate else 10980Sstevel@tonic-gate hserr = HS_FREE; 10990Sstevel@tonic-gate raid_hs_release(hserr, un, &recids[n++], i); 11000Sstevel@tonic-gate } 11010Sstevel@tonic-gate /* 11020Sstevel@tonic-gate * deparent any metadevices. 11030Sstevel@tonic-gate * NOTE: currently soft partitions are the only metadevices 11040Sstevel@tonic-gate * allowed in RAID metadevices. 11050Sstevel@tonic-gate */ 11060Sstevel@tonic-gate comp_dev = column->un_dev; 11070Sstevel@tonic-gate if (md_getmajor(comp_dev) == md_major) { 11080Sstevel@tonic-gate comp_un = MD_UNIT(md_getminor(comp_dev)); 11090Sstevel@tonic-gate recids[n++] = MD_RECID(comp_un); 11100Sstevel@tonic-gate md_reset_parent(comp_dev); 11110Sstevel@tonic-gate } 11120Sstevel@tonic-gate } 11130Sstevel@tonic-gate /* decrement the reference count of the old hsp */ 11140Sstevel@tonic-gate if (un->un_hsp_id != -1) 11150Sstevel@tonic-gate (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 11160Sstevel@tonic-gate &recids[n++], NULL, NULL, NULL); 11170Sstevel@tonic-gate recids[n] = 0; 11180Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_BEING_RESET; 11190Sstevel@tonic-gate vtoc_id = un->c.un_vtoc_id; 11200Sstevel@tonic-gate 11210Sstevel@tonic-gate raid_commit(un, recids); 11220Sstevel@tonic-gate 11230Sstevel@tonic-gate 11240Sstevel@tonic-gate /* Remove the unit structure */ 11250Sstevel@tonic-gate mddb_deleterec_wrapper(un->c.un_record_id); 11260Sstevel@tonic-gate 11270Sstevel@tonic-gate /* Remove the vtoc, if present */ 11280Sstevel@tonic-gate if (vtoc_id) 11290Sstevel@tonic-gate mddb_deleterec_wrapper(vtoc_id); 11300Sstevel@tonic-gate md_rem_names(sv, column_cnt); 11310Sstevel@tonic-gate kmem_free(sv, (column_cnt + 1) * sizeof (sv_dev_t)); 11320Sstevel@tonic-gate kmem_free(recids, (column_cnt + 2) * sizeof (mddb_recid_t)); 11330Sstevel@tonic-gate 11340Sstevel@tonic-gate SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 11350Sstevel@tonic-gate MD_MIN2SET(mnum), mnum); 11360Sstevel@tonic-gate } 11370Sstevel@tonic-gate 11380Sstevel@tonic-gate /* 11390Sstevel@tonic-gate * NAMES: raid_error_parent 11400Sstevel@tonic-gate * DESCRIPTION: mark a parent structure in error 11410Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 11420Sstevel@tonic-gate * int error - error value to set 11430Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 11440Sstevel@tonic-gate */ 11450Sstevel@tonic-gate static void 11460Sstevel@tonic-gate raid_error_parent(md_raidps_t *ps, int error) 11470Sstevel@tonic-gate { 11480Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 11490Sstevel@tonic-gate ps->ps_flags |= MD_RPS_ERROR; 11500Sstevel@tonic-gate ps->ps_error = error; 11510Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 11520Sstevel@tonic-gate } 11530Sstevel@tonic-gate 11540Sstevel@tonic-gate /* 11550Sstevel@tonic-gate * The following defines tell raid_free_parent 11560Sstevel@tonic-gate * RFP_RLS_LOCK release the unit reader lock when done. 11570Sstevel@tonic-gate * RFP_DECR_PWFRAGS decrement ps_pwfrags 11580Sstevel@tonic-gate * RFP_DECR_FRAGS decrement ps_frags 11590Sstevel@tonic-gate * RFP_DECR_READFRAGS read keeps FRAGS and PWFRAGS in lockstep 11600Sstevel@tonic-gate */ 11610Sstevel@tonic-gate #define RFP_RLS_LOCK 0x00001 11620Sstevel@tonic-gate #define RFP_DECR_PWFRAGS 0x00002 11630Sstevel@tonic-gate #define RFP_DECR_FRAGS 0x00004 11640Sstevel@tonic-gate #define RFP_DECR_READFRAGS (RFP_DECR_PWFRAGS | RFP_DECR_FRAGS) 11650Sstevel@tonic-gate 11660Sstevel@tonic-gate /* 11670Sstevel@tonic-gate * NAMES: raid_free_parent 11680Sstevel@tonic-gate * DESCRIPTION: free a parent structure 11690Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 11700Sstevel@tonic-gate * int todo - indicates what needs to be done 11710Sstevel@tonic-gate */ 11720Sstevel@tonic-gate static void 11730Sstevel@tonic-gate raid_free_parent(md_raidps_t *ps, int todo) 11740Sstevel@tonic-gate { 11750Sstevel@tonic-gate mdi_unit_t *ui = ps->ps_ui; 11760Sstevel@tonic-gate 11770Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 11780Sstevel@tonic-gate ASSERT(ps->ps_flags & MD_RPS_INUSE); 11790Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 11800Sstevel@tonic-gate if (todo & RFP_DECR_PWFRAGS) { 11810Sstevel@tonic-gate ASSERT(ps->ps_pwfrags); 11820Sstevel@tonic-gate ps->ps_pwfrags--; 11830Sstevel@tonic-gate if (ps->ps_pwfrags == 0 && (! (ps->ps_flags & MD_RPS_IODONE))) { 11840Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_ERROR) { 11850Sstevel@tonic-gate ps->ps_bp->b_flags |= B_ERROR; 11860Sstevel@tonic-gate ps->ps_bp->b_error = ps->ps_error; 11870Sstevel@tonic-gate } 11880Sstevel@tonic-gate md_kstat_done(ui, ps->ps_bp, 0); 11890Sstevel@tonic-gate biodone(ps->ps_bp); 11900Sstevel@tonic-gate ps->ps_flags |= MD_RPS_IODONE; 11910Sstevel@tonic-gate } 11920Sstevel@tonic-gate } 11930Sstevel@tonic-gate 11940Sstevel@tonic-gate if (todo & RFP_DECR_FRAGS) { 11950Sstevel@tonic-gate ASSERT(ps->ps_frags); 11960Sstevel@tonic-gate ps->ps_frags--; 11970Sstevel@tonic-gate } 11980Sstevel@tonic-gate 11990Sstevel@tonic-gate if (ps->ps_frags != 0) { 12000Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 12010Sstevel@tonic-gate return; 12020Sstevel@tonic-gate } 12030Sstevel@tonic-gate 12040Sstevel@tonic-gate ASSERT((ps->ps_frags == 0) && (ps->ps_pwfrags == 0)); 12050Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 12060Sstevel@tonic-gate 12070Sstevel@tonic-gate if (todo & RFP_RLS_LOCK) 12080Sstevel@tonic-gate md_io_readerexit(ui); 12090Sstevel@tonic-gate 12100Sstevel@tonic-gate if (panicstr) { 12110Sstevel@tonic-gate ps->ps_flags |= MD_RPS_DONE; 12120Sstevel@tonic-gate return; 12130Sstevel@tonic-gate } 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_HSREQ) 12160Sstevel@tonic-gate (void) raid_hotspares(); 12170Sstevel@tonic-gate 12180Sstevel@tonic-gate ASSERT(todo & RFP_RLS_LOCK); 12190Sstevel@tonic-gate ps->ps_flags &= ~MD_RPS_INUSE; 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate md_dec_iocount(MD_MIN2SET(ps->ps_un->c.un_self_id)); 12220Sstevel@tonic-gate 12230Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 12240Sstevel@tonic-gate } 12250Sstevel@tonic-gate 12260Sstevel@tonic-gate /* 12270Sstevel@tonic-gate * NAMES: raid_free_child 12280Sstevel@tonic-gate * DESCRIPTION: free a parent structure 12290Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 12300Sstevel@tonic-gate * int drop_locks - 0 for no locks held 12310Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 12320Sstevel@tonic-gate */ 12330Sstevel@tonic-gate static void 12340Sstevel@tonic-gate raid_free_child(md_raidcs_t *cs, int drop_locks) 12350Sstevel@tonic-gate { 12360Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 12370Sstevel@tonic-gate md_raidcbuf_t *cbuf, *cbuf1; 12380Sstevel@tonic-gate 12390Sstevel@tonic-gate if (cs->cs_pw_inval_list) 12400Sstevel@tonic-gate raid_free_pwinvalidate(cs); 12410Sstevel@tonic-gate 12420Sstevel@tonic-gate if (drop_locks) { 12430Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD && 12440Sstevel@tonic-gate (cs->cs_flags & (MD_RCS_READER | MD_RCS_WRITER))); 12450Sstevel@tonic-gate md_unit_readerexit(MDI_UNIT(MD_SID(un))); 12460Sstevel@tonic-gate raid_line_exit(cs); 12470Sstevel@tonic-gate } else { 12480Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_LLOCKD)); 12490Sstevel@tonic-gate } 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate freebuffers(cs); 12520Sstevel@tonic-gate cbuf = cs->cs_buflist; 12530Sstevel@tonic-gate while (cbuf) { 12540Sstevel@tonic-gate cbuf1 = cbuf->cbuf_next; 12550Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf); 12560Sstevel@tonic-gate cbuf = cbuf1; 12570Sstevel@tonic-gate } 12580Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_REMAPPED) 12590Sstevel@tonic-gate bp_mapout(&cs->cs_dbuf); 12600Sstevel@tonic-gate kmem_cache_free(raid_child_cache, cs); 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate /* 12640Sstevel@tonic-gate * NAME: raid_regen_parity 12650Sstevel@tonic-gate * 12660Sstevel@tonic-gate * DESCRIPTION: This routine is used to regenerate the parity blocks 12670Sstevel@tonic-gate * for the entire raid device. It is called from 12680Sstevel@tonic-gate * both the regen thread and the IO path. 12690Sstevel@tonic-gate * 12700Sstevel@tonic-gate * On error the entire device is marked as in error by 12710Sstevel@tonic-gate * placing the erroring device in error and all other 12720Sstevel@tonic-gate * devices in last_errored. 12730Sstevel@tonic-gate * 12740Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs 12750Sstevel@tonic-gate */ 12760Sstevel@tonic-gate void 12770Sstevel@tonic-gate raid_regen_parity(md_raidcs_t *cs) 12780Sstevel@tonic-gate { 12790Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 12800Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 12810Sstevel@tonic-gate caddr_t buffer; 12820Sstevel@tonic-gate caddr_t parity_buffer; 12830Sstevel@tonic-gate buf_t *bp; 12840Sstevel@tonic-gate uint_t *dbuf, *pbuf; 12850Sstevel@tonic-gate uint_t colcnt = un->un_totalcolumncnt; 12860Sstevel@tonic-gate int column; 12870Sstevel@tonic-gate int parity_column = cs->cs_pcolumn; 12880Sstevel@tonic-gate size_t bcount; 12890Sstevel@tonic-gate int j; 12900Sstevel@tonic-gate 12910Sstevel@tonic-gate /* 12920Sstevel@tonic-gate * This routine uses the data and parity buffers allocated to a 12930Sstevel@tonic-gate * write. In the case of a read the buffers are allocated and 12940Sstevel@tonic-gate * freed at the end. 12950Sstevel@tonic-gate */ 12960Sstevel@tonic-gate 12970Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 12980Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 12990Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 13000Sstevel@tonic-gate 13010Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != colcnt) 13020Sstevel@tonic-gate return; 13030Sstevel@tonic-gate 13040Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13050Sstevel@tonic-gate getpbuffer(cs); 13060Sstevel@tonic-gate getdbuffer(cs); 13070Sstevel@tonic-gate } 13080Sstevel@tonic-gate ASSERT(cs->cs_dbuffer && cs->cs_pbuffer); 13090Sstevel@tonic-gate bcount = cs->cs_bcount; 13100Sstevel@tonic-gate buffer = cs->cs_dbuffer; 13110Sstevel@tonic-gate parity_buffer = cs->cs_pbuffer; 13120Sstevel@tonic-gate bzero(parity_buffer, bcount); 13130Sstevel@tonic-gate bp = &cs->cs_dbuf; 13140Sstevel@tonic-gate for (column = 0; column < colcnt; column++) { 13150Sstevel@tonic-gate if (column == parity_column) 13160Sstevel@tonic-gate continue; 13170Sstevel@tonic-gate reset_buf(bp, B_READ | B_BUSY, bcount); 13180Sstevel@tonic-gate bp->b_un.b_addr = buffer; 13190Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 13200Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[column].un_devstart; 13210Sstevel@tonic-gate bp->b_bcount = bcount; 13220Sstevel@tonic-gate bp->b_bufsize = bcount; 13230Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 13240Sstevel@tonic-gate if (biowait(bp)) 13250Sstevel@tonic-gate goto bail; 13260Sstevel@tonic-gate pbuf = (uint_t *)(void *)parity_buffer; 13270Sstevel@tonic-gate dbuf = (uint_t *)(void *)buffer; 13280Sstevel@tonic-gate for (j = 0; j < (bcount / (sizeof (uint_t))); j++) { 13290Sstevel@tonic-gate *pbuf = *pbuf ^ *dbuf; 13300Sstevel@tonic-gate pbuf++; 13310Sstevel@tonic-gate dbuf++; 13320Sstevel@tonic-gate } 13330Sstevel@tonic-gate } 13340Sstevel@tonic-gate 13350Sstevel@tonic-gate reset_buf(bp, B_WRITE | B_BUSY, cs->cs_bcount); 13360Sstevel@tonic-gate bp->b_un.b_addr = parity_buffer; 13370Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[parity_column].un_dev); 13380Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[parity_column].un_devstart; 13390Sstevel@tonic-gate bp->b_bcount = bcount; 13400Sstevel@tonic-gate bp->b_bufsize = bcount; 13410Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 13420Sstevel@tonic-gate if (biowait(bp)) 13430Sstevel@tonic-gate goto bail; 13440Sstevel@tonic-gate 13450Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13460Sstevel@tonic-gate freebuffers(cs); 13470Sstevel@tonic-gate cs->cs_pbuffer = NULL; 13480Sstevel@tonic-gate cs->cs_dbuffer = NULL; 13490Sstevel@tonic-gate } 13500Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 13510Sstevel@tonic-gate return; 13520Sstevel@tonic-gate bail: 13530Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13540Sstevel@tonic-gate freebuffers(cs); 13550Sstevel@tonic-gate cs->cs_pbuffer = NULL; 13560Sstevel@tonic-gate cs->cs_dbuffer = NULL; 13570Sstevel@tonic-gate } 13580Sstevel@tonic-gate md_unit_readerexit(ui); 13590Sstevel@tonic-gate un = md_unit_writerlock(ui); 13600Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 13610Sstevel@tonic-gate for (column = 0; column < colcnt; column++) 13620Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 13630Sstevel@tonic-gate raid_commit(un, NULL); 13640Sstevel@tonic-gate md_unit_writerexit(ui); 13650Sstevel@tonic-gate un = md_unit_readerlock(ui); 13660Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 13670Sstevel@tonic-gate } 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate /* 13700Sstevel@tonic-gate * NAMES: raid_error_state 13710Sstevel@tonic-gate * DESCRIPTION: check unit and column states' impact on I/O error 13720Sstevel@tonic-gate * NOTE: the state now may not be the state when the 13730Sstevel@tonic-gate * I/O completed due to race conditions. 13740Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 13750Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 13760Sstevel@tonic-gate * buf_t *bp - pointer to buffer structure 13770Sstevel@tonic-gate */ 13780Sstevel@tonic-gate static int 13790Sstevel@tonic-gate raid_error_state(mr_unit_t *un, buf_t *bp) 13800Sstevel@tonic-gate { 13810Sstevel@tonic-gate int column; 13820Sstevel@tonic-gate int i; 13830Sstevel@tonic-gate 13840Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 13850Sstevel@tonic-gate ASSERT(UNIT_WRITER_HELD(un)); 13860Sstevel@tonic-gate 13870Sstevel@tonic-gate column = -1; 13880Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 13890Sstevel@tonic-gate if (un->un_column[i].un_dev == md_expldev(bp->b_edev)) { 13900Sstevel@tonic-gate column = i; 13910Sstevel@tonic-gate break; 13920Sstevel@tonic-gate } 13930Sstevel@tonic-gate if (un->un_column[i].un_alt_dev == md_expldev(bp->b_edev)) { 13940Sstevel@tonic-gate column = i; 13950Sstevel@tonic-gate break; 13960Sstevel@tonic-gate } 13970Sstevel@tonic-gate } 13980Sstevel@tonic-gate 13990Sstevel@tonic-gate /* in case a replace snuck in while waiting on unit writer lock */ 14000Sstevel@tonic-gate 14010Sstevel@tonic-gate if (column == -1) { 14020Sstevel@tonic-gate return (0); 14030Sstevel@tonic-gate } 14040Sstevel@tonic-gate 14050Sstevel@tonic-gate (void) raid_set_state(un, column, RCS_ERRED, 0); 14060Sstevel@tonic-gate ASSERT(un->un_state & (RUS_ERRED | RUS_LAST_ERRED)); 14070Sstevel@tonic-gate 14080Sstevel@tonic-gate raid_commit(un, NULL); 14090Sstevel@tonic-gate if (un->un_state & RUS_ERRED) { 14100Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, 14110Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 14120Sstevel@tonic-gate } else if (un->un_state & RUS_LAST_ERRED) { 14130Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, 14140Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 14150Sstevel@tonic-gate } 14160Sstevel@tonic-gate 14170Sstevel@tonic-gate return (EIO); 14180Sstevel@tonic-gate } 14190Sstevel@tonic-gate 14200Sstevel@tonic-gate /* 14210Sstevel@tonic-gate * NAME: raid_mapin_buf 14220Sstevel@tonic-gate * DESCRIPTION: wait for the input buffer header to be maped in 14230Sstevel@tonic-gate * PARAMETERS: md_raidps_t *ps 14240Sstevel@tonic-gate */ 14250Sstevel@tonic-gate static void 14260Sstevel@tonic-gate raid_mapin_buf(md_raidcs_t *cs) 14270Sstevel@tonic-gate { 14280Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 14290Sstevel@tonic-gate 14300Sstevel@tonic-gate /* 14310Sstevel@tonic-gate * check to see if the buffer is maped. If all is ok return the 14320Sstevel@tonic-gate * offset of the data and return. Since it is expensive to grab 14330Sstevel@tonic-gate * a mutex this is only done if the mapin is not complete. 14340Sstevel@tonic-gate * Once the mutex is aquired it is possible that the mapin was 14350Sstevel@tonic-gate * not done so recheck and if necessary do the mapin. 14360Sstevel@tonic-gate */ 14370Sstevel@tonic-gate if (ps->ps_mapin > 0) { 14380Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14390Sstevel@tonic-gate return; 14400Sstevel@tonic-gate } 14410Sstevel@tonic-gate mutex_enter(&ps->ps_mapin_mx); 14420Sstevel@tonic-gate if (ps->ps_mapin > 0) { 14430Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14440Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 14450Sstevel@tonic-gate return; 14460Sstevel@tonic-gate } 14470Sstevel@tonic-gate bp_mapin(ps->ps_bp); 14480Sstevel@tonic-gate /* 14490Sstevel@tonic-gate * get the new b_addr out of the parent since bp_mapin just changed it 14500Sstevel@tonic-gate */ 14510Sstevel@tonic-gate ps->ps_addr = ps->ps_bp->b_un.b_addr; 14520Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14530Sstevel@tonic-gate ps->ps_mapin++; 14540Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 14550Sstevel@tonic-gate } 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate /* 14580Sstevel@tonic-gate * NAMES: raid_read_no_retry 14590Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 14600Sstevel@tonic-gate * read failed attempting to regenerate the data, 14610Sstevel@tonic-gate * no retry possible, error occured in raid_raidregenloop(). 14620Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 14630Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 14640Sstevel@tonic-gate */ 14650Sstevel@tonic-gate /*ARGSUSED*/ 14660Sstevel@tonic-gate static void 14670Sstevel@tonic-gate raid_read_no_retry(mr_unit_t *un, md_raidcs_t *cs) 14680Sstevel@tonic-gate { 14690Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 14700Sstevel@tonic-gate 14710Sstevel@tonic-gate raid_error_parent(ps, EIO); 14720Sstevel@tonic-gate raid_free_child(cs, 1); 14730Sstevel@tonic-gate 14740Sstevel@tonic-gate /* decrement readfrags */ 14750Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 14760Sstevel@tonic-gate } 14770Sstevel@tonic-gate 14780Sstevel@tonic-gate /* 14790Sstevel@tonic-gate * NAMES: raid_read_retry 14800Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 14810Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 14820Sstevel@tonic-gate */ 14830Sstevel@tonic-gate static void 14840Sstevel@tonic-gate raid_read_retry(mr_unit_t *un, md_raidcs_t *cs) 14850Sstevel@tonic-gate { 14860Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_read() */ 14870Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 14880Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 14890Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 14900Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 14910Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 14920Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 14930Sstevel@tonic-gate /* Initialize semaphores */ 14940Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 14950Sstevel@tonic-gate SEMA_DEFAULT, NULL); 14960Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 14970Sstevel@tonic-gate SEMA_DEFAULT, NULL); 14980Sstevel@tonic-gate 14990Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 15000Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 15010Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 15020Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 15030Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 15040Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 15050Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 15060Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15070Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 15080Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15090Sstevel@tonic-gate 15100Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_ERROR; /* reset child error flag */ 15110Sstevel@tonic-gate cs->cs_flags |= MD_RCS_RECOVERY; /* set RECOVERY flag */ 15120Sstevel@tonic-gate 15130Sstevel@tonic-gate /* 15140Sstevel@tonic-gate * re-scheduling I/O with raid_read_io() is simpler. basically, 15150Sstevel@tonic-gate * raid_read_io() is invoked again with same child structure. 15160Sstevel@tonic-gate * (NOTE: we aren`t supposed to do any error recovery when an I/O 15170Sstevel@tonic-gate * error occured in raid_raidregenloop(). 15180Sstevel@tonic-gate */ 15190Sstevel@tonic-gate raid_mapin_buf(cs); 15200Sstevel@tonic-gate raid_read_io(un, cs); 15210Sstevel@tonic-gate } 15220Sstevel@tonic-gate 15230Sstevel@tonic-gate /* 15240Sstevel@tonic-gate * NAMES: raid_rderr 15250Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 15260Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 15270Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 15280Sstevel@tonic-gate * since a unit or column state transition may take place. 15290Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 15300Sstevel@tonic-gate */ 15310Sstevel@tonic-gate /*ARGSUSED*/ 15320Sstevel@tonic-gate static void 15330Sstevel@tonic-gate raid_rderr(md_raidcs_t *cs) 15340Sstevel@tonic-gate { 15350Sstevel@tonic-gate md_raidps_t *ps; 15360Sstevel@tonic-gate mdi_unit_t *ui; 15370Sstevel@tonic-gate mr_unit_t *un; 15380Sstevel@tonic-gate int error = 0; 15390Sstevel@tonic-gate 15400Sstevel@tonic-gate ps = cs->cs_ps; 15410Sstevel@tonic-gate ui = ps->ps_ui; 15420Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 15430Sstevel@tonic-gate ASSERT(un != 0); 15440Sstevel@tonic-gate 15450Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 15460Sstevel@tonic-gate error = raid_error_state(un, &cs->cs_dbuf); 15470Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 15480Sstevel@tonic-gate error |= raid_error_state(un, &cs->cs_pbuf); 15490Sstevel@tonic-gate 15500Sstevel@tonic-gate md_unit_writerexit(ui); 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 15530Sstevel@tonic-gate 15540Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 15550Sstevel@tonic-gate ASSERT(un != 0); 15560Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 15570Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 15580Sstevel@tonic-gate } 15590Sstevel@tonic-gate 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate /* 15620Sstevel@tonic-gate * NAMES: raid_read_error 15630Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 15640Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 15650Sstevel@tonic-gate */ 15660Sstevel@tonic-gate /*ARGSUSED*/ 15670Sstevel@tonic-gate static void 15680Sstevel@tonic-gate raid_read_error(md_raidcs_t *cs) 15690Sstevel@tonic-gate { 15700Sstevel@tonic-gate md_raidps_t *ps; 15710Sstevel@tonic-gate mdi_unit_t *ui; 15720Sstevel@tonic-gate mr_unit_t *un; 15730Sstevel@tonic-gate set_t setno; 15740Sstevel@tonic-gate 15750Sstevel@tonic-gate ps = cs->cs_ps; 15760Sstevel@tonic-gate ui = ps->ps_ui; 15770Sstevel@tonic-gate un = cs->cs_un; 15780Sstevel@tonic-gate 15790Sstevel@tonic-gate setno = MD_UN2SET(un); 15800Sstevel@tonic-gate 15810Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 15820Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 15830Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 15840Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 15850Sstevel@tonic-gate md_shortname(MD_SID(un)), 15860Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 15890Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 15900Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 15910Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 15920Sstevel@tonic-gate md_shortname(MD_SID(un)), 15930Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 15940Sstevel@tonic-gate 15950Sstevel@tonic-gate md_unit_readerexit(ui); 15960Sstevel@tonic-gate 15970Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 15980Sstevel@tonic-gate 15990Sstevel@tonic-gate /* now schedule processing for possible state change */ 16000Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_rderr, 16010Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 16020Sstevel@tonic-gate 16030Sstevel@tonic-gate } 16040Sstevel@tonic-gate 16050Sstevel@tonic-gate /* 16060Sstevel@tonic-gate * NAMES: getdbuffer 16070Sstevel@tonic-gate * DESCRIPTION: data buffer allocation for a child structure 16080Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16090Sstevel@tonic-gate * 16100Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16110Sstevel@tonic-gate * and get both buffers before pwslot 16120Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16130Sstevel@tonic-gate */ 16140Sstevel@tonic-gate static void 16150Sstevel@tonic-gate getdbuffer(md_raidcs_t *cs) 16160Sstevel@tonic-gate { 16170Sstevel@tonic-gate mr_unit_t *un; 16180Sstevel@tonic-gate 16190Sstevel@tonic-gate cs->cs_dbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 16200Sstevel@tonic-gate if (cs->cs_dbuffer != NULL) 16210Sstevel@tonic-gate return; 16220Sstevel@tonic-gate un = cs->cs_ps->ps_un; 16230Sstevel@tonic-gate mutex_enter(&un->un_mx); 16240Sstevel@tonic-gate while (un->un_dbuffer == NULL) { 16250Sstevel@tonic-gate STAT_INC(data_buffer_waits); 16260Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 16270Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 16280Sstevel@tonic-gate } 16290Sstevel@tonic-gate cs->cs_dbuffer = un->un_dbuffer; 16300Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNDBUF; 16310Sstevel@tonic-gate un->un_dbuffer = NULL; 16320Sstevel@tonic-gate mutex_exit(&un->un_mx); 16330Sstevel@tonic-gate } 16340Sstevel@tonic-gate 16350Sstevel@tonic-gate /* 16360Sstevel@tonic-gate * NAMES: getpbuffer 16370Sstevel@tonic-gate * DESCRIPTION: parity buffer allocation for a child structure 16380Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16390Sstevel@tonic-gate * 16400Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16410Sstevel@tonic-gate * and get both buffers before pwslot 16420Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16430Sstevel@tonic-gate */ 16440Sstevel@tonic-gate static void 16450Sstevel@tonic-gate getpbuffer(md_raidcs_t *cs) 16460Sstevel@tonic-gate { 16470Sstevel@tonic-gate mr_unit_t *un; 16480Sstevel@tonic-gate 16490Sstevel@tonic-gate cs->cs_pbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 16500Sstevel@tonic-gate if (cs->cs_pbuffer != NULL) 16510Sstevel@tonic-gate return; 16520Sstevel@tonic-gate un = cs->cs_ps->ps_un; 16530Sstevel@tonic-gate mutex_enter(&un->un_mx); 16540Sstevel@tonic-gate while (un->un_pbuffer == NULL) { 16550Sstevel@tonic-gate STAT_INC(parity_buffer_waits); 16560Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 16570Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 16580Sstevel@tonic-gate } 16590Sstevel@tonic-gate cs->cs_pbuffer = un->un_pbuffer; 16600Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNPBUF; 16610Sstevel@tonic-gate un->un_pbuffer = NULL; 16620Sstevel@tonic-gate mutex_exit(&un->un_mx); 16630Sstevel@tonic-gate } 16640Sstevel@tonic-gate static void 16650Sstevel@tonic-gate getresources(md_raidcs_t *cs) 16660Sstevel@tonic-gate { 16670Sstevel@tonic-gate md_raidcbuf_t *cbuf; 16680Sstevel@tonic-gate /* 16690Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16700Sstevel@tonic-gate * and get both buffers before pwslot 16710Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16720Sstevel@tonic-gate */ 16730Sstevel@tonic-gate getdbuffer(cs); 16740Sstevel@tonic-gate getpbuffer(cs); 16750Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 16760Sstevel@tonic-gate cbuf->cbuf_buffer = 16770Sstevel@tonic-gate kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_SLEEP); 16780Sstevel@tonic-gate } 16790Sstevel@tonic-gate /* 16800Sstevel@tonic-gate * NAMES: freebuffers 16810Sstevel@tonic-gate * DESCRIPTION: child structure buffer freeing routine 16820Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16830Sstevel@tonic-gate */ 16840Sstevel@tonic-gate static void 16850Sstevel@tonic-gate freebuffers(md_raidcs_t *cs) 16860Sstevel@tonic-gate { 16870Sstevel@tonic-gate mr_unit_t *un; 16880Sstevel@tonic-gate md_raidcbuf_t *cbuf; 16890Sstevel@tonic-gate 16900Sstevel@tonic-gate /* free buffers used for full line write */ 16910Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 16920Sstevel@tonic-gate if (cbuf->cbuf_buffer == NULL) 16930Sstevel@tonic-gate continue; 16940Sstevel@tonic-gate kmem_free(cbuf->cbuf_buffer, cbuf->cbuf_bcount + DEV_BSIZE); 16950Sstevel@tonic-gate cbuf->cbuf_buffer = NULL; 16960Sstevel@tonic-gate cbuf->cbuf_bcount = 0; 16970Sstevel@tonic-gate } 16980Sstevel@tonic-gate 16990Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 17000Sstevel@tonic-gate un = cs->cs_un; 17010Sstevel@tonic-gate mutex_enter(&un->un_mx); 17020Sstevel@tonic-gate } 17030Sstevel@tonic-gate if (cs->cs_dbuffer) { 17040Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNDBUF) 17050Sstevel@tonic-gate un->un_dbuffer = cs->cs_dbuffer; 17060Sstevel@tonic-gate else 17070Sstevel@tonic-gate kmem_free(cs->cs_dbuffer, cs->cs_bcount + DEV_BSIZE); 17080Sstevel@tonic-gate } 17090Sstevel@tonic-gate if (cs->cs_pbuffer) { 17100Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNPBUF) 17110Sstevel@tonic-gate un->un_pbuffer = cs->cs_pbuffer; 17120Sstevel@tonic-gate else 17130Sstevel@tonic-gate kmem_free(cs->cs_pbuffer, cs->cs_bcount + DEV_BSIZE); 17140Sstevel@tonic-gate } 17150Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 17160Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDBUF; 17170Sstevel@tonic-gate cv_broadcast(&un->un_cv); 17180Sstevel@tonic-gate mutex_exit(&un->un_mx); 17190Sstevel@tonic-gate } 17200Sstevel@tonic-gate } 17210Sstevel@tonic-gate 17220Sstevel@tonic-gate /* 17230Sstevel@tonic-gate * NAMES: raid_line_reader_lock, raid_line_writer_lock 17240Sstevel@tonic-gate * DESCRIPTION: RAID metadevice line reader and writer lock routines 17250Sstevel@tonic-gate * data column # and parity column #. 17260Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 17270Sstevel@tonic-gate */ 17280Sstevel@tonic-gate 17290Sstevel@tonic-gate void 17300Sstevel@tonic-gate raid_line_reader_lock(md_raidcs_t *cs, int resync_thread) 17310Sstevel@tonic-gate { 17320Sstevel@tonic-gate mr_unit_t *un; 17330Sstevel@tonic-gate md_raidcs_t *cs1; 17340Sstevel@tonic-gate 17350Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 17360Sstevel@tonic-gate un = cs->cs_un; 17370Sstevel@tonic-gate cs->cs_flags |= MD_RCS_READER; 17380Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 17390Sstevel@tonic-gate if (!panicstr) 17400Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 17410Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17420Sstevel@tonic-gate while (cs1 != NULL) { 17430Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 17440Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1) == 1) 17450Sstevel@tonic-gate if (cs1->cs_flags & MD_RCS_WRITER) 17460Sstevel@tonic-gate break; 17470Sstevel@tonic-gate 17480Sstevel@tonic-gate if (cs1 != NULL) { 17490Sstevel@tonic-gate if (panicstr) 17500Sstevel@tonic-gate panic("md; raid line write lock held"); 17510Sstevel@tonic-gate un->un_linlck_flg = 1; 17520Sstevel@tonic-gate cv_wait(&un->un_linlck_cv, &un->un_linlck_mx); 17530Sstevel@tonic-gate STAT_INC(raid_read_waits); 17540Sstevel@tonic-gate } 17550Sstevel@tonic-gate } 17560Sstevel@tonic-gate STAT_MAX(raid_max_reader_locks, raid_reader_locks_active); 17570Sstevel@tonic-gate STAT_INC(raid_reader_locks); 17580Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17590Sstevel@tonic-gate if (cs1 != NULL) 17600Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 17610Sstevel@tonic-gate cs->cs_linlck_next = cs1; 17620Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 17630Sstevel@tonic-gate un->un_linlck_chn = cs; 17640Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 17650Sstevel@tonic-gate if (resync_thread) { 17660Sstevel@tonic-gate diskaddr_t lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 17670Sstevel@tonic-gate diskaddr_t line = (lastblk + 1) / un->un_segsize; 17680Sstevel@tonic-gate ASSERT(raid_state_cnt(un, RCS_RESYNC)); 17690Sstevel@tonic-gate mutex_enter(&un->un_mx); 17700Sstevel@tonic-gate un->un_resync_line_index = line; 17710Sstevel@tonic-gate mutex_exit(&un->un_mx); 17720Sstevel@tonic-gate } 17730Sstevel@tonic-gate if (!panicstr) 17740Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 17750Sstevel@tonic-gate } 17760Sstevel@tonic-gate 17770Sstevel@tonic-gate int 17780Sstevel@tonic-gate raid_line_writer_lock(md_raidcs_t *cs, int lock) 17790Sstevel@tonic-gate { 17800Sstevel@tonic-gate mr_unit_t *un; 17810Sstevel@tonic-gate md_raidcs_t *cs1; 17820Sstevel@tonic-gate 17830Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 17840Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WRITER; 17850Sstevel@tonic-gate un = cs->cs_ps->ps_un; 17860Sstevel@tonic-gate 17870Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 17880Sstevel@tonic-gate if (lock && !panicstr) 17890Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 17900Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 17910Sstevel@tonic-gate 17920Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17930Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 17940Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1)) 17950Sstevel@tonic-gate break; 17960Sstevel@tonic-gate 17970Sstevel@tonic-gate if (cs1 != NULL) { 17980Sstevel@tonic-gate if (panicstr) 17990Sstevel@tonic-gate panic("md: line writer lock inaccessible"); 18000Sstevel@tonic-gate goto no_lock_exit; 18010Sstevel@tonic-gate } 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate if (raid_alloc_pwslot(cs)) { 18040Sstevel@tonic-gate if (panicstr) 18050Sstevel@tonic-gate panic("md: no prewrite slots"); 18060Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 18070Sstevel@tonic-gate goto no_lock_exit; 18080Sstevel@tonic-gate } 18090Sstevel@tonic-gate 18100Sstevel@tonic-gate cs1 = un->un_linlck_chn; 18110Sstevel@tonic-gate if (cs1 != NULL) 18120Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 18130Sstevel@tonic-gate cs->cs_linlck_next = cs1; 18140Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 18150Sstevel@tonic-gate un->un_linlck_chn = cs; 18160Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 18170Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_WAITING; 18180Sstevel@tonic-gate STAT_INC(raid_writer_locks); 18190Sstevel@tonic-gate STAT_MAX(raid_max_write_locks, raid_write_locks_active); 18200Sstevel@tonic-gate if (lock && !panicstr) 18210Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 18220Sstevel@tonic-gate return (0); 18230Sstevel@tonic-gate 18240Sstevel@tonic-gate no_lock_exit: 18250Sstevel@tonic-gate /* if this is already queued then do not requeue it */ 18260Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 18270Sstevel@tonic-gate if (!lock || (cs->cs_flags & MD_RCS_WAITING)) 18280Sstevel@tonic-gate return (1); 18290Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WAITING; 18300Sstevel@tonic-gate cs->cs_un = un; 18310Sstevel@tonic-gate raid_enqueue(cs); 18320Sstevel@tonic-gate if (lock && !panicstr) 18330Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 18340Sstevel@tonic-gate return (1); 18350Sstevel@tonic-gate } 18360Sstevel@tonic-gate 18370Sstevel@tonic-gate static void 18380Sstevel@tonic-gate raid_startio(md_raidcs_t *cs) 18390Sstevel@tonic-gate { 18400Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 18410Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 18420Sstevel@tonic-gate 18430Sstevel@tonic-gate un = md_unit_readerlock(ui); 18440Sstevel@tonic-gate raid_write_io(un, cs); 18450Sstevel@tonic-gate } 18460Sstevel@tonic-gate 18470Sstevel@tonic-gate void 18480Sstevel@tonic-gate raid_io_startup(mr_unit_t *un) 18490Sstevel@tonic-gate { 18500Sstevel@tonic-gate md_raidcs_t *waiting_list, *cs1; 18510Sstevel@tonic-gate md_raidcs_t *previous = NULL, *next = NULL; 18520Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 18530Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 18540Sstevel@tonic-gate 18550Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 18560Sstevel@tonic-gate mutex_enter(io_list_mutex); 18570Sstevel@tonic-gate 18580Sstevel@tonic-gate /* 18590Sstevel@tonic-gate * check to be sure there are no reader locks outstanding. If 18600Sstevel@tonic-gate * there are not then pass on the writer lock. 18610Sstevel@tonic-gate */ 18620Sstevel@tonic-gate waiting_list = ui->ui_io_lock->io_list_front; 18630Sstevel@tonic-gate while (waiting_list) { 18640Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18650Sstevel@tonic-gate ASSERT(! (waiting_list->cs_flags & MD_RCS_LLOCKD)); 18660Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 18670Sstevel@tonic-gate if (raid_io_overlaps(waiting_list, cs1) == 1) 18680Sstevel@tonic-gate break; 18690Sstevel@tonic-gate /* 18700Sstevel@tonic-gate * there was an IOs that overlaps this io so go onto 18710Sstevel@tonic-gate * the next io in the waiting list 18720Sstevel@tonic-gate */ 18730Sstevel@tonic-gate if (cs1) { 18740Sstevel@tonic-gate previous = waiting_list; 18750Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 18760Sstevel@tonic-gate continue; 18770Sstevel@tonic-gate } 18780Sstevel@tonic-gate 18790Sstevel@tonic-gate /* 18800Sstevel@tonic-gate * There are no IOs that overlap this, so remove it from 18810Sstevel@tonic-gate * the waiting queue, and start it 18820Sstevel@tonic-gate */ 18830Sstevel@tonic-gate 18840Sstevel@tonic-gate if (raid_check_pw(waiting_list)) { 18850Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18860Sstevel@tonic-gate previous = waiting_list; 18870Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 18880Sstevel@tonic-gate continue; 18890Sstevel@tonic-gate } 18900Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18910Sstevel@tonic-gate 18920Sstevel@tonic-gate next = waiting_list->cs_linlck_next; 18930Sstevel@tonic-gate if (previous) 18940Sstevel@tonic-gate previous->cs_linlck_next = next; 18950Sstevel@tonic-gate else 18960Sstevel@tonic-gate ui->ui_io_lock->io_list_front = next; 18970Sstevel@tonic-gate 18980Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) 18990Sstevel@tonic-gate ui->ui_io_lock->io_list_back = NULL; 19000Sstevel@tonic-gate 19010Sstevel@tonic-gate if (ui->ui_io_lock->io_list_back == waiting_list) 19020Sstevel@tonic-gate ui->ui_io_lock->io_list_back = previous; 19030Sstevel@tonic-gate 19040Sstevel@tonic-gate waiting_list->cs_linlck_next = NULL; 19050Sstevel@tonic-gate waiting_list->cs_flags &= ~MD_RCS_WAITING; 19060Sstevel@tonic-gate STAT_DEC(raid_write_queue_length); 19070Sstevel@tonic-gate if (raid_line_writer_lock(waiting_list, 0)) 19080Sstevel@tonic-gate panic("region locking corrupted"); 19090Sstevel@tonic-gate 19100Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_LLOCKD); 19110Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_startio, 19120Sstevel@tonic-gate (daemon_queue_t *)waiting_list, REQ_OLD); 19130Sstevel@tonic-gate waiting_list = next; 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate } 19160Sstevel@tonic-gate mutex_exit(io_list_mutex); 19170Sstevel@tonic-gate } 19180Sstevel@tonic-gate 19190Sstevel@tonic-gate void 19200Sstevel@tonic-gate raid_line_exit(md_raidcs_t *cs) 19210Sstevel@tonic-gate { 19220Sstevel@tonic-gate mr_unit_t *un; 19230Sstevel@tonic-gate 19240Sstevel@tonic-gate un = cs->cs_ps->ps_un; 19250Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 19260Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 19270Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) 19280Sstevel@tonic-gate STAT_DEC(raid_reader_locks_active); 19290Sstevel@tonic-gate else 19300Sstevel@tonic-gate STAT_DEC(raid_write_locks_active); 19310Sstevel@tonic-gate 19320Sstevel@tonic-gate if (cs->cs_linlck_prev) 19330Sstevel@tonic-gate cs->cs_linlck_prev->cs_linlck_next = cs->cs_linlck_next; 19340Sstevel@tonic-gate else 19350Sstevel@tonic-gate un->un_linlck_chn = cs->cs_linlck_next; 19360Sstevel@tonic-gate if (cs->cs_linlck_next) 19370Sstevel@tonic-gate cs->cs_linlck_next->cs_linlck_prev = cs->cs_linlck_prev; 19380Sstevel@tonic-gate 19390Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LLOCKD; 19400Sstevel@tonic-gate 19410Sstevel@tonic-gate if (un->un_linlck_flg) 19420Sstevel@tonic-gate cv_broadcast(&un->un_linlck_cv); 19430Sstevel@tonic-gate 19440Sstevel@tonic-gate un->un_linlck_flg = 0; 19450Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 19460Sstevel@tonic-gate 19470Sstevel@tonic-gate raid_cancel_pwslot(cs); 19480Sstevel@tonic-gate /* 19490Sstevel@tonic-gate * now that the lock is droped go ahead and see if there are any 19500Sstevel@tonic-gate * other writes that can be started up 19510Sstevel@tonic-gate */ 19520Sstevel@tonic-gate raid_io_startup(un); 19530Sstevel@tonic-gate 19540Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 19550Sstevel@tonic-gate } 19560Sstevel@tonic-gate 19570Sstevel@tonic-gate /* 19580Sstevel@tonic-gate * NAMES: raid_line, raid_pcolumn, raid_dcolumn 19590Sstevel@tonic-gate * DESCRIPTION: RAID metadevice APIs for mapping segment # to line #, 19600Sstevel@tonic-gate * data column # and parity column #. 19610Sstevel@tonic-gate * PARAMETERS: int segment - segment number 19620Sstevel@tonic-gate * mr_unit_t *un - pointer to an unit structure 19630Sstevel@tonic-gate * RETURNS: raid_line returns line # 19640Sstevel@tonic-gate * raid_dcolumn returns data column # 19650Sstevel@tonic-gate * raid_pcolumn returns parity column # 19660Sstevel@tonic-gate */ 19670Sstevel@tonic-gate static diskaddr_t 19680Sstevel@tonic-gate raid_line(diskaddr_t segment, mr_unit_t *un) 19690Sstevel@tonic-gate { 19700Sstevel@tonic-gate diskaddr_t adj_seg; 19710Sstevel@tonic-gate diskaddr_t line; 19720Sstevel@tonic-gate diskaddr_t max_orig_segment; 19730Sstevel@tonic-gate 19740Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 19750Sstevel@tonic-gate if (segment >= max_orig_segment) { 19760Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 19770Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 19780Sstevel@tonic-gate } else { 19790Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 19800Sstevel@tonic-gate } 19810Sstevel@tonic-gate return (line); 19820Sstevel@tonic-gate } 19830Sstevel@tonic-gate 19840Sstevel@tonic-gate uint_t 19850Sstevel@tonic-gate raid_dcolumn(diskaddr_t segment, mr_unit_t *un) 19860Sstevel@tonic-gate { 19870Sstevel@tonic-gate diskaddr_t adj_seg; 19880Sstevel@tonic-gate diskaddr_t line; 19890Sstevel@tonic-gate diskaddr_t max_orig_segment; 19900Sstevel@tonic-gate uint_t column; 19910Sstevel@tonic-gate 19920Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 19930Sstevel@tonic-gate if (segment >= max_orig_segment) { 19940Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 19950Sstevel@tonic-gate column = un->un_origcolumncnt + 19960Sstevel@tonic-gate (uint_t)(adj_seg / un->un_segsincolumn); 19970Sstevel@tonic-gate } else { 19980Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 19990Sstevel@tonic-gate column = (uint_t)((segment % (un->un_origcolumncnt - 1) + line) 20000Sstevel@tonic-gate % un->un_origcolumncnt); 20010Sstevel@tonic-gate } 20020Sstevel@tonic-gate return (column); 20030Sstevel@tonic-gate } 20040Sstevel@tonic-gate 20050Sstevel@tonic-gate uint_t 20060Sstevel@tonic-gate raid_pcolumn(diskaddr_t segment, mr_unit_t *un) 20070Sstevel@tonic-gate { 20080Sstevel@tonic-gate diskaddr_t adj_seg; 20090Sstevel@tonic-gate diskaddr_t line; 20100Sstevel@tonic-gate diskaddr_t max_orig_segment; 20110Sstevel@tonic-gate uint_t column; 20120Sstevel@tonic-gate 20130Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 20140Sstevel@tonic-gate if (segment >= max_orig_segment) { 20150Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 20160Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 20170Sstevel@tonic-gate } else { 20180Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 20190Sstevel@tonic-gate } 20200Sstevel@tonic-gate column = (uint_t)((line + (un->un_origcolumncnt - 1)) 20210Sstevel@tonic-gate % un->un_origcolumncnt); 20220Sstevel@tonic-gate return (column); 20230Sstevel@tonic-gate } 20240Sstevel@tonic-gate 20250Sstevel@tonic-gate 20260Sstevel@tonic-gate /* 20270Sstevel@tonic-gate * Is called in raid_iosetup to probe each column to insure 20280Sstevel@tonic-gate * that all the columns are in 'okay' state and meet the 20290Sstevel@tonic-gate * 'full line' requirement. If any column is in error, 20300Sstevel@tonic-gate * we don't want to enable the 'full line' flag. Previously, 20310Sstevel@tonic-gate * we would do so and disable it only when a error is 20320Sstevel@tonic-gate * detected after the first 'full line' io which is too late 20330Sstevel@tonic-gate * and leads to the potential data corruption. 20340Sstevel@tonic-gate */ 20350Sstevel@tonic-gate static int 20360Sstevel@tonic-gate raid_check_cols(mr_unit_t *un) 20370Sstevel@tonic-gate { 20380Sstevel@tonic-gate buf_t bp; 20390Sstevel@tonic-gate char *buf; 20400Sstevel@tonic-gate mr_column_t *colptr; 20410Sstevel@tonic-gate minor_t mnum = MD_SID(un); 20420Sstevel@tonic-gate int i; 20430Sstevel@tonic-gate int err = 0; 20440Sstevel@tonic-gate 20450Sstevel@tonic-gate buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP); 20460Sstevel@tonic-gate 20470Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 20480Sstevel@tonic-gate md_dev64_t tmpdev; 20490Sstevel@tonic-gate 20500Sstevel@tonic-gate colptr = &un->un_column[i]; 20510Sstevel@tonic-gate 20520Sstevel@tonic-gate tmpdev = colptr->un_dev; 20530Sstevel@tonic-gate /* 20540Sstevel@tonic-gate * Open by device id 20550Sstevel@tonic-gate * If this device is hotspared 20560Sstevel@tonic-gate * use the hotspare key 20570Sstevel@tonic-gate */ 20580Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ? 20590Sstevel@tonic-gate colptr->un_hs_key : colptr->un_orig_key); 20600Sstevel@tonic-gate 20610Sstevel@tonic-gate if (tmpdev == NODEV64) { 20620Sstevel@tonic-gate err = 1; 20630Sstevel@tonic-gate break; 20640Sstevel@tonic-gate } 20650Sstevel@tonic-gate 20660Sstevel@tonic-gate colptr->un_dev = tmpdev; 20670Sstevel@tonic-gate 20680Sstevel@tonic-gate bzero((caddr_t)&bp, sizeof (buf_t)); 20690Sstevel@tonic-gate bp.b_back = &bp; 20700Sstevel@tonic-gate bp.b_forw = &bp; 20710Sstevel@tonic-gate bp.b_flags = (B_READ | B_BUSY); 20720Sstevel@tonic-gate sema_init(&bp.b_io, 0, NULL, 20730Sstevel@tonic-gate SEMA_DEFAULT, NULL); 20740Sstevel@tonic-gate sema_init(&bp.b_sem, 0, NULL, 20750Sstevel@tonic-gate SEMA_DEFAULT, NULL); 20760Sstevel@tonic-gate bp.b_edev = md_dev64_to_dev(colptr->un_dev); 20770Sstevel@tonic-gate bp.b_lblkno = colptr->un_pwstart; 20780Sstevel@tonic-gate bp.b_bcount = DEV_BSIZE; 20790Sstevel@tonic-gate bp.b_bufsize = DEV_BSIZE; 20800Sstevel@tonic-gate bp.b_un.b_addr = (caddr_t)buf; 20810Sstevel@tonic-gate (void) md_call_strategy(&bp, 0, NULL); 20820Sstevel@tonic-gate if (biowait(&bp)) { 20830Sstevel@tonic-gate err = 1; 20840Sstevel@tonic-gate break; 20850Sstevel@tonic-gate } 20860Sstevel@tonic-gate } 20870Sstevel@tonic-gate 20880Sstevel@tonic-gate kmem_free(buf, DEV_BSIZE); 20890Sstevel@tonic-gate return (err); 20900Sstevel@tonic-gate } 20910Sstevel@tonic-gate 20920Sstevel@tonic-gate /* 20930Sstevel@tonic-gate * NAME: raid_iosetup 20940Sstevel@tonic-gate * DESCRIPTION: RAID metadevice specific I/O set up routine which does 20950Sstevel@tonic-gate * all the necessary calculations to determine the location 20960Sstevel@tonic-gate * of the segement for the I/O. 20970Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - unit number of RAID metadevice 20980Sstevel@tonic-gate * diskaddr_t blkno - block number of the I/O attempt 20990Sstevel@tonic-gate * size_t blkcnt - block count for this I/O 21000Sstevel@tonic-gate * md_raidcs_t *cs - child structure for each segmented I/O 21010Sstevel@tonic-gate * 21020Sstevel@tonic-gate * NOTE: The following is an example of a raid disk layer out: 21030Sstevel@tonic-gate * 21040Sstevel@tonic-gate * Total Column = 5 21050Sstevel@tonic-gate * Original Column = 4 21060Sstevel@tonic-gate * Segment Per Column = 10 21070Sstevel@tonic-gate * 21080Sstevel@tonic-gate * Col#0 Col#1 Col#2 Col#3 Col#4 Col#5 Col#6 21090Sstevel@tonic-gate * ------------------------------------------------------------- 21100Sstevel@tonic-gate * line#0 Seg#0 Seg#1 Seg#2 Parity Seg#30 Seg#40 21110Sstevel@tonic-gate * line#1 Parity Seg#3 Seg#4 Seg#5 Seg#31 21120Sstevel@tonic-gate * line#2 Seg#8 Parity Seg#6 Seg#7 Seg#32 21130Sstevel@tonic-gate * line#3 Seg#10 Seg#11 Parity Seg#9 Seg#33 21140Sstevel@tonic-gate * line#4 Seg#12 Seg#13 Seg#14 Parity Seg#34 21150Sstevel@tonic-gate * line#5 Parity Seg#15 Seg#16 Seg#17 Seg#35 21160Sstevel@tonic-gate * line#6 Seg#20 Parity Seg#18 Seg#19 Seg#36 21170Sstevel@tonic-gate * line#7 Seg#22 Seg#23 Parity Seg#21 Seg#37 21180Sstevel@tonic-gate * line#8 Seg#24 Seg#25 Seg#26 Parity Seg#38 21190Sstevel@tonic-gate * line#9 Parity Seg#27 Seg#28 Seg#29 Seg#39 21200Sstevel@tonic-gate */ 21210Sstevel@tonic-gate static size_t 21220Sstevel@tonic-gate raid_iosetup( 21230Sstevel@tonic-gate mr_unit_t *un, 21240Sstevel@tonic-gate diskaddr_t blkno, 21250Sstevel@tonic-gate size_t blkcnt, 21260Sstevel@tonic-gate md_raidcs_t *cs 21270Sstevel@tonic-gate ) 21280Sstevel@tonic-gate { 21290Sstevel@tonic-gate diskaddr_t segment; 21300Sstevel@tonic-gate diskaddr_t segstart; 21310Sstevel@tonic-gate diskaddr_t segoff; 21320Sstevel@tonic-gate size_t leftover; 21330Sstevel@tonic-gate diskaddr_t line; 21340Sstevel@tonic-gate uint_t iosize; 21350Sstevel@tonic-gate uint_t colcnt; 21360Sstevel@tonic-gate 21370Sstevel@tonic-gate /* caculate the segment# and offset for the block */ 21380Sstevel@tonic-gate segment = blkno / un->un_segsize; 21390Sstevel@tonic-gate segstart = segment * un->un_segsize; 21400Sstevel@tonic-gate segoff = blkno - segstart; 21410Sstevel@tonic-gate iosize = un->un_iosize - 1; 21420Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 21430Sstevel@tonic-gate line = raid_line(segment, un); 21440Sstevel@tonic-gate cs->cs_dcolumn = raid_dcolumn(segment, un); 21450Sstevel@tonic-gate cs->cs_pcolumn = raid_pcolumn(segment, un); 21460Sstevel@tonic-gate cs->cs_dflags = un->un_column[cs->cs_dcolumn].un_devflags; 21470Sstevel@tonic-gate cs->cs_pflags = un->un_column[cs->cs_pcolumn].un_devflags; 21480Sstevel@tonic-gate cs->cs_line = line; 21490Sstevel@tonic-gate 21500Sstevel@tonic-gate if ((cs->cs_ps->ps_flags & MD_RPS_WRITE) && 21510Sstevel@tonic-gate (UNIT_STATE(un) & RCS_OKAY) && 21520Sstevel@tonic-gate (segoff == 0) && 21530Sstevel@tonic-gate (un->un_totalcolumncnt == un->un_origcolumncnt) && 21540Sstevel@tonic-gate (un->un_segsize < un->un_iosize) && 21550Sstevel@tonic-gate (un->un_iosize <= un->un_maxio) && 21560Sstevel@tonic-gate (blkno == line * un->un_segsize * colcnt) && 21570Sstevel@tonic-gate (blkcnt >= ((un->un_totalcolumncnt -1) * un->un_segsize)) && 21580Sstevel@tonic-gate (raid_state_cnt(un, RCS_OKAY) == un->un_origcolumncnt) && 21590Sstevel@tonic-gate (raid_check_cols(un) == 0)) { 21600Sstevel@tonic-gate 21610Sstevel@tonic-gate md_raidcbuf_t **cbufp; 21620Sstevel@tonic-gate md_raidcbuf_t *cbuf; 21630Sstevel@tonic-gate int i, j; 21640Sstevel@tonic-gate 21650Sstevel@tonic-gate STAT_INC(raid_full_line_writes); 21660Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize * colcnt); 21670Sstevel@tonic-gate ASSERT(blkcnt >= (un->un_segsize * colcnt)); 21680Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize; 21690Sstevel@tonic-gate cs->cs_blkcnt = un->un_segsize; 21700Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 21710Sstevel@tonic-gate cs->cs_bcount = dbtob(cs->cs_blkcnt); 21720Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LINE; 21730Sstevel@tonic-gate 21740Sstevel@tonic-gate cbufp = &cs->cs_buflist; 21750Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 21760Sstevel@tonic-gate j = cs->cs_dcolumn + i; 21770Sstevel@tonic-gate j = j % un->un_totalcolumncnt; 21780Sstevel@tonic-gate 21790Sstevel@tonic-gate if ((j == cs->cs_dcolumn) || (j == cs->cs_pcolumn)) 21800Sstevel@tonic-gate continue; 21810Sstevel@tonic-gate cbuf = kmem_cache_alloc(raid_cbuf_cache, 21820Sstevel@tonic-gate MD_ALLOCFLAGS); 21830Sstevel@tonic-gate raid_cbuf_init(cbuf); 21840Sstevel@tonic-gate cbuf->cbuf_un = cs->cs_un; 21850Sstevel@tonic-gate cbuf->cbuf_ps = cs->cs_ps; 21860Sstevel@tonic-gate cbuf->cbuf_column = j; 21870Sstevel@tonic-gate cbuf->cbuf_bcount = dbtob(un->un_segsize); 21880Sstevel@tonic-gate *cbufp = cbuf; 21890Sstevel@tonic-gate cbufp = &cbuf->cbuf_next; 21900Sstevel@tonic-gate } 21910Sstevel@tonic-gate return (leftover); 21920Sstevel@tonic-gate } 21930Sstevel@tonic-gate 21940Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize - segoff); 21950Sstevel@tonic-gate if (blkcnt > (un->un_segsize - segoff)) 21960Sstevel@tonic-gate blkcnt -= leftover; 21970Sstevel@tonic-gate else 21980Sstevel@tonic-gate leftover = 0; 21990Sstevel@tonic-gate 22000Sstevel@tonic-gate if (blkcnt > (size_t)iosize) { 22010Sstevel@tonic-gate leftover += (blkcnt - iosize); 22020Sstevel@tonic-gate blkcnt = iosize; 22030Sstevel@tonic-gate } 22040Sstevel@tonic-gate 22050Sstevel@tonic-gate /* calculate the line# and column# for the segment */ 22060Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LINE; 22070Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize + segoff; 22080Sstevel@tonic-gate cs->cs_blkcnt = (uint_t)blkcnt; 22090Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 22100Sstevel@tonic-gate cs->cs_bcount = dbtob((uint_t)blkcnt); 22110Sstevel@tonic-gate return (leftover); 22120Sstevel@tonic-gate } 22130Sstevel@tonic-gate 22140Sstevel@tonic-gate /* 22150Sstevel@tonic-gate * NAME: raid_done 22160Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O done interrupt routine 22170Sstevel@tonic-gate * PARAMETERS: struct buf *bp - pointer to a buffer structure 22180Sstevel@tonic-gate */ 22190Sstevel@tonic-gate static void 22200Sstevel@tonic-gate raid_done(struct buf *bp) 22210Sstevel@tonic-gate { 22220Sstevel@tonic-gate md_raidcs_t *cs; 22230Sstevel@tonic-gate int flags, frags; 22240Sstevel@tonic-gate 22250Sstevel@tonic-gate sema_v(&bp->b_io); 22260Sstevel@tonic-gate cs = (md_raidcs_t *)bp->b_chain; 22270Sstevel@tonic-gate 22280Sstevel@tonic-gate ASSERT(cs != NULL); 22290Sstevel@tonic-gate 22300Sstevel@tonic-gate mutex_enter(&cs->cs_mx); 22310Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 22320Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ERROR; 22330Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 22340Sstevel@tonic-gate } 22350Sstevel@tonic-gate 22360Sstevel@tonic-gate flags = cs->cs_flags; 22370Sstevel@tonic-gate frags = --cs->cs_frags; 22380Sstevel@tonic-gate mutex_exit(&cs->cs_mx); 22390Sstevel@tonic-gate if (frags != 0) { 22400Sstevel@tonic-gate return; 22410Sstevel@tonic-gate } 22420Sstevel@tonic-gate 22430Sstevel@tonic-gate if (flags & MD_RCS_ERROR) { 22440Sstevel@tonic-gate if (cs->cs_error_call) { 22450Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_error_call, 22460Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 22470Sstevel@tonic-gate } 22480Sstevel@tonic-gate return; 22490Sstevel@tonic-gate } 22500Sstevel@tonic-gate 22510Sstevel@tonic-gate if (flags & MD_RCS_ISCALL) { 22520Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 22530Sstevel@tonic-gate (*(cs->cs_call))(cs); 22540Sstevel@tonic-gate return; 22550Sstevel@tonic-gate } 22560Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_call, 22570Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 22580Sstevel@tonic-gate } 22590Sstevel@tonic-gate /* 22600Sstevel@tonic-gate * the flag RIO_EXTRA is used when dealing with a column in the process 22610Sstevel@tonic-gate * of being resynced. During the resync, writes may have to take place 22620Sstevel@tonic-gate * on both the original component and a hotspare component. 22630Sstevel@tonic-gate */ 22640Sstevel@tonic-gate #define RIO_DATA 0x00100 /* use data buffer & data column */ 22650Sstevel@tonic-gate #define RIO_PARITY 0x00200 /* use parity buffer & parity column */ 22660Sstevel@tonic-gate #define RIO_WRITE 0x00400 /* issue a write */ 22670Sstevel@tonic-gate #define RIO_READ 0x00800 /* issue a read */ 22680Sstevel@tonic-gate #define RIO_PWIO 0x01000 /* do the I/O to the prewrite entry */ 22690Sstevel@tonic-gate #define RIO_ALT 0x02000 /* do write to alternate device */ 22700Sstevel@tonic-gate #define RIO_EXTRA 0x04000 /* use extra buffer */ 22710Sstevel@tonic-gate 22720Sstevel@tonic-gate #define RIO_COLMASK 0x000ff 22730Sstevel@tonic-gate 22740Sstevel@tonic-gate #define RIO_PREWRITE RIO_WRITE | RIO_PWIO 22750Sstevel@tonic-gate 22760Sstevel@tonic-gate /* 22770Sstevel@tonic-gate * NAME: raidio 22780Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 22790Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 22800Sstevel@tonic-gate */ 22810Sstevel@tonic-gate static void 22820Sstevel@tonic-gate raidio(md_raidcs_t *cs, int flags) 22830Sstevel@tonic-gate { 22840Sstevel@tonic-gate buf_t *bp; 22850Sstevel@tonic-gate int column; 22860Sstevel@tonic-gate int flag; 22870Sstevel@tonic-gate void *private; 22880Sstevel@tonic-gate mr_unit_t *un; 22890Sstevel@tonic-gate int iosize; 22900Sstevel@tonic-gate diskaddr_t pwstart; 22910Sstevel@tonic-gate diskaddr_t devstart; 22920Sstevel@tonic-gate md_dev64_t dev; 22930Sstevel@tonic-gate 22940Sstevel@tonic-gate un = cs->cs_un; 22950Sstevel@tonic-gate 22960Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 22970Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 22980Sstevel@tonic-gate 22990Sstevel@tonic-gate if (flags & RIO_DATA) { 23000Sstevel@tonic-gate if (flags & RIO_EXTRA) 23010Sstevel@tonic-gate bp = &cs->cs_hbuf; 23020Sstevel@tonic-gate else 23030Sstevel@tonic-gate bp = &cs->cs_dbuf; 23040Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_dbuffer; 23050Sstevel@tonic-gate column = cs->cs_dcolumn; 23060Sstevel@tonic-gate } else { 23070Sstevel@tonic-gate if (flags & RIO_EXTRA) 23080Sstevel@tonic-gate bp = &cs->cs_hbuf; 23090Sstevel@tonic-gate else 23100Sstevel@tonic-gate bp = &cs->cs_pbuf; 23110Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_pbuffer; 23120Sstevel@tonic-gate column = cs->cs_pcolumn; 23130Sstevel@tonic-gate } 23140Sstevel@tonic-gate if (flags & RIO_COLMASK) 23150Sstevel@tonic-gate column = (flags & RIO_COLMASK) - 1; 23160Sstevel@tonic-gate 23170Sstevel@tonic-gate bp->b_bcount = cs->cs_bcount; 23180Sstevel@tonic-gate bp->b_bufsize = cs->cs_bcount; 23190Sstevel@tonic-gate iosize = un->un_iosize; 23200Sstevel@tonic-gate 23210Sstevel@tonic-gate /* check if the hotspared device will be used */ 23220Sstevel@tonic-gate if (flags & RIO_ALT && (flags & RIO_WRITE)) { 23230Sstevel@tonic-gate pwstart = un->un_column[column].un_alt_pwstart; 23240Sstevel@tonic-gate devstart = un->un_column[column].un_alt_devstart; 23250Sstevel@tonic-gate dev = un->un_column[column].un_alt_dev; 23260Sstevel@tonic-gate } else { 23270Sstevel@tonic-gate pwstart = un->un_column[column].un_pwstart; 23280Sstevel@tonic-gate devstart = un->un_column[column].un_devstart; 23290Sstevel@tonic-gate dev = un->un_column[column].un_dev; 23300Sstevel@tonic-gate } 23310Sstevel@tonic-gate 23320Sstevel@tonic-gate /* if not writing to log skip log header */ 23330Sstevel@tonic-gate if ((flags & RIO_PWIO) == 0) { 23340Sstevel@tonic-gate bp->b_lblkno = devstart + cs->cs_blkno; 23350Sstevel@tonic-gate bp->b_un.b_addr += DEV_BSIZE; 23360Sstevel@tonic-gate } else { 23370Sstevel@tonic-gate bp->b_bcount += DEV_BSIZE; 23380Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 23390Sstevel@tonic-gate if (flags & RIO_DATA) { 23400Sstevel@tonic-gate bp->b_lblkno = cs->cs_dpwslot * iosize + pwstart; 23410Sstevel@tonic-gate } else { /* not DATA -> PARITY */ 23420Sstevel@tonic-gate bp->b_lblkno = cs->cs_ppwslot * iosize + pwstart; 23430Sstevel@tonic-gate } 23440Sstevel@tonic-gate } 23450Sstevel@tonic-gate 23460Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR | nv_available); 23470Sstevel@tonic-gate bp->b_flags |= B_BUSY; 23480Sstevel@tonic-gate if (flags & RIO_READ) { 23490Sstevel@tonic-gate bp->b_flags |= B_READ; 23500Sstevel@tonic-gate } else { 23510Sstevel@tonic-gate bp->b_flags |= B_WRITE; 23520Sstevel@tonic-gate if ((nv_available && nv_parity && (flags & RIO_PARITY)) || 23530Sstevel@tonic-gate (nv_available && nv_prewrite && (flags & RIO_PWIO))) 23540Sstevel@tonic-gate bp->b_flags |= nv_available; 23550Sstevel@tonic-gate } 23560Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 23570Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(dev); 23580Sstevel@tonic-gate 23590Sstevel@tonic-gate ASSERT((bp->b_edev != 0) && (bp->b_edev != NODEV)); 23600Sstevel@tonic-gate 23610Sstevel@tonic-gate private = cs->cs_strategy_private; 23620Sstevel@tonic-gate flag = cs->cs_strategy_flag; 23630Sstevel@tonic-gate 23640Sstevel@tonic-gate md_call_strategy(bp, flag, private); 23650Sstevel@tonic-gate } 23660Sstevel@tonic-gate 23670Sstevel@tonic-gate /* 23680Sstevel@tonic-gate * NAME: genstandardparity 23690Sstevel@tonic-gate * DESCRIPTION: This routine 23700Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 23710Sstevel@tonic-gate */ 23720Sstevel@tonic-gate static void 23730Sstevel@tonic-gate genstandardparity(md_raidcs_t *cs) 23740Sstevel@tonic-gate { 23750Sstevel@tonic-gate uint_t *dbuf, *pbuf; 23760Sstevel@tonic-gate size_t wordcnt; 23770Sstevel@tonic-gate uint_t dsum = 0; 23780Sstevel@tonic-gate uint_t psum = 0; 23790Sstevel@tonic-gate 23800Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 23810Sstevel@tonic-gate 23820Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 23830Sstevel@tonic-gate 23840Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 23850Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 23860Sstevel@tonic-gate 23870Sstevel@tonic-gate /* Word aligned */ 23880Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 23890Sstevel@tonic-gate uint_t *uwbuf = (uint_t *)(void *)(cs->cs_addr); 23900Sstevel@tonic-gate uint_t uval; 23910Sstevel@tonic-gate 23920Sstevel@tonic-gate while (wordcnt--) { 23930Sstevel@tonic-gate uval = *uwbuf++; 23940Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ uval)); 23950Sstevel@tonic-gate ++pbuf; 23960Sstevel@tonic-gate *dbuf = uval; 23970Sstevel@tonic-gate dsum ^= uval; 23980Sstevel@tonic-gate ++dbuf; 23990Sstevel@tonic-gate } 24000Sstevel@tonic-gate } else { 24010Sstevel@tonic-gate uchar_t *ubbuf = (uchar_t *)(cs->cs_addr); 24020Sstevel@tonic-gate union { 24030Sstevel@tonic-gate uint_t wb; 24040Sstevel@tonic-gate uchar_t bb[4]; 24050Sstevel@tonic-gate } cb; 24060Sstevel@tonic-gate 24070Sstevel@tonic-gate while (wordcnt--) { 24080Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 24090Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 24100Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 24110Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 24120Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ cb.wb)); 24130Sstevel@tonic-gate ++pbuf; 24140Sstevel@tonic-gate *dbuf = cb.wb; 24150Sstevel@tonic-gate dsum ^= cb.wb; 24160Sstevel@tonic-gate ++dbuf; 24170Sstevel@tonic-gate } 24180Sstevel@tonic-gate } 24190Sstevel@tonic-gate 24200Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, cs->cs_un, dsum, cs->cs_pcolumn, 24210Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24220Sstevel@tonic-gate 2, cs->cs_dcolumn, RAID_PWMAGIC); 24230Sstevel@tonic-gate 24240Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, cs->cs_un, psum, cs->cs_dcolumn, 24250Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24260Sstevel@tonic-gate 2, cs->cs_pcolumn, RAID_PWMAGIC); 24270Sstevel@tonic-gate } 24280Sstevel@tonic-gate 24290Sstevel@tonic-gate static void 24300Sstevel@tonic-gate genlineparity(md_raidcs_t *cs) 24310Sstevel@tonic-gate { 24320Sstevel@tonic-gate 24330Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 24340Sstevel@tonic-gate md_raidcbuf_t *cbuf; 24350Sstevel@tonic-gate uint_t *pbuf, *dbuf; 24360Sstevel@tonic-gate uint_t *uwbuf; 24370Sstevel@tonic-gate uchar_t *ubbuf; 24380Sstevel@tonic-gate size_t wordcnt; 24390Sstevel@tonic-gate uint_t psum = 0, dsum = 0; 24400Sstevel@tonic-gate size_t count = un->un_segsize * DEV_BSIZE; 24410Sstevel@tonic-gate uint_t col; 24420Sstevel@tonic-gate buf_t *bp; 24430Sstevel@tonic-gate 24440Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 24450Sstevel@tonic-gate 24460Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 24470Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 24480Sstevel@tonic-gate uwbuf = (uint_t *)(void *)(cs->cs_addr); 24490Sstevel@tonic-gate ubbuf = (uchar_t *)(void *)(cs->cs_addr); 24500Sstevel@tonic-gate 24510Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 24520Sstevel@tonic-gate 24530Sstevel@tonic-gate /* Word aligned */ 24540Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 24550Sstevel@tonic-gate uint_t uval; 24560Sstevel@tonic-gate 24570Sstevel@tonic-gate while (wordcnt--) { 24580Sstevel@tonic-gate uval = *uwbuf++; 24590Sstevel@tonic-gate *dbuf = uval; 24600Sstevel@tonic-gate *pbuf = uval; 24610Sstevel@tonic-gate dsum ^= uval; 24620Sstevel@tonic-gate ++pbuf; 24630Sstevel@tonic-gate ++dbuf; 24640Sstevel@tonic-gate } 24650Sstevel@tonic-gate } else { 24660Sstevel@tonic-gate union { 24670Sstevel@tonic-gate uint_t wb; 24680Sstevel@tonic-gate uchar_t bb[4]; 24690Sstevel@tonic-gate } cb; 24700Sstevel@tonic-gate 24710Sstevel@tonic-gate while (wordcnt--) { 24720Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 24730Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 24740Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 24750Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 24760Sstevel@tonic-gate *dbuf = cb.wb; 24770Sstevel@tonic-gate *pbuf = cb.wb; 24780Sstevel@tonic-gate dsum ^= cb.wb; 24790Sstevel@tonic-gate ++pbuf; 24800Sstevel@tonic-gate ++dbuf; 24810Sstevel@tonic-gate } 24820Sstevel@tonic-gate } 24830Sstevel@tonic-gate 24840Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, cs->cs_pcolumn, 24850Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24860Sstevel@tonic-gate un->un_totalcolumncnt, cs->cs_dcolumn, RAID_PWMAGIC); 24870Sstevel@tonic-gate 24880Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_DATA); 24890Sstevel@tonic-gate 24900Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 24910Sstevel@tonic-gate 24920Sstevel@tonic-gate dsum = 0; 24930Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 24940Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cbuf->cbuf_buffer + DEV_BSIZE); 24950Sstevel@tonic-gate 24960Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 24970Sstevel@tonic-gate 24980Sstevel@tonic-gate col = cbuf->cbuf_column; 24990Sstevel@tonic-gate 25000Sstevel@tonic-gate /* Word aligned */ 25010Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 25020Sstevel@tonic-gate uint_t uval; 25030Sstevel@tonic-gate 25040Sstevel@tonic-gate /* 25050Sstevel@tonic-gate * Only calculate psum when working on the last 25060Sstevel@tonic-gate * data buffer. 25070Sstevel@tonic-gate */ 25080Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 25090Sstevel@tonic-gate psum = 0; 25100Sstevel@tonic-gate while (wordcnt--) { 25110Sstevel@tonic-gate uval = *uwbuf++; 25120Sstevel@tonic-gate *dbuf = uval; 25130Sstevel@tonic-gate psum ^= (*pbuf ^= uval); 25140Sstevel@tonic-gate dsum ^= uval; 25150Sstevel@tonic-gate ++dbuf; 25160Sstevel@tonic-gate ++pbuf; 25170Sstevel@tonic-gate } 25180Sstevel@tonic-gate } else { 25190Sstevel@tonic-gate while (wordcnt--) { 25200Sstevel@tonic-gate uval = *uwbuf++; 25210Sstevel@tonic-gate *dbuf = uval; 25220Sstevel@tonic-gate *pbuf ^= uval; 25230Sstevel@tonic-gate dsum ^= uval; 25240Sstevel@tonic-gate ++dbuf; 25250Sstevel@tonic-gate ++pbuf; 25260Sstevel@tonic-gate } 25270Sstevel@tonic-gate } 25280Sstevel@tonic-gate } else { 25290Sstevel@tonic-gate union { 25300Sstevel@tonic-gate uint_t wb; 25310Sstevel@tonic-gate uchar_t bb[4]; 25320Sstevel@tonic-gate } cb; 25330Sstevel@tonic-gate 25340Sstevel@tonic-gate /* 25350Sstevel@tonic-gate * Only calculate psum when working on the last 25360Sstevel@tonic-gate * data buffer. 25370Sstevel@tonic-gate */ 25380Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 25390Sstevel@tonic-gate psum = 0; 25400Sstevel@tonic-gate while (wordcnt--) { 25410Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 25420Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 25430Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 25440Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 25450Sstevel@tonic-gate *dbuf = cb.wb; 25460Sstevel@tonic-gate psum ^= (*pbuf ^= cb.wb); 25470Sstevel@tonic-gate dsum ^= cb.wb; 25480Sstevel@tonic-gate ++dbuf; 25490Sstevel@tonic-gate ++pbuf; 25500Sstevel@tonic-gate } 25510Sstevel@tonic-gate } else { 25520Sstevel@tonic-gate while (wordcnt--) { 25530Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 25540Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 25550Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 25560Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 25570Sstevel@tonic-gate *dbuf = cb.wb; 25580Sstevel@tonic-gate *pbuf ^= cb.wb; 25590Sstevel@tonic-gate dsum ^= cb.wb; 25600Sstevel@tonic-gate ++dbuf; 25610Sstevel@tonic-gate ++pbuf; 25620Sstevel@tonic-gate } 25630Sstevel@tonic-gate } 25640Sstevel@tonic-gate } 25650Sstevel@tonic-gate RAID_FILLIN_RPW(cbuf->cbuf_buffer, un, dsum, cs->cs_pcolumn, 25660Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 25670Sstevel@tonic-gate un->un_totalcolumncnt, col, RAID_PWMAGIC); 25680Sstevel@tonic-gate 25690Sstevel@tonic-gate /* 25700Sstevel@tonic-gate * fill in buffer for write to prewrite area 25710Sstevel@tonic-gate */ 25720Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 25730Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer; 25740Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount + DEV_BSIZE; 25750Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 25760Sstevel@tonic-gate bp->b_lblkno = (cbuf->cbuf_pwslot * un->un_iosize) + 25770Sstevel@tonic-gate un->un_column[col].un_pwstart; 25780Sstevel@tonic-gate bp->b_flags = B_WRITE | B_BUSY; 25790Sstevel@tonic-gate if (nv_available && nv_prewrite) 25800Sstevel@tonic-gate bp->b_flags |= nv_available; 25810Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 25820Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[col].un_dev); 25830Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 25840Sstevel@tonic-gate md_call_strategy(bp, 25850Sstevel@tonic-gate cs->cs_strategy_flag, cs->cs_strategy_private); 25860Sstevel@tonic-gate } 25870Sstevel@tonic-gate 25880Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, cs->cs_dcolumn, 25890Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 25900Sstevel@tonic-gate un->un_totalcolumncnt, cs->cs_pcolumn, RAID_PWMAGIC); 25910Sstevel@tonic-gate 25920Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_PARITY); 25930Sstevel@tonic-gate } 25940Sstevel@tonic-gate 25950Sstevel@tonic-gate /* 25960Sstevel@tonic-gate * NAME: raid_readregenloop 25970Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 25980Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 25990Sstevel@tonic-gate */ 26000Sstevel@tonic-gate static void 26010Sstevel@tonic-gate raid_readregenloop(md_raidcs_t *cs) 26020Sstevel@tonic-gate { 26030Sstevel@tonic-gate mr_unit_t *un; 26040Sstevel@tonic-gate md_raidps_t *ps; 26050Sstevel@tonic-gate uint_t *dbuf; 26060Sstevel@tonic-gate uint_t *pbuf; 26070Sstevel@tonic-gate size_t wordcnt; 26080Sstevel@tonic-gate 26090Sstevel@tonic-gate un = cs->cs_un; 26100Sstevel@tonic-gate 26110Sstevel@tonic-gate /* 26120Sstevel@tonic-gate * XOR the parity with data bytes, must skip the 26130Sstevel@tonic-gate * pre-write entry header in all data/parity buffers 26140Sstevel@tonic-gate */ 26150Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 26160Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 26170Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 26180Sstevel@tonic-gate while (wordcnt--) 26190Sstevel@tonic-gate *dbuf++ ^= *pbuf++; 26200Sstevel@tonic-gate 26210Sstevel@tonic-gate /* bump up the loop count */ 26220Sstevel@tonic-gate cs->cs_loop++; 26230Sstevel@tonic-gate 26240Sstevel@tonic-gate /* skip the errored component */ 26250Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 26260Sstevel@tonic-gate cs->cs_loop++; 26270Sstevel@tonic-gate 26280Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 26290Sstevel@tonic-gate cs->cs_frags = 1; 26300Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 26310Sstevel@tonic-gate return; 26320Sstevel@tonic-gate } 26330Sstevel@tonic-gate /* reaching the end sof loop */ 26340Sstevel@tonic-gate ps = cs->cs_ps; 26350Sstevel@tonic-gate bcopy(cs->cs_dbuffer + DEV_BSIZE, cs->cs_addr, cs->cs_bcount); 26360Sstevel@tonic-gate raid_free_child(cs, 1); 26370Sstevel@tonic-gate 26380Sstevel@tonic-gate /* decrement readfrags */ 26390Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 26400Sstevel@tonic-gate } 26410Sstevel@tonic-gate 26420Sstevel@tonic-gate /* 26430Sstevel@tonic-gate * NAME: raid_read_io 26440Sstevel@tonic-gate * DESCRIPTION: RAID metadevice read I/O routine 26450Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 26460Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 26470Sstevel@tonic-gate */ 26480Sstevel@tonic-gate static void 26490Sstevel@tonic-gate raid_read_io(mr_unit_t *un, md_raidcs_t *cs) 26500Sstevel@tonic-gate { 26510Sstevel@tonic-gate int flag; 26520Sstevel@tonic-gate void *private; 26530Sstevel@tonic-gate buf_t *bp; 26540Sstevel@tonic-gate buf_t *pb = cs->cs_ps->ps_bp; 26550Sstevel@tonic-gate mr_column_t *column; 26560Sstevel@tonic-gate 26570Sstevel@tonic-gate flag = cs->cs_strategy_flag; 26580Sstevel@tonic-gate private = cs->cs_strategy_private; 26590Sstevel@tonic-gate column = &un->un_column[cs->cs_dcolumn]; 26600Sstevel@tonic-gate 26610Sstevel@tonic-gate /* 26620Sstevel@tonic-gate * The component to be read is good, simply set up bp structure 26630Sstevel@tonic-gate * and call low level md routine doing the read. 26640Sstevel@tonic-gate */ 26650Sstevel@tonic-gate 26660Sstevel@tonic-gate if (COLUMN_ISOKAY(un, cs->cs_dcolumn) || 26670Sstevel@tonic-gate (COLUMN_ISLASTERR(un, cs->cs_dcolumn) && 26680Sstevel@tonic-gate (cs->cs_flags & MD_RCS_RECOVERY) == 0)) { 26690Sstevel@tonic-gate dev_t ddi_dev; /* needed for bioclone, so not md_dev64_t */ 26700Sstevel@tonic-gate ddi_dev = md_dev64_to_dev(column->un_dev); 26710Sstevel@tonic-gate 26720Sstevel@tonic-gate bp = &cs->cs_dbuf; 26730Sstevel@tonic-gate bp = md_bioclone(pb, cs->cs_offset, cs->cs_bcount, ddi_dev, 26740Sstevel@tonic-gate column->un_devstart + cs->cs_blkno, 26750Sstevel@tonic-gate (int (*)())raid_done, bp, KM_NOSLEEP); 26760Sstevel@tonic-gate 26770Sstevel@tonic-gate bp->b_chain = (buf_t *)cs; 26780Sstevel@tonic-gate 26790Sstevel@tonic-gate cs->cs_frags = 1; 26800Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 26810Sstevel@tonic-gate cs->cs_retry_call = raid_read_retry; 26820Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ISCALL; 26830Sstevel@tonic-gate cs->cs_stage = RAID_READ_DONE; 26840Sstevel@tonic-gate cs->cs_call = raid_stage; 26850Sstevel@tonic-gate 26860Sstevel@tonic-gate ASSERT(bp->b_edev != 0); 26870Sstevel@tonic-gate 26880Sstevel@tonic-gate md_call_strategy(bp, flag, private); 26890Sstevel@tonic-gate return; 26900Sstevel@tonic-gate } 26910Sstevel@tonic-gate 26920Sstevel@tonic-gate /* 26930Sstevel@tonic-gate * The component to be read is bad, have to go through 26940Sstevel@tonic-gate * raid specific method to read data from other members. 26950Sstevel@tonic-gate */ 26960Sstevel@tonic-gate cs->cs_loop = 0; 26970Sstevel@tonic-gate /* 26980Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 26990Sstevel@tonic-gate * and get both buffers before pwslot 27000Sstevel@tonic-gate * otherwise a deadlock could be introduced. 27010Sstevel@tonic-gate */ 27020Sstevel@tonic-gate raid_mapin_buf(cs); 27030Sstevel@tonic-gate getdbuffer(cs); 27040Sstevel@tonic-gate getpbuffer(cs); 27050Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 27060Sstevel@tonic-gate cs->cs_loop++; 27070Sstevel@tonic-gate 27080Sstevel@tonic-gate /* zero out data buffer for use as a data sink */ 27090Sstevel@tonic-gate bzero(cs->cs_dbuffer + DEV_BSIZE, cs->cs_bcount); 27100Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 27110Sstevel@tonic-gate cs->cs_call = raid_readregenloop; 27120Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 27130Sstevel@tonic-gate cs->cs_retry_call = raid_read_no_retry; 27140Sstevel@tonic-gate cs->cs_frags = 1; 27150Sstevel@tonic-gate 27160Sstevel@tonic-gate /* use parity buffer to read other columns */ 27170Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 27180Sstevel@tonic-gate } 27190Sstevel@tonic-gate 27200Sstevel@tonic-gate /* 27210Sstevel@tonic-gate * NAME: raid_read 27220Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 27230Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27240Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27250Sstevel@tonic-gate */ 27260Sstevel@tonic-gate static int 27270Sstevel@tonic-gate raid_read(mr_unit_t *un, md_raidcs_t *cs) 27280Sstevel@tonic-gate { 27290Sstevel@tonic-gate int error = 0; 27300Sstevel@tonic-gate md_raidps_t *ps; 27310Sstevel@tonic-gate mdi_unit_t *ui; 27320Sstevel@tonic-gate minor_t mnum; 27330Sstevel@tonic-gate 27340Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 27350Sstevel@tonic-gate ps = cs->cs_ps; 27360Sstevel@tonic-gate ui = ps->ps_ui; 27370Sstevel@tonic-gate raid_line_reader_lock(cs, 0); 27380Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 27390Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 27400Sstevel@tonic-gate mnum = MD_SID(un); 27410Sstevel@tonic-gate cs->cs_un = un; 27420Sstevel@tonic-gate 27430Sstevel@tonic-gate /* make sure the read doesn't go beyond the end of the column */ 27440Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > 27450Sstevel@tonic-gate un->un_segsize * un->un_segsincolumn) { 27460Sstevel@tonic-gate error = ENXIO; 27470Sstevel@tonic-gate } 27480Sstevel@tonic-gate if (error) 27490Sstevel@tonic-gate goto rerror; 27500Sstevel@tonic-gate 27510Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 27520Sstevel@tonic-gate raid_regen_parity(cs); 27530Sstevel@tonic-gate un = MD_UNIT(mnum); 27540Sstevel@tonic-gate cs->cs_un = un; 27550Sstevel@tonic-gate } 27560Sstevel@tonic-gate 27570Sstevel@tonic-gate raid_read_io(un, cs); 27580Sstevel@tonic-gate return (0); 27590Sstevel@tonic-gate 27600Sstevel@tonic-gate rerror: 27610Sstevel@tonic-gate raid_error_parent(ps, error); 27620Sstevel@tonic-gate raid_free_child(cs, 1); 27630Sstevel@tonic-gate /* decrement readfrags */ 27640Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 27650Sstevel@tonic-gate return (0); 27660Sstevel@tonic-gate } 27670Sstevel@tonic-gate 27680Sstevel@tonic-gate /* 27690Sstevel@tonic-gate * NAME: raid_write_err_retry 27700Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 27710Sstevel@tonic-gate * write was for parity or data only; 27720Sstevel@tonic-gate * complete write with error, no recovery possible 27730Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27740Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27750Sstevel@tonic-gate */ 27760Sstevel@tonic-gate /*ARGSUSED*/ 27770Sstevel@tonic-gate static void 27780Sstevel@tonic-gate raid_write_err_retry(mr_unit_t *un, md_raidcs_t *cs) 27790Sstevel@tonic-gate { 27800Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 27810Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 27820Sstevel@tonic-gate 27830Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 27840Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 27850Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 27860Sstevel@tonic-gate raid_error_parent(ps, EIO); 27870Sstevel@tonic-gate raid_free_child(cs, 1); 27880Sstevel@tonic-gate raid_free_parent(ps, flags); 27890Sstevel@tonic-gate } 27900Sstevel@tonic-gate 27910Sstevel@tonic-gate /* 27920Sstevel@tonic-gate * NAME: raid_write_err_retry 27930Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 27940Sstevel@tonic-gate * write is too far along to retry and parent 27950Sstevel@tonic-gate * has already been signaled with iodone. 27960Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27970Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27980Sstevel@tonic-gate */ 27990Sstevel@tonic-gate /*ARGSUSED*/ 28000Sstevel@tonic-gate static void 28010Sstevel@tonic-gate raid_write_no_retry(mr_unit_t *un, md_raidcs_t *cs) 28020Sstevel@tonic-gate { 28030Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 28040Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 28050Sstevel@tonic-gate 28060Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 28070Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 28080Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 28090Sstevel@tonic-gate raid_free_child(cs, 1); 28100Sstevel@tonic-gate raid_free_parent(ps, flags); 28110Sstevel@tonic-gate } 28120Sstevel@tonic-gate 28130Sstevel@tonic-gate /* 28140Sstevel@tonic-gate * NAME: raid_write_retry 28150Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 28160Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 28170Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 28180Sstevel@tonic-gate */ 28190Sstevel@tonic-gate static void 28200Sstevel@tonic-gate raid_write_retry(mr_unit_t *un, md_raidcs_t *cs) 28210Sstevel@tonic-gate { 28220Sstevel@tonic-gate md_raidps_t *ps; 28230Sstevel@tonic-gate 28240Sstevel@tonic-gate ps = cs->cs_ps; 28250Sstevel@tonic-gate 28260Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_write() */ 28270Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 28280Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 28290Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 28300Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 28310Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 28320Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 28330Sstevel@tonic-gate /* Initialize semaphores */ 28340Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 28350Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28360Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 28370Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28380Sstevel@tonic-gate 28390Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 28400Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 28410Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 28420Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 28430Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 28440Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 28450Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 28460Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28470Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 28480Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28490Sstevel@tonic-gate 28500Sstevel@tonic-gate cs->cs_hbuf.b_chain = (struct buf *)cs; 28510Sstevel@tonic-gate cs->cs_hbuf.b_back = &cs->cs_hbuf; 28520Sstevel@tonic-gate cs->cs_hbuf.b_forw = &cs->cs_hbuf; 28530Sstevel@tonic-gate cs->cs_hbuf.b_flags = B_BUSY; /* initialize flags */ 28540Sstevel@tonic-gate cs->cs_hbuf.b_error = 0; /* initialize error */ 28550Sstevel@tonic-gate cs->cs_hbuf.b_offset = -1; 28560Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_io, 0, NULL, 28570Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28580Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_sem, 0, NULL, 28590Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28600Sstevel@tonic-gate 28610Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ERROR); 28620Sstevel@tonic-gate /* 28630Sstevel@tonic-gate * If we have already done'ed the i/o but have done prewrite 28640Sstevel@tonic-gate * on this child, then reset PWDONE flag and bump pwfrags before 28650Sstevel@tonic-gate * restarting i/o. 28660Sstevel@tonic-gate * If pwfrags is zero, we have already 'iodone'd the i/o so 28670Sstevel@tonic-gate * leave things alone. We don't want to re-'done' it. 28680Sstevel@tonic-gate */ 28690Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 28700Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_PWDONE) { 28710Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_PWDONE; 28720Sstevel@tonic-gate ps->ps_pwfrags++; 28730Sstevel@tonic-gate } 28740Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 28750Sstevel@tonic-gate raid_write_io(un, cs); 28760Sstevel@tonic-gate } 28770Sstevel@tonic-gate 28780Sstevel@tonic-gate /* 28790Sstevel@tonic-gate * NAME: raid_wrerr 28800Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 28810Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 28820Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 28830Sstevel@tonic-gate * since a unit or column state transition may take place. 28840Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 28850Sstevel@tonic-gate */ 28860Sstevel@tonic-gate static void 28870Sstevel@tonic-gate raid_wrerr(md_raidcs_t *cs) 28880Sstevel@tonic-gate { 28890Sstevel@tonic-gate md_raidps_t *ps; 28900Sstevel@tonic-gate mdi_unit_t *ui; 28910Sstevel@tonic-gate mr_unit_t *un; 28920Sstevel@tonic-gate md_raidcbuf_t *cbuf; 28930Sstevel@tonic-gate 28940Sstevel@tonic-gate ps = cs->cs_ps; 28950Sstevel@tonic-gate ui = ps->ps_ui; 28960Sstevel@tonic-gate 28970Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 28980Sstevel@tonic-gate ASSERT(un != 0); 28990Sstevel@tonic-gate 29000Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 29010Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_dbuf); 29020Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 29030Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_pbuf); 29040Sstevel@tonic-gate if (cs->cs_hbuf.b_flags & B_ERROR) 29050Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_hbuf); 29060Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 29070Sstevel@tonic-gate if (cbuf->cbuf_bp.b_flags & B_ERROR) 29080Sstevel@tonic-gate (void) raid_error_state(un, &cbuf->cbuf_bp); 29090Sstevel@tonic-gate 29100Sstevel@tonic-gate md_unit_writerexit(ui); 29110Sstevel@tonic-gate 29120Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 29130Sstevel@tonic-gate 29140Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 29150Sstevel@tonic-gate 29160Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 29170Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 29180Sstevel@tonic-gate } 29190Sstevel@tonic-gate /* 29200Sstevel@tonic-gate * NAMES: raid_write_error 29210Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice write 29220Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 29230Sstevel@tonic-gate */ 29240Sstevel@tonic-gate /*ARGSUSED*/ 29250Sstevel@tonic-gate static void 29260Sstevel@tonic-gate raid_write_error(md_raidcs_t *cs) 29270Sstevel@tonic-gate { 29280Sstevel@tonic-gate md_raidps_t *ps; 29290Sstevel@tonic-gate mdi_unit_t *ui; 29300Sstevel@tonic-gate mr_unit_t *un; 29310Sstevel@tonic-gate md_raidcbuf_t *cbuf; 29320Sstevel@tonic-gate set_t setno; 29330Sstevel@tonic-gate 29340Sstevel@tonic-gate ps = cs->cs_ps; 29350Sstevel@tonic-gate ui = ps->ps_ui; 29360Sstevel@tonic-gate un = cs->cs_un; 29370Sstevel@tonic-gate 29380Sstevel@tonic-gate setno = MD_UN2SET(un); 29390Sstevel@tonic-gate 29400Sstevel@tonic-gate /* 29410Sstevel@tonic-gate * locate each buf that is in error on this io and then 29420Sstevel@tonic-gate * output an error message 29430Sstevel@tonic-gate */ 29440Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 29450Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 29460Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 29470Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29480Sstevel@tonic-gate md_shortname(MD_SID(un)), 29490Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 29500Sstevel@tonic-gate 29510Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 29520Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 29530Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 29540Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29550Sstevel@tonic-gate md_shortname(MD_SID(un)), 29560Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 29570Sstevel@tonic-gate 29580Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 29590Sstevel@tonic-gate if ((cbuf->cbuf_bp.b_flags & B_ERROR) && 29600Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_ERRED) && 29610Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_LAST_ERRED)) 29620Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29630Sstevel@tonic-gate md_shortname(MD_SID(un)), 29640Sstevel@tonic-gate md_devname(setno, md_expldev(cbuf->cbuf_bp.b_edev), 29650Sstevel@tonic-gate NULL, 0)); 29660Sstevel@tonic-gate 29670Sstevel@tonic-gate md_unit_readerexit(ui); 29680Sstevel@tonic-gate 29690Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 29700Sstevel@tonic-gate 29710Sstevel@tonic-gate /* now schedule processing for possible state change */ 29720Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_wrerr, 29730Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 29740Sstevel@tonic-gate 29750Sstevel@tonic-gate } 29760Sstevel@tonic-gate 29770Sstevel@tonic-gate /* 29780Sstevel@tonic-gate * NAME: raid_write_ponly 29790Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 29800Sstevel@tonic-gate * in the case where only the parity column can be written 29810Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 29820Sstevel@tonic-gate */ 29830Sstevel@tonic-gate static void 29840Sstevel@tonic-gate raid_write_ponly(md_raidcs_t *cs) 29850Sstevel@tonic-gate { 29860Sstevel@tonic-gate md_raidps_t *ps; 29870Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 29880Sstevel@tonic-gate 29890Sstevel@tonic-gate ps = cs->cs_ps; 29900Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 29910Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 29920Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 29930Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 29940Sstevel@tonic-gate cs->cs_frags = 1; 29950Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_PONLY_DONE; 29960Sstevel@tonic-gate cs->cs_call = raid_stage; 29970Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 29980Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 29990Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 30000Sstevel@tonic-gate cs->cs_frags++; 30010Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_WRITE); 30020Sstevel@tonic-gate } 30030Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 30040Sstevel@tonic-gate } 30050Sstevel@tonic-gate 30060Sstevel@tonic-gate /* 30070Sstevel@tonic-gate * NAME: raid_write_ploop 30080Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine, constructs parity from 30090Sstevel@tonic-gate * data in other columns. 30100Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 30110Sstevel@tonic-gate */ 30120Sstevel@tonic-gate static void 30130Sstevel@tonic-gate raid_write_ploop(md_raidcs_t *cs) 30140Sstevel@tonic-gate { 30150Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30160Sstevel@tonic-gate uint_t *dbuf; 30170Sstevel@tonic-gate uint_t *pbuf; 30180Sstevel@tonic-gate size_t wordcnt; 30190Sstevel@tonic-gate uint_t psum = 0; 30200Sstevel@tonic-gate 30210Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 30220Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 30230Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 30240Sstevel@tonic-gate while (wordcnt--) 30250Sstevel@tonic-gate *pbuf++ ^= *dbuf++; 30260Sstevel@tonic-gate cs->cs_loop++; 30270Sstevel@tonic-gate 30280Sstevel@tonic-gate /* 30290Sstevel@tonic-gate * build parity from scratch using new data, 30300Sstevel@tonic-gate * skip reading the data and parity columns. 30310Sstevel@tonic-gate */ 30320Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || cs->cs_loop == cs->cs_pcolumn) 30330Sstevel@tonic-gate cs->cs_loop++; 30340Sstevel@tonic-gate 30350Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 30360Sstevel@tonic-gate cs->cs_frags = 1; 30370Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 30380Sstevel@tonic-gate return; 30390Sstevel@tonic-gate } 30400Sstevel@tonic-gate 30410Sstevel@tonic-gate /* construct checksum for parity buffer */ 30420Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 30430Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 30440Sstevel@tonic-gate while (wordcnt--) { 30450Sstevel@tonic-gate psum ^= *pbuf; 30460Sstevel@tonic-gate pbuf++; 30470Sstevel@tonic-gate } 30480Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, -1, 30490Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 30500Sstevel@tonic-gate 1, cs->cs_pcolumn, RAID_PWMAGIC); 30510Sstevel@tonic-gate 30520Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 30530Sstevel@tonic-gate cs->cs_call = raid_write_ponly; 30540Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30550Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 30560Sstevel@tonic-gate cs->cs_frags = 1; 30570Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 30580Sstevel@tonic-gate cs->cs_frags++; 30590Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 30600Sstevel@tonic-gate } 30610Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 30620Sstevel@tonic-gate } 30630Sstevel@tonic-gate 30640Sstevel@tonic-gate /* 30650Sstevel@tonic-gate * NAME: raid_write_donly 30660Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 30670Sstevel@tonic-gate * Completed writing data to prewrite entry 30680Sstevel@tonic-gate * in the case where only the data column can be written 30690Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 30700Sstevel@tonic-gate */ 30710Sstevel@tonic-gate static void 30720Sstevel@tonic-gate raid_write_donly(md_raidcs_t *cs) 30730Sstevel@tonic-gate { 30740Sstevel@tonic-gate md_raidps_t *ps; 30750Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30760Sstevel@tonic-gate 30770Sstevel@tonic-gate ps = cs->cs_ps; 30780Sstevel@tonic-gate /* WARNING: don't release unit reader lock here... */ 30790Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 30800Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 30810Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 30820Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 30830Sstevel@tonic-gate cs->cs_frags = 1; 30840Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONLY_DONE; 30850Sstevel@tonic-gate cs->cs_call = raid_stage; 30860Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30870Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 30880Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 30890Sstevel@tonic-gate cs->cs_frags++; 30900Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 30910Sstevel@tonic-gate } 30920Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 30930Sstevel@tonic-gate } 30940Sstevel@tonic-gate 30950Sstevel@tonic-gate /* 30960Sstevel@tonic-gate * NAME: raid_write_got_old 30970Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 30980Sstevel@tonic-gate * completed read of old data and old parity 30990Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 31000Sstevel@tonic-gate */ 31010Sstevel@tonic-gate static void 31020Sstevel@tonic-gate raid_write_got_old(md_raidcs_t *cs) 31030Sstevel@tonic-gate { 31040Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 31050Sstevel@tonic-gate 31060Sstevel@tonic-gate ASSERT(IO_READER_HELD(cs->cs_un)); 31070Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(cs->cs_un)); 31080Sstevel@tonic-gate 31090Sstevel@tonic-gate raid_mapin_buf(cs); 31100Sstevel@tonic-gate genstandardparity(cs); 31110Sstevel@tonic-gate cs->cs_frags = 2; 31120Sstevel@tonic-gate cs->cs_call = raid_stage; 31130Sstevel@tonic-gate cs->cs_stage = RAID_PREWRITE_DONE; 31140Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 31150Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 31160Sstevel@tonic-gate 31170Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 31180Sstevel@tonic-gate cs->cs_frags++; 31190Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_PREWRITE); 31200Sstevel@tonic-gate } 31210Sstevel@tonic-gate 31220Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 31230Sstevel@tonic-gate cs->cs_frags++; 31240Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 31250Sstevel@tonic-gate } 31260Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 31270Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 31280Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 31290Sstevel@tonic-gate } 31300Sstevel@tonic-gate 31310Sstevel@tonic-gate /* 31320Sstevel@tonic-gate * NAME: raid_write_io 31330Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write I/O routine 31340Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 31350Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 31360Sstevel@tonic-gate */ 31370Sstevel@tonic-gate 31380Sstevel@tonic-gate /*ARGSUSED*/ 31390Sstevel@tonic-gate static void 31400Sstevel@tonic-gate raid_write_io(mr_unit_t *un, md_raidcs_t *cs) 31410Sstevel@tonic-gate { 31420Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 31430Sstevel@tonic-gate uint_t *dbuf; 31440Sstevel@tonic-gate uint_t *ubuf; 31450Sstevel@tonic-gate size_t wordcnt; 31460Sstevel@tonic-gate uint_t dsum = 0; 31470Sstevel@tonic-gate int pcheck; 31480Sstevel@tonic-gate int dcheck; 31490Sstevel@tonic-gate 31500Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_pcolumn].un_devstate & 31510Sstevel@tonic-gate RCS_INIT) == 0); 31520Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_dcolumn].un_devstate & 31530Sstevel@tonic-gate RCS_INIT) == 0); 31540Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 31550Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 31560Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_HAVE_PW_SLOTS); 31570Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 31580Sstevel@tonic-gate 31590Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 31600Sstevel@tonic-gate 31610Sstevel@tonic-gate ASSERT(un->un_origcolumncnt == un->un_totalcolumncnt); 31620Sstevel@tonic-gate raid_mapin_buf(cs); 31630Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 31640Sstevel@tonic-gate cs->cs_call = raid_stage; 31650Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 31660Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 31670Sstevel@tonic-gate cs->cs_stage = RAID_LINE_PWDONE; 31680Sstevel@tonic-gate genlineparity(cs); 31690Sstevel@tonic-gate return; 31700Sstevel@tonic-gate } 31710Sstevel@tonic-gate 31720Sstevel@tonic-gate pcheck = erred_check_line(un, cs, &un->un_column[cs->cs_pcolumn]); 31730Sstevel@tonic-gate dcheck = erred_check_line(un, cs, &un->un_column[cs->cs_dcolumn]); 31740Sstevel@tonic-gate cs->cs_resync_check = pcheck << RCL_PARITY_OFFSET || dcheck; 31750Sstevel@tonic-gate 31760Sstevel@tonic-gate if (pcheck == RCL_ERRED && dcheck == RCL_ERRED) { 31770Sstevel@tonic-gate int err = EIO; 31780Sstevel@tonic-gate 31790Sstevel@tonic-gate if ((un->un_column[cs->cs_pcolumn].un_devstate == 31800Sstevel@tonic-gate RCS_LAST_ERRED) || 31810Sstevel@tonic-gate (un->un_column[cs->cs_dcolumn].un_devstate == 31820Sstevel@tonic-gate RCS_LAST_ERRED)) 31830Sstevel@tonic-gate err = ENXIO; 31840Sstevel@tonic-gate raid_error_parent(ps, err); 31850Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 31860Sstevel@tonic-gate raid_free_child(cs, 1); 31870Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS 31880Sstevel@tonic-gate | RFP_RLS_LOCK | RFP_DECR_PWFRAGS); 31890Sstevel@tonic-gate return; 31900Sstevel@tonic-gate } 31910Sstevel@tonic-gate 31920Sstevel@tonic-gate if (pcheck & RCL_ERRED) { 31930Sstevel@tonic-gate /* 31940Sstevel@tonic-gate * handle case of only having data drive 31950Sstevel@tonic-gate */ 31960Sstevel@tonic-gate raid_mapin_buf(cs); 31970Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 31980Sstevel@tonic-gate 31990Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 32000Sstevel@tonic-gate ubuf = (uint_t *)(void *)(cs->cs_addr); 32010Sstevel@tonic-gate 32020Sstevel@tonic-gate while (wordcnt--) { 32030Sstevel@tonic-gate *dbuf = *ubuf; 32040Sstevel@tonic-gate dsum ^= *ubuf; 32050Sstevel@tonic-gate dbuf++; 32060Sstevel@tonic-gate ubuf++; 32070Sstevel@tonic-gate } 32080Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, -1, 32090Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 32100Sstevel@tonic-gate 1, cs->cs_dcolumn, RAID_PWMAGIC); 32110Sstevel@tonic-gate cs->cs_frags = 1; 32120Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32130Sstevel@tonic-gate cs->cs_call = raid_write_donly; 32140Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32150Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 32160Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 32170Sstevel@tonic-gate cs->cs_frags++; 32180Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_ALT | RIO_EXTRA | 32190Sstevel@tonic-gate RIO_PREWRITE); 32200Sstevel@tonic-gate } 32210Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 32220Sstevel@tonic-gate return; 32230Sstevel@tonic-gate } 32240Sstevel@tonic-gate 32250Sstevel@tonic-gate if (dcheck & RCL_ERRED) { 32260Sstevel@tonic-gate /* 32270Sstevel@tonic-gate * handle case of only having parity drive 32280Sstevel@tonic-gate * build parity from scratch using new data, 32290Sstevel@tonic-gate * skip reading the data and parity columns. 32300Sstevel@tonic-gate */ 32310Sstevel@tonic-gate raid_mapin_buf(cs); 32320Sstevel@tonic-gate cs->cs_loop = 0; 32330Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || 32340Sstevel@tonic-gate cs->cs_loop == cs->cs_pcolumn) 32350Sstevel@tonic-gate cs->cs_loop++; 32360Sstevel@tonic-gate 32370Sstevel@tonic-gate /* copy new data in to begin building parity */ 32380Sstevel@tonic-gate bcopy(cs->cs_addr, cs->cs_pbuffer + DEV_BSIZE, cs->cs_bcount); 32390Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32400Sstevel@tonic-gate cs->cs_call = raid_write_ploop; 32410Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32420Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 32430Sstevel@tonic-gate cs->cs_frags = 1; 32440Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 32450Sstevel@tonic-gate return; 32460Sstevel@tonic-gate } 32470Sstevel@tonic-gate /* 32480Sstevel@tonic-gate * handle normal cases 32490Sstevel@tonic-gate * read old data and old parity 32500Sstevel@tonic-gate */ 32510Sstevel@tonic-gate cs->cs_frags = 2; 32520Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32530Sstevel@tonic-gate cs->cs_call = raid_write_got_old; 32540Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32550Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 32560Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 32570Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ); 32580Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ); 32590Sstevel@tonic-gate } 32600Sstevel@tonic-gate 32610Sstevel@tonic-gate static void 32620Sstevel@tonic-gate raid_enqueue(md_raidcs_t *cs) 32630Sstevel@tonic-gate { 32640Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 32650Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 32660Sstevel@tonic-gate md_raidcs_t *cs1; 32670Sstevel@tonic-gate 32680Sstevel@tonic-gate mutex_enter(io_list_mutex); 32690Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 32700Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) { 32710Sstevel@tonic-gate ui->ui_io_lock->io_list_front = cs; 32720Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 32730Sstevel@tonic-gate } else { 32740Sstevel@tonic-gate cs1 = ui->ui_io_lock->io_list_back; 32750Sstevel@tonic-gate cs1->cs_linlck_next = cs; 32760Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 32770Sstevel@tonic-gate } 32780Sstevel@tonic-gate STAT_INC(raid_write_waits); 32790Sstevel@tonic-gate STAT_MAX(raid_max_write_q_length, raid_write_queue_length); 32800Sstevel@tonic-gate cs->cs_linlck_next = NULL; 32810Sstevel@tonic-gate mutex_exit(io_list_mutex); 32820Sstevel@tonic-gate } 32830Sstevel@tonic-gate 32840Sstevel@tonic-gate /* 32850Sstevel@tonic-gate * NAME: raid_write 32860Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 32870Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 32880Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 32890Sstevel@tonic-gate */ 32900Sstevel@tonic-gate 32910Sstevel@tonic-gate /*ARGSUSED*/ 32920Sstevel@tonic-gate static int 32930Sstevel@tonic-gate raid_write(mr_unit_t *un, md_raidcs_t *cs) 32940Sstevel@tonic-gate { 32950Sstevel@tonic-gate int error = 0; 32960Sstevel@tonic-gate md_raidps_t *ps; 32970Sstevel@tonic-gate mdi_unit_t *ui; 32980Sstevel@tonic-gate minor_t mnum; 32990Sstevel@tonic-gate clock_t timeout; 33000Sstevel@tonic-gate 33010Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 33020Sstevel@tonic-gate ps = cs->cs_ps; 33030Sstevel@tonic-gate ui = ps->ps_ui; 33040Sstevel@tonic-gate 33050Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 33060Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 33070Sstevel@tonic-gate error = EIO; 33080Sstevel@tonic-gate 33090Sstevel@tonic-gate /* make sure the write doesn't go beyond the column */ 33100Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > un->un_segsize * un->un_segsincolumn) 33110Sstevel@tonic-gate error = ENXIO; 33120Sstevel@tonic-gate if (error) 33130Sstevel@tonic-gate goto werror; 33140Sstevel@tonic-gate 33150Sstevel@tonic-gate getresources(cs); 33160Sstevel@tonic-gate 33170Sstevel@tonic-gate /* 33180Sstevel@tonic-gate * this is an advisory loop that keeps the waiting lists short 33190Sstevel@tonic-gate * to reduce cpu time. Since there is a race introduced by not 33200Sstevel@tonic-gate * aquiring all the correct mutexes, use a cv_timedwait to be 33210Sstevel@tonic-gate * sure the write always will wake up and start. 33220Sstevel@tonic-gate */ 33230Sstevel@tonic-gate while (raid_check_pw(cs)) { 33240Sstevel@tonic-gate mutex_enter(&un->un_mx); 33250Sstevel@tonic-gate (void) drv_getparm(LBOLT, &timeout); 33260Sstevel@tonic-gate timeout += md_wr_wait; 33270Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDPW; 33280Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 33290Sstevel@tonic-gate (void) cv_timedwait(&un->un_cv, &un->un_mx, timeout); 33300Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDPW; 33310Sstevel@tonic-gate mutex_exit(&un->un_mx); 33320Sstevel@tonic-gate } 33330Sstevel@tonic-gate 33340Sstevel@tonic-gate if (raid_line_writer_lock(cs, 1)) 33350Sstevel@tonic-gate return (0); 33360Sstevel@tonic-gate 33370Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 33380Sstevel@tonic-gate cs->cs_un = un; 33390Sstevel@tonic-gate mnum = MD_SID(un); 33400Sstevel@tonic-gate 33410Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 33420Sstevel@tonic-gate raid_regen_parity(cs); 33430Sstevel@tonic-gate un = MD_UNIT(mnum); 33440Sstevel@tonic-gate cs->cs_un = un; 33450Sstevel@tonic-gate } 33460Sstevel@tonic-gate 33470Sstevel@tonic-gate raid_write_io(un, cs); 33480Sstevel@tonic-gate return (0); 33490Sstevel@tonic-gate werror: 33500Sstevel@tonic-gate /* aquire unit reader lock sinc raid_free_child always drops it */ 33510Sstevel@tonic-gate raid_error_parent(ps, error); 33520Sstevel@tonic-gate raid_free_child(cs, 0); 33530Sstevel@tonic-gate /* decrement both pwfrags and frags */ 33540Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS | RFP_DECR_FRAGS | RFP_RLS_LOCK); 33550Sstevel@tonic-gate return (0); 33560Sstevel@tonic-gate } 33570Sstevel@tonic-gate 33580Sstevel@tonic-gate 33590Sstevel@tonic-gate /* 33600Sstevel@tonic-gate * NAMES: raid_stage 33610Sstevel@tonic-gate * DESCRIPTION: post-processing routine for a RAID metadevice 33620Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 33630Sstevel@tonic-gate */ 33640Sstevel@tonic-gate static void 33650Sstevel@tonic-gate raid_stage(md_raidcs_t *cs) 33660Sstevel@tonic-gate { 33670Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 33680Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 33690Sstevel@tonic-gate md_raidcbuf_t *cbuf; 33700Sstevel@tonic-gate buf_t *bp; 33710Sstevel@tonic-gate void *private; 33720Sstevel@tonic-gate int flag; 33730Sstevel@tonic-gate 33740Sstevel@tonic-gate switch (cs->cs_stage) { 33750Sstevel@tonic-gate case RAID_READ_DONE: 33760Sstevel@tonic-gate raid_free_child(cs, 1); 33770Sstevel@tonic-gate /* decrement readfrags */ 33780Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 33790Sstevel@tonic-gate return; 33800Sstevel@tonic-gate 33810Sstevel@tonic-gate case RAID_WRITE_DONE: 33820Sstevel@tonic-gate case RAID_WRITE_PONLY_DONE: 33830Sstevel@tonic-gate case RAID_WRITE_DONLY_DONE: 33840Sstevel@tonic-gate /* 33850Sstevel@tonic-gate * Completed writing real parity and/or data. 33860Sstevel@tonic-gate */ 33870Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_PWDONE); 33880Sstevel@tonic-gate raid_free_child(cs, 1); 33890Sstevel@tonic-gate /* decrement frags but not pwfrags */ 33900Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS | RFP_RLS_LOCK); 33910Sstevel@tonic-gate return; 33920Sstevel@tonic-gate 33930Sstevel@tonic-gate case RAID_PREWRITE_DONE: 33940Sstevel@tonic-gate /* 33950Sstevel@tonic-gate * completed writing data and parity to prewrite entries 33960Sstevel@tonic-gate */ 33970Sstevel@tonic-gate /* 33980Sstevel@tonic-gate * WARNING: don't release unit reader lock here.. 33990Sstevel@tonic-gate * decrement pwfrags but not frags 34000Sstevel@tonic-gate */ 34010Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 34020Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 34030Sstevel@tonic-gate cs->cs_frags = 2; 34040Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 34050Sstevel@tonic-gate cs->cs_call = raid_stage; 34060Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 34070Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 34080Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 34090Sstevel@tonic-gate cs->cs_frags++; 34100Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | 34110Sstevel@tonic-gate RIO_WRITE); 34120Sstevel@tonic-gate } 34130Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 34140Sstevel@tonic-gate cs->cs_frags++; 34150Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 34160Sstevel@tonic-gate } 34170Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 34180Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 34190Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 34200Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 34210Sstevel@tonic-gate raid_free_pwinvalidate(cs); 34220Sstevel@tonic-gate } 34230Sstevel@tonic-gate return; 34240Sstevel@tonic-gate 34250Sstevel@tonic-gate case RAID_LINE_PWDONE: 34260Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 34270Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 34280Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 34290Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 34300Sstevel@tonic-gate cs->cs_call = raid_stage; 34310Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 34320Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 34330Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 34340Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 34350Sstevel@tonic-gate /* 34360Sstevel@tonic-gate * fill in buffer for write to prewrite area 34370Sstevel@tonic-gate */ 34380Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 34390Sstevel@tonic-gate bp->b_back = bp; 34400Sstevel@tonic-gate bp->b_forw = bp; 34410Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer + DEV_BSIZE; 34420Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount; 34430Sstevel@tonic-gate bp->b_bufsize = cbuf->cbuf_bcount; 34440Sstevel@tonic-gate bp->b_lblkno = 34450Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_devstart + 34460Sstevel@tonic-gate cs->cs_blkno; 34470Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR); 34480Sstevel@tonic-gate bp->b_flags &= ~nv_available; 34490Sstevel@tonic-gate bp->b_flags |= B_WRITE | B_BUSY; 34500Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 34510Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev( 34520Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_dev); 34530Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 34540Sstevel@tonic-gate private = cs->cs_strategy_private; 34550Sstevel@tonic-gate flag = cs->cs_strategy_flag; 34560Sstevel@tonic-gate md_call_strategy(bp, flag, private); 34570Sstevel@tonic-gate } 34580Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 34590Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 34600Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 34610Sstevel@tonic-gate raid_free_pwinvalidate(cs); 34620Sstevel@tonic-gate } 34630Sstevel@tonic-gate return; 34640Sstevel@tonic-gate 34650Sstevel@tonic-gate default: 34660Sstevel@tonic-gate ASSERT(0); 34670Sstevel@tonic-gate break; 34680Sstevel@tonic-gate } 34690Sstevel@tonic-gate } 34700Sstevel@tonic-gate /* 34710Sstevel@tonic-gate * NAME: md_raid_strategy 34720Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O oprations entry point. 34730Sstevel@tonic-gate * PARAMETERS: buf_t *pb - pointer to a user I/O buffer 34740Sstevel@tonic-gate * int flag - metadevice specific flag 34750Sstevel@tonic-gate * void *private - carry over flag ?? 34760Sstevel@tonic-gate * 34770Sstevel@tonic-gate */ 34780Sstevel@tonic-gate 34790Sstevel@tonic-gate void 34800Sstevel@tonic-gate md_raid_strategy(buf_t *pb, int flag, void *private) 34810Sstevel@tonic-gate { 34820Sstevel@tonic-gate md_raidps_t *ps; 34830Sstevel@tonic-gate md_raidcs_t *cs; 34840Sstevel@tonic-gate int doing_writes; 34850Sstevel@tonic-gate int err; 34860Sstevel@tonic-gate mr_unit_t *un; 34870Sstevel@tonic-gate mdi_unit_t *ui; 34880Sstevel@tonic-gate size_t count; 34890Sstevel@tonic-gate diskaddr_t blkno; 34900Sstevel@tonic-gate caddr_t addr; 34910Sstevel@tonic-gate off_t offset; 34920Sstevel@tonic-gate int colcnt; 34930Sstevel@tonic-gate minor_t mnum; 34940Sstevel@tonic-gate set_t setno; 34950Sstevel@tonic-gate 34960Sstevel@tonic-gate ui = MDI_UNIT(getminor(pb->b_edev)); 34970Sstevel@tonic-gate md_kstat_waitq_enter(ui); 34980Sstevel@tonic-gate un = (mr_unit_t *)md_io_readerlock(ui); 34990Sstevel@tonic-gate setno = MD_MIN2SET(getminor(pb->b_edev)); 35000Sstevel@tonic-gate 35010Sstevel@tonic-gate if ((flag & MD_NOBLOCK) == 0) { 35020Sstevel@tonic-gate if (md_inc_iocount(setno) != 0) { 35030Sstevel@tonic-gate pb->b_flags |= B_ERROR; 35040Sstevel@tonic-gate pb->b_error = ENXIO; 35050Sstevel@tonic-gate pb->b_resid = pb->b_bcount; 35060Sstevel@tonic-gate md_io_readerexit(ui); 35070Sstevel@tonic-gate biodone(pb); 35080Sstevel@tonic-gate return; 35090Sstevel@tonic-gate } 35100Sstevel@tonic-gate } else { 35110Sstevel@tonic-gate md_inc_iocount_noblock(setno); 35120Sstevel@tonic-gate } 35130Sstevel@tonic-gate 35140Sstevel@tonic-gate mnum = MD_SID(un); 35150Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 35160Sstevel@tonic-gate count = pb->b_bcount; 35170Sstevel@tonic-gate 35180Sstevel@tonic-gate STAT_CHECK(raid_512, count == 512); 35190Sstevel@tonic-gate STAT_CHECK(raid_1024, count == 1024); 35200Sstevel@tonic-gate STAT_CHECK(raid_1024_8192, count > 1024 && count < 8192); 35210Sstevel@tonic-gate STAT_CHECK(raid_8192, count == 8192); 35220Sstevel@tonic-gate STAT_CHECK(raid_8192_bigger, count > 8192); 35230Sstevel@tonic-gate 35240Sstevel@tonic-gate (void *) md_unit_readerlock(ui); 35250Sstevel@tonic-gate if (!(flag & MD_STR_NOTTOP)) { 35260Sstevel@tonic-gate err = md_checkbuf(ui, (md_unit_t *)un, pb); /* check and map */ 35270Sstevel@tonic-gate if (err != 0) { 35280Sstevel@tonic-gate md_kstat_waitq_exit(ui); 35290Sstevel@tonic-gate md_io_readerexit(ui); 35300Sstevel@tonic-gate return; 35310Sstevel@tonic-gate } 35320Sstevel@tonic-gate } 35330Sstevel@tonic-gate md_unit_readerexit(ui); 35340Sstevel@tonic-gate 35350Sstevel@tonic-gate STAT_INC(raid_total_io); 35360Sstevel@tonic-gate 35370Sstevel@tonic-gate /* allocate a parent structure for the user I/O */ 35380Sstevel@tonic-gate ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); 35390Sstevel@tonic-gate raid_parent_init(ps); 35400Sstevel@tonic-gate 35410Sstevel@tonic-gate /* 35420Sstevel@tonic-gate * Save essential information from the original buffhdr 35430Sstevel@tonic-gate * in the md_save structure. 35440Sstevel@tonic-gate */ 35450Sstevel@tonic-gate ps->ps_un = un; 35460Sstevel@tonic-gate ps->ps_ui = ui; 35470Sstevel@tonic-gate ps->ps_bp = pb; 35480Sstevel@tonic-gate ps->ps_addr = pb->b_un.b_addr; 35490Sstevel@tonic-gate 35500Sstevel@tonic-gate if ((pb->b_flags & B_READ) == 0) { 35510Sstevel@tonic-gate ps->ps_flags |= MD_RPS_WRITE; 35520Sstevel@tonic-gate doing_writes = 1; 35530Sstevel@tonic-gate STAT_INC(raid_writes); 35540Sstevel@tonic-gate } else { 35550Sstevel@tonic-gate ps->ps_flags |= MD_RPS_READ; 35560Sstevel@tonic-gate doing_writes = 0; 35570Sstevel@tonic-gate STAT_INC(raid_reads); 35580Sstevel@tonic-gate } 35590Sstevel@tonic-gate 35600Sstevel@tonic-gate count = lbtodb(pb->b_bcount); /* transfer count (in blocks) */ 35610Sstevel@tonic-gate blkno = pb->b_lblkno; /* block number on device */ 35620Sstevel@tonic-gate addr = 0; 35630Sstevel@tonic-gate offset = 0; 35640Sstevel@tonic-gate ps->ps_pwfrags = 1; 35650Sstevel@tonic-gate ps->ps_frags = 1; 35660Sstevel@tonic-gate md_kstat_waitq_to_runq(ui); 35670Sstevel@tonic-gate 35680Sstevel@tonic-gate do { 35690Sstevel@tonic-gate cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); 35700Sstevel@tonic-gate raid_child_init(cs); 35710Sstevel@tonic-gate cs->cs_ps = ps; 35720Sstevel@tonic-gate cs->cs_un = un; 35730Sstevel@tonic-gate cs->cs_mdunit = mnum; 35740Sstevel@tonic-gate cs->cs_strategy_flag = flag; 35750Sstevel@tonic-gate cs->cs_strategy_private = private; 35760Sstevel@tonic-gate cs->cs_addr = addr; 35770Sstevel@tonic-gate cs->cs_offset = offset; 35780Sstevel@tonic-gate count = raid_iosetup(un, blkno, count, cs); 35790Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 35800Sstevel@tonic-gate blkno += (cs->cs_blkcnt * colcnt); 35810Sstevel@tonic-gate offset += (cs->cs_bcount * colcnt); 35820Sstevel@tonic-gate } else { 35830Sstevel@tonic-gate blkno += cs->cs_blkcnt; 35840Sstevel@tonic-gate offset += cs->cs_bcount; 35850Sstevel@tonic-gate } 35860Sstevel@tonic-gate /* for each cs bump up the ps_pwfrags and ps_frags fields */ 35870Sstevel@tonic-gate if (count) { 35880Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 35890Sstevel@tonic-gate ps->ps_pwfrags++; 35900Sstevel@tonic-gate ps->ps_frags++; 35910Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 35920Sstevel@tonic-gate if (doing_writes) 35930Sstevel@tonic-gate (void) raid_write(un, cs); 35940Sstevel@tonic-gate else 35950Sstevel@tonic-gate (void) raid_read(un, cs); 35960Sstevel@tonic-gate } 35970Sstevel@tonic-gate } while (count); 35980Sstevel@tonic-gate if (doing_writes) { 35990Sstevel@tonic-gate (void) raid_write(un, cs); 36000Sstevel@tonic-gate } else 36010Sstevel@tonic-gate (void) raid_read(un, cs); 36020Sstevel@tonic-gate 36030Sstevel@tonic-gate if (! (flag & MD_STR_NOTTOP) && panicstr) { 36040Sstevel@tonic-gate while (! (ps->ps_flags & MD_RPS_DONE)) { 36050Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 36060Sstevel@tonic-gate drv_usecwait(10); 36070Sstevel@tonic-gate } 36080Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 36090Sstevel@tonic-gate } 36100Sstevel@tonic-gate } 36110Sstevel@tonic-gate 36120Sstevel@tonic-gate /* 36130Sstevel@tonic-gate * NAMES: raid_snarf 36140Sstevel@tonic-gate * DESCRIPTION: RAID metadevice SNARF entry point 36150Sstevel@tonic-gate * PARAMETERS: md_snarfcmd_t cmd, 36160Sstevel@tonic-gate * set_t setno 36170Sstevel@tonic-gate * RETURNS: 36180Sstevel@tonic-gate */ 36190Sstevel@tonic-gate static int 36200Sstevel@tonic-gate raid_snarf(md_snarfcmd_t cmd, set_t setno) 36210Sstevel@tonic-gate { 36220Sstevel@tonic-gate mr_unit_t *un; 36230Sstevel@tonic-gate mddb_recid_t recid; 36240Sstevel@tonic-gate int gotsomething; 36250Sstevel@tonic-gate int all_raid_gotten; 36260Sstevel@tonic-gate mddb_type_t typ1; 36270Sstevel@tonic-gate uint_t ncol; 36280Sstevel@tonic-gate mddb_de_ic_t *dep; 36290Sstevel@tonic-gate mddb_rb32_t *rbp; 36300Sstevel@tonic-gate size_t newreqsize; 36310Sstevel@tonic-gate mr_unit_t *big_un; 36320Sstevel@tonic-gate mr_unit32_od_t *small_un; 36330Sstevel@tonic-gate 36340Sstevel@tonic-gate 36350Sstevel@tonic-gate if (cmd == MD_SNARF_CLEANUP) 36360Sstevel@tonic-gate return (0); 36370Sstevel@tonic-gate 36380Sstevel@tonic-gate all_raid_gotten = 1; 36390Sstevel@tonic-gate gotsomething = 0; 36400Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 36410Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 36420Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 36430Sstevel@tonic-gate 36440Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 36450Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) { 36460Sstevel@tonic-gate continue; 36470Sstevel@tonic-gate } 36480Sstevel@tonic-gate 36490Sstevel@tonic-gate dep = mddb_getrecdep(recid); 36500Sstevel@tonic-gate dep->de_flags = MDDB_F_RAID; 36510Sstevel@tonic-gate rbp = dep->de_rb; 36520Sstevel@tonic-gate if ((rbp->rb_revision == MDDB_REV_RB) && 36530Sstevel@tonic-gate ((rbp->rb_private & MD_PRV_CONVD) == 0)) { 36540Sstevel@tonic-gate /* 36550Sstevel@tonic-gate * This means, we have an old and small record 36560Sstevel@tonic-gate * and this record hasn't already been converted. 36570Sstevel@tonic-gate * Before we create an incore metadevice from this 36580Sstevel@tonic-gate * we have to convert it to a big record. 36590Sstevel@tonic-gate */ 36600Sstevel@tonic-gate small_un = (mr_unit32_od_t *)mddb_getrecaddr(recid); 36610Sstevel@tonic-gate ncol = small_un->un_totalcolumncnt; 36620Sstevel@tonic-gate newreqsize = sizeof (mr_unit_t) + 36630Sstevel@tonic-gate ((ncol - 1) * sizeof (mr_column_t)); 36640Sstevel@tonic-gate big_un = (mr_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 36650Sstevel@tonic-gate raid_convert((caddr_t)small_un, (caddr_t)big_un, 36660Sstevel@tonic-gate SMALL_2_BIG); 36670Sstevel@tonic-gate kmem_free(small_un, dep->de_reqsize); 36680Sstevel@tonic-gate dep->de_rb_userdata = big_un; 36690Sstevel@tonic-gate dep->de_reqsize = newreqsize; 36700Sstevel@tonic-gate un = big_un; 36710Sstevel@tonic-gate rbp->rb_private |= MD_PRV_CONVD; 36720Sstevel@tonic-gate } else { 36730Sstevel@tonic-gate /* Big device */ 36740Sstevel@tonic-gate un = (mr_unit_t *)mddb_getrecaddr(recid); 36750Sstevel@tonic-gate } 36760Sstevel@tonic-gate 36770Sstevel@tonic-gate /* Set revision and flag accordingly */ 36780Sstevel@tonic-gate if (rbp->rb_revision == MDDB_REV_RB) { 36790Sstevel@tonic-gate un->c.un_revision = MD_32BIT_META_DEV; 36800Sstevel@tonic-gate } else { 36810Sstevel@tonic-gate un->c.un_revision = MD_64BIT_META_DEV; 36820Sstevel@tonic-gate un->c.un_flag |= MD_EFILABEL; 36830Sstevel@tonic-gate } 36840Sstevel@tonic-gate 36850Sstevel@tonic-gate /* 36860Sstevel@tonic-gate * Create minor device node for snarfed entry. 36870Sstevel@tonic-gate */ 36880Sstevel@tonic-gate (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 36890Sstevel@tonic-gate 36900Sstevel@tonic-gate if (MD_UNIT(MD_SID(un)) != NULL) { 36910Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 36920Sstevel@tonic-gate continue; 36930Sstevel@tonic-gate } 36940Sstevel@tonic-gate all_raid_gotten = 0; 36950Sstevel@tonic-gate if (raid_build_incore((void *)un, 1) == 0) { 36960Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 36970Sstevel@tonic-gate md_create_unit_incore(MD_SID(un), &raid_md_ops, 36980Sstevel@tonic-gate 1); 36990Sstevel@tonic-gate gotsomething = 1; 37000Sstevel@tonic-gate } else if (un->mr_ic) { 37010Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 37020Sstevel@tonic-gate un->un_totalcolumncnt); 37030Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 37040Sstevel@tonic-gate } 37050Sstevel@tonic-gate } 37060Sstevel@tonic-gate 37070Sstevel@tonic-gate if (!all_raid_gotten) { 37080Sstevel@tonic-gate return (gotsomething); 37090Sstevel@tonic-gate } 37100Sstevel@tonic-gate 37110Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 37120Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 37130Sstevel@tonic-gate if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 37140Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 37150Sstevel@tonic-gate 37160Sstevel@tonic-gate return (0); 37170Sstevel@tonic-gate } 37180Sstevel@tonic-gate 37190Sstevel@tonic-gate /* 37200Sstevel@tonic-gate * NAMES: raid_halt 37210Sstevel@tonic-gate * DESCRIPTION: RAID metadevice HALT entry point 37220Sstevel@tonic-gate * PARAMETERS: md_haltcmd_t cmd - 37230Sstevel@tonic-gate * set_t setno - 37240Sstevel@tonic-gate * RETURNS: 37250Sstevel@tonic-gate */ 37260Sstevel@tonic-gate static int 37270Sstevel@tonic-gate raid_halt(md_haltcmd_t cmd, set_t setno) 37280Sstevel@tonic-gate { 37290Sstevel@tonic-gate set_t i; 37300Sstevel@tonic-gate mdi_unit_t *ui; 37310Sstevel@tonic-gate minor_t mnum; 37320Sstevel@tonic-gate 37330Sstevel@tonic-gate if (cmd == MD_HALT_CLOSE) 37340Sstevel@tonic-gate return (0); 37350Sstevel@tonic-gate 37360Sstevel@tonic-gate if (cmd == MD_HALT_OPEN) 37370Sstevel@tonic-gate return (0); 37380Sstevel@tonic-gate 37390Sstevel@tonic-gate if (cmd == MD_HALT_UNLOAD) 37400Sstevel@tonic-gate return (0); 37410Sstevel@tonic-gate 37420Sstevel@tonic-gate if (cmd == MD_HALT_CHECK) { 37430Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 37440Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 37450Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 37460Sstevel@tonic-gate continue; 37470Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 37480Sstevel@tonic-gate continue; 37490Sstevel@tonic-gate if (md_unit_isopen(ui)) 37500Sstevel@tonic-gate return (1); 37510Sstevel@tonic-gate } 37520Sstevel@tonic-gate return (0); 37530Sstevel@tonic-gate } 37540Sstevel@tonic-gate 37550Sstevel@tonic-gate if (cmd != MD_HALT_DOIT) 37560Sstevel@tonic-gate return (1); 37570Sstevel@tonic-gate 37580Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 37590Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 37600Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 37610Sstevel@tonic-gate continue; 37620Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 37630Sstevel@tonic-gate continue; 37640Sstevel@tonic-gate reset_raid((mr_unit_t *)MD_UNIT(mnum), mnum, 0); 37650Sstevel@tonic-gate } 37660Sstevel@tonic-gate return (0); 37670Sstevel@tonic-gate } 37680Sstevel@tonic-gate 37690Sstevel@tonic-gate /* 37700Sstevel@tonic-gate * NAMES: raid_close_all_devs 37710Sstevel@tonic-gate * DESCRIPTION: Close all the devices of the unit. 37720Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 37730Sstevel@tonic-gate * RETURNS: 37740Sstevel@tonic-gate */ 37750Sstevel@tonic-gate void 37760Sstevel@tonic-gate raid_close_all_devs(mr_unit_t *un, int init_pw, int md_cflags) 37770Sstevel@tonic-gate { 37780Sstevel@tonic-gate int i; 37790Sstevel@tonic-gate mr_column_t *device; 37800Sstevel@tonic-gate 37810Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 37820Sstevel@tonic-gate device = &un->un_column[i]; 37830Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 37840Sstevel@tonic-gate ASSERT((device->un_dev != (md_dev64_t)0) && 37850Sstevel@tonic-gate (device->un_dev != NODEV64)); 37860Sstevel@tonic-gate if ((device->un_devstate & RCS_OKAY) && init_pw) 37870Sstevel@tonic-gate (void) init_pw_area(un, device->un_dev, 37880Sstevel@tonic-gate device->un_pwstart, i); 37890Sstevel@tonic-gate md_layered_close(device->un_dev, md_cflags); 37900Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_ISOPEN; 37910Sstevel@tonic-gate } 37920Sstevel@tonic-gate } 37930Sstevel@tonic-gate } 37940Sstevel@tonic-gate 37950Sstevel@tonic-gate /* 37960Sstevel@tonic-gate * NAMES: raid_open_all_devs 37970Sstevel@tonic-gate * DESCRIPTION: Open all the components (columns) of the device unit. 37980Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 37990Sstevel@tonic-gate * RETURNS: 38000Sstevel@tonic-gate */ 38010Sstevel@tonic-gate static int 38020Sstevel@tonic-gate raid_open_all_devs(mr_unit_t *un, int md_oflags) 38030Sstevel@tonic-gate { 38040Sstevel@tonic-gate minor_t mnum = MD_SID(un); 38050Sstevel@tonic-gate int i; 38060Sstevel@tonic-gate int not_opened = 0; 38070Sstevel@tonic-gate int commit = 0; 38080Sstevel@tonic-gate int col = -1; 38090Sstevel@tonic-gate mr_column_t *device; 38100Sstevel@tonic-gate set_t setno = MD_MIN2SET(MD_SID(un)); 38110Sstevel@tonic-gate side_t side = mddb_getsidenum(setno); 38120Sstevel@tonic-gate mdkey_t key; 38130Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 38140Sstevel@tonic-gate 38150Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 38160Sstevel@tonic-gate 38170Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38180Sstevel@tonic-gate md_dev64_t tmpdev; 38190Sstevel@tonic-gate 38200Sstevel@tonic-gate device = &un->un_column[i]; 38210Sstevel@tonic-gate 38220Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 38230Sstevel@tonic-gate not_opened++; 38240Sstevel@tonic-gate continue; 38250Sstevel@tonic-gate } 38260Sstevel@tonic-gate 38270Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) 38280Sstevel@tonic-gate continue; 38290Sstevel@tonic-gate 38300Sstevel@tonic-gate tmpdev = device->un_dev; 38310Sstevel@tonic-gate /* 38320Sstevel@tonic-gate * Open by device id 38330Sstevel@tonic-gate */ 38340Sstevel@tonic-gate key = HOTSPARED(un, i) ? 38350Sstevel@tonic-gate device->un_hs_key : device->un_orig_key; 38360Sstevel@tonic-gate if ((md_getmajor(tmpdev) != md_major) && 38370Sstevel@tonic-gate md_devid_found(setno, side, key) == 1) { 38380Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, key); 38390Sstevel@tonic-gate } 38400Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, md_oflags)) { 38410Sstevel@tonic-gate device->un_dev = tmpdev; 38420Sstevel@tonic-gate not_opened++; 38430Sstevel@tonic-gate continue; 38440Sstevel@tonic-gate } 38450Sstevel@tonic-gate device->un_dev = tmpdev; 38460Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_ISOPEN; 38470Sstevel@tonic-gate } 38480Sstevel@tonic-gate 38490Sstevel@tonic-gate /* if open errors and errored devices are 1 then device can run */ 38500Sstevel@tonic-gate if (not_opened > 1) { 38510Sstevel@tonic-gate cmn_err(CE_WARN, 38520Sstevel@tonic-gate "md: %s failed to open. open error on %s\n", 38530Sstevel@tonic-gate md_shortname(MD_SID(un)), 38540Sstevel@tonic-gate md_devname(MD_UN2SET(un), device->un_orig_dev, 38550Sstevel@tonic-gate NULL, 0)); 38560Sstevel@tonic-gate 38570Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 38580Sstevel@tonic-gate 38590Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 38600Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 38610Sstevel@tonic-gate 38620Sstevel@tonic-gate return (not_opened > 1); 38630Sstevel@tonic-gate } 38640Sstevel@tonic-gate 38650Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38660Sstevel@tonic-gate device = &un->un_column[i]; 38670Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 38680Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 38690Sstevel@tonic-gate /* 38700Sstevel@tonic-gate * At this point in time there is a possibility 38710Sstevel@tonic-gate * that errors were the result of a controller 38720Sstevel@tonic-gate * failure with more than a single column on it 38730Sstevel@tonic-gate * so clear out last errored columns and let errors 38740Sstevel@tonic-gate * re-occur is necessary. 38750Sstevel@tonic-gate */ 38760Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 38770Sstevel@tonic-gate commit++; 38780Sstevel@tonic-gate } 38790Sstevel@tonic-gate continue; 38800Sstevel@tonic-gate } 38810Sstevel@tonic-gate ASSERT(col == -1); 38820Sstevel@tonic-gate col = i; 38830Sstevel@tonic-gate } 38840Sstevel@tonic-gate 38850Sstevel@tonic-gate if (col != -1) { 38860Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 38870Sstevel@tonic-gate commit++; 38880Sstevel@tonic-gate } 38890Sstevel@tonic-gate 38900Sstevel@tonic-gate if (commit) 38910Sstevel@tonic-gate raid_commit(un, NULL); 38920Sstevel@tonic-gate 38930Sstevel@tonic-gate if (col != -1) { 38940Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 38950Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 38960Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 38970Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 38980Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 38990Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 39000Sstevel@tonic-gate } 39010Sstevel@tonic-gate } 39020Sstevel@tonic-gate 39030Sstevel@tonic-gate return (0); 39040Sstevel@tonic-gate } 39050Sstevel@tonic-gate 39060Sstevel@tonic-gate /* 39070Sstevel@tonic-gate * NAMES: raid_internal_open 39080Sstevel@tonic-gate * DESCRIPTION: Do the actual RAID open 39090Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number of the RAID device 39100Sstevel@tonic-gate * int flag - 39110Sstevel@tonic-gate * int otyp - 39120Sstevel@tonic-gate * int md_oflags - RAID open flags 39130Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 39140Sstevel@tonic-gate */ 39150Sstevel@tonic-gate int 39160Sstevel@tonic-gate raid_internal_open(minor_t mnum, int flag, int otyp, int md_oflags) 39170Sstevel@tonic-gate { 39180Sstevel@tonic-gate mr_unit_t *un; 39190Sstevel@tonic-gate mdi_unit_t *ui; 39200Sstevel@tonic-gate int err = 0; 39210Sstevel@tonic-gate int replay_error = 0; 39220Sstevel@tonic-gate 39230Sstevel@tonic-gate ui = MDI_UNIT(mnum); 39240Sstevel@tonic-gate ASSERT(ui != NULL); 39250Sstevel@tonic-gate 39260Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 39270Sstevel@tonic-gate /* 39280Sstevel@tonic-gate * this MUST be checked before md_unit_isopen is checked. 39290Sstevel@tonic-gate * raid_init_columns sets md_unit_isopen to block reset, halt. 39300Sstevel@tonic-gate */ 39310Sstevel@tonic-gate if ((UNIT_STATE(un) & (RUS_INIT | RUS_DOI)) && 39320Sstevel@tonic-gate !(md_oflags & MD_OFLG_ISINIT)) { 39330Sstevel@tonic-gate md_unit_openclose_exit(ui); 39340Sstevel@tonic-gate return (EAGAIN); 39350Sstevel@tonic-gate } 39360Sstevel@tonic-gate 39370Sstevel@tonic-gate if ((md_oflags & MD_OFLG_ISINIT) || md_unit_isopen(ui)) { 39380Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, otyp); 39390Sstevel@tonic-gate goto out; 39400Sstevel@tonic-gate } 39410Sstevel@tonic-gate 39420Sstevel@tonic-gate md_unit_readerexit(ui); 39430Sstevel@tonic-gate 39440Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 39450Sstevel@tonic-gate if (raid_open_all_devs(un, md_oflags) == 0) { 39460Sstevel@tonic-gate if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) { 39470Sstevel@tonic-gate md_unit_writerexit(ui); 39480Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39490Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 39500Sstevel@tonic-gate goto out; 39510Sstevel@tonic-gate } 39520Sstevel@tonic-gate } else { 39530Sstevel@tonic-gate /* 39540Sstevel@tonic-gate * if this unit contains more than two errored components 39550Sstevel@tonic-gate * should return error and close all opened devices 39560Sstevel@tonic-gate */ 39570Sstevel@tonic-gate 39580Sstevel@tonic-gate md_unit_writerexit(ui); 39590Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39600Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 39610Sstevel@tonic-gate md_unit_openclose_exit(ui); 39620Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 39630Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 39640Sstevel@tonic-gate return (ENXIO); 39650Sstevel@tonic-gate } 39660Sstevel@tonic-gate 39670Sstevel@tonic-gate if (!(MD_STATUS(un) & MD_UN_REPLAYED)) { 39680Sstevel@tonic-gate replay_error = raid_replay(un); 39690Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_REPLAYED; 39700Sstevel@tonic-gate } 39710Sstevel@tonic-gate 39720Sstevel@tonic-gate md_unit_writerexit(ui); 39730Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39740Sstevel@tonic-gate 39750Sstevel@tonic-gate if ((replay_error == RAID_RPLY_READONLY) && 39760Sstevel@tonic-gate ((flag & (FREAD | FWRITE)) == FREAD)) { 39770Sstevel@tonic-gate md_unit_openclose_exit(ui); 39780Sstevel@tonic-gate return (0); 39790Sstevel@tonic-gate } 39800Sstevel@tonic-gate 39810Sstevel@tonic-gate /* allocate hotspare if possible */ 39820Sstevel@tonic-gate (void) raid_hotspares(); 39830Sstevel@tonic-gate 39840Sstevel@tonic-gate 39850Sstevel@tonic-gate out: 39860Sstevel@tonic-gate md_unit_openclose_exit(ui); 39870Sstevel@tonic-gate return (err); 39880Sstevel@tonic-gate } 39890Sstevel@tonic-gate /* 39900Sstevel@tonic-gate * NAMES: raid_open 39910Sstevel@tonic-gate * DESCRIPTION: RAID metadevice OPEN entry point 39920Sstevel@tonic-gate * PARAMETERS: dev_t dev - 39930Sstevel@tonic-gate * int flag - 39940Sstevel@tonic-gate * int otyp - 39950Sstevel@tonic-gate * cred_t * cred_p - 39960Sstevel@tonic-gate * int md_oflags - 39970Sstevel@tonic-gate * RETURNS: 39980Sstevel@tonic-gate */ 39990Sstevel@tonic-gate /*ARGSUSED1*/ 40000Sstevel@tonic-gate static int 40010Sstevel@tonic-gate raid_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 40020Sstevel@tonic-gate { 40030Sstevel@tonic-gate int error = 0; 40040Sstevel@tonic-gate 40050Sstevel@tonic-gate if (error = raid_internal_open(getminor(*dev), flag, otyp, md_oflags)) { 40060Sstevel@tonic-gate return (error); 40070Sstevel@tonic-gate } 40080Sstevel@tonic-gate return (0); 40090Sstevel@tonic-gate } 40100Sstevel@tonic-gate 40110Sstevel@tonic-gate /* 40120Sstevel@tonic-gate * NAMES: raid_internal_close 40130Sstevel@tonic-gate * DESCRIPTION: RAID metadevice CLOSE actual implementation 40140Sstevel@tonic-gate * PARAMETERS: minor_t - minor number of the RAID device 40150Sstevel@tonic-gate * int otyp - 40160Sstevel@tonic-gate * int init_pw - 40170Sstevel@tonic-gate * int md_cflags - RAID close flags 40180Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 40190Sstevel@tonic-gate */ 40200Sstevel@tonic-gate /*ARGSUSED*/ 40210Sstevel@tonic-gate int 40220Sstevel@tonic-gate raid_internal_close(minor_t mnum, int otyp, int init_pw, int md_cflags) 40230Sstevel@tonic-gate { 40240Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 40250Sstevel@tonic-gate mr_unit_t *un; 40260Sstevel@tonic-gate int err = 0; 40270Sstevel@tonic-gate 40280Sstevel@tonic-gate /* single thread */ 40290Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 40300Sstevel@tonic-gate 40310Sstevel@tonic-gate /* count closed */ 40320Sstevel@tonic-gate if ((err = md_unit_decopen(mnum, otyp)) != 0) 40330Sstevel@tonic-gate goto out; 40340Sstevel@tonic-gate /* close devices, if necessary */ 40350Sstevel@tonic-gate if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 40360Sstevel@tonic-gate raid_close_all_devs(un, init_pw, md_cflags); 40370Sstevel@tonic-gate } 40380Sstevel@tonic-gate 40390Sstevel@tonic-gate /* unlock, return success */ 40400Sstevel@tonic-gate out: 40410Sstevel@tonic-gate md_unit_openclose_exit(ui); 40420Sstevel@tonic-gate return (err); 40430Sstevel@tonic-gate } 40440Sstevel@tonic-gate 40450Sstevel@tonic-gate /* 40460Sstevel@tonic-gate * NAMES: raid_close 40470Sstevel@tonic-gate * DESCRIPTION: RAID metadevice close entry point 40480Sstevel@tonic-gate * PARAMETERS: dev_t dev - 40490Sstevel@tonic-gate * int flag - 40500Sstevel@tonic-gate * int otyp - 40510Sstevel@tonic-gate * cred_t * cred_p - 40520Sstevel@tonic-gate * int md_oflags - 40530Sstevel@tonic-gate * RETURNS: 40540Sstevel@tonic-gate */ 40550Sstevel@tonic-gate /*ARGSUSED1*/ 40560Sstevel@tonic-gate static int 40570Sstevel@tonic-gate raid_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags) 40580Sstevel@tonic-gate { 40590Sstevel@tonic-gate int retval; 40600Sstevel@tonic-gate 40610Sstevel@tonic-gate (void) md_io_writerlock(MDI_UNIT(getminor(dev))); 40620Sstevel@tonic-gate retval = raid_internal_close(getminor(dev), otyp, 1, md_cflags); 40630Sstevel@tonic-gate (void) md_io_writerexit(MDI_UNIT(getminor(dev))); 40640Sstevel@tonic-gate return (retval); 40650Sstevel@tonic-gate } 40660Sstevel@tonic-gate 40670Sstevel@tonic-gate /* 40680Sstevel@tonic-gate * raid_probe_close_all_devs 40690Sstevel@tonic-gate */ 40700Sstevel@tonic-gate void 40710Sstevel@tonic-gate raid_probe_close_all_devs(mr_unit_t *un) 40720Sstevel@tonic-gate { 40730Sstevel@tonic-gate int i; 40740Sstevel@tonic-gate mr_column_t *device; 40750Sstevel@tonic-gate 40760Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 40770Sstevel@tonic-gate device = &un->un_column[i]; 40780Sstevel@tonic-gate 40790Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 40800Sstevel@tonic-gate md_layered_close(device->un_dev, 40810Sstevel@tonic-gate MD_OFLG_PROBEDEV); 40820Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_PROBEOPEN; 40830Sstevel@tonic-gate } 40840Sstevel@tonic-gate } 40850Sstevel@tonic-gate } 40860Sstevel@tonic-gate /* 40870Sstevel@tonic-gate * Raid_probe_dev: 40880Sstevel@tonic-gate * 40890Sstevel@tonic-gate * On entry the unit writerlock is held 40900Sstevel@tonic-gate */ 40910Sstevel@tonic-gate static int 40920Sstevel@tonic-gate raid_probe_dev(mdi_unit_t *ui, minor_t mnum) 40930Sstevel@tonic-gate { 40940Sstevel@tonic-gate mr_unit_t *un; 40950Sstevel@tonic-gate int i; 40960Sstevel@tonic-gate int not_opened = 0; 40970Sstevel@tonic-gate int commit = 0; 40980Sstevel@tonic-gate int col = -1; 40990Sstevel@tonic-gate mr_column_t *device; 41000Sstevel@tonic-gate int md_devopen = 0; 41010Sstevel@tonic-gate 41020Sstevel@tonic-gate if (md_unit_isopen(ui)) 41030Sstevel@tonic-gate md_devopen++; 41040Sstevel@tonic-gate 41050Sstevel@tonic-gate un = MD_UNIT(mnum); 41060Sstevel@tonic-gate /* 41070Sstevel@tonic-gate * If the state has been set to LAST_ERRED because 41080Sstevel@tonic-gate * of an error when the raid device was open at some 41090Sstevel@tonic-gate * point in the past, don't probe. We really don't want 41100Sstevel@tonic-gate * to reset the state in this case. 41110Sstevel@tonic-gate */ 41120Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 41130Sstevel@tonic-gate return (0); 41140Sstevel@tonic-gate 41150Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 41160Sstevel@tonic-gate 41170Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 41180Sstevel@tonic-gate md_dev64_t tmpdev; 41190Sstevel@tonic-gate 41200Sstevel@tonic-gate device = &un->un_column[i]; 41210Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 41220Sstevel@tonic-gate not_opened++; 41230Sstevel@tonic-gate continue; 41240Sstevel@tonic-gate } 41250Sstevel@tonic-gate 41260Sstevel@tonic-gate tmpdev = device->un_dev; 41270Sstevel@tonic-gate /* 41280Sstevel@tonic-gate * Currently the flags passed are not needed since 41290Sstevel@tonic-gate * there cannot be an underlying metadevice. However 41300Sstevel@tonic-gate * they are kept here for consistency. 41310Sstevel@tonic-gate * 41320Sstevel@tonic-gate * Open by device id 41330Sstevel@tonic-gate */ 41340Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i)? 41350Sstevel@tonic-gate device->un_hs_key : device->un_orig_key); 41360Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, 41370Sstevel@tonic-gate MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV)) { 41380Sstevel@tonic-gate device->un_dev = tmpdev; 41390Sstevel@tonic-gate not_opened++; 41400Sstevel@tonic-gate continue; 41410Sstevel@tonic-gate } 41420Sstevel@tonic-gate device->un_dev = tmpdev; 41430Sstevel@tonic-gate 41440Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_PROBEOPEN; 41450Sstevel@tonic-gate } 41460Sstevel@tonic-gate 41470Sstevel@tonic-gate /* 41480Sstevel@tonic-gate * The code below is careful on setting the LAST_ERRED state. 41490Sstevel@tonic-gate * 41500Sstevel@tonic-gate * If open errors and exactly one device has failed we can run. 41510Sstevel@tonic-gate * If more then one device fails we have to figure out when to set 41520Sstevel@tonic-gate * LAST_ERRED state. The rationale is to avoid unnecessary resyncs 41530Sstevel@tonic-gate * since they are painful and time consuming. 41540Sstevel@tonic-gate * 41550Sstevel@tonic-gate * When more than one component/column fails there are 2 scenerios. 41560Sstevel@tonic-gate * 41570Sstevel@tonic-gate * 1. Metadevice has NOT been opened: In this case, the behavior 41580Sstevel@tonic-gate * mimics the open symantics. ie. Only the first failed device 41590Sstevel@tonic-gate * is ERRED and LAST_ERRED is not set. 41600Sstevel@tonic-gate * 41610Sstevel@tonic-gate * 2. Metadevice has been opened: Here the read/write sematics are 41620Sstevel@tonic-gate * followed. The first failed devicce is ERRED and on the next 41630Sstevel@tonic-gate * failed device LAST_ERRED is set. 41640Sstevel@tonic-gate */ 41650Sstevel@tonic-gate 41660Sstevel@tonic-gate if (not_opened > 1 && !md_devopen) { 41670Sstevel@tonic-gate cmn_err(CE_WARN, 41680Sstevel@tonic-gate "md: %s failed to open. open error on %s\n", 41690Sstevel@tonic-gate md_shortname(MD_SID(un)), 41700Sstevel@tonic-gate md_devname(MD_UN2SET(un), device->un_orig_dev, 41710Sstevel@tonic-gate NULL, 0)); 41720Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 41730Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 41740Sstevel@tonic-gate raid_probe_close_all_devs(un); 41750Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 41760Sstevel@tonic-gate return (not_opened > 1); 41770Sstevel@tonic-gate } 41780Sstevel@tonic-gate 41790Sstevel@tonic-gate if (!md_devopen) { 41800Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 41810Sstevel@tonic-gate device = &un->un_column[i]; 41820Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 41830Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 41840Sstevel@tonic-gate /* 41850Sstevel@tonic-gate * At this point in time there is a 41860Sstevel@tonic-gate * possibility that errors were the 41870Sstevel@tonic-gate * result of a controller failure with 41880Sstevel@tonic-gate * more than a single column on it so 41890Sstevel@tonic-gate * clear out last errored columns and 41900Sstevel@tonic-gate * let errors re-occur is necessary. 41910Sstevel@tonic-gate */ 41920Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 41930Sstevel@tonic-gate commit++; 41940Sstevel@tonic-gate } 41950Sstevel@tonic-gate continue; 41960Sstevel@tonic-gate } 41970Sstevel@tonic-gate ASSERT(col == -1); 41980Sstevel@tonic-gate /* 41990Sstevel@tonic-gate * note if multiple devices are failing then only 42000Sstevel@tonic-gate * the last one is marked as error 42010Sstevel@tonic-gate */ 42020Sstevel@tonic-gate col = i; 42030Sstevel@tonic-gate } 42040Sstevel@tonic-gate 42050Sstevel@tonic-gate if (col != -1) { 42060Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 42070Sstevel@tonic-gate commit++; 42080Sstevel@tonic-gate } 42090Sstevel@tonic-gate 42100Sstevel@tonic-gate } else { 42110Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 42120Sstevel@tonic-gate device = &un->un_column[i]; 42130Sstevel@tonic-gate 42140Sstevel@tonic-gate /* if we have LAST_ERRED go ahead and commit. */ 42150Sstevel@tonic-gate if (un->un_state & RUS_LAST_ERRED) 42160Sstevel@tonic-gate break; 42170Sstevel@tonic-gate /* 42180Sstevel@tonic-gate * could not open the component 42190Sstevel@tonic-gate */ 42200Sstevel@tonic-gate 42210Sstevel@tonic-gate if (!(device->un_devflags & MD_RAID_DEV_PROBEOPEN)) { 42220Sstevel@tonic-gate col = i; 42230Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 42240Sstevel@tonic-gate commit++; 42250Sstevel@tonic-gate } 42260Sstevel@tonic-gate } 42270Sstevel@tonic-gate } 42280Sstevel@tonic-gate 42290Sstevel@tonic-gate if (commit) 42300Sstevel@tonic-gate raid_commit(un, NULL); 42310Sstevel@tonic-gate 42320Sstevel@tonic-gate if (col != -1) { 42330Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 42340Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 42350Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 42360Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 42370Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 42380Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 42390Sstevel@tonic-gate } 42400Sstevel@tonic-gate } 42410Sstevel@tonic-gate 42420Sstevel@tonic-gate raid_probe_close_all_devs(un); 42430Sstevel@tonic-gate return (0); 42440Sstevel@tonic-gate } 42450Sstevel@tonic-gate 42460Sstevel@tonic-gate static int 42470Sstevel@tonic-gate raid_imp_set( 42480Sstevel@tonic-gate set_t setno 42490Sstevel@tonic-gate ) 42500Sstevel@tonic-gate { 42510Sstevel@tonic-gate mddb_recid_t recid; 42520Sstevel@tonic-gate int i, gotsomething; 42530Sstevel@tonic-gate mddb_type_t typ1; 42540Sstevel@tonic-gate mddb_de_ic_t *dep; 42550Sstevel@tonic-gate mddb_rb32_t *rbp; 42560Sstevel@tonic-gate mr_unit_t *un64; 42570Sstevel@tonic-gate mr_unit32_od_t *un32; 42580Sstevel@tonic-gate minor_t *self_id; /* minor needs to be updated */ 42590Sstevel@tonic-gate md_parent_t *parent_id; /* parent needs to be updated */ 42600Sstevel@tonic-gate mddb_recid_t *record_id; /* record id needs to be updated */ 42610Sstevel@tonic-gate hsp_t *hsp_id; 42620Sstevel@tonic-gate 42630Sstevel@tonic-gate gotsomething = 0; 42640Sstevel@tonic-gate 42650Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 42660Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 42670Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 42680Sstevel@tonic-gate 42690Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 42700Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 42710Sstevel@tonic-gate continue; 42720Sstevel@tonic-gate 42730Sstevel@tonic-gate dep = mddb_getrecdep(recid); 42740Sstevel@tonic-gate rbp = dep->de_rb; 42750Sstevel@tonic-gate 42760Sstevel@tonic-gate if (rbp->rb_revision == MDDB_REV_RB) { 42770Sstevel@tonic-gate /* 42780Sstevel@tonic-gate * Small device 42790Sstevel@tonic-gate */ 42800Sstevel@tonic-gate un32 = (mr_unit32_od_t *)mddb_getrecaddr(recid); 42810Sstevel@tonic-gate self_id = &(un32->c.un_self_id); 42820Sstevel@tonic-gate parent_id = &(un32->c.un_parent); 42830Sstevel@tonic-gate record_id = &(un32->c.un_record_id); 42840Sstevel@tonic-gate hsp_id = &(un32->un_hsp_id); 42850Sstevel@tonic-gate 42860Sstevel@tonic-gate for (i = 0; i < un32->un_totalcolumncnt; i++) { 42870Sstevel@tonic-gate mr_column32_od_t *device; 42880Sstevel@tonic-gate 42890Sstevel@tonic-gate device = &un32->un_column[i]; 42900Sstevel@tonic-gate if (!md_update_minor(setno, mddb_getsidenum 42910Sstevel@tonic-gate (setno), device->un_orig_key)) 42920Sstevel@tonic-gate goto out; 42930Sstevel@tonic-gate 42940Sstevel@tonic-gate if (device->un_hs_id != 0) 42950Sstevel@tonic-gate device->un_hs_id = MAKERECID( 42960Sstevel@tonic-gate setno, device->un_hs_id); 42970Sstevel@tonic-gate } 42980Sstevel@tonic-gate } else { 42990Sstevel@tonic-gate un64 = (mr_unit_t *)mddb_getrecaddr(recid); 43000Sstevel@tonic-gate self_id = &(un64->c.un_self_id); 43010Sstevel@tonic-gate parent_id = &(un64->c.un_parent); 43020Sstevel@tonic-gate record_id = &(un64->c.un_record_id); 43030Sstevel@tonic-gate hsp_id = &(un64->un_hsp_id); 43040Sstevel@tonic-gate 43050Sstevel@tonic-gate for (i = 0; i < un64->un_totalcolumncnt; i++) { 43060Sstevel@tonic-gate mr_column_t *device; 43070Sstevel@tonic-gate 43080Sstevel@tonic-gate device = &un64->un_column[i]; 43090Sstevel@tonic-gate if (!md_update_minor(setno, mddb_getsidenum 43100Sstevel@tonic-gate (setno), device->un_orig_key)) 43110Sstevel@tonic-gate goto out; 43120Sstevel@tonic-gate 43130Sstevel@tonic-gate if (device->un_hs_id != 0) 43140Sstevel@tonic-gate device->un_hs_id = MAKERECID( 43150Sstevel@tonic-gate setno, device->un_hs_id); 43160Sstevel@tonic-gate } 43170Sstevel@tonic-gate } 43180Sstevel@tonic-gate 43190Sstevel@tonic-gate /* 43200Sstevel@tonic-gate * Update unit with the imported setno 43210Sstevel@tonic-gate */ 43220Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 43230Sstevel@tonic-gate 43240Sstevel@tonic-gate *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 43250Sstevel@tonic-gate 43260Sstevel@tonic-gate if (*hsp_id != -1) 43270Sstevel@tonic-gate *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 43280Sstevel@tonic-gate 43290Sstevel@tonic-gate if (*parent_id != MD_NO_PARENT) 43300Sstevel@tonic-gate *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 43310Sstevel@tonic-gate *record_id = MAKERECID(setno, DBID(*record_id)); 43320Sstevel@tonic-gate gotsomething = 1; 43330Sstevel@tonic-gate } 43340Sstevel@tonic-gate 43350Sstevel@tonic-gate out: 43360Sstevel@tonic-gate return (gotsomething); 43370Sstevel@tonic-gate } 43380Sstevel@tonic-gate 43390Sstevel@tonic-gate static md_named_services_t raid_named_services[] = { 43400Sstevel@tonic-gate {raid_hotspares, "poke hotspares" }, 43410Sstevel@tonic-gate {raid_rename_check, MDRNM_CHECK }, 43420Sstevel@tonic-gate {raid_rename_lock, MDRNM_LOCK }, 43430Sstevel@tonic-gate {(intptr_t (*)()) raid_rename_unlock, MDRNM_UNLOCK }, 43440Sstevel@tonic-gate {(intptr_t (*)()) raid_probe_dev, "probe open test" }, 43450Sstevel@tonic-gate {NULL, 0 } 43460Sstevel@tonic-gate }; 43470Sstevel@tonic-gate 43480Sstevel@tonic-gate md_ops_t raid_md_ops = { 43490Sstevel@tonic-gate raid_open, /* open */ 43500Sstevel@tonic-gate raid_close, /* close */ 43510Sstevel@tonic-gate md_raid_strategy, /* strategy */ 43520Sstevel@tonic-gate NULL, /* print */ 43530Sstevel@tonic-gate NULL, /* dump */ 43540Sstevel@tonic-gate NULL, /* read */ 43550Sstevel@tonic-gate NULL, /* write */ 43560Sstevel@tonic-gate md_raid_ioctl, /* ioctl, */ 43570Sstevel@tonic-gate raid_snarf, /* raid_snarf */ 43580Sstevel@tonic-gate raid_halt, /* raid_halt */ 43590Sstevel@tonic-gate NULL, /* aread */ 43600Sstevel@tonic-gate NULL, /* awrite */ 43610Sstevel@tonic-gate raid_imp_set, /* import set */ 43620Sstevel@tonic-gate raid_named_services 43630Sstevel@tonic-gate }; 43640Sstevel@tonic-gate 43650Sstevel@tonic-gate static void 43660Sstevel@tonic-gate init_init() 43670Sstevel@tonic-gate { 43680Sstevel@tonic-gate /* default to a second */ 43690Sstevel@tonic-gate if (md_wr_wait == 0) 43700Sstevel@tonic-gate md_wr_wait = md_hz >> 1; 43710Sstevel@tonic-gate 43720Sstevel@tonic-gate raid_parent_cache = kmem_cache_create("md_raid_parent", 43730Sstevel@tonic-gate sizeof (md_raidps_t), 0, raid_parent_constructor, 43740Sstevel@tonic-gate raid_parent_destructor, raid_run_queue, NULL, NULL, 0); 43750Sstevel@tonic-gate raid_child_cache = kmem_cache_create("md_raid_child", 43760Sstevel@tonic-gate sizeof (md_raidcs_t) - sizeof (buf_t) + biosize(), 0, 43770Sstevel@tonic-gate raid_child_constructor, raid_child_destructor, 43780Sstevel@tonic-gate raid_run_queue, NULL, NULL, 0); 43790Sstevel@tonic-gate raid_cbuf_cache = kmem_cache_create("md_raid_cbufs", 43800Sstevel@tonic-gate sizeof (md_raidcbuf_t), 0, raid_cbuf_constructor, 43810Sstevel@tonic-gate raid_cbuf_destructor, raid_run_queue, NULL, NULL, 0); 43820Sstevel@tonic-gate } 43830Sstevel@tonic-gate 43840Sstevel@tonic-gate static void 43850Sstevel@tonic-gate fini_uninit() 43860Sstevel@tonic-gate { 43870Sstevel@tonic-gate kmem_cache_destroy(raid_parent_cache); 43880Sstevel@tonic-gate kmem_cache_destroy(raid_child_cache); 43890Sstevel@tonic-gate kmem_cache_destroy(raid_cbuf_cache); 43900Sstevel@tonic-gate raid_parent_cache = raid_child_cache = raid_cbuf_cache = NULL; 43910Sstevel@tonic-gate } 43920Sstevel@tonic-gate 43930Sstevel@tonic-gate /* define the module linkage */ 43940Sstevel@tonic-gate MD_PLUGIN_MISC_MODULE("raid module %I%", init_init(), fini_uninit()) 4395