10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51366Spetede * Common Development and Distribution License (the "License"). 61366Spetede * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 221366Spetede * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * NAME: raid.c 300Sstevel@tonic-gate * 310Sstevel@tonic-gate * DESCRIPTION: Main RAID driver source file containing open, close and I/O 320Sstevel@tonic-gate * operations. 330Sstevel@tonic-gate * 340Sstevel@tonic-gate * ROUTINES PROVIDED FOR EXTERNAL USE: 350Sstevel@tonic-gate * raid_open() - open the RAID metadevice for access. 360Sstevel@tonic-gate * raid_internal_open() - internal open routine of RAID metdevice. 370Sstevel@tonic-gate * md_raid_strategy() - perform normal I/O operations, 380Sstevel@tonic-gate * such as read and write. 390Sstevel@tonic-gate * raid_close() - close the RAID metadevice. 400Sstevel@tonic-gate * raid_internal_close() - internal close routine of RAID metadevice. 410Sstevel@tonic-gate * raid_snarf() - initialize and clean up MDD records. 420Sstevel@tonic-gate * raid_halt() - reset the RAID metadevice 430Sstevel@tonic-gate * raid_line() - return the line # of this segment 440Sstevel@tonic-gate * raid_dcolumn() - return the data column # of this segment 450Sstevel@tonic-gate * raid_pcolumn() - return the parity column # of this segment 460Sstevel@tonic-gate */ 470Sstevel@tonic-gate 480Sstevel@tonic-gate #include <sys/param.h> 490Sstevel@tonic-gate #include <sys/systm.h> 500Sstevel@tonic-gate #include <sys/conf.h> 510Sstevel@tonic-gate #include <sys/file.h> 520Sstevel@tonic-gate #include <sys/user.h> 530Sstevel@tonic-gate #include <sys/uio.h> 540Sstevel@tonic-gate #include <sys/t_lock.h> 550Sstevel@tonic-gate #include <sys/buf.h> 560Sstevel@tonic-gate #include <sys/dkio.h> 570Sstevel@tonic-gate #include <sys/vtoc.h> 580Sstevel@tonic-gate #include <sys/kmem.h> 590Sstevel@tonic-gate #include <vm/page.h> 600Sstevel@tonic-gate #include <sys/cmn_err.h> 610Sstevel@tonic-gate #include <sys/sysmacros.h> 620Sstevel@tonic-gate #include <sys/types.h> 630Sstevel@tonic-gate #include <sys/mkdev.h> 640Sstevel@tonic-gate #include <sys/stat.h> 650Sstevel@tonic-gate #include <sys/open.h> 660Sstevel@tonic-gate #include <sys/modctl.h> 670Sstevel@tonic-gate #include <sys/ddi.h> 680Sstevel@tonic-gate #include <sys/sunddi.h> 690Sstevel@tonic-gate #include <sys/debug.h> 700Sstevel@tonic-gate #include <sys/lvm/md_raid.h> 710Sstevel@tonic-gate #include <sys/lvm/mdvar.h> 720Sstevel@tonic-gate #include <sys/lvm/md_convert.h> 730Sstevel@tonic-gate 740Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h> 750Sstevel@tonic-gate #include <sys/sysevent/svm.h> 760Sstevel@tonic-gate 770Sstevel@tonic-gate md_ops_t raid_md_ops; 780Sstevel@tonic-gate #ifndef lint 791366Spetede char _depends_on[] = "drv/md"; 800Sstevel@tonic-gate md_ops_t *md_interface_ops = &raid_md_ops; 810Sstevel@tonic-gate #endif /* lint */ 820Sstevel@tonic-gate 830Sstevel@tonic-gate extern unit_t md_nunits; 840Sstevel@tonic-gate extern unit_t md_nsets; 850Sstevel@tonic-gate extern md_set_t md_set[]; 860Sstevel@tonic-gate extern int md_status; 870Sstevel@tonic-gate extern major_t md_major; 880Sstevel@tonic-gate extern mdq_anchor_t md_done_daemon; 890Sstevel@tonic-gate extern mdq_anchor_t md_mstr_daemon; 900Sstevel@tonic-gate extern int md_sleep_for_test; 910Sstevel@tonic-gate extern clock_t md_hz; 920Sstevel@tonic-gate 930Sstevel@tonic-gate extern md_event_queue_t *md_event_queue; 940Sstevel@tonic-gate 950Sstevel@tonic-gate 960Sstevel@tonic-gate int pchunks = 16; 970Sstevel@tonic-gate int phigh = 1024; 980Sstevel@tonic-gate int plow = 128; 990Sstevel@tonic-gate int cchunks = 64; 1000Sstevel@tonic-gate int chigh = 1024; 1010Sstevel@tonic-gate int clow = 512; 1020Sstevel@tonic-gate int bchunks = 32; 1030Sstevel@tonic-gate int bhigh = 256; 1040Sstevel@tonic-gate int blow = 128; 1050Sstevel@tonic-gate 1060Sstevel@tonic-gate int raid_total_io = 0; 1070Sstevel@tonic-gate int raid_reads = 0; 1080Sstevel@tonic-gate int raid_writes = 0; 1090Sstevel@tonic-gate int raid_no_bpmaps = 0; 1100Sstevel@tonic-gate int raid_512 = 0; 1110Sstevel@tonic-gate int raid_1024 = 0; 1120Sstevel@tonic-gate int raid_1024_8192 = 0; 1130Sstevel@tonic-gate int raid_8192 = 0; 1140Sstevel@tonic-gate int raid_8192_bigger = 0; 1150Sstevel@tonic-gate int raid_line_lock_wait = 0; 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate int data_buffer_waits = 0; 1180Sstevel@tonic-gate int parity_buffer_waits = 0; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate /* writer line locks */ 1210Sstevel@tonic-gate int raid_writer_locks = 0; /* total writer locks */ 1220Sstevel@tonic-gate int raid_write_waits = 0; /* total writer locks that waited */ 1230Sstevel@tonic-gate int raid_full_line_writes = 0; /* total full line writes */ 1240Sstevel@tonic-gate int raid_write_queue_length = 0; /* wait queue length */ 1250Sstevel@tonic-gate int raid_max_write_q_length = 0; /* maximum queue length */ 1260Sstevel@tonic-gate int raid_write_locks_active = 0; /* writer locks at any time */ 1270Sstevel@tonic-gate int raid_max_write_locks = 0; /* maximum writer locks active */ 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* read line locks */ 1300Sstevel@tonic-gate int raid_reader_locks = 0; /* total reader locks held */ 1310Sstevel@tonic-gate int raid_reader_locks_active = 0; /* reader locks held */ 1320Sstevel@tonic-gate int raid_max_reader_locks = 0; /* maximum reader locks held in run */ 1330Sstevel@tonic-gate int raid_read_overlaps = 0; /* number of times 2 reads hit same line */ 1340Sstevel@tonic-gate int raid_read_waits = 0; /* times a reader waited on writer */ 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /* prewrite stats */ 1370Sstevel@tonic-gate int raid_prewrite_waits = 0; /* number of waits for a pw slot */ 1380Sstevel@tonic-gate int raid_pw = 0; /* number of pw slots in use */ 1390Sstevel@tonic-gate int raid_prewrite_max = 0; /* maximum number of pw slots in use */ 1400Sstevel@tonic-gate int raid_pw_invalidates = 0; 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate static clock_t md_wr_wait = 0; 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate int nv_available = 0; /* presence of nv-ram support in device */ 1450Sstevel@tonic-gate int nv_prewrite = 1; /* mark prewrites with nv_available */ 1460Sstevel@tonic-gate int nv_parity = 1; /* mark parity with nv_available */ 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate kmem_cache_t *raid_parent_cache = NULL; 1490Sstevel@tonic-gate kmem_cache_t *raid_child_cache = NULL; 1500Sstevel@tonic-gate kmem_cache_t *raid_cbuf_cache = NULL; 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate int raid_internal_open(minor_t mnum, int flag, int otyp, 1530Sstevel@tonic-gate int md_oflags); 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate static void freebuffers(md_raidcs_t *cs); 1560Sstevel@tonic-gate static int raid_read(mr_unit_t *un, md_raidcs_t *cs); 1570Sstevel@tonic-gate static void raid_read_io(mr_unit_t *un, md_raidcs_t *cs); 1580Sstevel@tonic-gate static int raid_write(mr_unit_t *un, md_raidcs_t *cs); 1590Sstevel@tonic-gate static void raid_write_io(mr_unit_t *un, md_raidcs_t *cs); 1600Sstevel@tonic-gate static void raid_stage(md_raidcs_t *cs); 1610Sstevel@tonic-gate static void raid_enqueue(md_raidcs_t *cs); 1620Sstevel@tonic-gate static diskaddr_t raid_line(diskaddr_t segment, mr_unit_t *un); 1630Sstevel@tonic-gate uint_t raid_dcolumn(diskaddr_t segment, mr_unit_t *un); 1640Sstevel@tonic-gate static void getpbuffer(md_raidcs_t *cs); 1650Sstevel@tonic-gate static void getdbuffer(md_raidcs_t *cs); 1660Sstevel@tonic-gate static void raid_done(buf_t *bp); 1670Sstevel@tonic-gate static void raid_io_startup(mr_unit_t *un); 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate static rus_state_t 1700Sstevel@tonic-gate raid_col2unit(rcs_state_t state, rus_state_t unitstate) 1710Sstevel@tonic-gate { 1720Sstevel@tonic-gate switch (state) { 1730Sstevel@tonic-gate case RCS_INIT: 1740Sstevel@tonic-gate return (RUS_INIT); 1750Sstevel@tonic-gate case RCS_OKAY: 1760Sstevel@tonic-gate return (RUS_OKAY); 1770Sstevel@tonic-gate case RCS_RESYNC: 1780Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) 1790Sstevel@tonic-gate return (RUS_LAST_ERRED); 1800Sstevel@tonic-gate else 1810Sstevel@tonic-gate return (RUS_ERRED); 1820Sstevel@tonic-gate case RCS_ERRED: 1830Sstevel@tonic-gate return (RUS_ERRED); 1840Sstevel@tonic-gate case RCS_LAST_ERRED: 1850Sstevel@tonic-gate return (RUS_ERRED); 1860Sstevel@tonic-gate default: 1870Sstevel@tonic-gate break; 1880Sstevel@tonic-gate } 1890Sstevel@tonic-gate panic("raid_col2unit"); 1900Sstevel@tonic-gate /*NOTREACHED*/ 1910Sstevel@tonic-gate } 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate void 1940Sstevel@tonic-gate raid_set_state(mr_unit_t *un, int col, rcs_state_t newstate, int force) 1950Sstevel@tonic-gate { 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate rus_state_t unitstate, origstate; 1980Sstevel@tonic-gate rcs_state_t colstate; 1990Sstevel@tonic-gate rcs_state_t orig_colstate; 2000Sstevel@tonic-gate int errcnt = 0, 2010Sstevel@tonic-gate okaycnt = 0, 2020Sstevel@tonic-gate resynccnt = 0; 2030Sstevel@tonic-gate int i; 2040Sstevel@tonic-gate char *devname; 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate ASSERT(un); 2070Sstevel@tonic-gate ASSERT(col < un->un_totalcolumncnt); 2080Sstevel@tonic-gate ASSERT(newstate & 2090Sstevel@tonic-gate (RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 2100Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)); 2110Sstevel@tonic-gate ASSERT((newstate & 2120Sstevel@tonic-gate ~(RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 2130Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)) 2140Sstevel@tonic-gate == 0); 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1); 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate unitstate = un->un_state; 2190Sstevel@tonic-gate origstate = unitstate; 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate if (force) { 2220Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 2230Sstevel@tonic-gate un->un_state = raid_col2unit(newstate, unitstate); 2240Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 2250Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 2260Sstevel@tonic-gate return; 2270Sstevel@tonic-gate } 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate ASSERT(un->un_state & 2300Sstevel@tonic-gate (RUS_INIT | RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | 2310Sstevel@tonic-gate RUS_REGEN)); 2320Sstevel@tonic-gate ASSERT((un->un_state & ~(RUS_INIT | 2330Sstevel@tonic-gate RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | RUS_REGEN)) == 0); 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate if (un->un_column[col].un_devstate == newstate) 2360Sstevel@tonic-gate return; 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate if (newstate == RCS_REGEN) { 2390Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) 2400Sstevel@tonic-gate return; 2410Sstevel@tonic-gate un->un_state = RUS_REGEN; 2420Sstevel@tonic-gate return; 2430Sstevel@tonic-gate } 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate orig_colstate = un->un_column[col].un_devstate; 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate /* 2480Sstevel@tonic-gate * if there is another column in the error state then this 2490Sstevel@tonic-gate * column should go to the last errored state 2500Sstevel@tonic-gate */ 2510Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 2520Sstevel@tonic-gate if (i == col) 2530Sstevel@tonic-gate colstate = newstate; 2540Sstevel@tonic-gate else 2550Sstevel@tonic-gate colstate = un->un_column[i].un_devstate; 2560Sstevel@tonic-gate if (colstate & (RCS_ERRED | RCS_LAST_ERRED | RCS_INIT_ERRED)) 2570Sstevel@tonic-gate errcnt++; 2580Sstevel@tonic-gate if (colstate & RCS_OKAY) 2590Sstevel@tonic-gate okaycnt++; 2600Sstevel@tonic-gate if (colstate & RCS_RESYNC) 2610Sstevel@tonic-gate resynccnt++; 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate ASSERT(resynccnt < 2); 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate if (okaycnt == un->un_totalcolumncnt) 2660Sstevel@tonic-gate unitstate = RUS_OKAY; 2670Sstevel@tonic-gate else if (errcnt > 1) { 2680Sstevel@tonic-gate unitstate = RUS_LAST_ERRED; 2690Sstevel@tonic-gate if (newstate & RCS_ERRED) 2700Sstevel@tonic-gate newstate = RCS_LAST_ERRED; 2710Sstevel@tonic-gate } else if (errcnt == 1) 2720Sstevel@tonic-gate if (!(unitstate & RUS_LAST_ERRED)) 2730Sstevel@tonic-gate unitstate = RUS_ERRED; 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate if (un->un_state == RUS_DOI) 2760Sstevel@tonic-gate unitstate = RUS_DOI; 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 2790Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 2800Sstevel@tonic-gate /* 2810Sstevel@tonic-gate * if there are last errored column being brought back online 2820Sstevel@tonic-gate * by open or snarf, then be sure to clear the RUS_LAST_ERRED 2830Sstevel@tonic-gate * bit to allow writes. If there is a real error then the 2840Sstevel@tonic-gate * column will go back into last erred. 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate if ((raid_state_cnt(un, RCS_LAST_ERRED) == 0) && 2870Sstevel@tonic-gate (raid_state_cnt(un, RCS_ERRED) == 1)) 2880Sstevel@tonic-gate unitstate = RUS_ERRED; 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate un->un_state = unitstate; 2910Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate if ((! (origstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) && 2940Sstevel@tonic-gate (unitstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) { 2950Sstevel@tonic-gate devname = md_devname(MD_UN2SET(un), 2960Sstevel@tonic-gate un->un_column[col].un_dev, NULL, 0); 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s needs maintenance", 2990Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 3000Sstevel@tonic-gate 3010Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) { 3020Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s last erred", 3030Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate } else if (un->un_column[col].un_devflags & 3060Sstevel@tonic-gate MD_RAID_DEV_ISOPEN) { 3070Sstevel@tonic-gate /* 3080Sstevel@tonic-gate * Close the broken device and clear the open flag on 3090Sstevel@tonic-gate * it. We have to check that the device is open, 3100Sstevel@tonic-gate * otherwise the first open on it has resulted in the 3110Sstevel@tonic-gate * error that is being processed and the actual un_dev 3120Sstevel@tonic-gate * will be NODEV64. 3130Sstevel@tonic-gate */ 3140Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, 3150Sstevel@tonic-gate MD_OFLG_NULL); 3160Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 3170Sstevel@tonic-gate } 3180Sstevel@tonic-gate } else if (orig_colstate == RCS_LAST_ERRED && newstate == RCS_ERRED && 3190Sstevel@tonic-gate un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN) { 3200Sstevel@tonic-gate /* 3210Sstevel@tonic-gate * Similar to logic above except no log messages since we 3220Sstevel@tonic-gate * are just transitioning from Last Erred to Erred. 3230Sstevel@tonic-gate */ 3240Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, MD_OFLG_NULL); 3250Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate /* 3290Sstevel@tonic-gate * If a resync has completed, see if there is a Last Erred 3300Sstevel@tonic-gate * component that we can change to the Erred state. 3310Sstevel@tonic-gate */ 3320Sstevel@tonic-gate if ((orig_colstate == RCS_RESYNC) && (newstate == RCS_OKAY)) { 3330Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 3340Sstevel@tonic-gate if (i != col && 3350Sstevel@tonic-gate (un->un_column[i].un_devstate & RCS_LAST_ERRED)) { 3360Sstevel@tonic-gate raid_set_state(un, i, RCS_ERRED, 0); 3370Sstevel@tonic-gate break; 3380Sstevel@tonic-gate } 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate } 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate /* 3440Sstevel@tonic-gate * NAME: erred_check_line 3450Sstevel@tonic-gate * 3460Sstevel@tonic-gate * DESCRIPTION: Return the type of write to perform on an erred column based 3470Sstevel@tonic-gate * upon any resync activity. 3480Sstevel@tonic-gate * 3490Sstevel@tonic-gate * if a column is being resynced and the write is above the 3500Sstevel@tonic-gate * resync point may have to write to the target being resynced. 3510Sstevel@tonic-gate * 3520Sstevel@tonic-gate * Column state may make it impossible to do the write 3530Sstevel@tonic-gate * in which case RCL_EIO or RCL_ENXIO is returned. 3540Sstevel@tonic-gate * 3550Sstevel@tonic-gate * If a column cannot be written directly, RCL_ERRED is 3560Sstevel@tonic-gate * returned and processing should proceed accordingly. 3570Sstevel@tonic-gate * 3580Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number identity of metadevice 3590Sstevel@tonic-gate * md_raidcs_t *cs - child save structure 3600Sstevel@tonic-gate * mr_column_t *dcolumn - pointer to data column structure 3610Sstevel@tonic-gate * mr_column_t *pcolumn - pointer to parity column structure 3620Sstevel@tonic-gate * 3630Sstevel@tonic-gate * RETURNS: RCL_OKAY, RCL_ERRED 3640Sstevel@tonic-gate * 3650Sstevel@tonic-gate * LOCKS: Expects Line Writer Lock and Unit Resource Lock to be held 3660Sstevel@tonic-gate * across call. 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate static int 3700Sstevel@tonic-gate erred_check_line(mr_unit_t *un, md_raidcs_t *cs, mr_column_t *column) 3710Sstevel@tonic-gate { 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate ASSERT(un != NULL); 3740Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate if (column->un_devstate & RCS_OKAY) 3770Sstevel@tonic-gate return (RCL_OKAY); 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 3800Sstevel@tonic-gate return (RCL_ERRED); /* do not read from errored disk */ 3810Sstevel@tonic-gate 3820Sstevel@tonic-gate /* 3830Sstevel@tonic-gate * for the last errored case their are two considerations. 3840Sstevel@tonic-gate * When the last errored column is the only errored column then 3850Sstevel@tonic-gate * do treat it like a maintenance column, not doing I/O from 3860Sstevel@tonic-gate * it. When it there are other failures then just attempt 3870Sstevel@tonic-gate * to use it. 3880Sstevel@tonic-gate */ 3890Sstevel@tonic-gate if (column->un_devstate & RCS_LAST_ERRED) 3900Sstevel@tonic-gate return (RCL_ERRED); 3910Sstevel@tonic-gate 3920Sstevel@tonic-gate ASSERT(column->un_devstate & RCS_RESYNC); 3930Sstevel@tonic-gate 3940Sstevel@tonic-gate /* 3950Sstevel@tonic-gate * When a resync from a hotspare is being done (copy resync) 3960Sstevel@tonic-gate * then always treat it as an OKAY column, since no regen 3970Sstevel@tonic-gate * is required. 3980Sstevel@tonic-gate */ 3990Sstevel@tonic-gate if (column->un_devflags & MD_RAID_COPY_RESYNC) { 4000Sstevel@tonic-gate return (RCL_OKAY); 4010Sstevel@tonic-gate } 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate mutex_enter(&un->un_mx); 4040Sstevel@tonic-gate if (cs->cs_line < un->un_resync_line_index) { 4050Sstevel@tonic-gate mutex_exit(&un->un_mx); 4060Sstevel@tonic-gate return (RCL_OKAY); 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate mutex_exit(&un->un_mx); 4090Sstevel@tonic-gate return (RCL_ERRED); 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate } 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate /* 4140Sstevel@tonic-gate * NAMES: raid_state_cnt 4150Sstevel@tonic-gate * 4160Sstevel@tonic-gate * DESCRIPTION: counts number of column in a specific state 4170Sstevel@tonic-gate * 4180Sstevel@tonic-gate * PARAMETERS: md_raid_t *un 4190Sstevel@tonic-gate * rcs_state state 4200Sstevel@tonic-gate */ 4210Sstevel@tonic-gate int 4220Sstevel@tonic-gate raid_state_cnt(mr_unit_t *un, rcs_state_t state) 4230Sstevel@tonic-gate { 4240Sstevel@tonic-gate int i, retval = 0; 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 4270Sstevel@tonic-gate if (un->un_column[i].un_devstate & state) 4280Sstevel@tonic-gate retval++; 4290Sstevel@tonic-gate return (retval); 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate /* 4330Sstevel@tonic-gate * NAMES: raid_io_overlaps 4340Sstevel@tonic-gate * 4350Sstevel@tonic-gate * DESCRIPTION: checkst for overlap of 2 child save structures 4360Sstevel@tonic-gate * 4370Sstevel@tonic-gate * PARAMETERS: md_raidcs_t cs1 4380Sstevel@tonic-gate * md_raidcs_t cs2 4390Sstevel@tonic-gate * 4400Sstevel@tonic-gate * RETURNS: 0 - no overlap 4410Sstevel@tonic-gate * 1 - overlap 4420Sstevel@tonic-gate */ 4430Sstevel@tonic-gate int 4440Sstevel@tonic-gate raid_io_overlaps(md_raidcs_t *cs1, md_raidcs_t *cs2) 4450Sstevel@tonic-gate { 4460Sstevel@tonic-gate if (cs1->cs_blkno > cs2->cs_lastblk) 4470Sstevel@tonic-gate return (0); 4480Sstevel@tonic-gate if (cs1->cs_lastblk < cs2->cs_blkno) 4490Sstevel@tonic-gate return (0); 4500Sstevel@tonic-gate return (1); 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate /* 4540Sstevel@tonic-gate * NAMES: raid_parent_constructor 4550Sstevel@tonic-gate * DESCRIPTION: parent structure constructor routine 4560Sstevel@tonic-gate * PARAMETERS: 4570Sstevel@tonic-gate */ 4580Sstevel@tonic-gate /*ARGSUSED1*/ 4590Sstevel@tonic-gate static int 4600Sstevel@tonic-gate raid_parent_constructor(void *p, void *d1, int d2) 4610Sstevel@tonic-gate { 4620Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mx, 4630Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 4640Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mapin_mx, 4650Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 4660Sstevel@tonic-gate return (0); 4670Sstevel@tonic-gate } 4680Sstevel@tonic-gate 4690Sstevel@tonic-gate void 4700Sstevel@tonic-gate raid_parent_init(md_raidps_t *ps) 4710Sstevel@tonic-gate { 4720Sstevel@tonic-gate bzero(ps, offsetof(md_raidps_t, ps_mx)); 4730Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_flags = MD_RPS_INUSE; 4740Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_magic = RAID_PSMAGIC; 4750Sstevel@tonic-gate } 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /*ARGSUSED1*/ 4780Sstevel@tonic-gate static void 4790Sstevel@tonic-gate raid_parent_destructor(void *p, void *d) 4800Sstevel@tonic-gate { 4810Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mx); 4820Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mapin_mx); 4830Sstevel@tonic-gate } 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* 4860Sstevel@tonic-gate * NAMES: raid_child_constructor 4870Sstevel@tonic-gate * DESCRIPTION: child structure constructor routine 4880Sstevel@tonic-gate * PARAMETERS: 4890Sstevel@tonic-gate */ 4900Sstevel@tonic-gate /*ARGSUSED1*/ 4910Sstevel@tonic-gate static int 4920Sstevel@tonic-gate raid_child_constructor(void *p, void *d1, int d2) 4930Sstevel@tonic-gate { 4940Sstevel@tonic-gate md_raidcs_t *cs = (md_raidcs_t *)p; 4950Sstevel@tonic-gate mutex_init(&cs->cs_mx, NULL, MUTEX_DEFAULT, NULL); 4960Sstevel@tonic-gate bioinit(&cs->cs_dbuf); 4970Sstevel@tonic-gate bioinit(&cs->cs_pbuf); 4980Sstevel@tonic-gate bioinit(&cs->cs_hbuf); 4990Sstevel@tonic-gate return (0); 5000Sstevel@tonic-gate } 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate void 5030Sstevel@tonic-gate raid_child_init(md_raidcs_t *cs) 5040Sstevel@tonic-gate { 5050Sstevel@tonic-gate bzero(cs, offsetof(md_raidcs_t, cs_mx)); 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate md_bioreset(&cs->cs_dbuf); 5080Sstevel@tonic-gate md_bioreset(&cs->cs_pbuf); 5090Sstevel@tonic-gate md_bioreset(&cs->cs_hbuf); 5100Sstevel@tonic-gate 5110Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_dbuf.b_chain = 5120Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_pbuf.b_chain = 5130Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_hbuf.b_chain = 5140Sstevel@tonic-gate (struct buf *)(cs); 5150Sstevel@tonic-gate 5160Sstevel@tonic-gate cs->cs_magic = RAID_CSMAGIC; 5170Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 5180Sstevel@tonic-gate cs->cs_dpwslot = -1; 5190Sstevel@tonic-gate cs->cs_ppwslot = -1; 5200Sstevel@tonic-gate } 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate /*ARGSUSED1*/ 5230Sstevel@tonic-gate static void 5240Sstevel@tonic-gate raid_child_destructor(void *p, void *d) 5250Sstevel@tonic-gate { 5260Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_dbuf); 5270Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_hbuf); 5280Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_pbuf); 5290Sstevel@tonic-gate mutex_destroy(&((md_raidcs_t *)p)->cs_mx); 5300Sstevel@tonic-gate } 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate /*ARGSUSED1*/ 5330Sstevel@tonic-gate static int 5340Sstevel@tonic-gate raid_cbuf_constructor(void *p, void *d1, int d2) 5350Sstevel@tonic-gate { 5360Sstevel@tonic-gate bioinit(&((md_raidcbuf_t *)p)->cbuf_bp); 5370Sstevel@tonic-gate return (0); 5380Sstevel@tonic-gate } 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate static void 5410Sstevel@tonic-gate raid_cbuf_init(md_raidcbuf_t *cb) 5420Sstevel@tonic-gate { 5430Sstevel@tonic-gate bzero(cb, offsetof(md_raidcbuf_t, cbuf_bp)); 5440Sstevel@tonic-gate md_bioreset(&cb->cbuf_bp); 5450Sstevel@tonic-gate cb->cbuf_magic = RAID_BUFMAGIC; 5460Sstevel@tonic-gate cb->cbuf_pwslot = -1; 5470Sstevel@tonic-gate cb->cbuf_flags = CBUF_WRITE; 5480Sstevel@tonic-gate } 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate /*ARGSUSED1*/ 5510Sstevel@tonic-gate static void 5520Sstevel@tonic-gate raid_cbuf_destructor(void *p, void *d) 5530Sstevel@tonic-gate { 5540Sstevel@tonic-gate biofini(&((md_raidcbuf_t *)p)->cbuf_bp); 5550Sstevel@tonic-gate } 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate /* 5580Sstevel@tonic-gate * NAMES: raid_run_queue 5590Sstevel@tonic-gate * DESCRIPTION: spawn a backend processing daemon for RAID metadevice. 5600Sstevel@tonic-gate * PARAMETERS: 5610Sstevel@tonic-gate */ 5620Sstevel@tonic-gate /*ARGSUSED*/ 5630Sstevel@tonic-gate static void 5640Sstevel@tonic-gate raid_run_queue(void *d) 5650Sstevel@tonic-gate { 5660Sstevel@tonic-gate if (!(md_status & MD_GBL_DAEMONS_LIVE)) 5670Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* 5710Sstevel@tonic-gate * NAME: raid_build_pwslot 5720Sstevel@tonic-gate * DESCRIPTION: builds mr_pw_reserve for the column 5730Sstevel@tonic-gate * PARAMETERS: un is the pointer to the unit structure 5740Sstevel@tonic-gate * colindex is the column to create the structure for 5750Sstevel@tonic-gate */ 5760Sstevel@tonic-gate int 5770Sstevel@tonic-gate raid_build_pw_reservation(mr_unit_t *un, int colindex) 5780Sstevel@tonic-gate { 5790Sstevel@tonic-gate mr_pw_reserve_t *pw; 5800Sstevel@tonic-gate mr_scoreboard_t *sb; 5810Sstevel@tonic-gate int i; 5820Sstevel@tonic-gate 5830Sstevel@tonic-gate pw = (mr_pw_reserve_t *) kmem_zalloc(sizeof (mr_pw_reserve_t) + 5840Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt), KM_SLEEP); 5850Sstevel@tonic-gate pw->pw_magic = RAID_PWMAGIC; 5860Sstevel@tonic-gate pw->pw_column = colindex; 5870Sstevel@tonic-gate pw->pw_free = un->un_pwcnt; 5880Sstevel@tonic-gate sb = &pw->pw_sb[0]; 5890Sstevel@tonic-gate for (i = 0; i < un->un_pwcnt; i++) { 5900Sstevel@tonic-gate sb[i].sb_column = colindex; 5910Sstevel@tonic-gate sb[i].sb_flags = SB_UNUSED; 5920Sstevel@tonic-gate sb[i].sb_start_blk = 0; 5930Sstevel@tonic-gate sb[i].sb_last_blk = 0; 5940Sstevel@tonic-gate sb[i].sb_cs = NULL; 5950Sstevel@tonic-gate } 5960Sstevel@tonic-gate un->un_column_ic[colindex].un_pw_reserve = pw; 5970Sstevel@tonic-gate return (0); 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate /* 6000Sstevel@tonic-gate * NAME: raid_free_pw_reservation 6010Sstevel@tonic-gate * DESCRIPTION: RAID metadevice pre-write slot structure destroy routine 6020Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 6030Sstevel@tonic-gate * int colindex - index of the column whose pre-write slot struct 6040Sstevel@tonic-gate * is to be destroyed. 6050Sstevel@tonic-gate */ 6060Sstevel@tonic-gate void 6070Sstevel@tonic-gate raid_free_pw_reservation(mr_unit_t *un, int colindex) 6080Sstevel@tonic-gate { 6090Sstevel@tonic-gate mr_pw_reserve_t *pw = un->un_column_ic[colindex].un_pw_reserve; 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate kmem_free(pw, sizeof (mr_pw_reserve_t) + 6120Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt)); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate /* 6160Sstevel@tonic-gate * NAME: raid_cancel_pwslot 6170Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 6180Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 6190Sstevel@tonic-gate */ 6200Sstevel@tonic-gate static void 6210Sstevel@tonic-gate raid_cancel_pwslot(md_raidcs_t *cs) 6220Sstevel@tonic-gate { 6230Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 6240Sstevel@tonic-gate mr_pw_reserve_t *pw; 6250Sstevel@tonic-gate mr_scoreboard_t *sb; 6260Sstevel@tonic-gate mr_column_ic_t *col; 6270Sstevel@tonic-gate md_raidcbuf_t *cbuf; 6280Sstevel@tonic-gate int broadcast = 0; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate if (cs->cs_ps->ps_flags & MD_RPS_READ) 6310Sstevel@tonic-gate return; 6320Sstevel@tonic-gate if (cs->cs_dpwslot != -1) { 6330Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_dcolumn]; 6340Sstevel@tonic-gate pw = col->un_pw_reserve; 6350Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_dpwslot]; 6360Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6370Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6380Sstevel@tonic-gate broadcast++; 6390Sstevel@tonic-gate sb->sb_cs = NULL; 6400Sstevel@tonic-gate } 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate if (cs->cs_ppwslot != -1) { 6430Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_pcolumn]; 6440Sstevel@tonic-gate pw = col->un_pw_reserve; 6450Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_ppwslot]; 6460Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6470Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6480Sstevel@tonic-gate broadcast++; 6490Sstevel@tonic-gate sb->sb_cs = NULL; 6500Sstevel@tonic-gate } 6510Sstevel@tonic-gate 6520Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 6530Sstevel@tonic-gate if (cbuf->cbuf_pwslot == -1) 6540Sstevel@tonic-gate continue; 6550Sstevel@tonic-gate col = &un->un_column_ic[cbuf->cbuf_column]; 6560Sstevel@tonic-gate pw = col->un_pw_reserve; 6570Sstevel@tonic-gate sb = &pw->pw_sb[cbuf->cbuf_pwslot]; 6580Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6590Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6600Sstevel@tonic-gate broadcast++; 6610Sstevel@tonic-gate sb->sb_cs = NULL; 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate if (broadcast) { 6640Sstevel@tonic-gate cv_broadcast(&un->un_cv); 6650Sstevel@tonic-gate return; 6660Sstevel@tonic-gate } 6670Sstevel@tonic-gate mutex_enter(&un->un_mx); 6680Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 6690Sstevel@tonic-gate cv_broadcast(&un->un_cv); 6700Sstevel@tonic-gate mutex_exit(&un->un_mx); 6710Sstevel@tonic-gate } 6720Sstevel@tonic-gate 6730Sstevel@tonic-gate static void 6740Sstevel@tonic-gate raid_free_pwinvalidate(md_raidcs_t *cs) 6750Sstevel@tonic-gate { 6760Sstevel@tonic-gate md_raidcbuf_t *cbuf; 6770Sstevel@tonic-gate md_raidcbuf_t *cbuf_to_free; 6780Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 6790Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 6800Sstevel@tonic-gate mr_pw_reserve_t *pw; 6810Sstevel@tonic-gate mr_scoreboard_t *sb; 6820Sstevel@tonic-gate int broadcast = 0; 6830Sstevel@tonic-gate 6840Sstevel@tonic-gate cbuf = cs->cs_pw_inval_list; 6850Sstevel@tonic-gate ASSERT(cbuf); 6860Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 6870Sstevel@tonic-gate while (cbuf) { 6880Sstevel@tonic-gate pw = un->un_column_ic[cbuf->cbuf_column].un_pw_reserve; 6890Sstevel@tonic-gate sb = &pw->pw_sb[0]; 6900Sstevel@tonic-gate ASSERT(sb[cbuf->cbuf_pwslot].sb_flags & SB_INVAL_PEND); 6910Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_flags = SB_UNUSED; 6920Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_cs = NULL; 6930Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6940Sstevel@tonic-gate broadcast++; 6950Sstevel@tonic-gate cbuf_to_free = cbuf; 6960Sstevel@tonic-gate cbuf = cbuf->cbuf_next; 6970Sstevel@tonic-gate kmem_free(cbuf_to_free->cbuf_buffer, dbtob(un->un_iosize)); 6980Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf_to_free); 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate cs->cs_pw_inval_list = (md_raidcbuf_t *)NULL; 7010Sstevel@tonic-gate /* 7020Sstevel@tonic-gate * now that there is a free prewrite slot, check to see if there 7030Sstevel@tonic-gate * are any io operations waiting first wake up the raid_io_startup 7040Sstevel@tonic-gate * then signal the the processes waiting in raid_write. 7050Sstevel@tonic-gate */ 7060Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front) 7070Sstevel@tonic-gate raid_io_startup(un); 7080Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 7090Sstevel@tonic-gate if (broadcast) { 7100Sstevel@tonic-gate cv_broadcast(&un->un_cv); 7110Sstevel@tonic-gate return; 7120Sstevel@tonic-gate } 7130Sstevel@tonic-gate mutex_enter(&un->un_mx); 7140Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 7150Sstevel@tonic-gate cv_broadcast(&un->un_cv); 7160Sstevel@tonic-gate mutex_exit(&un->un_mx); 7170Sstevel@tonic-gate } 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate 7200Sstevel@tonic-gate static int 7210Sstevel@tonic-gate raid_get_pwslot(md_raidcs_t *cs, int column) 7220Sstevel@tonic-gate { 7230Sstevel@tonic-gate mr_scoreboard_t *sb; 7240Sstevel@tonic-gate mr_pw_reserve_t *pw; 7250Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 7260Sstevel@tonic-gate diskaddr_t start_blk = cs->cs_blkno; 7270Sstevel@tonic-gate diskaddr_t last_blk = cs->cs_lastblk; 7280Sstevel@tonic-gate int i; 7290Sstevel@tonic-gate int pwcnt = un->un_pwcnt; 7300Sstevel@tonic-gate int avail = -1; 7310Sstevel@tonic-gate int use = -1; 7320Sstevel@tonic-gate int flags; 7330Sstevel@tonic-gate 7340Sstevel@tonic-gate 7350Sstevel@tonic-gate /* start with the data column */ 7360Sstevel@tonic-gate pw = cs->cs_un->un_column_ic[column].un_pw_reserve; 7370Sstevel@tonic-gate sb = &pw->pw_sb[0]; 7380Sstevel@tonic-gate ASSERT(pw->pw_free > 0); 7390Sstevel@tonic-gate for (i = 0; i < pwcnt; i++) { 7400Sstevel@tonic-gate flags = sb[i].sb_flags; 7410Sstevel@tonic-gate if (flags & SB_INVAL_PEND) 7420Sstevel@tonic-gate continue; 7430Sstevel@tonic-gate 7440Sstevel@tonic-gate if ((avail == -1) && (flags & (SB_AVAIL | SB_UNUSED))) 7450Sstevel@tonic-gate avail = i; 7460Sstevel@tonic-gate 7470Sstevel@tonic-gate if ((start_blk > sb[i].sb_last_blk) || 7480Sstevel@tonic-gate (last_blk < sb[i].sb_start_blk)) 7490Sstevel@tonic-gate continue; 7500Sstevel@tonic-gate 7510Sstevel@tonic-gate /* OVERLAP */ 7520Sstevel@tonic-gate ASSERT(! (sb[i].sb_flags & SB_INUSE)); 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * raid_invalidate_pwslot attempts to zero out prewrite entry 7560Sstevel@tonic-gate * in parallel with other disk reads/writes related to current 7570Sstevel@tonic-gate * transaction. however cs_frags accounting for this case is 7580Sstevel@tonic-gate * broken because raid_write_io resets cs_frags i.e. ignoring 7590Sstevel@tonic-gate * that it could have been been set to > 0 value by 7600Sstevel@tonic-gate * raid_invalidate_pwslot. While this can be fixed an 7610Sstevel@tonic-gate * additional problem is that we don't seem to handle 7620Sstevel@tonic-gate * correctly the case of getting a disk error for prewrite 7630Sstevel@tonic-gate * entry invalidation. 7640Sstevel@tonic-gate * It does not look like we really need 7650Sstevel@tonic-gate * to invalidate prewrite slots because raid_replay sorts 7660Sstevel@tonic-gate * prewrite id's in ascending order and during recovery the 7670Sstevel@tonic-gate * latest prewrite entry for the same block will be replay 7680Sstevel@tonic-gate * last. That's why i ifdef'd out the call to 7690Sstevel@tonic-gate * raid_invalidate_pwslot. --aguzovsk@east 7700Sstevel@tonic-gate */ 7710Sstevel@tonic-gate 7720Sstevel@tonic-gate if (use == -1) { 7730Sstevel@tonic-gate use = i; 7740Sstevel@tonic-gate } 7750Sstevel@tonic-gate } 7760Sstevel@tonic-gate 7770Sstevel@tonic-gate ASSERT(avail != -1); 7780Sstevel@tonic-gate pw->pw_free--; 7790Sstevel@tonic-gate if (use == -1) 7800Sstevel@tonic-gate use = avail; 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate ASSERT(! (sb[use].sb_flags & SB_INUSE)); 7830Sstevel@tonic-gate sb[use].sb_flags = SB_INUSE; 7840Sstevel@tonic-gate sb[use].sb_cs = cs; 7850Sstevel@tonic-gate sb[use].sb_start_blk = start_blk; 7860Sstevel@tonic-gate sb[use].sb_last_blk = last_blk; 7870Sstevel@tonic-gate ASSERT((use >= 0) && (use < un->un_pwcnt)); 7880Sstevel@tonic-gate return (use); 7890Sstevel@tonic-gate } 7900Sstevel@tonic-gate 7910Sstevel@tonic-gate static int 7920Sstevel@tonic-gate raid_check_pw(md_raidcs_t *cs) 7930Sstevel@tonic-gate { 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 7960Sstevel@tonic-gate int i; 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 7990Sstevel@tonic-gate /* 8000Sstevel@tonic-gate * check to be sure there is a prewrite slot available 8010Sstevel@tonic-gate * if not just return. 8020Sstevel@tonic-gate */ 8030Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 8040Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 8050Sstevel@tonic-gate if (un->un_column_ic[i].un_pw_reserve->pw_free <= 0) 8060Sstevel@tonic-gate return (1); 8070Sstevel@tonic-gate return (0); 8080Sstevel@tonic-gate } 8090Sstevel@tonic-gate 8100Sstevel@tonic-gate if (un->un_column_ic[cs->cs_dcolumn].un_pw_reserve->pw_free <= 0) 8110Sstevel@tonic-gate return (1); 8120Sstevel@tonic-gate if (un->un_column_ic[cs->cs_pcolumn].un_pw_reserve->pw_free <= 0) 8130Sstevel@tonic-gate return (1); 8140Sstevel@tonic-gate return (0); 8150Sstevel@tonic-gate } 8160Sstevel@tonic-gate static int 8170Sstevel@tonic-gate raid_alloc_pwslot(md_raidcs_t *cs) 8180Sstevel@tonic-gate { 8190Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 8200Sstevel@tonic-gate md_raidcbuf_t *cbuf; 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 8230Sstevel@tonic-gate if (raid_check_pw(cs)) 8240Sstevel@tonic-gate return (1); 8250Sstevel@tonic-gate 8260Sstevel@tonic-gate mutex_enter(&un->un_mx); 8270Sstevel@tonic-gate un->un_pwid++; 8280Sstevel@tonic-gate cs->cs_pwid = un->un_pwid; 8290Sstevel@tonic-gate mutex_exit(&un->un_mx); 8300Sstevel@tonic-gate 8310Sstevel@tonic-gate cs->cs_dpwslot = raid_get_pwslot(cs, cs->cs_dcolumn); 8320Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 8330Sstevel@tonic-gate cbuf->cbuf_pwslot = raid_get_pwslot(cs, cbuf->cbuf_column); 8340Sstevel@tonic-gate } 8350Sstevel@tonic-gate cs->cs_ppwslot = raid_get_pwslot(cs, cs->cs_pcolumn); 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate cs->cs_flags |= MD_RCS_HAVE_PW_SLOTS; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate return (0); 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate /* 8430Sstevel@tonic-gate * NAMES: raid_build_incore 8440Sstevel@tonic-gate * DESCRIPTION: RAID metadevice incore structure building routine 8450Sstevel@tonic-gate * PARAMETERS: void *p - pointer to a unit structure 8460Sstevel@tonic-gate * int snarfing - a flag to indicate snarfing is required 8470Sstevel@tonic-gate */ 8480Sstevel@tonic-gate int 8490Sstevel@tonic-gate raid_build_incore(void *p, int snarfing) 8500Sstevel@tonic-gate { 8510Sstevel@tonic-gate mr_unit_t *un = (mr_unit_t *)p; 8520Sstevel@tonic-gate minor_t mnum = MD_SID(un); 8530Sstevel@tonic-gate mddb_recid_t hs_recid = 0; 8540Sstevel@tonic-gate int i; 8550Sstevel@tonic-gate int preserve_flags; 8560Sstevel@tonic-gate mr_column_t *column; 8570Sstevel@tonic-gate int iosize; 8580Sstevel@tonic-gate md_dev64_t hs, dev; 8590Sstevel@tonic-gate int resync_cnt = 0, 8600Sstevel@tonic-gate error_cnt = 0; 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate hs = NODEV64; 8630Sstevel@tonic-gate dev = NODEV64; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate /* clear out bogus pointer incase we return(1) prior to alloc */ 8660Sstevel@tonic-gate un->mr_ic = NULL; 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate if (MD_STATUS(un) & MD_UN_BEING_RESET) { 8690Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 8700Sstevel@tonic-gate return (1); 8710Sstevel@tonic-gate } 8720Sstevel@tonic-gate 8730Sstevel@tonic-gate if (MD_UNIT(mnum) != NULL) 8740Sstevel@tonic-gate return (0); 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate if (snarfing) 8770Sstevel@tonic-gate MD_STATUS(un) = 0; 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate un->mr_ic = (mr_unit_ic_t *)kmem_zalloc(sizeof (*un->mr_ic), 8800Sstevel@tonic-gate KM_SLEEP); 8810Sstevel@tonic-gate 8820Sstevel@tonic-gate un->un_column_ic = (mr_column_ic_t *) 8830Sstevel@tonic-gate kmem_zalloc(sizeof (mr_column_ic_t) * 8840Sstevel@tonic-gate un->un_totalcolumncnt, KM_SLEEP); 8850Sstevel@tonic-gate 8860Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 8870Sstevel@tonic-gate 8880Sstevel@tonic-gate column = &un->un_column[i]; 8890Sstevel@tonic-gate preserve_flags = column->un_devflags & 8900Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); 8910Sstevel@tonic-gate column->un_devflags &= 8920Sstevel@tonic-gate ~(MD_RAID_ALT_ISOPEN | MD_RAID_DEV_ISOPEN | 8930Sstevel@tonic-gate MD_RAID_WRITE_ALT); 8940Sstevel@tonic-gate if (raid_build_pw_reservation(un, i) != 0) { 8950Sstevel@tonic-gate /* could not build pwslot */ 8960Sstevel@tonic-gate return (1); 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate 8990Sstevel@tonic-gate if (snarfing) { 9000Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum); 9010Sstevel@tonic-gate dev = md_getdevnum(setno, mddb_getsidenum(setno), 9020Sstevel@tonic-gate column->un_orig_key, MD_NOTRUST_DEVT); 9030Sstevel@tonic-gate /* 9040Sstevel@tonic-gate * Comment out instead of remove so we have history 9050Sstevel@tonic-gate * In the pre-SVM releases stored devt is used so 9060Sstevel@tonic-gate * as long as there is one snarf is always happy 9070Sstevel@tonic-gate * even the component is powered off. This is not 9080Sstevel@tonic-gate * the case in current SVM implementation. NODEV64 9090Sstevel@tonic-gate * can be returned and in this case since we resolve 9100Sstevel@tonic-gate * the devt at 'open' time (first use of metadevice) 9110Sstevel@tonic-gate * we will allow snarf continue. 9120Sstevel@tonic-gate * 9130Sstevel@tonic-gate * if (dev == NODEV64) 9140Sstevel@tonic-gate * return (1); 9150Sstevel@tonic-gate */ 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate /* 9180Sstevel@tonic-gate * Setup un_orig_dev from device id info if the device 9190Sstevel@tonic-gate * is valid (not NODEV64). 9200Sstevel@tonic-gate */ 9210Sstevel@tonic-gate if (dev != NODEV64) 9220Sstevel@tonic-gate column->un_orig_dev = dev; 9230Sstevel@tonic-gate 9240Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 9250Sstevel@tonic-gate resync_cnt++; 9260Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 9270Sstevel@tonic-gate error_cnt++; 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate if (HOTSPARED(un, i)) { 9300Sstevel@tonic-gate (void) md_hot_spare_ifc(HS_MKDEV, 9310Sstevel@tonic-gate 0, 0, 0, &column->un_hs_id, NULL, 9320Sstevel@tonic-gate &hs, NULL); 9330Sstevel@tonic-gate /* 9340Sstevel@tonic-gate * Same here 9350Sstevel@tonic-gate * 9360Sstevel@tonic-gate * if (hs == NODEV64) 9370Sstevel@tonic-gate * return (1); 9380Sstevel@tonic-gate */ 9390Sstevel@tonic-gate } 9400Sstevel@tonic-gate 9410Sstevel@tonic-gate if (HOTSPARED(un, i)) { 9420Sstevel@tonic-gate if (column->un_devstate & 9430Sstevel@tonic-gate (RCS_OKAY | RCS_LAST_ERRED)) { 9440Sstevel@tonic-gate column->un_dev = hs; 9450Sstevel@tonic-gate column->un_pwstart = 9460Sstevel@tonic-gate column->un_hs_pwstart; 9470Sstevel@tonic-gate column->un_devstart = 9480Sstevel@tonic-gate column->un_hs_devstart; 9490Sstevel@tonic-gate preserve_flags &= 9500Sstevel@tonic-gate ~(MD_RAID_COPY_RESYNC | 9510Sstevel@tonic-gate MD_RAID_REGEN_RESYNC); 9520Sstevel@tonic-gate } else if (column->un_devstate & RCS_RESYNC) { 9530Sstevel@tonic-gate /* 9540Sstevel@tonic-gate * if previous system was 4.0 set 9550Sstevel@tonic-gate * the direction flags 9560Sstevel@tonic-gate */ 9570Sstevel@tonic-gate if ((preserve_flags & 9580Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | 9590Sstevel@tonic-gate MD_RAID_REGEN_RESYNC)) == 0) { 9600Sstevel@tonic-gate if (column->un_alt_dev != NODEV64) 9610Sstevel@tonic-gate preserve_flags |= 9620Sstevel@tonic-gate MD_RAID_COPY_RESYNC; 9630Sstevel@tonic-gate else 9640Sstevel@tonic-gate preserve_flags |= 9650Sstevel@tonic-gate MD_RAID_REGEN_RESYNC; 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate } 9680Sstevel@tonic-gate } else { /* no hot spares */ 9690Sstevel@tonic-gate column->un_dev = dev; 9700Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 9710Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 9720Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) { 9730Sstevel@tonic-gate preserve_flags |= MD_RAID_REGEN_RESYNC; 9740Sstevel@tonic-gate preserve_flags &= ~MD_RAID_COPY_RESYNC; 9750Sstevel@tonic-gate } 9760Sstevel@tonic-gate } 9770Sstevel@tonic-gate if (! (column->un_devstate & RCS_RESYNC)) { 9780Sstevel@tonic-gate preserve_flags &= 9790Sstevel@tonic-gate ~(MD_RAID_REGEN_RESYNC | 9800Sstevel@tonic-gate MD_RAID_COPY_RESYNC); 9810Sstevel@tonic-gate } 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate column->un_devflags = preserve_flags; 9840Sstevel@tonic-gate column->un_alt_dev = NODEV64; 9850Sstevel@tonic-gate column->un_alt_pwstart = 0; 9860Sstevel@tonic-gate column->un_alt_devstart = 0; 9870Sstevel@tonic-gate un->un_resync_line_index = 0; 9880Sstevel@tonic-gate un->un_resync_index = 0; 9890Sstevel@tonic-gate un->un_percent_done = 0; 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate } 9920Sstevel@tonic-gate 9930Sstevel@tonic-gate if (resync_cnt && error_cnt) { 9940Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 9950Sstevel@tonic-gate column = &un->un_column[i]; 9960Sstevel@tonic-gate if (HOTSPARED(un, i) && 9970Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC) && 9980Sstevel@tonic-gate (column->un_devflags & MD_RAID_COPY_RESYNC)) 9990Sstevel@tonic-gate /* hotspare has data */ 10000Sstevel@tonic-gate continue; 10010Sstevel@tonic-gate 10020Sstevel@tonic-gate if (HOTSPARED(un, i) && 10030Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC)) { 10040Sstevel@tonic-gate /* hotspare does not have data */ 10050Sstevel@tonic-gate raid_hs_release(HS_FREE, un, &hs_recid, i); 10060Sstevel@tonic-gate column->un_dev = column->un_orig_dev; 10070Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 10080Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 10090Sstevel@tonic-gate mddb_setrecprivate(hs_recid, MD_PRV_PENDCOM); 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 10130Sstevel@tonic-gate column->un_devstate = RCS_LAST_ERRED; 10140Sstevel@tonic-gate 10150Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 10160Sstevel@tonic-gate column->un_devstate = RCS_ERRED; 10170Sstevel@tonic-gate } 10180Sstevel@tonic-gate } 10190Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate un->un_pwid = 1; /* or some other possible value */ 10220Sstevel@tonic-gate un->un_magic = RAID_UNMAGIC; 10230Sstevel@tonic-gate iosize = un->un_iosize; 10240Sstevel@tonic-gate un->un_pbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 10250Sstevel@tonic-gate un->un_dbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 10260Sstevel@tonic-gate mutex_init(&un->un_linlck_mx, NULL, MUTEX_DEFAULT, NULL); 10270Sstevel@tonic-gate cv_init(&un->un_linlck_cv, NULL, CV_DEFAULT, NULL); 10280Sstevel@tonic-gate un->un_linlck_chn = NULL; 10290Sstevel@tonic-gate MD_UNIT(mnum) = un; 10300Sstevel@tonic-gate 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate return (0); 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate 10350Sstevel@tonic-gate /* 10360Sstevel@tonic-gate * NAMES: reset_raid 10370Sstevel@tonic-gate * DESCRIPTION: RAID metadevice reset routine 10380Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 10390Sstevel@tonic-gate * minor_t mnum - RAID metadevice minor number 10400Sstevel@tonic-gate * int removing - a flag to imply removing device name from 10410Sstevel@tonic-gate * MDDB database. 10420Sstevel@tonic-gate */ 10430Sstevel@tonic-gate void 10440Sstevel@tonic-gate reset_raid(mr_unit_t *un, minor_t mnum, int removing) 10450Sstevel@tonic-gate { 10460Sstevel@tonic-gate int i, n = 0; 10470Sstevel@tonic-gate sv_dev_t *sv; 10480Sstevel@tonic-gate mr_column_t *column; 10490Sstevel@tonic-gate int column_cnt = un->un_totalcolumncnt; 10500Sstevel@tonic-gate mddb_recid_t *recids, vtoc_id; 10510Sstevel@tonic-gate int hserr; 10520Sstevel@tonic-gate 10530Sstevel@tonic-gate ASSERT((MDI_UNIT(mnum)->ui_io_lock->io_list_front == NULL) && 10540Sstevel@tonic-gate (MDI_UNIT(mnum)->ui_io_lock->io_list_back == NULL)); 10550Sstevel@tonic-gate 10560Sstevel@tonic-gate md_destroy_unit_incore(mnum, &raid_md_ops); 10570Sstevel@tonic-gate 10580Sstevel@tonic-gate MD_UNIT(mnum) = NULL; 10590Sstevel@tonic-gate 10600Sstevel@tonic-gate if (un->un_pbuffer) { 10610Sstevel@tonic-gate kmem_free(un->un_pbuffer, dbtob(un->un_iosize)); 10620Sstevel@tonic-gate un->un_pbuffer = NULL; 10630Sstevel@tonic-gate } 10640Sstevel@tonic-gate if (un->un_dbuffer) { 10650Sstevel@tonic-gate kmem_free(un->un_dbuffer, dbtob(un->un_iosize)); 10660Sstevel@tonic-gate un->un_dbuffer = NULL; 10670Sstevel@tonic-gate } 10680Sstevel@tonic-gate 10690Sstevel@tonic-gate /* free all pre-write slots created during build incore */ 10700Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 10710Sstevel@tonic-gate raid_free_pw_reservation(un, i); 10720Sstevel@tonic-gate 10730Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 10740Sstevel@tonic-gate un->un_totalcolumncnt); 10750Sstevel@tonic-gate 10760Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 10770Sstevel@tonic-gate 10781623Stw21770 /* 10791623Stw21770 * Attempt release of its minor node 10801623Stw21770 */ 1081*2077Stw21770 md_remove_minor_node(mnum); 10821623Stw21770 10830Sstevel@tonic-gate if (!removing) 10840Sstevel@tonic-gate return; 10850Sstevel@tonic-gate 10860Sstevel@tonic-gate sv = (sv_dev_t *)kmem_zalloc((column_cnt + 1) * sizeof (sv_dev_t), 10870Sstevel@tonic-gate KM_SLEEP); 10880Sstevel@tonic-gate 10890Sstevel@tonic-gate recids = (mddb_recid_t *) 10900Sstevel@tonic-gate kmem_zalloc((column_cnt + 2) * sizeof (mddb_recid_t), KM_SLEEP); 10910Sstevel@tonic-gate 10920Sstevel@tonic-gate for (i = 0; i < column_cnt; i++) { 10930Sstevel@tonic-gate md_unit_t *comp_un; 10940Sstevel@tonic-gate md_dev64_t comp_dev; 10950Sstevel@tonic-gate 10960Sstevel@tonic-gate column = &un->un_column[i]; 10970Sstevel@tonic-gate sv[i].setno = MD_MIN2SET(mnum); 10980Sstevel@tonic-gate sv[i].key = column->un_orig_key; 10990Sstevel@tonic-gate if (HOTSPARED(un, i)) { 11000Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 11010Sstevel@tonic-gate hserr = HS_BAD; 11020Sstevel@tonic-gate else 11030Sstevel@tonic-gate hserr = HS_FREE; 11040Sstevel@tonic-gate raid_hs_release(hserr, un, &recids[n++], i); 11050Sstevel@tonic-gate } 11060Sstevel@tonic-gate /* 11070Sstevel@tonic-gate * deparent any metadevices. 11080Sstevel@tonic-gate * NOTE: currently soft partitions are the only metadevices 11090Sstevel@tonic-gate * allowed in RAID metadevices. 11100Sstevel@tonic-gate */ 11110Sstevel@tonic-gate comp_dev = column->un_dev; 11120Sstevel@tonic-gate if (md_getmajor(comp_dev) == md_major) { 11130Sstevel@tonic-gate comp_un = MD_UNIT(md_getminor(comp_dev)); 11140Sstevel@tonic-gate recids[n++] = MD_RECID(comp_un); 11150Sstevel@tonic-gate md_reset_parent(comp_dev); 11160Sstevel@tonic-gate } 11170Sstevel@tonic-gate } 11180Sstevel@tonic-gate /* decrement the reference count of the old hsp */ 11190Sstevel@tonic-gate if (un->un_hsp_id != -1) 11200Sstevel@tonic-gate (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 11210Sstevel@tonic-gate &recids[n++], NULL, NULL, NULL); 11220Sstevel@tonic-gate recids[n] = 0; 11230Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_BEING_RESET; 11240Sstevel@tonic-gate vtoc_id = un->c.un_vtoc_id; 11250Sstevel@tonic-gate 11260Sstevel@tonic-gate raid_commit(un, recids); 11270Sstevel@tonic-gate 11281623Stw21770 /* 11291623Stw21770 * Remove self from the namespace 11301623Stw21770 */ 11311623Stw21770 if (un->c.un_revision & MD_FN_META_DEV) { 11321623Stw21770 (void) md_rem_selfname(un->c.un_self_id); 11331623Stw21770 } 11340Sstevel@tonic-gate 11350Sstevel@tonic-gate /* Remove the unit structure */ 11360Sstevel@tonic-gate mddb_deleterec_wrapper(un->c.un_record_id); 11370Sstevel@tonic-gate 11380Sstevel@tonic-gate /* Remove the vtoc, if present */ 11390Sstevel@tonic-gate if (vtoc_id) 11400Sstevel@tonic-gate mddb_deleterec_wrapper(vtoc_id); 11410Sstevel@tonic-gate md_rem_names(sv, column_cnt); 11420Sstevel@tonic-gate kmem_free(sv, (column_cnt + 1) * sizeof (sv_dev_t)); 11430Sstevel@tonic-gate kmem_free(recids, (column_cnt + 2) * sizeof (mddb_recid_t)); 11440Sstevel@tonic-gate 11450Sstevel@tonic-gate SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 11460Sstevel@tonic-gate MD_MIN2SET(mnum), mnum); 11470Sstevel@tonic-gate } 11480Sstevel@tonic-gate 11490Sstevel@tonic-gate /* 11500Sstevel@tonic-gate * NAMES: raid_error_parent 11510Sstevel@tonic-gate * DESCRIPTION: mark a parent structure in error 11520Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 11530Sstevel@tonic-gate * int error - error value to set 11540Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 11550Sstevel@tonic-gate */ 11560Sstevel@tonic-gate static void 11570Sstevel@tonic-gate raid_error_parent(md_raidps_t *ps, int error) 11580Sstevel@tonic-gate { 11590Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 11600Sstevel@tonic-gate ps->ps_flags |= MD_RPS_ERROR; 11610Sstevel@tonic-gate ps->ps_error = error; 11620Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 11630Sstevel@tonic-gate } 11640Sstevel@tonic-gate 11650Sstevel@tonic-gate /* 11660Sstevel@tonic-gate * The following defines tell raid_free_parent 11670Sstevel@tonic-gate * RFP_RLS_LOCK release the unit reader lock when done. 11680Sstevel@tonic-gate * RFP_DECR_PWFRAGS decrement ps_pwfrags 11690Sstevel@tonic-gate * RFP_DECR_FRAGS decrement ps_frags 11700Sstevel@tonic-gate * RFP_DECR_READFRAGS read keeps FRAGS and PWFRAGS in lockstep 11710Sstevel@tonic-gate */ 11720Sstevel@tonic-gate #define RFP_RLS_LOCK 0x00001 11730Sstevel@tonic-gate #define RFP_DECR_PWFRAGS 0x00002 11740Sstevel@tonic-gate #define RFP_DECR_FRAGS 0x00004 11750Sstevel@tonic-gate #define RFP_DECR_READFRAGS (RFP_DECR_PWFRAGS | RFP_DECR_FRAGS) 11760Sstevel@tonic-gate 11770Sstevel@tonic-gate /* 11780Sstevel@tonic-gate * NAMES: raid_free_parent 11790Sstevel@tonic-gate * DESCRIPTION: free a parent structure 11800Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 11810Sstevel@tonic-gate * int todo - indicates what needs to be done 11820Sstevel@tonic-gate */ 11830Sstevel@tonic-gate static void 11840Sstevel@tonic-gate raid_free_parent(md_raidps_t *ps, int todo) 11850Sstevel@tonic-gate { 11860Sstevel@tonic-gate mdi_unit_t *ui = ps->ps_ui; 11870Sstevel@tonic-gate 11880Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 11890Sstevel@tonic-gate ASSERT(ps->ps_flags & MD_RPS_INUSE); 11900Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 11910Sstevel@tonic-gate if (todo & RFP_DECR_PWFRAGS) { 11920Sstevel@tonic-gate ASSERT(ps->ps_pwfrags); 11930Sstevel@tonic-gate ps->ps_pwfrags--; 11940Sstevel@tonic-gate if (ps->ps_pwfrags == 0 && (! (ps->ps_flags & MD_RPS_IODONE))) { 11950Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_ERROR) { 11960Sstevel@tonic-gate ps->ps_bp->b_flags |= B_ERROR; 11970Sstevel@tonic-gate ps->ps_bp->b_error = ps->ps_error; 11980Sstevel@tonic-gate } 11990Sstevel@tonic-gate md_kstat_done(ui, ps->ps_bp, 0); 12000Sstevel@tonic-gate biodone(ps->ps_bp); 12010Sstevel@tonic-gate ps->ps_flags |= MD_RPS_IODONE; 12020Sstevel@tonic-gate } 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate 12050Sstevel@tonic-gate if (todo & RFP_DECR_FRAGS) { 12060Sstevel@tonic-gate ASSERT(ps->ps_frags); 12070Sstevel@tonic-gate ps->ps_frags--; 12080Sstevel@tonic-gate } 12090Sstevel@tonic-gate 12100Sstevel@tonic-gate if (ps->ps_frags != 0) { 12110Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 12120Sstevel@tonic-gate return; 12130Sstevel@tonic-gate } 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate ASSERT((ps->ps_frags == 0) && (ps->ps_pwfrags == 0)); 12160Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 12170Sstevel@tonic-gate 12180Sstevel@tonic-gate if (todo & RFP_RLS_LOCK) 12190Sstevel@tonic-gate md_io_readerexit(ui); 12200Sstevel@tonic-gate 12210Sstevel@tonic-gate if (panicstr) { 12220Sstevel@tonic-gate ps->ps_flags |= MD_RPS_DONE; 12230Sstevel@tonic-gate return; 12240Sstevel@tonic-gate } 12250Sstevel@tonic-gate 12260Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_HSREQ) 12270Sstevel@tonic-gate (void) raid_hotspares(); 12280Sstevel@tonic-gate 12290Sstevel@tonic-gate ASSERT(todo & RFP_RLS_LOCK); 12300Sstevel@tonic-gate ps->ps_flags &= ~MD_RPS_INUSE; 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate md_dec_iocount(MD_MIN2SET(ps->ps_un->c.un_self_id)); 12330Sstevel@tonic-gate 12340Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 12350Sstevel@tonic-gate } 12360Sstevel@tonic-gate 12370Sstevel@tonic-gate /* 12380Sstevel@tonic-gate * NAMES: raid_free_child 12390Sstevel@tonic-gate * DESCRIPTION: free a parent structure 12400Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 12410Sstevel@tonic-gate * int drop_locks - 0 for no locks held 12420Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 12430Sstevel@tonic-gate */ 12440Sstevel@tonic-gate static void 12450Sstevel@tonic-gate raid_free_child(md_raidcs_t *cs, int drop_locks) 12460Sstevel@tonic-gate { 12470Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 12480Sstevel@tonic-gate md_raidcbuf_t *cbuf, *cbuf1; 12490Sstevel@tonic-gate 12500Sstevel@tonic-gate if (cs->cs_pw_inval_list) 12510Sstevel@tonic-gate raid_free_pwinvalidate(cs); 12520Sstevel@tonic-gate 12530Sstevel@tonic-gate if (drop_locks) { 12540Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD && 12550Sstevel@tonic-gate (cs->cs_flags & (MD_RCS_READER | MD_RCS_WRITER))); 12560Sstevel@tonic-gate md_unit_readerexit(MDI_UNIT(MD_SID(un))); 12570Sstevel@tonic-gate raid_line_exit(cs); 12580Sstevel@tonic-gate } else { 12590Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_LLOCKD)); 12600Sstevel@tonic-gate } 12610Sstevel@tonic-gate 12620Sstevel@tonic-gate freebuffers(cs); 12630Sstevel@tonic-gate cbuf = cs->cs_buflist; 12640Sstevel@tonic-gate while (cbuf) { 12650Sstevel@tonic-gate cbuf1 = cbuf->cbuf_next; 12660Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf); 12670Sstevel@tonic-gate cbuf = cbuf1; 12680Sstevel@tonic-gate } 12690Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_REMAPPED) 12700Sstevel@tonic-gate bp_mapout(&cs->cs_dbuf); 12710Sstevel@tonic-gate kmem_cache_free(raid_child_cache, cs); 12720Sstevel@tonic-gate } 12730Sstevel@tonic-gate 12740Sstevel@tonic-gate /* 12750Sstevel@tonic-gate * NAME: raid_regen_parity 12760Sstevel@tonic-gate * 12770Sstevel@tonic-gate * DESCRIPTION: This routine is used to regenerate the parity blocks 12780Sstevel@tonic-gate * for the entire raid device. It is called from 12790Sstevel@tonic-gate * both the regen thread and the IO path. 12800Sstevel@tonic-gate * 12810Sstevel@tonic-gate * On error the entire device is marked as in error by 12820Sstevel@tonic-gate * placing the erroring device in error and all other 12830Sstevel@tonic-gate * devices in last_errored. 12840Sstevel@tonic-gate * 12850Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs 12860Sstevel@tonic-gate */ 12870Sstevel@tonic-gate void 12880Sstevel@tonic-gate raid_regen_parity(md_raidcs_t *cs) 12890Sstevel@tonic-gate { 12900Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 12910Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 12920Sstevel@tonic-gate caddr_t buffer; 12930Sstevel@tonic-gate caddr_t parity_buffer; 12940Sstevel@tonic-gate buf_t *bp; 12950Sstevel@tonic-gate uint_t *dbuf, *pbuf; 12960Sstevel@tonic-gate uint_t colcnt = un->un_totalcolumncnt; 12970Sstevel@tonic-gate int column; 12980Sstevel@tonic-gate int parity_column = cs->cs_pcolumn; 12990Sstevel@tonic-gate size_t bcount; 13000Sstevel@tonic-gate int j; 13010Sstevel@tonic-gate 13020Sstevel@tonic-gate /* 13030Sstevel@tonic-gate * This routine uses the data and parity buffers allocated to a 13040Sstevel@tonic-gate * write. In the case of a read the buffers are allocated and 13050Sstevel@tonic-gate * freed at the end. 13060Sstevel@tonic-gate */ 13070Sstevel@tonic-gate 13080Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 13090Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 13100Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 13110Sstevel@tonic-gate 13120Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != colcnt) 13130Sstevel@tonic-gate return; 13140Sstevel@tonic-gate 13150Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13160Sstevel@tonic-gate getpbuffer(cs); 13170Sstevel@tonic-gate getdbuffer(cs); 13180Sstevel@tonic-gate } 13190Sstevel@tonic-gate ASSERT(cs->cs_dbuffer && cs->cs_pbuffer); 13200Sstevel@tonic-gate bcount = cs->cs_bcount; 13210Sstevel@tonic-gate buffer = cs->cs_dbuffer; 13220Sstevel@tonic-gate parity_buffer = cs->cs_pbuffer; 13230Sstevel@tonic-gate bzero(parity_buffer, bcount); 13240Sstevel@tonic-gate bp = &cs->cs_dbuf; 13250Sstevel@tonic-gate for (column = 0; column < colcnt; column++) { 13260Sstevel@tonic-gate if (column == parity_column) 13270Sstevel@tonic-gate continue; 13280Sstevel@tonic-gate reset_buf(bp, B_READ | B_BUSY, bcount); 13290Sstevel@tonic-gate bp->b_un.b_addr = buffer; 13300Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 13310Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[column].un_devstart; 13320Sstevel@tonic-gate bp->b_bcount = bcount; 13330Sstevel@tonic-gate bp->b_bufsize = bcount; 13340Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 13350Sstevel@tonic-gate if (biowait(bp)) 13360Sstevel@tonic-gate goto bail; 13370Sstevel@tonic-gate pbuf = (uint_t *)(void *)parity_buffer; 13380Sstevel@tonic-gate dbuf = (uint_t *)(void *)buffer; 13390Sstevel@tonic-gate for (j = 0; j < (bcount / (sizeof (uint_t))); j++) { 13400Sstevel@tonic-gate *pbuf = *pbuf ^ *dbuf; 13410Sstevel@tonic-gate pbuf++; 13420Sstevel@tonic-gate dbuf++; 13430Sstevel@tonic-gate } 13440Sstevel@tonic-gate } 13450Sstevel@tonic-gate 13460Sstevel@tonic-gate reset_buf(bp, B_WRITE | B_BUSY, cs->cs_bcount); 13470Sstevel@tonic-gate bp->b_un.b_addr = parity_buffer; 13480Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[parity_column].un_dev); 13490Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[parity_column].un_devstart; 13500Sstevel@tonic-gate bp->b_bcount = bcount; 13510Sstevel@tonic-gate bp->b_bufsize = bcount; 13520Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 13530Sstevel@tonic-gate if (biowait(bp)) 13540Sstevel@tonic-gate goto bail; 13550Sstevel@tonic-gate 13560Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13570Sstevel@tonic-gate freebuffers(cs); 13580Sstevel@tonic-gate cs->cs_pbuffer = NULL; 13590Sstevel@tonic-gate cs->cs_dbuffer = NULL; 13600Sstevel@tonic-gate } 13610Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 13620Sstevel@tonic-gate return; 13630Sstevel@tonic-gate bail: 13640Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13650Sstevel@tonic-gate freebuffers(cs); 13660Sstevel@tonic-gate cs->cs_pbuffer = NULL; 13670Sstevel@tonic-gate cs->cs_dbuffer = NULL; 13680Sstevel@tonic-gate } 13690Sstevel@tonic-gate md_unit_readerexit(ui); 13700Sstevel@tonic-gate un = md_unit_writerlock(ui); 13710Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 13720Sstevel@tonic-gate for (column = 0; column < colcnt; column++) 13730Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 13740Sstevel@tonic-gate raid_commit(un, NULL); 13750Sstevel@tonic-gate md_unit_writerexit(ui); 13760Sstevel@tonic-gate un = md_unit_readerlock(ui); 13770Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 13780Sstevel@tonic-gate } 13790Sstevel@tonic-gate 13800Sstevel@tonic-gate /* 13810Sstevel@tonic-gate * NAMES: raid_error_state 13820Sstevel@tonic-gate * DESCRIPTION: check unit and column states' impact on I/O error 13830Sstevel@tonic-gate * NOTE: the state now may not be the state when the 13840Sstevel@tonic-gate * I/O completed due to race conditions. 13850Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 13860Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 13870Sstevel@tonic-gate * buf_t *bp - pointer to buffer structure 13880Sstevel@tonic-gate */ 13890Sstevel@tonic-gate static int 13900Sstevel@tonic-gate raid_error_state(mr_unit_t *un, buf_t *bp) 13910Sstevel@tonic-gate { 13920Sstevel@tonic-gate int column; 13930Sstevel@tonic-gate int i; 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 13960Sstevel@tonic-gate ASSERT(UNIT_WRITER_HELD(un)); 13970Sstevel@tonic-gate 13980Sstevel@tonic-gate column = -1; 13990Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 14000Sstevel@tonic-gate if (un->un_column[i].un_dev == md_expldev(bp->b_edev)) { 14010Sstevel@tonic-gate column = i; 14020Sstevel@tonic-gate break; 14030Sstevel@tonic-gate } 14040Sstevel@tonic-gate if (un->un_column[i].un_alt_dev == md_expldev(bp->b_edev)) { 14050Sstevel@tonic-gate column = i; 14060Sstevel@tonic-gate break; 14070Sstevel@tonic-gate } 14080Sstevel@tonic-gate } 14090Sstevel@tonic-gate 14100Sstevel@tonic-gate /* in case a replace snuck in while waiting on unit writer lock */ 14110Sstevel@tonic-gate 14120Sstevel@tonic-gate if (column == -1) { 14130Sstevel@tonic-gate return (0); 14140Sstevel@tonic-gate } 14150Sstevel@tonic-gate 14160Sstevel@tonic-gate (void) raid_set_state(un, column, RCS_ERRED, 0); 14170Sstevel@tonic-gate ASSERT(un->un_state & (RUS_ERRED | RUS_LAST_ERRED)); 14180Sstevel@tonic-gate 14190Sstevel@tonic-gate raid_commit(un, NULL); 14200Sstevel@tonic-gate if (un->un_state & RUS_ERRED) { 14210Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, 14220Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 14230Sstevel@tonic-gate } else if (un->un_state & RUS_LAST_ERRED) { 14240Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, 14250Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 14260Sstevel@tonic-gate } 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate return (EIO); 14290Sstevel@tonic-gate } 14300Sstevel@tonic-gate 14310Sstevel@tonic-gate /* 14320Sstevel@tonic-gate * NAME: raid_mapin_buf 14330Sstevel@tonic-gate * DESCRIPTION: wait for the input buffer header to be maped in 14340Sstevel@tonic-gate * PARAMETERS: md_raidps_t *ps 14350Sstevel@tonic-gate */ 14360Sstevel@tonic-gate static void 14370Sstevel@tonic-gate raid_mapin_buf(md_raidcs_t *cs) 14380Sstevel@tonic-gate { 14390Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 14400Sstevel@tonic-gate 14410Sstevel@tonic-gate /* 14420Sstevel@tonic-gate * check to see if the buffer is maped. If all is ok return the 14430Sstevel@tonic-gate * offset of the data and return. Since it is expensive to grab 14440Sstevel@tonic-gate * a mutex this is only done if the mapin is not complete. 14450Sstevel@tonic-gate * Once the mutex is aquired it is possible that the mapin was 14460Sstevel@tonic-gate * not done so recheck and if necessary do the mapin. 14470Sstevel@tonic-gate */ 14480Sstevel@tonic-gate if (ps->ps_mapin > 0) { 14490Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14500Sstevel@tonic-gate return; 14510Sstevel@tonic-gate } 14520Sstevel@tonic-gate mutex_enter(&ps->ps_mapin_mx); 14530Sstevel@tonic-gate if (ps->ps_mapin > 0) { 14540Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14550Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 14560Sstevel@tonic-gate return; 14570Sstevel@tonic-gate } 14580Sstevel@tonic-gate bp_mapin(ps->ps_bp); 14590Sstevel@tonic-gate /* 14600Sstevel@tonic-gate * get the new b_addr out of the parent since bp_mapin just changed it 14610Sstevel@tonic-gate */ 14620Sstevel@tonic-gate ps->ps_addr = ps->ps_bp->b_un.b_addr; 14630Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14640Sstevel@tonic-gate ps->ps_mapin++; 14650Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 14660Sstevel@tonic-gate } 14670Sstevel@tonic-gate 14680Sstevel@tonic-gate /* 14690Sstevel@tonic-gate * NAMES: raid_read_no_retry 14700Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 14710Sstevel@tonic-gate * read failed attempting to regenerate the data, 14720Sstevel@tonic-gate * no retry possible, error occured in raid_raidregenloop(). 14730Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 14740Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 14750Sstevel@tonic-gate */ 14760Sstevel@tonic-gate /*ARGSUSED*/ 14770Sstevel@tonic-gate static void 14780Sstevel@tonic-gate raid_read_no_retry(mr_unit_t *un, md_raidcs_t *cs) 14790Sstevel@tonic-gate { 14800Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 14810Sstevel@tonic-gate 14820Sstevel@tonic-gate raid_error_parent(ps, EIO); 14830Sstevel@tonic-gate raid_free_child(cs, 1); 14840Sstevel@tonic-gate 14850Sstevel@tonic-gate /* decrement readfrags */ 14860Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 14870Sstevel@tonic-gate } 14880Sstevel@tonic-gate 14890Sstevel@tonic-gate /* 14900Sstevel@tonic-gate * NAMES: raid_read_retry 14910Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 14920Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 14930Sstevel@tonic-gate */ 14940Sstevel@tonic-gate static void 14950Sstevel@tonic-gate raid_read_retry(mr_unit_t *un, md_raidcs_t *cs) 14960Sstevel@tonic-gate { 14970Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_read() */ 14980Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 14990Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 15000Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 15010Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 15020Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 15030Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 15040Sstevel@tonic-gate /* Initialize semaphores */ 15050Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 15060Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15070Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 15080Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15090Sstevel@tonic-gate 15100Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 15110Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 15120Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 15130Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 15140Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 15150Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 15160Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 15170Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15180Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 15190Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15200Sstevel@tonic-gate 15210Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_ERROR; /* reset child error flag */ 15220Sstevel@tonic-gate cs->cs_flags |= MD_RCS_RECOVERY; /* set RECOVERY flag */ 15230Sstevel@tonic-gate 15240Sstevel@tonic-gate /* 15250Sstevel@tonic-gate * re-scheduling I/O with raid_read_io() is simpler. basically, 15260Sstevel@tonic-gate * raid_read_io() is invoked again with same child structure. 15270Sstevel@tonic-gate * (NOTE: we aren`t supposed to do any error recovery when an I/O 15280Sstevel@tonic-gate * error occured in raid_raidregenloop(). 15290Sstevel@tonic-gate */ 15300Sstevel@tonic-gate raid_mapin_buf(cs); 15310Sstevel@tonic-gate raid_read_io(un, cs); 15320Sstevel@tonic-gate } 15330Sstevel@tonic-gate 15340Sstevel@tonic-gate /* 15350Sstevel@tonic-gate * NAMES: raid_rderr 15360Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 15370Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 15380Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 15390Sstevel@tonic-gate * since a unit or column state transition may take place. 15400Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 15410Sstevel@tonic-gate */ 15420Sstevel@tonic-gate /*ARGSUSED*/ 15430Sstevel@tonic-gate static void 15440Sstevel@tonic-gate raid_rderr(md_raidcs_t *cs) 15450Sstevel@tonic-gate { 15460Sstevel@tonic-gate md_raidps_t *ps; 15470Sstevel@tonic-gate mdi_unit_t *ui; 15480Sstevel@tonic-gate mr_unit_t *un; 15490Sstevel@tonic-gate int error = 0; 15500Sstevel@tonic-gate 15510Sstevel@tonic-gate ps = cs->cs_ps; 15520Sstevel@tonic-gate ui = ps->ps_ui; 15530Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 15540Sstevel@tonic-gate ASSERT(un != 0); 15550Sstevel@tonic-gate 15560Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 15570Sstevel@tonic-gate error = raid_error_state(un, &cs->cs_dbuf); 15580Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 15590Sstevel@tonic-gate error |= raid_error_state(un, &cs->cs_pbuf); 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate md_unit_writerexit(ui); 15620Sstevel@tonic-gate 15630Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 15640Sstevel@tonic-gate 15650Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 15660Sstevel@tonic-gate ASSERT(un != 0); 15670Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 15680Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 15690Sstevel@tonic-gate } 15700Sstevel@tonic-gate 15710Sstevel@tonic-gate 15720Sstevel@tonic-gate /* 15730Sstevel@tonic-gate * NAMES: raid_read_error 15740Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 15750Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 15760Sstevel@tonic-gate */ 15770Sstevel@tonic-gate /*ARGSUSED*/ 15780Sstevel@tonic-gate static void 15790Sstevel@tonic-gate raid_read_error(md_raidcs_t *cs) 15800Sstevel@tonic-gate { 15810Sstevel@tonic-gate md_raidps_t *ps; 15820Sstevel@tonic-gate mdi_unit_t *ui; 15830Sstevel@tonic-gate mr_unit_t *un; 15840Sstevel@tonic-gate set_t setno; 15850Sstevel@tonic-gate 15860Sstevel@tonic-gate ps = cs->cs_ps; 15870Sstevel@tonic-gate ui = ps->ps_ui; 15880Sstevel@tonic-gate un = cs->cs_un; 15890Sstevel@tonic-gate 15900Sstevel@tonic-gate setno = MD_UN2SET(un); 15910Sstevel@tonic-gate 15920Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 15930Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 15940Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 15950Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 15960Sstevel@tonic-gate md_shortname(MD_SID(un)), 15970Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 15980Sstevel@tonic-gate 15990Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 16000Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 16010Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 16020Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 16030Sstevel@tonic-gate md_shortname(MD_SID(un)), 16040Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 16050Sstevel@tonic-gate 16060Sstevel@tonic-gate md_unit_readerexit(ui); 16070Sstevel@tonic-gate 16080Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 16090Sstevel@tonic-gate 16100Sstevel@tonic-gate /* now schedule processing for possible state change */ 16110Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_rderr, 16120Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 16130Sstevel@tonic-gate 16140Sstevel@tonic-gate } 16150Sstevel@tonic-gate 16160Sstevel@tonic-gate /* 16170Sstevel@tonic-gate * NAMES: getdbuffer 16180Sstevel@tonic-gate * DESCRIPTION: data buffer allocation for a child structure 16190Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16200Sstevel@tonic-gate * 16210Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16220Sstevel@tonic-gate * and get both buffers before pwslot 16230Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16240Sstevel@tonic-gate */ 16250Sstevel@tonic-gate static void 16260Sstevel@tonic-gate getdbuffer(md_raidcs_t *cs) 16270Sstevel@tonic-gate { 16280Sstevel@tonic-gate mr_unit_t *un; 16290Sstevel@tonic-gate 16300Sstevel@tonic-gate cs->cs_dbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 16310Sstevel@tonic-gate if (cs->cs_dbuffer != NULL) 16320Sstevel@tonic-gate return; 16330Sstevel@tonic-gate un = cs->cs_ps->ps_un; 16340Sstevel@tonic-gate mutex_enter(&un->un_mx); 16350Sstevel@tonic-gate while (un->un_dbuffer == NULL) { 16360Sstevel@tonic-gate STAT_INC(data_buffer_waits); 16370Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 16380Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 16390Sstevel@tonic-gate } 16400Sstevel@tonic-gate cs->cs_dbuffer = un->un_dbuffer; 16410Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNDBUF; 16420Sstevel@tonic-gate un->un_dbuffer = NULL; 16430Sstevel@tonic-gate mutex_exit(&un->un_mx); 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate /* 16470Sstevel@tonic-gate * NAMES: getpbuffer 16480Sstevel@tonic-gate * DESCRIPTION: parity buffer allocation for a child structure 16490Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16500Sstevel@tonic-gate * 16510Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16520Sstevel@tonic-gate * and get both buffers before pwslot 16530Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16540Sstevel@tonic-gate */ 16550Sstevel@tonic-gate static void 16560Sstevel@tonic-gate getpbuffer(md_raidcs_t *cs) 16570Sstevel@tonic-gate { 16580Sstevel@tonic-gate mr_unit_t *un; 16590Sstevel@tonic-gate 16600Sstevel@tonic-gate cs->cs_pbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 16610Sstevel@tonic-gate if (cs->cs_pbuffer != NULL) 16620Sstevel@tonic-gate return; 16630Sstevel@tonic-gate un = cs->cs_ps->ps_un; 16640Sstevel@tonic-gate mutex_enter(&un->un_mx); 16650Sstevel@tonic-gate while (un->un_pbuffer == NULL) { 16660Sstevel@tonic-gate STAT_INC(parity_buffer_waits); 16670Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 16680Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 16690Sstevel@tonic-gate } 16700Sstevel@tonic-gate cs->cs_pbuffer = un->un_pbuffer; 16710Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNPBUF; 16720Sstevel@tonic-gate un->un_pbuffer = NULL; 16730Sstevel@tonic-gate mutex_exit(&un->un_mx); 16740Sstevel@tonic-gate } 16750Sstevel@tonic-gate static void 16760Sstevel@tonic-gate getresources(md_raidcs_t *cs) 16770Sstevel@tonic-gate { 16780Sstevel@tonic-gate md_raidcbuf_t *cbuf; 16790Sstevel@tonic-gate /* 16800Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16810Sstevel@tonic-gate * and get both buffers before pwslot 16820Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16830Sstevel@tonic-gate */ 16840Sstevel@tonic-gate getdbuffer(cs); 16850Sstevel@tonic-gate getpbuffer(cs); 16860Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 16870Sstevel@tonic-gate cbuf->cbuf_buffer = 16880Sstevel@tonic-gate kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_SLEEP); 16890Sstevel@tonic-gate } 16900Sstevel@tonic-gate /* 16910Sstevel@tonic-gate * NAMES: freebuffers 16920Sstevel@tonic-gate * DESCRIPTION: child structure buffer freeing routine 16930Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16940Sstevel@tonic-gate */ 16950Sstevel@tonic-gate static void 16960Sstevel@tonic-gate freebuffers(md_raidcs_t *cs) 16970Sstevel@tonic-gate { 16980Sstevel@tonic-gate mr_unit_t *un; 16990Sstevel@tonic-gate md_raidcbuf_t *cbuf; 17000Sstevel@tonic-gate 17010Sstevel@tonic-gate /* free buffers used for full line write */ 17020Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 17030Sstevel@tonic-gate if (cbuf->cbuf_buffer == NULL) 17040Sstevel@tonic-gate continue; 17050Sstevel@tonic-gate kmem_free(cbuf->cbuf_buffer, cbuf->cbuf_bcount + DEV_BSIZE); 17060Sstevel@tonic-gate cbuf->cbuf_buffer = NULL; 17070Sstevel@tonic-gate cbuf->cbuf_bcount = 0; 17080Sstevel@tonic-gate } 17090Sstevel@tonic-gate 17100Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 17110Sstevel@tonic-gate un = cs->cs_un; 17120Sstevel@tonic-gate mutex_enter(&un->un_mx); 17130Sstevel@tonic-gate } 17140Sstevel@tonic-gate if (cs->cs_dbuffer) { 17150Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNDBUF) 17160Sstevel@tonic-gate un->un_dbuffer = cs->cs_dbuffer; 17170Sstevel@tonic-gate else 17180Sstevel@tonic-gate kmem_free(cs->cs_dbuffer, cs->cs_bcount + DEV_BSIZE); 17190Sstevel@tonic-gate } 17200Sstevel@tonic-gate if (cs->cs_pbuffer) { 17210Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNPBUF) 17220Sstevel@tonic-gate un->un_pbuffer = cs->cs_pbuffer; 17230Sstevel@tonic-gate else 17240Sstevel@tonic-gate kmem_free(cs->cs_pbuffer, cs->cs_bcount + DEV_BSIZE); 17250Sstevel@tonic-gate } 17260Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 17270Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDBUF; 17280Sstevel@tonic-gate cv_broadcast(&un->un_cv); 17290Sstevel@tonic-gate mutex_exit(&un->un_mx); 17300Sstevel@tonic-gate } 17310Sstevel@tonic-gate } 17320Sstevel@tonic-gate 17330Sstevel@tonic-gate /* 17340Sstevel@tonic-gate * NAMES: raid_line_reader_lock, raid_line_writer_lock 17350Sstevel@tonic-gate * DESCRIPTION: RAID metadevice line reader and writer lock routines 17360Sstevel@tonic-gate * data column # and parity column #. 17370Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 17380Sstevel@tonic-gate */ 17390Sstevel@tonic-gate 17400Sstevel@tonic-gate void 17410Sstevel@tonic-gate raid_line_reader_lock(md_raidcs_t *cs, int resync_thread) 17420Sstevel@tonic-gate { 17430Sstevel@tonic-gate mr_unit_t *un; 17440Sstevel@tonic-gate md_raidcs_t *cs1; 17450Sstevel@tonic-gate 17460Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 17470Sstevel@tonic-gate un = cs->cs_un; 17480Sstevel@tonic-gate cs->cs_flags |= MD_RCS_READER; 17490Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 17500Sstevel@tonic-gate if (!panicstr) 17510Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 17520Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17530Sstevel@tonic-gate while (cs1 != NULL) { 17540Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 17550Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1) == 1) 17560Sstevel@tonic-gate if (cs1->cs_flags & MD_RCS_WRITER) 17570Sstevel@tonic-gate break; 17580Sstevel@tonic-gate 17590Sstevel@tonic-gate if (cs1 != NULL) { 17600Sstevel@tonic-gate if (panicstr) 17610Sstevel@tonic-gate panic("md; raid line write lock held"); 17620Sstevel@tonic-gate un->un_linlck_flg = 1; 17630Sstevel@tonic-gate cv_wait(&un->un_linlck_cv, &un->un_linlck_mx); 17640Sstevel@tonic-gate STAT_INC(raid_read_waits); 17650Sstevel@tonic-gate } 17660Sstevel@tonic-gate } 17670Sstevel@tonic-gate STAT_MAX(raid_max_reader_locks, raid_reader_locks_active); 17680Sstevel@tonic-gate STAT_INC(raid_reader_locks); 17690Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17700Sstevel@tonic-gate if (cs1 != NULL) 17710Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 17720Sstevel@tonic-gate cs->cs_linlck_next = cs1; 17730Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 17740Sstevel@tonic-gate un->un_linlck_chn = cs; 17750Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 17760Sstevel@tonic-gate if (resync_thread) { 17770Sstevel@tonic-gate diskaddr_t lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 17780Sstevel@tonic-gate diskaddr_t line = (lastblk + 1) / un->un_segsize; 17790Sstevel@tonic-gate ASSERT(raid_state_cnt(un, RCS_RESYNC)); 17800Sstevel@tonic-gate mutex_enter(&un->un_mx); 17810Sstevel@tonic-gate un->un_resync_line_index = line; 17820Sstevel@tonic-gate mutex_exit(&un->un_mx); 17830Sstevel@tonic-gate } 17840Sstevel@tonic-gate if (!panicstr) 17850Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 17860Sstevel@tonic-gate } 17870Sstevel@tonic-gate 17880Sstevel@tonic-gate int 17890Sstevel@tonic-gate raid_line_writer_lock(md_raidcs_t *cs, int lock) 17900Sstevel@tonic-gate { 17910Sstevel@tonic-gate mr_unit_t *un; 17920Sstevel@tonic-gate md_raidcs_t *cs1; 17930Sstevel@tonic-gate 17940Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 17950Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WRITER; 17960Sstevel@tonic-gate un = cs->cs_ps->ps_un; 17970Sstevel@tonic-gate 17980Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 17990Sstevel@tonic-gate if (lock && !panicstr) 18000Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 18010Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate cs1 = un->un_linlck_chn; 18040Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 18050Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1)) 18060Sstevel@tonic-gate break; 18070Sstevel@tonic-gate 18080Sstevel@tonic-gate if (cs1 != NULL) { 18090Sstevel@tonic-gate if (panicstr) 18100Sstevel@tonic-gate panic("md: line writer lock inaccessible"); 18110Sstevel@tonic-gate goto no_lock_exit; 18120Sstevel@tonic-gate } 18130Sstevel@tonic-gate 18140Sstevel@tonic-gate if (raid_alloc_pwslot(cs)) { 18150Sstevel@tonic-gate if (panicstr) 18160Sstevel@tonic-gate panic("md: no prewrite slots"); 18170Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 18180Sstevel@tonic-gate goto no_lock_exit; 18190Sstevel@tonic-gate } 18200Sstevel@tonic-gate 18210Sstevel@tonic-gate cs1 = un->un_linlck_chn; 18220Sstevel@tonic-gate if (cs1 != NULL) 18230Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 18240Sstevel@tonic-gate cs->cs_linlck_next = cs1; 18250Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 18260Sstevel@tonic-gate un->un_linlck_chn = cs; 18270Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 18280Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_WAITING; 18290Sstevel@tonic-gate STAT_INC(raid_writer_locks); 18300Sstevel@tonic-gate STAT_MAX(raid_max_write_locks, raid_write_locks_active); 18310Sstevel@tonic-gate if (lock && !panicstr) 18320Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 18330Sstevel@tonic-gate return (0); 18340Sstevel@tonic-gate 18350Sstevel@tonic-gate no_lock_exit: 18360Sstevel@tonic-gate /* if this is already queued then do not requeue it */ 18370Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 18380Sstevel@tonic-gate if (!lock || (cs->cs_flags & MD_RCS_WAITING)) 18390Sstevel@tonic-gate return (1); 18400Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WAITING; 18410Sstevel@tonic-gate cs->cs_un = un; 18420Sstevel@tonic-gate raid_enqueue(cs); 18430Sstevel@tonic-gate if (lock && !panicstr) 18440Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 18450Sstevel@tonic-gate return (1); 18460Sstevel@tonic-gate } 18470Sstevel@tonic-gate 18480Sstevel@tonic-gate static void 18490Sstevel@tonic-gate raid_startio(md_raidcs_t *cs) 18500Sstevel@tonic-gate { 18510Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 18520Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 18530Sstevel@tonic-gate 18540Sstevel@tonic-gate un = md_unit_readerlock(ui); 18550Sstevel@tonic-gate raid_write_io(un, cs); 18560Sstevel@tonic-gate } 18570Sstevel@tonic-gate 18580Sstevel@tonic-gate void 18590Sstevel@tonic-gate raid_io_startup(mr_unit_t *un) 18600Sstevel@tonic-gate { 18610Sstevel@tonic-gate md_raidcs_t *waiting_list, *cs1; 18620Sstevel@tonic-gate md_raidcs_t *previous = NULL, *next = NULL; 18630Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 18640Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 18650Sstevel@tonic-gate 18660Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 18670Sstevel@tonic-gate mutex_enter(io_list_mutex); 18680Sstevel@tonic-gate 18690Sstevel@tonic-gate /* 18700Sstevel@tonic-gate * check to be sure there are no reader locks outstanding. If 18710Sstevel@tonic-gate * there are not then pass on the writer lock. 18720Sstevel@tonic-gate */ 18730Sstevel@tonic-gate waiting_list = ui->ui_io_lock->io_list_front; 18740Sstevel@tonic-gate while (waiting_list) { 18750Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18760Sstevel@tonic-gate ASSERT(! (waiting_list->cs_flags & MD_RCS_LLOCKD)); 18770Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 18780Sstevel@tonic-gate if (raid_io_overlaps(waiting_list, cs1) == 1) 18790Sstevel@tonic-gate break; 18800Sstevel@tonic-gate /* 18810Sstevel@tonic-gate * there was an IOs that overlaps this io so go onto 18820Sstevel@tonic-gate * the next io in the waiting list 18830Sstevel@tonic-gate */ 18840Sstevel@tonic-gate if (cs1) { 18850Sstevel@tonic-gate previous = waiting_list; 18860Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 18870Sstevel@tonic-gate continue; 18880Sstevel@tonic-gate } 18890Sstevel@tonic-gate 18900Sstevel@tonic-gate /* 18910Sstevel@tonic-gate * There are no IOs that overlap this, so remove it from 18920Sstevel@tonic-gate * the waiting queue, and start it 18930Sstevel@tonic-gate */ 18940Sstevel@tonic-gate 18950Sstevel@tonic-gate if (raid_check_pw(waiting_list)) { 18960Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18970Sstevel@tonic-gate previous = waiting_list; 18980Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 18990Sstevel@tonic-gate continue; 19000Sstevel@tonic-gate } 19010Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 19020Sstevel@tonic-gate 19030Sstevel@tonic-gate next = waiting_list->cs_linlck_next; 19040Sstevel@tonic-gate if (previous) 19050Sstevel@tonic-gate previous->cs_linlck_next = next; 19060Sstevel@tonic-gate else 19070Sstevel@tonic-gate ui->ui_io_lock->io_list_front = next; 19080Sstevel@tonic-gate 19090Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) 19100Sstevel@tonic-gate ui->ui_io_lock->io_list_back = NULL; 19110Sstevel@tonic-gate 19120Sstevel@tonic-gate if (ui->ui_io_lock->io_list_back == waiting_list) 19130Sstevel@tonic-gate ui->ui_io_lock->io_list_back = previous; 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate waiting_list->cs_linlck_next = NULL; 19160Sstevel@tonic-gate waiting_list->cs_flags &= ~MD_RCS_WAITING; 19170Sstevel@tonic-gate STAT_DEC(raid_write_queue_length); 19180Sstevel@tonic-gate if (raid_line_writer_lock(waiting_list, 0)) 19190Sstevel@tonic-gate panic("region locking corrupted"); 19200Sstevel@tonic-gate 19210Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_LLOCKD); 19220Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_startio, 19230Sstevel@tonic-gate (daemon_queue_t *)waiting_list, REQ_OLD); 19240Sstevel@tonic-gate waiting_list = next; 19250Sstevel@tonic-gate 19260Sstevel@tonic-gate } 19270Sstevel@tonic-gate mutex_exit(io_list_mutex); 19280Sstevel@tonic-gate } 19290Sstevel@tonic-gate 19300Sstevel@tonic-gate void 19310Sstevel@tonic-gate raid_line_exit(md_raidcs_t *cs) 19320Sstevel@tonic-gate { 19330Sstevel@tonic-gate mr_unit_t *un; 19340Sstevel@tonic-gate 19350Sstevel@tonic-gate un = cs->cs_ps->ps_un; 19360Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 19370Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 19380Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) 19390Sstevel@tonic-gate STAT_DEC(raid_reader_locks_active); 19400Sstevel@tonic-gate else 19410Sstevel@tonic-gate STAT_DEC(raid_write_locks_active); 19420Sstevel@tonic-gate 19430Sstevel@tonic-gate if (cs->cs_linlck_prev) 19440Sstevel@tonic-gate cs->cs_linlck_prev->cs_linlck_next = cs->cs_linlck_next; 19450Sstevel@tonic-gate else 19460Sstevel@tonic-gate un->un_linlck_chn = cs->cs_linlck_next; 19470Sstevel@tonic-gate if (cs->cs_linlck_next) 19480Sstevel@tonic-gate cs->cs_linlck_next->cs_linlck_prev = cs->cs_linlck_prev; 19490Sstevel@tonic-gate 19500Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LLOCKD; 19510Sstevel@tonic-gate 19520Sstevel@tonic-gate if (un->un_linlck_flg) 19530Sstevel@tonic-gate cv_broadcast(&un->un_linlck_cv); 19540Sstevel@tonic-gate 19550Sstevel@tonic-gate un->un_linlck_flg = 0; 19560Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 19570Sstevel@tonic-gate 19580Sstevel@tonic-gate raid_cancel_pwslot(cs); 19590Sstevel@tonic-gate /* 19600Sstevel@tonic-gate * now that the lock is droped go ahead and see if there are any 19610Sstevel@tonic-gate * other writes that can be started up 19620Sstevel@tonic-gate */ 19630Sstevel@tonic-gate raid_io_startup(un); 19640Sstevel@tonic-gate 19650Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 19660Sstevel@tonic-gate } 19670Sstevel@tonic-gate 19680Sstevel@tonic-gate /* 19690Sstevel@tonic-gate * NAMES: raid_line, raid_pcolumn, raid_dcolumn 19700Sstevel@tonic-gate * DESCRIPTION: RAID metadevice APIs for mapping segment # to line #, 19710Sstevel@tonic-gate * data column # and parity column #. 19720Sstevel@tonic-gate * PARAMETERS: int segment - segment number 19730Sstevel@tonic-gate * mr_unit_t *un - pointer to an unit structure 19740Sstevel@tonic-gate * RETURNS: raid_line returns line # 19750Sstevel@tonic-gate * raid_dcolumn returns data column # 19760Sstevel@tonic-gate * raid_pcolumn returns parity column # 19770Sstevel@tonic-gate */ 19780Sstevel@tonic-gate static diskaddr_t 19790Sstevel@tonic-gate raid_line(diskaddr_t segment, mr_unit_t *un) 19800Sstevel@tonic-gate { 19810Sstevel@tonic-gate diskaddr_t adj_seg; 19820Sstevel@tonic-gate diskaddr_t line; 19830Sstevel@tonic-gate diskaddr_t max_orig_segment; 19840Sstevel@tonic-gate 19850Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 19860Sstevel@tonic-gate if (segment >= max_orig_segment) { 19870Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 19880Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 19890Sstevel@tonic-gate } else { 19900Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 19910Sstevel@tonic-gate } 19920Sstevel@tonic-gate return (line); 19930Sstevel@tonic-gate } 19940Sstevel@tonic-gate 19950Sstevel@tonic-gate uint_t 19960Sstevel@tonic-gate raid_dcolumn(diskaddr_t segment, mr_unit_t *un) 19970Sstevel@tonic-gate { 19980Sstevel@tonic-gate diskaddr_t adj_seg; 19990Sstevel@tonic-gate diskaddr_t line; 20000Sstevel@tonic-gate diskaddr_t max_orig_segment; 20010Sstevel@tonic-gate uint_t column; 20020Sstevel@tonic-gate 20030Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 20040Sstevel@tonic-gate if (segment >= max_orig_segment) { 20050Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 20060Sstevel@tonic-gate column = un->un_origcolumncnt + 20070Sstevel@tonic-gate (uint_t)(adj_seg / un->un_segsincolumn); 20080Sstevel@tonic-gate } else { 20090Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 20100Sstevel@tonic-gate column = (uint_t)((segment % (un->un_origcolumncnt - 1) + line) 20110Sstevel@tonic-gate % un->un_origcolumncnt); 20120Sstevel@tonic-gate } 20130Sstevel@tonic-gate return (column); 20140Sstevel@tonic-gate } 20150Sstevel@tonic-gate 20160Sstevel@tonic-gate uint_t 20170Sstevel@tonic-gate raid_pcolumn(diskaddr_t segment, mr_unit_t *un) 20180Sstevel@tonic-gate { 20190Sstevel@tonic-gate diskaddr_t adj_seg; 20200Sstevel@tonic-gate diskaddr_t line; 20210Sstevel@tonic-gate diskaddr_t max_orig_segment; 20220Sstevel@tonic-gate uint_t column; 20230Sstevel@tonic-gate 20240Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 20250Sstevel@tonic-gate if (segment >= max_orig_segment) { 20260Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 20270Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 20280Sstevel@tonic-gate } else { 20290Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate column = (uint_t)((line + (un->un_origcolumncnt - 1)) 20320Sstevel@tonic-gate % un->un_origcolumncnt); 20330Sstevel@tonic-gate return (column); 20340Sstevel@tonic-gate } 20350Sstevel@tonic-gate 20360Sstevel@tonic-gate 20370Sstevel@tonic-gate /* 20380Sstevel@tonic-gate * Is called in raid_iosetup to probe each column to insure 20390Sstevel@tonic-gate * that all the columns are in 'okay' state and meet the 20400Sstevel@tonic-gate * 'full line' requirement. If any column is in error, 20410Sstevel@tonic-gate * we don't want to enable the 'full line' flag. Previously, 20420Sstevel@tonic-gate * we would do so and disable it only when a error is 20430Sstevel@tonic-gate * detected after the first 'full line' io which is too late 20440Sstevel@tonic-gate * and leads to the potential data corruption. 20450Sstevel@tonic-gate */ 20460Sstevel@tonic-gate static int 20470Sstevel@tonic-gate raid_check_cols(mr_unit_t *un) 20480Sstevel@tonic-gate { 20490Sstevel@tonic-gate buf_t bp; 20500Sstevel@tonic-gate char *buf; 20510Sstevel@tonic-gate mr_column_t *colptr; 20520Sstevel@tonic-gate minor_t mnum = MD_SID(un); 20530Sstevel@tonic-gate int i; 20540Sstevel@tonic-gate int err = 0; 20550Sstevel@tonic-gate 20560Sstevel@tonic-gate buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP); 20570Sstevel@tonic-gate 20580Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 20590Sstevel@tonic-gate md_dev64_t tmpdev; 20600Sstevel@tonic-gate 20610Sstevel@tonic-gate colptr = &un->un_column[i]; 20620Sstevel@tonic-gate 20630Sstevel@tonic-gate tmpdev = colptr->un_dev; 20640Sstevel@tonic-gate /* 20650Sstevel@tonic-gate * Open by device id 20660Sstevel@tonic-gate * If this device is hotspared 20670Sstevel@tonic-gate * use the hotspare key 20680Sstevel@tonic-gate */ 20690Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ? 20700Sstevel@tonic-gate colptr->un_hs_key : colptr->un_orig_key); 20710Sstevel@tonic-gate 20720Sstevel@tonic-gate if (tmpdev == NODEV64) { 20730Sstevel@tonic-gate err = 1; 20740Sstevel@tonic-gate break; 20750Sstevel@tonic-gate } 20760Sstevel@tonic-gate 20770Sstevel@tonic-gate colptr->un_dev = tmpdev; 20780Sstevel@tonic-gate 20790Sstevel@tonic-gate bzero((caddr_t)&bp, sizeof (buf_t)); 20800Sstevel@tonic-gate bp.b_back = &bp; 20810Sstevel@tonic-gate bp.b_forw = &bp; 20820Sstevel@tonic-gate bp.b_flags = (B_READ | B_BUSY); 20830Sstevel@tonic-gate sema_init(&bp.b_io, 0, NULL, 20840Sstevel@tonic-gate SEMA_DEFAULT, NULL); 20850Sstevel@tonic-gate sema_init(&bp.b_sem, 0, NULL, 20860Sstevel@tonic-gate SEMA_DEFAULT, NULL); 20870Sstevel@tonic-gate bp.b_edev = md_dev64_to_dev(colptr->un_dev); 20880Sstevel@tonic-gate bp.b_lblkno = colptr->un_pwstart; 20890Sstevel@tonic-gate bp.b_bcount = DEV_BSIZE; 20900Sstevel@tonic-gate bp.b_bufsize = DEV_BSIZE; 20910Sstevel@tonic-gate bp.b_un.b_addr = (caddr_t)buf; 20920Sstevel@tonic-gate (void) md_call_strategy(&bp, 0, NULL); 20930Sstevel@tonic-gate if (biowait(&bp)) { 20940Sstevel@tonic-gate err = 1; 20950Sstevel@tonic-gate break; 20960Sstevel@tonic-gate } 20970Sstevel@tonic-gate } 20980Sstevel@tonic-gate 20990Sstevel@tonic-gate kmem_free(buf, DEV_BSIZE); 21000Sstevel@tonic-gate return (err); 21010Sstevel@tonic-gate } 21020Sstevel@tonic-gate 21030Sstevel@tonic-gate /* 21040Sstevel@tonic-gate * NAME: raid_iosetup 21050Sstevel@tonic-gate * DESCRIPTION: RAID metadevice specific I/O set up routine which does 21060Sstevel@tonic-gate * all the necessary calculations to determine the location 21070Sstevel@tonic-gate * of the segement for the I/O. 21080Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - unit number of RAID metadevice 21090Sstevel@tonic-gate * diskaddr_t blkno - block number of the I/O attempt 21100Sstevel@tonic-gate * size_t blkcnt - block count for this I/O 21110Sstevel@tonic-gate * md_raidcs_t *cs - child structure for each segmented I/O 21120Sstevel@tonic-gate * 21130Sstevel@tonic-gate * NOTE: The following is an example of a raid disk layer out: 21140Sstevel@tonic-gate * 21150Sstevel@tonic-gate * Total Column = 5 21160Sstevel@tonic-gate * Original Column = 4 21170Sstevel@tonic-gate * Segment Per Column = 10 21180Sstevel@tonic-gate * 21190Sstevel@tonic-gate * Col#0 Col#1 Col#2 Col#3 Col#4 Col#5 Col#6 21200Sstevel@tonic-gate * ------------------------------------------------------------- 21210Sstevel@tonic-gate * line#0 Seg#0 Seg#1 Seg#2 Parity Seg#30 Seg#40 21220Sstevel@tonic-gate * line#1 Parity Seg#3 Seg#4 Seg#5 Seg#31 21230Sstevel@tonic-gate * line#2 Seg#8 Parity Seg#6 Seg#7 Seg#32 21240Sstevel@tonic-gate * line#3 Seg#10 Seg#11 Parity Seg#9 Seg#33 21250Sstevel@tonic-gate * line#4 Seg#12 Seg#13 Seg#14 Parity Seg#34 21260Sstevel@tonic-gate * line#5 Parity Seg#15 Seg#16 Seg#17 Seg#35 21270Sstevel@tonic-gate * line#6 Seg#20 Parity Seg#18 Seg#19 Seg#36 21280Sstevel@tonic-gate * line#7 Seg#22 Seg#23 Parity Seg#21 Seg#37 21290Sstevel@tonic-gate * line#8 Seg#24 Seg#25 Seg#26 Parity Seg#38 21300Sstevel@tonic-gate * line#9 Parity Seg#27 Seg#28 Seg#29 Seg#39 21310Sstevel@tonic-gate */ 21320Sstevel@tonic-gate static size_t 21330Sstevel@tonic-gate raid_iosetup( 21340Sstevel@tonic-gate mr_unit_t *un, 21350Sstevel@tonic-gate diskaddr_t blkno, 21360Sstevel@tonic-gate size_t blkcnt, 21370Sstevel@tonic-gate md_raidcs_t *cs 21380Sstevel@tonic-gate ) 21390Sstevel@tonic-gate { 21400Sstevel@tonic-gate diskaddr_t segment; 21410Sstevel@tonic-gate diskaddr_t segstart; 21420Sstevel@tonic-gate diskaddr_t segoff; 21430Sstevel@tonic-gate size_t leftover; 21440Sstevel@tonic-gate diskaddr_t line; 21450Sstevel@tonic-gate uint_t iosize; 21460Sstevel@tonic-gate uint_t colcnt; 21470Sstevel@tonic-gate 21480Sstevel@tonic-gate /* caculate the segment# and offset for the block */ 21490Sstevel@tonic-gate segment = blkno / un->un_segsize; 21500Sstevel@tonic-gate segstart = segment * un->un_segsize; 21510Sstevel@tonic-gate segoff = blkno - segstart; 21520Sstevel@tonic-gate iosize = un->un_iosize - 1; 21530Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 21540Sstevel@tonic-gate line = raid_line(segment, un); 21550Sstevel@tonic-gate cs->cs_dcolumn = raid_dcolumn(segment, un); 21560Sstevel@tonic-gate cs->cs_pcolumn = raid_pcolumn(segment, un); 21570Sstevel@tonic-gate cs->cs_dflags = un->un_column[cs->cs_dcolumn].un_devflags; 21580Sstevel@tonic-gate cs->cs_pflags = un->un_column[cs->cs_pcolumn].un_devflags; 21590Sstevel@tonic-gate cs->cs_line = line; 21600Sstevel@tonic-gate 21610Sstevel@tonic-gate if ((cs->cs_ps->ps_flags & MD_RPS_WRITE) && 21620Sstevel@tonic-gate (UNIT_STATE(un) & RCS_OKAY) && 21630Sstevel@tonic-gate (segoff == 0) && 21640Sstevel@tonic-gate (un->un_totalcolumncnt == un->un_origcolumncnt) && 21650Sstevel@tonic-gate (un->un_segsize < un->un_iosize) && 21660Sstevel@tonic-gate (un->un_iosize <= un->un_maxio) && 21670Sstevel@tonic-gate (blkno == line * un->un_segsize * colcnt) && 21680Sstevel@tonic-gate (blkcnt >= ((un->un_totalcolumncnt -1) * un->un_segsize)) && 21690Sstevel@tonic-gate (raid_state_cnt(un, RCS_OKAY) == un->un_origcolumncnt) && 21700Sstevel@tonic-gate (raid_check_cols(un) == 0)) { 21710Sstevel@tonic-gate 21720Sstevel@tonic-gate md_raidcbuf_t **cbufp; 21730Sstevel@tonic-gate md_raidcbuf_t *cbuf; 21740Sstevel@tonic-gate int i, j; 21750Sstevel@tonic-gate 21760Sstevel@tonic-gate STAT_INC(raid_full_line_writes); 21770Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize * colcnt); 21780Sstevel@tonic-gate ASSERT(blkcnt >= (un->un_segsize * colcnt)); 21790Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize; 21800Sstevel@tonic-gate cs->cs_blkcnt = un->un_segsize; 21810Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 21820Sstevel@tonic-gate cs->cs_bcount = dbtob(cs->cs_blkcnt); 21830Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LINE; 21840Sstevel@tonic-gate 21850Sstevel@tonic-gate cbufp = &cs->cs_buflist; 21860Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 21870Sstevel@tonic-gate j = cs->cs_dcolumn + i; 21880Sstevel@tonic-gate j = j % un->un_totalcolumncnt; 21890Sstevel@tonic-gate 21900Sstevel@tonic-gate if ((j == cs->cs_dcolumn) || (j == cs->cs_pcolumn)) 21910Sstevel@tonic-gate continue; 21920Sstevel@tonic-gate cbuf = kmem_cache_alloc(raid_cbuf_cache, 21930Sstevel@tonic-gate MD_ALLOCFLAGS); 21940Sstevel@tonic-gate raid_cbuf_init(cbuf); 21950Sstevel@tonic-gate cbuf->cbuf_un = cs->cs_un; 21960Sstevel@tonic-gate cbuf->cbuf_ps = cs->cs_ps; 21970Sstevel@tonic-gate cbuf->cbuf_column = j; 21980Sstevel@tonic-gate cbuf->cbuf_bcount = dbtob(un->un_segsize); 21990Sstevel@tonic-gate *cbufp = cbuf; 22000Sstevel@tonic-gate cbufp = &cbuf->cbuf_next; 22010Sstevel@tonic-gate } 22020Sstevel@tonic-gate return (leftover); 22030Sstevel@tonic-gate } 22040Sstevel@tonic-gate 22050Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize - segoff); 22060Sstevel@tonic-gate if (blkcnt > (un->un_segsize - segoff)) 22070Sstevel@tonic-gate blkcnt -= leftover; 22080Sstevel@tonic-gate else 22090Sstevel@tonic-gate leftover = 0; 22100Sstevel@tonic-gate 22110Sstevel@tonic-gate if (blkcnt > (size_t)iosize) { 22120Sstevel@tonic-gate leftover += (blkcnt - iosize); 22130Sstevel@tonic-gate blkcnt = iosize; 22140Sstevel@tonic-gate } 22150Sstevel@tonic-gate 22160Sstevel@tonic-gate /* calculate the line# and column# for the segment */ 22170Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LINE; 22180Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize + segoff; 22190Sstevel@tonic-gate cs->cs_blkcnt = (uint_t)blkcnt; 22200Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 22210Sstevel@tonic-gate cs->cs_bcount = dbtob((uint_t)blkcnt); 22220Sstevel@tonic-gate return (leftover); 22230Sstevel@tonic-gate } 22240Sstevel@tonic-gate 22250Sstevel@tonic-gate /* 22260Sstevel@tonic-gate * NAME: raid_done 22270Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O done interrupt routine 22280Sstevel@tonic-gate * PARAMETERS: struct buf *bp - pointer to a buffer structure 22290Sstevel@tonic-gate */ 22300Sstevel@tonic-gate static void 22310Sstevel@tonic-gate raid_done(struct buf *bp) 22320Sstevel@tonic-gate { 22330Sstevel@tonic-gate md_raidcs_t *cs; 22340Sstevel@tonic-gate int flags, frags; 22350Sstevel@tonic-gate 22360Sstevel@tonic-gate sema_v(&bp->b_io); 22370Sstevel@tonic-gate cs = (md_raidcs_t *)bp->b_chain; 22380Sstevel@tonic-gate 22390Sstevel@tonic-gate ASSERT(cs != NULL); 22400Sstevel@tonic-gate 22410Sstevel@tonic-gate mutex_enter(&cs->cs_mx); 22420Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 22430Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ERROR; 22440Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 22450Sstevel@tonic-gate } 22460Sstevel@tonic-gate 22470Sstevel@tonic-gate flags = cs->cs_flags; 22480Sstevel@tonic-gate frags = --cs->cs_frags; 22490Sstevel@tonic-gate mutex_exit(&cs->cs_mx); 22500Sstevel@tonic-gate if (frags != 0) { 22510Sstevel@tonic-gate return; 22520Sstevel@tonic-gate } 22530Sstevel@tonic-gate 22540Sstevel@tonic-gate if (flags & MD_RCS_ERROR) { 22550Sstevel@tonic-gate if (cs->cs_error_call) { 22560Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_error_call, 22570Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 22580Sstevel@tonic-gate } 22590Sstevel@tonic-gate return; 22600Sstevel@tonic-gate } 22610Sstevel@tonic-gate 22620Sstevel@tonic-gate if (flags & MD_RCS_ISCALL) { 22630Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 22640Sstevel@tonic-gate (*(cs->cs_call))(cs); 22650Sstevel@tonic-gate return; 22660Sstevel@tonic-gate } 22670Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_call, 22680Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 22690Sstevel@tonic-gate } 22700Sstevel@tonic-gate /* 22710Sstevel@tonic-gate * the flag RIO_EXTRA is used when dealing with a column in the process 22720Sstevel@tonic-gate * of being resynced. During the resync, writes may have to take place 22730Sstevel@tonic-gate * on both the original component and a hotspare component. 22740Sstevel@tonic-gate */ 22750Sstevel@tonic-gate #define RIO_DATA 0x00100 /* use data buffer & data column */ 22760Sstevel@tonic-gate #define RIO_PARITY 0x00200 /* use parity buffer & parity column */ 22770Sstevel@tonic-gate #define RIO_WRITE 0x00400 /* issue a write */ 22780Sstevel@tonic-gate #define RIO_READ 0x00800 /* issue a read */ 22790Sstevel@tonic-gate #define RIO_PWIO 0x01000 /* do the I/O to the prewrite entry */ 22800Sstevel@tonic-gate #define RIO_ALT 0x02000 /* do write to alternate device */ 22810Sstevel@tonic-gate #define RIO_EXTRA 0x04000 /* use extra buffer */ 22820Sstevel@tonic-gate 22830Sstevel@tonic-gate #define RIO_COLMASK 0x000ff 22840Sstevel@tonic-gate 22850Sstevel@tonic-gate #define RIO_PREWRITE RIO_WRITE | RIO_PWIO 22860Sstevel@tonic-gate 22870Sstevel@tonic-gate /* 22880Sstevel@tonic-gate * NAME: raidio 22890Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 22900Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 22910Sstevel@tonic-gate */ 22920Sstevel@tonic-gate static void 22930Sstevel@tonic-gate raidio(md_raidcs_t *cs, int flags) 22940Sstevel@tonic-gate { 22950Sstevel@tonic-gate buf_t *bp; 22960Sstevel@tonic-gate int column; 22970Sstevel@tonic-gate int flag; 22980Sstevel@tonic-gate void *private; 22990Sstevel@tonic-gate mr_unit_t *un; 23000Sstevel@tonic-gate int iosize; 23010Sstevel@tonic-gate diskaddr_t pwstart; 23020Sstevel@tonic-gate diskaddr_t devstart; 23030Sstevel@tonic-gate md_dev64_t dev; 23040Sstevel@tonic-gate 23050Sstevel@tonic-gate un = cs->cs_un; 23060Sstevel@tonic-gate 23070Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 23080Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 23090Sstevel@tonic-gate 23100Sstevel@tonic-gate if (flags & RIO_DATA) { 23110Sstevel@tonic-gate if (flags & RIO_EXTRA) 23120Sstevel@tonic-gate bp = &cs->cs_hbuf; 23130Sstevel@tonic-gate else 23140Sstevel@tonic-gate bp = &cs->cs_dbuf; 23150Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_dbuffer; 23160Sstevel@tonic-gate column = cs->cs_dcolumn; 23170Sstevel@tonic-gate } else { 23180Sstevel@tonic-gate if (flags & RIO_EXTRA) 23190Sstevel@tonic-gate bp = &cs->cs_hbuf; 23200Sstevel@tonic-gate else 23210Sstevel@tonic-gate bp = &cs->cs_pbuf; 23220Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_pbuffer; 23230Sstevel@tonic-gate column = cs->cs_pcolumn; 23240Sstevel@tonic-gate } 23250Sstevel@tonic-gate if (flags & RIO_COLMASK) 23260Sstevel@tonic-gate column = (flags & RIO_COLMASK) - 1; 23270Sstevel@tonic-gate 23280Sstevel@tonic-gate bp->b_bcount = cs->cs_bcount; 23290Sstevel@tonic-gate bp->b_bufsize = cs->cs_bcount; 23300Sstevel@tonic-gate iosize = un->un_iosize; 23310Sstevel@tonic-gate 23320Sstevel@tonic-gate /* check if the hotspared device will be used */ 23330Sstevel@tonic-gate if (flags & RIO_ALT && (flags & RIO_WRITE)) { 23340Sstevel@tonic-gate pwstart = un->un_column[column].un_alt_pwstart; 23350Sstevel@tonic-gate devstart = un->un_column[column].un_alt_devstart; 23360Sstevel@tonic-gate dev = un->un_column[column].un_alt_dev; 23370Sstevel@tonic-gate } else { 23380Sstevel@tonic-gate pwstart = un->un_column[column].un_pwstart; 23390Sstevel@tonic-gate devstart = un->un_column[column].un_devstart; 23400Sstevel@tonic-gate dev = un->un_column[column].un_dev; 23410Sstevel@tonic-gate } 23420Sstevel@tonic-gate 23430Sstevel@tonic-gate /* if not writing to log skip log header */ 23440Sstevel@tonic-gate if ((flags & RIO_PWIO) == 0) { 23450Sstevel@tonic-gate bp->b_lblkno = devstart + cs->cs_blkno; 23460Sstevel@tonic-gate bp->b_un.b_addr += DEV_BSIZE; 23470Sstevel@tonic-gate } else { 23480Sstevel@tonic-gate bp->b_bcount += DEV_BSIZE; 23490Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 23500Sstevel@tonic-gate if (flags & RIO_DATA) { 23510Sstevel@tonic-gate bp->b_lblkno = cs->cs_dpwslot * iosize + pwstart; 23520Sstevel@tonic-gate } else { /* not DATA -> PARITY */ 23530Sstevel@tonic-gate bp->b_lblkno = cs->cs_ppwslot * iosize + pwstart; 23540Sstevel@tonic-gate } 23550Sstevel@tonic-gate } 23560Sstevel@tonic-gate 23570Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR | nv_available); 23580Sstevel@tonic-gate bp->b_flags |= B_BUSY; 23590Sstevel@tonic-gate if (flags & RIO_READ) { 23600Sstevel@tonic-gate bp->b_flags |= B_READ; 23610Sstevel@tonic-gate } else { 23620Sstevel@tonic-gate bp->b_flags |= B_WRITE; 23630Sstevel@tonic-gate if ((nv_available && nv_parity && (flags & RIO_PARITY)) || 23640Sstevel@tonic-gate (nv_available && nv_prewrite && (flags & RIO_PWIO))) 23650Sstevel@tonic-gate bp->b_flags |= nv_available; 23660Sstevel@tonic-gate } 23670Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 23680Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(dev); 23690Sstevel@tonic-gate 23700Sstevel@tonic-gate ASSERT((bp->b_edev != 0) && (bp->b_edev != NODEV)); 23710Sstevel@tonic-gate 23720Sstevel@tonic-gate private = cs->cs_strategy_private; 23730Sstevel@tonic-gate flag = cs->cs_strategy_flag; 23740Sstevel@tonic-gate 23750Sstevel@tonic-gate md_call_strategy(bp, flag, private); 23760Sstevel@tonic-gate } 23770Sstevel@tonic-gate 23780Sstevel@tonic-gate /* 23790Sstevel@tonic-gate * NAME: genstandardparity 23800Sstevel@tonic-gate * DESCRIPTION: This routine 23810Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 23820Sstevel@tonic-gate */ 23830Sstevel@tonic-gate static void 23840Sstevel@tonic-gate genstandardparity(md_raidcs_t *cs) 23850Sstevel@tonic-gate { 23860Sstevel@tonic-gate uint_t *dbuf, *pbuf; 23870Sstevel@tonic-gate size_t wordcnt; 23880Sstevel@tonic-gate uint_t dsum = 0; 23890Sstevel@tonic-gate uint_t psum = 0; 23900Sstevel@tonic-gate 23910Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 23920Sstevel@tonic-gate 23930Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 23940Sstevel@tonic-gate 23950Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 23960Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate /* Word aligned */ 23990Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 24000Sstevel@tonic-gate uint_t *uwbuf = (uint_t *)(void *)(cs->cs_addr); 24010Sstevel@tonic-gate uint_t uval; 24020Sstevel@tonic-gate 24030Sstevel@tonic-gate while (wordcnt--) { 24040Sstevel@tonic-gate uval = *uwbuf++; 24050Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ uval)); 24060Sstevel@tonic-gate ++pbuf; 24070Sstevel@tonic-gate *dbuf = uval; 24080Sstevel@tonic-gate dsum ^= uval; 24090Sstevel@tonic-gate ++dbuf; 24100Sstevel@tonic-gate } 24110Sstevel@tonic-gate } else { 24120Sstevel@tonic-gate uchar_t *ubbuf = (uchar_t *)(cs->cs_addr); 24130Sstevel@tonic-gate union { 24140Sstevel@tonic-gate uint_t wb; 24150Sstevel@tonic-gate uchar_t bb[4]; 24160Sstevel@tonic-gate } cb; 24170Sstevel@tonic-gate 24180Sstevel@tonic-gate while (wordcnt--) { 24190Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 24200Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 24210Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 24220Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 24230Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ cb.wb)); 24240Sstevel@tonic-gate ++pbuf; 24250Sstevel@tonic-gate *dbuf = cb.wb; 24260Sstevel@tonic-gate dsum ^= cb.wb; 24270Sstevel@tonic-gate ++dbuf; 24280Sstevel@tonic-gate } 24290Sstevel@tonic-gate } 24300Sstevel@tonic-gate 24310Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, cs->cs_un, dsum, cs->cs_pcolumn, 24320Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24330Sstevel@tonic-gate 2, cs->cs_dcolumn, RAID_PWMAGIC); 24340Sstevel@tonic-gate 24350Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, cs->cs_un, psum, cs->cs_dcolumn, 24360Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24370Sstevel@tonic-gate 2, cs->cs_pcolumn, RAID_PWMAGIC); 24380Sstevel@tonic-gate } 24390Sstevel@tonic-gate 24400Sstevel@tonic-gate static void 24410Sstevel@tonic-gate genlineparity(md_raidcs_t *cs) 24420Sstevel@tonic-gate { 24430Sstevel@tonic-gate 24440Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 24450Sstevel@tonic-gate md_raidcbuf_t *cbuf; 24460Sstevel@tonic-gate uint_t *pbuf, *dbuf; 24470Sstevel@tonic-gate uint_t *uwbuf; 24480Sstevel@tonic-gate uchar_t *ubbuf; 24490Sstevel@tonic-gate size_t wordcnt; 24500Sstevel@tonic-gate uint_t psum = 0, dsum = 0; 24510Sstevel@tonic-gate size_t count = un->un_segsize * DEV_BSIZE; 24520Sstevel@tonic-gate uint_t col; 24530Sstevel@tonic-gate buf_t *bp; 24540Sstevel@tonic-gate 24550Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 24560Sstevel@tonic-gate 24570Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 24580Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 24590Sstevel@tonic-gate uwbuf = (uint_t *)(void *)(cs->cs_addr); 24600Sstevel@tonic-gate ubbuf = (uchar_t *)(void *)(cs->cs_addr); 24610Sstevel@tonic-gate 24620Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 24630Sstevel@tonic-gate 24640Sstevel@tonic-gate /* Word aligned */ 24650Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 24660Sstevel@tonic-gate uint_t uval; 24670Sstevel@tonic-gate 24680Sstevel@tonic-gate while (wordcnt--) { 24690Sstevel@tonic-gate uval = *uwbuf++; 24700Sstevel@tonic-gate *dbuf = uval; 24710Sstevel@tonic-gate *pbuf = uval; 24720Sstevel@tonic-gate dsum ^= uval; 24730Sstevel@tonic-gate ++pbuf; 24740Sstevel@tonic-gate ++dbuf; 24750Sstevel@tonic-gate } 24760Sstevel@tonic-gate } else { 24770Sstevel@tonic-gate union { 24780Sstevel@tonic-gate uint_t wb; 24790Sstevel@tonic-gate uchar_t bb[4]; 24800Sstevel@tonic-gate } cb; 24810Sstevel@tonic-gate 24820Sstevel@tonic-gate while (wordcnt--) { 24830Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 24840Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 24850Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 24860Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 24870Sstevel@tonic-gate *dbuf = cb.wb; 24880Sstevel@tonic-gate *pbuf = cb.wb; 24890Sstevel@tonic-gate dsum ^= cb.wb; 24900Sstevel@tonic-gate ++pbuf; 24910Sstevel@tonic-gate ++dbuf; 24920Sstevel@tonic-gate } 24930Sstevel@tonic-gate } 24940Sstevel@tonic-gate 24950Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, cs->cs_pcolumn, 24960Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24970Sstevel@tonic-gate un->un_totalcolumncnt, cs->cs_dcolumn, RAID_PWMAGIC); 24980Sstevel@tonic-gate 24990Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_DATA); 25000Sstevel@tonic-gate 25010Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 25020Sstevel@tonic-gate 25030Sstevel@tonic-gate dsum = 0; 25040Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 25050Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cbuf->cbuf_buffer + DEV_BSIZE); 25060Sstevel@tonic-gate 25070Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 25080Sstevel@tonic-gate 25090Sstevel@tonic-gate col = cbuf->cbuf_column; 25100Sstevel@tonic-gate 25110Sstevel@tonic-gate /* Word aligned */ 25120Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 25130Sstevel@tonic-gate uint_t uval; 25140Sstevel@tonic-gate 25150Sstevel@tonic-gate /* 25160Sstevel@tonic-gate * Only calculate psum when working on the last 25170Sstevel@tonic-gate * data buffer. 25180Sstevel@tonic-gate */ 25190Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 25200Sstevel@tonic-gate psum = 0; 25210Sstevel@tonic-gate while (wordcnt--) { 25220Sstevel@tonic-gate uval = *uwbuf++; 25230Sstevel@tonic-gate *dbuf = uval; 25240Sstevel@tonic-gate psum ^= (*pbuf ^= uval); 25250Sstevel@tonic-gate dsum ^= uval; 25260Sstevel@tonic-gate ++dbuf; 25270Sstevel@tonic-gate ++pbuf; 25280Sstevel@tonic-gate } 25290Sstevel@tonic-gate } else { 25300Sstevel@tonic-gate while (wordcnt--) { 25310Sstevel@tonic-gate uval = *uwbuf++; 25320Sstevel@tonic-gate *dbuf = uval; 25330Sstevel@tonic-gate *pbuf ^= uval; 25340Sstevel@tonic-gate dsum ^= uval; 25350Sstevel@tonic-gate ++dbuf; 25360Sstevel@tonic-gate ++pbuf; 25370Sstevel@tonic-gate } 25380Sstevel@tonic-gate } 25390Sstevel@tonic-gate } else { 25400Sstevel@tonic-gate union { 25410Sstevel@tonic-gate uint_t wb; 25420Sstevel@tonic-gate uchar_t bb[4]; 25430Sstevel@tonic-gate } cb; 25440Sstevel@tonic-gate 25450Sstevel@tonic-gate /* 25460Sstevel@tonic-gate * Only calculate psum when working on the last 25470Sstevel@tonic-gate * data buffer. 25480Sstevel@tonic-gate */ 25490Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 25500Sstevel@tonic-gate psum = 0; 25510Sstevel@tonic-gate while (wordcnt--) { 25520Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 25530Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 25540Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 25550Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 25560Sstevel@tonic-gate *dbuf = cb.wb; 25570Sstevel@tonic-gate psum ^= (*pbuf ^= cb.wb); 25580Sstevel@tonic-gate dsum ^= cb.wb; 25590Sstevel@tonic-gate ++dbuf; 25600Sstevel@tonic-gate ++pbuf; 25610Sstevel@tonic-gate } 25620Sstevel@tonic-gate } else { 25630Sstevel@tonic-gate while (wordcnt--) { 25640Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 25650Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 25660Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 25670Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 25680Sstevel@tonic-gate *dbuf = cb.wb; 25690Sstevel@tonic-gate *pbuf ^= cb.wb; 25700Sstevel@tonic-gate dsum ^= cb.wb; 25710Sstevel@tonic-gate ++dbuf; 25720Sstevel@tonic-gate ++pbuf; 25730Sstevel@tonic-gate } 25740Sstevel@tonic-gate } 25750Sstevel@tonic-gate } 25760Sstevel@tonic-gate RAID_FILLIN_RPW(cbuf->cbuf_buffer, un, dsum, cs->cs_pcolumn, 25770Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 25780Sstevel@tonic-gate un->un_totalcolumncnt, col, RAID_PWMAGIC); 25790Sstevel@tonic-gate 25800Sstevel@tonic-gate /* 25810Sstevel@tonic-gate * fill in buffer for write to prewrite area 25820Sstevel@tonic-gate */ 25830Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 25840Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer; 25850Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount + DEV_BSIZE; 25860Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 25870Sstevel@tonic-gate bp->b_lblkno = (cbuf->cbuf_pwslot * un->un_iosize) + 25880Sstevel@tonic-gate un->un_column[col].un_pwstart; 25890Sstevel@tonic-gate bp->b_flags = B_WRITE | B_BUSY; 25900Sstevel@tonic-gate if (nv_available && nv_prewrite) 25910Sstevel@tonic-gate bp->b_flags |= nv_available; 25920Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 25930Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[col].un_dev); 25940Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 25950Sstevel@tonic-gate md_call_strategy(bp, 25960Sstevel@tonic-gate cs->cs_strategy_flag, cs->cs_strategy_private); 25970Sstevel@tonic-gate } 25980Sstevel@tonic-gate 25990Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, cs->cs_dcolumn, 26000Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 26010Sstevel@tonic-gate un->un_totalcolumncnt, cs->cs_pcolumn, RAID_PWMAGIC); 26020Sstevel@tonic-gate 26030Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_PARITY); 26040Sstevel@tonic-gate } 26050Sstevel@tonic-gate 26060Sstevel@tonic-gate /* 26070Sstevel@tonic-gate * NAME: raid_readregenloop 26080Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 26090Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 26100Sstevel@tonic-gate */ 26110Sstevel@tonic-gate static void 26120Sstevel@tonic-gate raid_readregenloop(md_raidcs_t *cs) 26130Sstevel@tonic-gate { 26140Sstevel@tonic-gate mr_unit_t *un; 26150Sstevel@tonic-gate md_raidps_t *ps; 26160Sstevel@tonic-gate uint_t *dbuf; 26170Sstevel@tonic-gate uint_t *pbuf; 26180Sstevel@tonic-gate size_t wordcnt; 26190Sstevel@tonic-gate 26200Sstevel@tonic-gate un = cs->cs_un; 26210Sstevel@tonic-gate 26220Sstevel@tonic-gate /* 26230Sstevel@tonic-gate * XOR the parity with data bytes, must skip the 26240Sstevel@tonic-gate * pre-write entry header in all data/parity buffers 26250Sstevel@tonic-gate */ 26260Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 26270Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 26280Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 26290Sstevel@tonic-gate while (wordcnt--) 26300Sstevel@tonic-gate *dbuf++ ^= *pbuf++; 26310Sstevel@tonic-gate 26320Sstevel@tonic-gate /* bump up the loop count */ 26330Sstevel@tonic-gate cs->cs_loop++; 26340Sstevel@tonic-gate 26350Sstevel@tonic-gate /* skip the errored component */ 26360Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 26370Sstevel@tonic-gate cs->cs_loop++; 26380Sstevel@tonic-gate 26390Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 26400Sstevel@tonic-gate cs->cs_frags = 1; 26410Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 26420Sstevel@tonic-gate return; 26430Sstevel@tonic-gate } 26440Sstevel@tonic-gate /* reaching the end sof loop */ 26450Sstevel@tonic-gate ps = cs->cs_ps; 26460Sstevel@tonic-gate bcopy(cs->cs_dbuffer + DEV_BSIZE, cs->cs_addr, cs->cs_bcount); 26470Sstevel@tonic-gate raid_free_child(cs, 1); 26480Sstevel@tonic-gate 26490Sstevel@tonic-gate /* decrement readfrags */ 26500Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 26510Sstevel@tonic-gate } 26520Sstevel@tonic-gate 26530Sstevel@tonic-gate /* 26540Sstevel@tonic-gate * NAME: raid_read_io 26550Sstevel@tonic-gate * DESCRIPTION: RAID metadevice read I/O routine 26560Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 26570Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 26580Sstevel@tonic-gate */ 26590Sstevel@tonic-gate static void 26600Sstevel@tonic-gate raid_read_io(mr_unit_t *un, md_raidcs_t *cs) 26610Sstevel@tonic-gate { 26620Sstevel@tonic-gate int flag; 26630Sstevel@tonic-gate void *private; 26640Sstevel@tonic-gate buf_t *bp; 26650Sstevel@tonic-gate buf_t *pb = cs->cs_ps->ps_bp; 26660Sstevel@tonic-gate mr_column_t *column; 26670Sstevel@tonic-gate 26680Sstevel@tonic-gate flag = cs->cs_strategy_flag; 26690Sstevel@tonic-gate private = cs->cs_strategy_private; 26700Sstevel@tonic-gate column = &un->un_column[cs->cs_dcolumn]; 26710Sstevel@tonic-gate 26720Sstevel@tonic-gate /* 26730Sstevel@tonic-gate * The component to be read is good, simply set up bp structure 26740Sstevel@tonic-gate * and call low level md routine doing the read. 26750Sstevel@tonic-gate */ 26760Sstevel@tonic-gate 26770Sstevel@tonic-gate if (COLUMN_ISOKAY(un, cs->cs_dcolumn) || 26780Sstevel@tonic-gate (COLUMN_ISLASTERR(un, cs->cs_dcolumn) && 26790Sstevel@tonic-gate (cs->cs_flags & MD_RCS_RECOVERY) == 0)) { 26800Sstevel@tonic-gate dev_t ddi_dev; /* needed for bioclone, so not md_dev64_t */ 26810Sstevel@tonic-gate ddi_dev = md_dev64_to_dev(column->un_dev); 26820Sstevel@tonic-gate 26830Sstevel@tonic-gate bp = &cs->cs_dbuf; 26840Sstevel@tonic-gate bp = md_bioclone(pb, cs->cs_offset, cs->cs_bcount, ddi_dev, 26850Sstevel@tonic-gate column->un_devstart + cs->cs_blkno, 26860Sstevel@tonic-gate (int (*)())raid_done, bp, KM_NOSLEEP); 26870Sstevel@tonic-gate 26880Sstevel@tonic-gate bp->b_chain = (buf_t *)cs; 26890Sstevel@tonic-gate 26900Sstevel@tonic-gate cs->cs_frags = 1; 26910Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 26920Sstevel@tonic-gate cs->cs_retry_call = raid_read_retry; 26930Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ISCALL; 26940Sstevel@tonic-gate cs->cs_stage = RAID_READ_DONE; 26950Sstevel@tonic-gate cs->cs_call = raid_stage; 26960Sstevel@tonic-gate 26970Sstevel@tonic-gate ASSERT(bp->b_edev != 0); 26980Sstevel@tonic-gate 26990Sstevel@tonic-gate md_call_strategy(bp, flag, private); 27000Sstevel@tonic-gate return; 27010Sstevel@tonic-gate } 27020Sstevel@tonic-gate 27030Sstevel@tonic-gate /* 27040Sstevel@tonic-gate * The component to be read is bad, have to go through 27050Sstevel@tonic-gate * raid specific method to read data from other members. 27060Sstevel@tonic-gate */ 27070Sstevel@tonic-gate cs->cs_loop = 0; 27080Sstevel@tonic-gate /* 27090Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 27100Sstevel@tonic-gate * and get both buffers before pwslot 27110Sstevel@tonic-gate * otherwise a deadlock could be introduced. 27120Sstevel@tonic-gate */ 27130Sstevel@tonic-gate raid_mapin_buf(cs); 27140Sstevel@tonic-gate getdbuffer(cs); 27150Sstevel@tonic-gate getpbuffer(cs); 27160Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 27170Sstevel@tonic-gate cs->cs_loop++; 27180Sstevel@tonic-gate 27190Sstevel@tonic-gate /* zero out data buffer for use as a data sink */ 27200Sstevel@tonic-gate bzero(cs->cs_dbuffer + DEV_BSIZE, cs->cs_bcount); 27210Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 27220Sstevel@tonic-gate cs->cs_call = raid_readregenloop; 27230Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 27240Sstevel@tonic-gate cs->cs_retry_call = raid_read_no_retry; 27250Sstevel@tonic-gate cs->cs_frags = 1; 27260Sstevel@tonic-gate 27270Sstevel@tonic-gate /* use parity buffer to read other columns */ 27280Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 27290Sstevel@tonic-gate } 27300Sstevel@tonic-gate 27310Sstevel@tonic-gate /* 27320Sstevel@tonic-gate * NAME: raid_read 27330Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 27340Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27350Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27360Sstevel@tonic-gate */ 27370Sstevel@tonic-gate static int 27380Sstevel@tonic-gate raid_read(mr_unit_t *un, md_raidcs_t *cs) 27390Sstevel@tonic-gate { 27400Sstevel@tonic-gate int error = 0; 27410Sstevel@tonic-gate md_raidps_t *ps; 27420Sstevel@tonic-gate mdi_unit_t *ui; 27430Sstevel@tonic-gate minor_t mnum; 27440Sstevel@tonic-gate 27450Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 27460Sstevel@tonic-gate ps = cs->cs_ps; 27470Sstevel@tonic-gate ui = ps->ps_ui; 27480Sstevel@tonic-gate raid_line_reader_lock(cs, 0); 27490Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 27500Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 27510Sstevel@tonic-gate mnum = MD_SID(un); 27520Sstevel@tonic-gate cs->cs_un = un; 27530Sstevel@tonic-gate 27540Sstevel@tonic-gate /* make sure the read doesn't go beyond the end of the column */ 27550Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > 27560Sstevel@tonic-gate un->un_segsize * un->un_segsincolumn) { 27570Sstevel@tonic-gate error = ENXIO; 27580Sstevel@tonic-gate } 27590Sstevel@tonic-gate if (error) 27600Sstevel@tonic-gate goto rerror; 27610Sstevel@tonic-gate 27620Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 27630Sstevel@tonic-gate raid_regen_parity(cs); 27640Sstevel@tonic-gate un = MD_UNIT(mnum); 27650Sstevel@tonic-gate cs->cs_un = un; 27660Sstevel@tonic-gate } 27670Sstevel@tonic-gate 27680Sstevel@tonic-gate raid_read_io(un, cs); 27690Sstevel@tonic-gate return (0); 27700Sstevel@tonic-gate 27710Sstevel@tonic-gate rerror: 27720Sstevel@tonic-gate raid_error_parent(ps, error); 27730Sstevel@tonic-gate raid_free_child(cs, 1); 27740Sstevel@tonic-gate /* decrement readfrags */ 27750Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 27760Sstevel@tonic-gate return (0); 27770Sstevel@tonic-gate } 27780Sstevel@tonic-gate 27790Sstevel@tonic-gate /* 27800Sstevel@tonic-gate * NAME: raid_write_err_retry 27810Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 27820Sstevel@tonic-gate * write was for parity or data only; 27830Sstevel@tonic-gate * complete write with error, no recovery possible 27840Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27850Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27860Sstevel@tonic-gate */ 27870Sstevel@tonic-gate /*ARGSUSED*/ 27880Sstevel@tonic-gate static void 27890Sstevel@tonic-gate raid_write_err_retry(mr_unit_t *un, md_raidcs_t *cs) 27900Sstevel@tonic-gate { 27910Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 27920Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 27930Sstevel@tonic-gate 27940Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 27950Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 27960Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 27970Sstevel@tonic-gate raid_error_parent(ps, EIO); 27980Sstevel@tonic-gate raid_free_child(cs, 1); 27990Sstevel@tonic-gate raid_free_parent(ps, flags); 28000Sstevel@tonic-gate } 28010Sstevel@tonic-gate 28020Sstevel@tonic-gate /* 28030Sstevel@tonic-gate * NAME: raid_write_err_retry 28040Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 28050Sstevel@tonic-gate * write is too far along to retry and parent 28060Sstevel@tonic-gate * has already been signaled with iodone. 28070Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 28080Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 28090Sstevel@tonic-gate */ 28100Sstevel@tonic-gate /*ARGSUSED*/ 28110Sstevel@tonic-gate static void 28120Sstevel@tonic-gate raid_write_no_retry(mr_unit_t *un, md_raidcs_t *cs) 28130Sstevel@tonic-gate { 28140Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 28150Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 28160Sstevel@tonic-gate 28170Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 28180Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 28190Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 28200Sstevel@tonic-gate raid_free_child(cs, 1); 28210Sstevel@tonic-gate raid_free_parent(ps, flags); 28220Sstevel@tonic-gate } 28230Sstevel@tonic-gate 28240Sstevel@tonic-gate /* 28250Sstevel@tonic-gate * NAME: raid_write_retry 28260Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 28270Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 28280Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 28290Sstevel@tonic-gate */ 28300Sstevel@tonic-gate static void 28310Sstevel@tonic-gate raid_write_retry(mr_unit_t *un, md_raidcs_t *cs) 28320Sstevel@tonic-gate { 28330Sstevel@tonic-gate md_raidps_t *ps; 28340Sstevel@tonic-gate 28350Sstevel@tonic-gate ps = cs->cs_ps; 28360Sstevel@tonic-gate 28370Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_write() */ 28380Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 28390Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 28400Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 28410Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 28420Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 28430Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 28440Sstevel@tonic-gate /* Initialize semaphores */ 28450Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 28460Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28470Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 28480Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28490Sstevel@tonic-gate 28500Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 28510Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 28520Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 28530Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 28540Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 28550Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 28560Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 28570Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28580Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 28590Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28600Sstevel@tonic-gate 28610Sstevel@tonic-gate cs->cs_hbuf.b_chain = (struct buf *)cs; 28620Sstevel@tonic-gate cs->cs_hbuf.b_back = &cs->cs_hbuf; 28630Sstevel@tonic-gate cs->cs_hbuf.b_forw = &cs->cs_hbuf; 28640Sstevel@tonic-gate cs->cs_hbuf.b_flags = B_BUSY; /* initialize flags */ 28650Sstevel@tonic-gate cs->cs_hbuf.b_error = 0; /* initialize error */ 28660Sstevel@tonic-gate cs->cs_hbuf.b_offset = -1; 28670Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_io, 0, NULL, 28680Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28690Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_sem, 0, NULL, 28700Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28710Sstevel@tonic-gate 28720Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ERROR); 28730Sstevel@tonic-gate /* 28740Sstevel@tonic-gate * If we have already done'ed the i/o but have done prewrite 28750Sstevel@tonic-gate * on this child, then reset PWDONE flag and bump pwfrags before 28760Sstevel@tonic-gate * restarting i/o. 28770Sstevel@tonic-gate * If pwfrags is zero, we have already 'iodone'd the i/o so 28780Sstevel@tonic-gate * leave things alone. We don't want to re-'done' it. 28790Sstevel@tonic-gate */ 28800Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 28810Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_PWDONE) { 28820Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_PWDONE; 28830Sstevel@tonic-gate ps->ps_pwfrags++; 28840Sstevel@tonic-gate } 28850Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 28860Sstevel@tonic-gate raid_write_io(un, cs); 28870Sstevel@tonic-gate } 28880Sstevel@tonic-gate 28890Sstevel@tonic-gate /* 28900Sstevel@tonic-gate * NAME: raid_wrerr 28910Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 28920Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 28930Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 28940Sstevel@tonic-gate * since a unit or column state transition may take place. 28950Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 28960Sstevel@tonic-gate */ 28970Sstevel@tonic-gate static void 28980Sstevel@tonic-gate raid_wrerr(md_raidcs_t *cs) 28990Sstevel@tonic-gate { 29000Sstevel@tonic-gate md_raidps_t *ps; 29010Sstevel@tonic-gate mdi_unit_t *ui; 29020Sstevel@tonic-gate mr_unit_t *un; 29030Sstevel@tonic-gate md_raidcbuf_t *cbuf; 29040Sstevel@tonic-gate 29050Sstevel@tonic-gate ps = cs->cs_ps; 29060Sstevel@tonic-gate ui = ps->ps_ui; 29070Sstevel@tonic-gate 29080Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 29090Sstevel@tonic-gate ASSERT(un != 0); 29100Sstevel@tonic-gate 29110Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 29120Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_dbuf); 29130Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 29140Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_pbuf); 29150Sstevel@tonic-gate if (cs->cs_hbuf.b_flags & B_ERROR) 29160Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_hbuf); 29170Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 29180Sstevel@tonic-gate if (cbuf->cbuf_bp.b_flags & B_ERROR) 29190Sstevel@tonic-gate (void) raid_error_state(un, &cbuf->cbuf_bp); 29200Sstevel@tonic-gate 29210Sstevel@tonic-gate md_unit_writerexit(ui); 29220Sstevel@tonic-gate 29230Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 29240Sstevel@tonic-gate 29250Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 29260Sstevel@tonic-gate 29270Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 29280Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 29290Sstevel@tonic-gate } 29300Sstevel@tonic-gate /* 29310Sstevel@tonic-gate * NAMES: raid_write_error 29320Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice write 29330Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 29340Sstevel@tonic-gate */ 29350Sstevel@tonic-gate /*ARGSUSED*/ 29360Sstevel@tonic-gate static void 29370Sstevel@tonic-gate raid_write_error(md_raidcs_t *cs) 29380Sstevel@tonic-gate { 29390Sstevel@tonic-gate md_raidps_t *ps; 29400Sstevel@tonic-gate mdi_unit_t *ui; 29410Sstevel@tonic-gate mr_unit_t *un; 29420Sstevel@tonic-gate md_raidcbuf_t *cbuf; 29430Sstevel@tonic-gate set_t setno; 29440Sstevel@tonic-gate 29450Sstevel@tonic-gate ps = cs->cs_ps; 29460Sstevel@tonic-gate ui = ps->ps_ui; 29470Sstevel@tonic-gate un = cs->cs_un; 29480Sstevel@tonic-gate 29490Sstevel@tonic-gate setno = MD_UN2SET(un); 29500Sstevel@tonic-gate 29510Sstevel@tonic-gate /* 29520Sstevel@tonic-gate * locate each buf that is in error on this io and then 29530Sstevel@tonic-gate * output an error message 29540Sstevel@tonic-gate */ 29550Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 29560Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 29570Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 29580Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29590Sstevel@tonic-gate md_shortname(MD_SID(un)), 29600Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 29610Sstevel@tonic-gate 29620Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 29630Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 29640Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 29650Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29660Sstevel@tonic-gate md_shortname(MD_SID(un)), 29670Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 29680Sstevel@tonic-gate 29690Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 29700Sstevel@tonic-gate if ((cbuf->cbuf_bp.b_flags & B_ERROR) && 29710Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_ERRED) && 29720Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_LAST_ERRED)) 29730Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29740Sstevel@tonic-gate md_shortname(MD_SID(un)), 29750Sstevel@tonic-gate md_devname(setno, md_expldev(cbuf->cbuf_bp.b_edev), 29760Sstevel@tonic-gate NULL, 0)); 29770Sstevel@tonic-gate 29780Sstevel@tonic-gate md_unit_readerexit(ui); 29790Sstevel@tonic-gate 29800Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 29810Sstevel@tonic-gate 29820Sstevel@tonic-gate /* now schedule processing for possible state change */ 29830Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_wrerr, 29840Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 29850Sstevel@tonic-gate 29860Sstevel@tonic-gate } 29870Sstevel@tonic-gate 29880Sstevel@tonic-gate /* 29890Sstevel@tonic-gate * NAME: raid_write_ponly 29900Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 29910Sstevel@tonic-gate * in the case where only the parity column can be written 29920Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 29930Sstevel@tonic-gate */ 29940Sstevel@tonic-gate static void 29950Sstevel@tonic-gate raid_write_ponly(md_raidcs_t *cs) 29960Sstevel@tonic-gate { 29970Sstevel@tonic-gate md_raidps_t *ps; 29980Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 29990Sstevel@tonic-gate 30000Sstevel@tonic-gate ps = cs->cs_ps; 30010Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 30020Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 30030Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 30040Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 30050Sstevel@tonic-gate cs->cs_frags = 1; 30060Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_PONLY_DONE; 30070Sstevel@tonic-gate cs->cs_call = raid_stage; 30080Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30090Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 30100Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 30110Sstevel@tonic-gate cs->cs_frags++; 30120Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_WRITE); 30130Sstevel@tonic-gate } 30140Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 30150Sstevel@tonic-gate } 30160Sstevel@tonic-gate 30170Sstevel@tonic-gate /* 30180Sstevel@tonic-gate * NAME: raid_write_ploop 30190Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine, constructs parity from 30200Sstevel@tonic-gate * data in other columns. 30210Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 30220Sstevel@tonic-gate */ 30230Sstevel@tonic-gate static void 30240Sstevel@tonic-gate raid_write_ploop(md_raidcs_t *cs) 30250Sstevel@tonic-gate { 30260Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30270Sstevel@tonic-gate uint_t *dbuf; 30280Sstevel@tonic-gate uint_t *pbuf; 30290Sstevel@tonic-gate size_t wordcnt; 30300Sstevel@tonic-gate uint_t psum = 0; 30310Sstevel@tonic-gate 30320Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 30330Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 30340Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 30350Sstevel@tonic-gate while (wordcnt--) 30360Sstevel@tonic-gate *pbuf++ ^= *dbuf++; 30370Sstevel@tonic-gate cs->cs_loop++; 30380Sstevel@tonic-gate 30390Sstevel@tonic-gate /* 30400Sstevel@tonic-gate * build parity from scratch using new data, 30410Sstevel@tonic-gate * skip reading the data and parity columns. 30420Sstevel@tonic-gate */ 30430Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || cs->cs_loop == cs->cs_pcolumn) 30440Sstevel@tonic-gate cs->cs_loop++; 30450Sstevel@tonic-gate 30460Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 30470Sstevel@tonic-gate cs->cs_frags = 1; 30480Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 30490Sstevel@tonic-gate return; 30500Sstevel@tonic-gate } 30510Sstevel@tonic-gate 30520Sstevel@tonic-gate /* construct checksum for parity buffer */ 30530Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 30540Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 30550Sstevel@tonic-gate while (wordcnt--) { 30560Sstevel@tonic-gate psum ^= *pbuf; 30570Sstevel@tonic-gate pbuf++; 30580Sstevel@tonic-gate } 30590Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, -1, 30600Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 30610Sstevel@tonic-gate 1, cs->cs_pcolumn, RAID_PWMAGIC); 30620Sstevel@tonic-gate 30630Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 30640Sstevel@tonic-gate cs->cs_call = raid_write_ponly; 30650Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30660Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 30670Sstevel@tonic-gate cs->cs_frags = 1; 30680Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 30690Sstevel@tonic-gate cs->cs_frags++; 30700Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 30710Sstevel@tonic-gate } 30720Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 30730Sstevel@tonic-gate } 30740Sstevel@tonic-gate 30750Sstevel@tonic-gate /* 30760Sstevel@tonic-gate * NAME: raid_write_donly 30770Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 30780Sstevel@tonic-gate * Completed writing data to prewrite entry 30790Sstevel@tonic-gate * in the case where only the data column can be written 30800Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 30810Sstevel@tonic-gate */ 30820Sstevel@tonic-gate static void 30830Sstevel@tonic-gate raid_write_donly(md_raidcs_t *cs) 30840Sstevel@tonic-gate { 30850Sstevel@tonic-gate md_raidps_t *ps; 30860Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30870Sstevel@tonic-gate 30880Sstevel@tonic-gate ps = cs->cs_ps; 30890Sstevel@tonic-gate /* WARNING: don't release unit reader lock here... */ 30900Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 30910Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 30920Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 30930Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 30940Sstevel@tonic-gate cs->cs_frags = 1; 30950Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONLY_DONE; 30960Sstevel@tonic-gate cs->cs_call = raid_stage; 30970Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30980Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 30990Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 31000Sstevel@tonic-gate cs->cs_frags++; 31010Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 31020Sstevel@tonic-gate } 31030Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 31040Sstevel@tonic-gate } 31050Sstevel@tonic-gate 31060Sstevel@tonic-gate /* 31070Sstevel@tonic-gate * NAME: raid_write_got_old 31080Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 31090Sstevel@tonic-gate * completed read of old data and old parity 31100Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 31110Sstevel@tonic-gate */ 31120Sstevel@tonic-gate static void 31130Sstevel@tonic-gate raid_write_got_old(md_raidcs_t *cs) 31140Sstevel@tonic-gate { 31150Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 31160Sstevel@tonic-gate 31170Sstevel@tonic-gate ASSERT(IO_READER_HELD(cs->cs_un)); 31180Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(cs->cs_un)); 31190Sstevel@tonic-gate 31200Sstevel@tonic-gate raid_mapin_buf(cs); 31210Sstevel@tonic-gate genstandardparity(cs); 31220Sstevel@tonic-gate cs->cs_frags = 2; 31230Sstevel@tonic-gate cs->cs_call = raid_stage; 31240Sstevel@tonic-gate cs->cs_stage = RAID_PREWRITE_DONE; 31250Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 31260Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 31270Sstevel@tonic-gate 31280Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 31290Sstevel@tonic-gate cs->cs_frags++; 31300Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_PREWRITE); 31310Sstevel@tonic-gate } 31320Sstevel@tonic-gate 31330Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 31340Sstevel@tonic-gate cs->cs_frags++; 31350Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 31360Sstevel@tonic-gate } 31370Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 31380Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 31390Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 31400Sstevel@tonic-gate } 31410Sstevel@tonic-gate 31420Sstevel@tonic-gate /* 31430Sstevel@tonic-gate * NAME: raid_write_io 31440Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write I/O routine 31450Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 31460Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 31470Sstevel@tonic-gate */ 31480Sstevel@tonic-gate 31490Sstevel@tonic-gate /*ARGSUSED*/ 31500Sstevel@tonic-gate static void 31510Sstevel@tonic-gate raid_write_io(mr_unit_t *un, md_raidcs_t *cs) 31520Sstevel@tonic-gate { 31530Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 31540Sstevel@tonic-gate uint_t *dbuf; 31550Sstevel@tonic-gate uint_t *ubuf; 31560Sstevel@tonic-gate size_t wordcnt; 31570Sstevel@tonic-gate uint_t dsum = 0; 31580Sstevel@tonic-gate int pcheck; 31590Sstevel@tonic-gate int dcheck; 31600Sstevel@tonic-gate 31610Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_pcolumn].un_devstate & 31620Sstevel@tonic-gate RCS_INIT) == 0); 31630Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_dcolumn].un_devstate & 31640Sstevel@tonic-gate RCS_INIT) == 0); 31650Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 31660Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 31670Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_HAVE_PW_SLOTS); 31680Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 31690Sstevel@tonic-gate 31700Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 31710Sstevel@tonic-gate 31720Sstevel@tonic-gate ASSERT(un->un_origcolumncnt == un->un_totalcolumncnt); 31730Sstevel@tonic-gate raid_mapin_buf(cs); 31740Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 31750Sstevel@tonic-gate cs->cs_call = raid_stage; 31760Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 31770Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 31780Sstevel@tonic-gate cs->cs_stage = RAID_LINE_PWDONE; 31790Sstevel@tonic-gate genlineparity(cs); 31800Sstevel@tonic-gate return; 31810Sstevel@tonic-gate } 31820Sstevel@tonic-gate 31830Sstevel@tonic-gate pcheck = erred_check_line(un, cs, &un->un_column[cs->cs_pcolumn]); 31840Sstevel@tonic-gate dcheck = erred_check_line(un, cs, &un->un_column[cs->cs_dcolumn]); 31850Sstevel@tonic-gate cs->cs_resync_check = pcheck << RCL_PARITY_OFFSET || dcheck; 31860Sstevel@tonic-gate 31870Sstevel@tonic-gate if (pcheck == RCL_ERRED && dcheck == RCL_ERRED) { 31880Sstevel@tonic-gate int err = EIO; 31890Sstevel@tonic-gate 31900Sstevel@tonic-gate if ((un->un_column[cs->cs_pcolumn].un_devstate == 31910Sstevel@tonic-gate RCS_LAST_ERRED) || 31920Sstevel@tonic-gate (un->un_column[cs->cs_dcolumn].un_devstate == 31930Sstevel@tonic-gate RCS_LAST_ERRED)) 31940Sstevel@tonic-gate err = ENXIO; 31950Sstevel@tonic-gate raid_error_parent(ps, err); 31960Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 31970Sstevel@tonic-gate raid_free_child(cs, 1); 31980Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS 31990Sstevel@tonic-gate | RFP_RLS_LOCK | RFP_DECR_PWFRAGS); 32000Sstevel@tonic-gate return; 32010Sstevel@tonic-gate } 32020Sstevel@tonic-gate 32030Sstevel@tonic-gate if (pcheck & RCL_ERRED) { 32040Sstevel@tonic-gate /* 32050Sstevel@tonic-gate * handle case of only having data drive 32060Sstevel@tonic-gate */ 32070Sstevel@tonic-gate raid_mapin_buf(cs); 32080Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 32090Sstevel@tonic-gate 32100Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 32110Sstevel@tonic-gate ubuf = (uint_t *)(void *)(cs->cs_addr); 32120Sstevel@tonic-gate 32130Sstevel@tonic-gate while (wordcnt--) { 32140Sstevel@tonic-gate *dbuf = *ubuf; 32150Sstevel@tonic-gate dsum ^= *ubuf; 32160Sstevel@tonic-gate dbuf++; 32170Sstevel@tonic-gate ubuf++; 32180Sstevel@tonic-gate } 32190Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, -1, 32200Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 32210Sstevel@tonic-gate 1, cs->cs_dcolumn, RAID_PWMAGIC); 32220Sstevel@tonic-gate cs->cs_frags = 1; 32230Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32240Sstevel@tonic-gate cs->cs_call = raid_write_donly; 32250Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32260Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 32270Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 32280Sstevel@tonic-gate cs->cs_frags++; 32290Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_ALT | RIO_EXTRA | 32300Sstevel@tonic-gate RIO_PREWRITE); 32310Sstevel@tonic-gate } 32320Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 32330Sstevel@tonic-gate return; 32340Sstevel@tonic-gate } 32350Sstevel@tonic-gate 32360Sstevel@tonic-gate if (dcheck & RCL_ERRED) { 32370Sstevel@tonic-gate /* 32380Sstevel@tonic-gate * handle case of only having parity drive 32390Sstevel@tonic-gate * build parity from scratch using new data, 32400Sstevel@tonic-gate * skip reading the data and parity columns. 32410Sstevel@tonic-gate */ 32420Sstevel@tonic-gate raid_mapin_buf(cs); 32430Sstevel@tonic-gate cs->cs_loop = 0; 32440Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || 32450Sstevel@tonic-gate cs->cs_loop == cs->cs_pcolumn) 32460Sstevel@tonic-gate cs->cs_loop++; 32470Sstevel@tonic-gate 32480Sstevel@tonic-gate /* copy new data in to begin building parity */ 32490Sstevel@tonic-gate bcopy(cs->cs_addr, cs->cs_pbuffer + DEV_BSIZE, cs->cs_bcount); 32500Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32510Sstevel@tonic-gate cs->cs_call = raid_write_ploop; 32520Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32530Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 32540Sstevel@tonic-gate cs->cs_frags = 1; 32550Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 32560Sstevel@tonic-gate return; 32570Sstevel@tonic-gate } 32580Sstevel@tonic-gate /* 32590Sstevel@tonic-gate * handle normal cases 32600Sstevel@tonic-gate * read old data and old parity 32610Sstevel@tonic-gate */ 32620Sstevel@tonic-gate cs->cs_frags = 2; 32630Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32640Sstevel@tonic-gate cs->cs_call = raid_write_got_old; 32650Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32660Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 32670Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 32680Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ); 32690Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ); 32700Sstevel@tonic-gate } 32710Sstevel@tonic-gate 32720Sstevel@tonic-gate static void 32730Sstevel@tonic-gate raid_enqueue(md_raidcs_t *cs) 32740Sstevel@tonic-gate { 32750Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 32760Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 32770Sstevel@tonic-gate md_raidcs_t *cs1; 32780Sstevel@tonic-gate 32790Sstevel@tonic-gate mutex_enter(io_list_mutex); 32800Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 32810Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) { 32820Sstevel@tonic-gate ui->ui_io_lock->io_list_front = cs; 32830Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 32840Sstevel@tonic-gate } else { 32850Sstevel@tonic-gate cs1 = ui->ui_io_lock->io_list_back; 32860Sstevel@tonic-gate cs1->cs_linlck_next = cs; 32870Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 32880Sstevel@tonic-gate } 32890Sstevel@tonic-gate STAT_INC(raid_write_waits); 32900Sstevel@tonic-gate STAT_MAX(raid_max_write_q_length, raid_write_queue_length); 32910Sstevel@tonic-gate cs->cs_linlck_next = NULL; 32920Sstevel@tonic-gate mutex_exit(io_list_mutex); 32930Sstevel@tonic-gate } 32940Sstevel@tonic-gate 32950Sstevel@tonic-gate /* 32960Sstevel@tonic-gate * NAME: raid_write 32970Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 32980Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 32990Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 33000Sstevel@tonic-gate */ 33010Sstevel@tonic-gate 33020Sstevel@tonic-gate /*ARGSUSED*/ 33030Sstevel@tonic-gate static int 33040Sstevel@tonic-gate raid_write(mr_unit_t *un, md_raidcs_t *cs) 33050Sstevel@tonic-gate { 33060Sstevel@tonic-gate int error = 0; 33070Sstevel@tonic-gate md_raidps_t *ps; 33080Sstevel@tonic-gate mdi_unit_t *ui; 33090Sstevel@tonic-gate minor_t mnum; 33100Sstevel@tonic-gate clock_t timeout; 33110Sstevel@tonic-gate 33120Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 33130Sstevel@tonic-gate ps = cs->cs_ps; 33140Sstevel@tonic-gate ui = ps->ps_ui; 33150Sstevel@tonic-gate 33160Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 33170Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 33180Sstevel@tonic-gate error = EIO; 33190Sstevel@tonic-gate 33200Sstevel@tonic-gate /* make sure the write doesn't go beyond the column */ 33210Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > un->un_segsize * un->un_segsincolumn) 33220Sstevel@tonic-gate error = ENXIO; 33230Sstevel@tonic-gate if (error) 33240Sstevel@tonic-gate goto werror; 33250Sstevel@tonic-gate 33260Sstevel@tonic-gate getresources(cs); 33270Sstevel@tonic-gate 33280Sstevel@tonic-gate /* 33290Sstevel@tonic-gate * this is an advisory loop that keeps the waiting lists short 33300Sstevel@tonic-gate * to reduce cpu time. Since there is a race introduced by not 33310Sstevel@tonic-gate * aquiring all the correct mutexes, use a cv_timedwait to be 33320Sstevel@tonic-gate * sure the write always will wake up and start. 33330Sstevel@tonic-gate */ 33340Sstevel@tonic-gate while (raid_check_pw(cs)) { 33350Sstevel@tonic-gate mutex_enter(&un->un_mx); 33360Sstevel@tonic-gate (void) drv_getparm(LBOLT, &timeout); 33370Sstevel@tonic-gate timeout += md_wr_wait; 33380Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDPW; 33390Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 33400Sstevel@tonic-gate (void) cv_timedwait(&un->un_cv, &un->un_mx, timeout); 33410Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDPW; 33420Sstevel@tonic-gate mutex_exit(&un->un_mx); 33430Sstevel@tonic-gate } 33440Sstevel@tonic-gate 33450Sstevel@tonic-gate if (raid_line_writer_lock(cs, 1)) 33460Sstevel@tonic-gate return (0); 33470Sstevel@tonic-gate 33480Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 33490Sstevel@tonic-gate cs->cs_un = un; 33500Sstevel@tonic-gate mnum = MD_SID(un); 33510Sstevel@tonic-gate 33520Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 33530Sstevel@tonic-gate raid_regen_parity(cs); 33540Sstevel@tonic-gate un = MD_UNIT(mnum); 33550Sstevel@tonic-gate cs->cs_un = un; 33560Sstevel@tonic-gate } 33570Sstevel@tonic-gate 33580Sstevel@tonic-gate raid_write_io(un, cs); 33590Sstevel@tonic-gate return (0); 33600Sstevel@tonic-gate werror: 33610Sstevel@tonic-gate /* aquire unit reader lock sinc raid_free_child always drops it */ 33620Sstevel@tonic-gate raid_error_parent(ps, error); 33630Sstevel@tonic-gate raid_free_child(cs, 0); 33640Sstevel@tonic-gate /* decrement both pwfrags and frags */ 33650Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS | RFP_DECR_FRAGS | RFP_RLS_LOCK); 33660Sstevel@tonic-gate return (0); 33670Sstevel@tonic-gate } 33680Sstevel@tonic-gate 33690Sstevel@tonic-gate 33700Sstevel@tonic-gate /* 33710Sstevel@tonic-gate * NAMES: raid_stage 33720Sstevel@tonic-gate * DESCRIPTION: post-processing routine for a RAID metadevice 33730Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 33740Sstevel@tonic-gate */ 33750Sstevel@tonic-gate static void 33760Sstevel@tonic-gate raid_stage(md_raidcs_t *cs) 33770Sstevel@tonic-gate { 33780Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 33790Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 33800Sstevel@tonic-gate md_raidcbuf_t *cbuf; 33810Sstevel@tonic-gate buf_t *bp; 33820Sstevel@tonic-gate void *private; 33830Sstevel@tonic-gate int flag; 33840Sstevel@tonic-gate 33850Sstevel@tonic-gate switch (cs->cs_stage) { 33860Sstevel@tonic-gate case RAID_READ_DONE: 33870Sstevel@tonic-gate raid_free_child(cs, 1); 33880Sstevel@tonic-gate /* decrement readfrags */ 33890Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 33900Sstevel@tonic-gate return; 33910Sstevel@tonic-gate 33920Sstevel@tonic-gate case RAID_WRITE_DONE: 33930Sstevel@tonic-gate case RAID_WRITE_PONLY_DONE: 33940Sstevel@tonic-gate case RAID_WRITE_DONLY_DONE: 33950Sstevel@tonic-gate /* 33960Sstevel@tonic-gate * Completed writing real parity and/or data. 33970Sstevel@tonic-gate */ 33980Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_PWDONE); 33990Sstevel@tonic-gate raid_free_child(cs, 1); 34000Sstevel@tonic-gate /* decrement frags but not pwfrags */ 34010Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS | RFP_RLS_LOCK); 34020Sstevel@tonic-gate return; 34030Sstevel@tonic-gate 34040Sstevel@tonic-gate case RAID_PREWRITE_DONE: 34050Sstevel@tonic-gate /* 34060Sstevel@tonic-gate * completed writing data and parity to prewrite entries 34070Sstevel@tonic-gate */ 34080Sstevel@tonic-gate /* 34090Sstevel@tonic-gate * WARNING: don't release unit reader lock here.. 34100Sstevel@tonic-gate * decrement pwfrags but not frags 34110Sstevel@tonic-gate */ 34120Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 34130Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 34140Sstevel@tonic-gate cs->cs_frags = 2; 34150Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 34160Sstevel@tonic-gate cs->cs_call = raid_stage; 34170Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 34180Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 34190Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 34200Sstevel@tonic-gate cs->cs_frags++; 34210Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | 34220Sstevel@tonic-gate RIO_WRITE); 34230Sstevel@tonic-gate } 34240Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 34250Sstevel@tonic-gate cs->cs_frags++; 34260Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 34270Sstevel@tonic-gate } 34280Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 34290Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 34300Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 34310Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 34320Sstevel@tonic-gate raid_free_pwinvalidate(cs); 34330Sstevel@tonic-gate } 34340Sstevel@tonic-gate return; 34350Sstevel@tonic-gate 34360Sstevel@tonic-gate case RAID_LINE_PWDONE: 34370Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 34380Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 34390Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 34400Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 34410Sstevel@tonic-gate cs->cs_call = raid_stage; 34420Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 34430Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 34440Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 34450Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 34460Sstevel@tonic-gate /* 34470Sstevel@tonic-gate * fill in buffer for write to prewrite area 34480Sstevel@tonic-gate */ 34490Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 34500Sstevel@tonic-gate bp->b_back = bp; 34510Sstevel@tonic-gate bp->b_forw = bp; 34520Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer + DEV_BSIZE; 34530Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount; 34540Sstevel@tonic-gate bp->b_bufsize = cbuf->cbuf_bcount; 34550Sstevel@tonic-gate bp->b_lblkno = 34560Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_devstart + 34570Sstevel@tonic-gate cs->cs_blkno; 34580Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR); 34590Sstevel@tonic-gate bp->b_flags &= ~nv_available; 34600Sstevel@tonic-gate bp->b_flags |= B_WRITE | B_BUSY; 34610Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 34620Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev( 34630Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_dev); 34640Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 34650Sstevel@tonic-gate private = cs->cs_strategy_private; 34660Sstevel@tonic-gate flag = cs->cs_strategy_flag; 34670Sstevel@tonic-gate md_call_strategy(bp, flag, private); 34680Sstevel@tonic-gate } 34690Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 34700Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 34710Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 34720Sstevel@tonic-gate raid_free_pwinvalidate(cs); 34730Sstevel@tonic-gate } 34740Sstevel@tonic-gate return; 34750Sstevel@tonic-gate 34760Sstevel@tonic-gate default: 34770Sstevel@tonic-gate ASSERT(0); 34780Sstevel@tonic-gate break; 34790Sstevel@tonic-gate } 34800Sstevel@tonic-gate } 34810Sstevel@tonic-gate /* 34820Sstevel@tonic-gate * NAME: md_raid_strategy 34830Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O oprations entry point. 34840Sstevel@tonic-gate * PARAMETERS: buf_t *pb - pointer to a user I/O buffer 34850Sstevel@tonic-gate * int flag - metadevice specific flag 34860Sstevel@tonic-gate * void *private - carry over flag ?? 34870Sstevel@tonic-gate * 34880Sstevel@tonic-gate */ 34890Sstevel@tonic-gate 34900Sstevel@tonic-gate void 34910Sstevel@tonic-gate md_raid_strategy(buf_t *pb, int flag, void *private) 34920Sstevel@tonic-gate { 34930Sstevel@tonic-gate md_raidps_t *ps; 34940Sstevel@tonic-gate md_raidcs_t *cs; 34950Sstevel@tonic-gate int doing_writes; 34960Sstevel@tonic-gate int err; 34970Sstevel@tonic-gate mr_unit_t *un; 34980Sstevel@tonic-gate mdi_unit_t *ui; 34990Sstevel@tonic-gate size_t count; 35000Sstevel@tonic-gate diskaddr_t blkno; 35010Sstevel@tonic-gate caddr_t addr; 35020Sstevel@tonic-gate off_t offset; 35030Sstevel@tonic-gate int colcnt; 35040Sstevel@tonic-gate minor_t mnum; 35050Sstevel@tonic-gate set_t setno; 35060Sstevel@tonic-gate 35070Sstevel@tonic-gate ui = MDI_UNIT(getminor(pb->b_edev)); 35080Sstevel@tonic-gate md_kstat_waitq_enter(ui); 35090Sstevel@tonic-gate un = (mr_unit_t *)md_io_readerlock(ui); 35100Sstevel@tonic-gate setno = MD_MIN2SET(getminor(pb->b_edev)); 35110Sstevel@tonic-gate 35120Sstevel@tonic-gate if ((flag & MD_NOBLOCK) == 0) { 35130Sstevel@tonic-gate if (md_inc_iocount(setno) != 0) { 35140Sstevel@tonic-gate pb->b_flags |= B_ERROR; 35150Sstevel@tonic-gate pb->b_error = ENXIO; 35160Sstevel@tonic-gate pb->b_resid = pb->b_bcount; 35170Sstevel@tonic-gate md_io_readerexit(ui); 35180Sstevel@tonic-gate biodone(pb); 35190Sstevel@tonic-gate return; 35200Sstevel@tonic-gate } 35210Sstevel@tonic-gate } else { 35220Sstevel@tonic-gate md_inc_iocount_noblock(setno); 35230Sstevel@tonic-gate } 35240Sstevel@tonic-gate 35250Sstevel@tonic-gate mnum = MD_SID(un); 35260Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 35270Sstevel@tonic-gate count = pb->b_bcount; 35280Sstevel@tonic-gate 35290Sstevel@tonic-gate STAT_CHECK(raid_512, count == 512); 35300Sstevel@tonic-gate STAT_CHECK(raid_1024, count == 1024); 35310Sstevel@tonic-gate STAT_CHECK(raid_1024_8192, count > 1024 && count < 8192); 35320Sstevel@tonic-gate STAT_CHECK(raid_8192, count == 8192); 35330Sstevel@tonic-gate STAT_CHECK(raid_8192_bigger, count > 8192); 35340Sstevel@tonic-gate 35350Sstevel@tonic-gate (void *) md_unit_readerlock(ui); 35360Sstevel@tonic-gate if (!(flag & MD_STR_NOTTOP)) { 35370Sstevel@tonic-gate err = md_checkbuf(ui, (md_unit_t *)un, pb); /* check and map */ 35380Sstevel@tonic-gate if (err != 0) { 35390Sstevel@tonic-gate md_kstat_waitq_exit(ui); 35400Sstevel@tonic-gate md_io_readerexit(ui); 35410Sstevel@tonic-gate return; 35420Sstevel@tonic-gate } 35430Sstevel@tonic-gate } 35440Sstevel@tonic-gate md_unit_readerexit(ui); 35450Sstevel@tonic-gate 35460Sstevel@tonic-gate STAT_INC(raid_total_io); 35470Sstevel@tonic-gate 35480Sstevel@tonic-gate /* allocate a parent structure for the user I/O */ 35490Sstevel@tonic-gate ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); 35500Sstevel@tonic-gate raid_parent_init(ps); 35510Sstevel@tonic-gate 35520Sstevel@tonic-gate /* 35530Sstevel@tonic-gate * Save essential information from the original buffhdr 35540Sstevel@tonic-gate * in the md_save structure. 35550Sstevel@tonic-gate */ 35560Sstevel@tonic-gate ps->ps_un = un; 35570Sstevel@tonic-gate ps->ps_ui = ui; 35580Sstevel@tonic-gate ps->ps_bp = pb; 35590Sstevel@tonic-gate ps->ps_addr = pb->b_un.b_addr; 35600Sstevel@tonic-gate 35610Sstevel@tonic-gate if ((pb->b_flags & B_READ) == 0) { 35620Sstevel@tonic-gate ps->ps_flags |= MD_RPS_WRITE; 35630Sstevel@tonic-gate doing_writes = 1; 35640Sstevel@tonic-gate STAT_INC(raid_writes); 35650Sstevel@tonic-gate } else { 35660Sstevel@tonic-gate ps->ps_flags |= MD_RPS_READ; 35670Sstevel@tonic-gate doing_writes = 0; 35680Sstevel@tonic-gate STAT_INC(raid_reads); 35690Sstevel@tonic-gate } 35700Sstevel@tonic-gate 35710Sstevel@tonic-gate count = lbtodb(pb->b_bcount); /* transfer count (in blocks) */ 35720Sstevel@tonic-gate blkno = pb->b_lblkno; /* block number on device */ 35730Sstevel@tonic-gate addr = 0; 35740Sstevel@tonic-gate offset = 0; 35750Sstevel@tonic-gate ps->ps_pwfrags = 1; 35760Sstevel@tonic-gate ps->ps_frags = 1; 35770Sstevel@tonic-gate md_kstat_waitq_to_runq(ui); 35780Sstevel@tonic-gate 35790Sstevel@tonic-gate do { 35800Sstevel@tonic-gate cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); 35810Sstevel@tonic-gate raid_child_init(cs); 35820Sstevel@tonic-gate cs->cs_ps = ps; 35830Sstevel@tonic-gate cs->cs_un = un; 35840Sstevel@tonic-gate cs->cs_mdunit = mnum; 35850Sstevel@tonic-gate cs->cs_strategy_flag = flag; 35860Sstevel@tonic-gate cs->cs_strategy_private = private; 35870Sstevel@tonic-gate cs->cs_addr = addr; 35880Sstevel@tonic-gate cs->cs_offset = offset; 35890Sstevel@tonic-gate count = raid_iosetup(un, blkno, count, cs); 35900Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 35910Sstevel@tonic-gate blkno += (cs->cs_blkcnt * colcnt); 35920Sstevel@tonic-gate offset += (cs->cs_bcount * colcnt); 35930Sstevel@tonic-gate } else { 35940Sstevel@tonic-gate blkno += cs->cs_blkcnt; 35950Sstevel@tonic-gate offset += cs->cs_bcount; 35960Sstevel@tonic-gate } 35970Sstevel@tonic-gate /* for each cs bump up the ps_pwfrags and ps_frags fields */ 35980Sstevel@tonic-gate if (count) { 35990Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 36000Sstevel@tonic-gate ps->ps_pwfrags++; 36010Sstevel@tonic-gate ps->ps_frags++; 36020Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 36030Sstevel@tonic-gate if (doing_writes) 36040Sstevel@tonic-gate (void) raid_write(un, cs); 36050Sstevel@tonic-gate else 36060Sstevel@tonic-gate (void) raid_read(un, cs); 36070Sstevel@tonic-gate } 36080Sstevel@tonic-gate } while (count); 36090Sstevel@tonic-gate if (doing_writes) { 36100Sstevel@tonic-gate (void) raid_write(un, cs); 36110Sstevel@tonic-gate } else 36120Sstevel@tonic-gate (void) raid_read(un, cs); 36130Sstevel@tonic-gate 36140Sstevel@tonic-gate if (! (flag & MD_STR_NOTTOP) && panicstr) { 36150Sstevel@tonic-gate while (! (ps->ps_flags & MD_RPS_DONE)) { 36160Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 36170Sstevel@tonic-gate drv_usecwait(10); 36180Sstevel@tonic-gate } 36190Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 36200Sstevel@tonic-gate } 36210Sstevel@tonic-gate } 36220Sstevel@tonic-gate 36230Sstevel@tonic-gate /* 36240Sstevel@tonic-gate * NAMES: raid_snarf 36250Sstevel@tonic-gate * DESCRIPTION: RAID metadevice SNARF entry point 36260Sstevel@tonic-gate * PARAMETERS: md_snarfcmd_t cmd, 36270Sstevel@tonic-gate * set_t setno 36280Sstevel@tonic-gate * RETURNS: 36290Sstevel@tonic-gate */ 36300Sstevel@tonic-gate static int 36310Sstevel@tonic-gate raid_snarf(md_snarfcmd_t cmd, set_t setno) 36320Sstevel@tonic-gate { 36330Sstevel@tonic-gate mr_unit_t *un; 36340Sstevel@tonic-gate mddb_recid_t recid; 36350Sstevel@tonic-gate int gotsomething; 36360Sstevel@tonic-gate int all_raid_gotten; 36370Sstevel@tonic-gate mddb_type_t typ1; 36380Sstevel@tonic-gate uint_t ncol; 36390Sstevel@tonic-gate mddb_de_ic_t *dep; 36400Sstevel@tonic-gate mddb_rb32_t *rbp; 36410Sstevel@tonic-gate size_t newreqsize; 36420Sstevel@tonic-gate mr_unit_t *big_un; 36430Sstevel@tonic-gate mr_unit32_od_t *small_un; 36440Sstevel@tonic-gate 36450Sstevel@tonic-gate 36460Sstevel@tonic-gate if (cmd == MD_SNARF_CLEANUP) 36470Sstevel@tonic-gate return (0); 36480Sstevel@tonic-gate 36490Sstevel@tonic-gate all_raid_gotten = 1; 36500Sstevel@tonic-gate gotsomething = 0; 36510Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 36520Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 36530Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 36540Sstevel@tonic-gate 36550Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 36560Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) { 36570Sstevel@tonic-gate continue; 36580Sstevel@tonic-gate } 36590Sstevel@tonic-gate 36600Sstevel@tonic-gate dep = mddb_getrecdep(recid); 36610Sstevel@tonic-gate dep->de_flags = MDDB_F_RAID; 36620Sstevel@tonic-gate rbp = dep->de_rb; 36631623Stw21770 switch (rbp->rb_revision) { 36641623Stw21770 case MDDB_REV_RB: 36651623Stw21770 case MDDB_REV_RBFN: 36661623Stw21770 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 36671623Stw21770 /* 36681623Stw21770 * This means, we have an old and small record 36691623Stw21770 * and this record hasn't already been 36701623Stw21770 * converted. Before we create an incore 36711623Stw21770 * metadevice from this we have to convert it to 36721623Stw21770 * a big record. 36731623Stw21770 */ 36741623Stw21770 small_un = 36751623Stw21770 (mr_unit32_od_t *)mddb_getrecaddr(recid); 36761623Stw21770 ncol = small_un->un_totalcolumncnt; 36771623Stw21770 newreqsize = sizeof (mr_unit_t) + 36781623Stw21770 ((ncol - 1) * sizeof (mr_column_t)); 36791623Stw21770 big_un = (mr_unit_t *)kmem_zalloc(newreqsize, 36801623Stw21770 KM_SLEEP); 36811623Stw21770 raid_convert((caddr_t)small_un, (caddr_t)big_un, 36821623Stw21770 SMALL_2_BIG); 36831623Stw21770 kmem_free(small_un, dep->de_reqsize); 36841623Stw21770 dep->de_rb_userdata = big_un; 36851623Stw21770 dep->de_reqsize = newreqsize; 36861623Stw21770 un = big_un; 36871623Stw21770 rbp->rb_private |= MD_PRV_CONVD; 36881623Stw21770 } else { 36891623Stw21770 /* 36901623Stw21770 * Record has already been converted. Just 36911623Stw21770 * get its address. 36921623Stw21770 */ 36931623Stw21770 un = (mr_unit_t *)mddb_getrecaddr(recid); 36941623Stw21770 } 36951623Stw21770 un->c.un_revision &= ~MD_64BIT_META_DEV; 36961623Stw21770 break; 36971623Stw21770 case MDDB_REV_RB64: 36981623Stw21770 case MDDB_REV_RB64FN: 36990Sstevel@tonic-gate /* Big device */ 37000Sstevel@tonic-gate un = (mr_unit_t *)mddb_getrecaddr(recid); 37011623Stw21770 un->c.un_revision |= MD_64BIT_META_DEV; 37021623Stw21770 un->c.un_flag |= MD_EFILABEL; 37031623Stw21770 break; 37040Sstevel@tonic-gate } 3705*2077Stw21770 MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); 37060Sstevel@tonic-gate 37070Sstevel@tonic-gate /* 37080Sstevel@tonic-gate * Create minor device node for snarfed entry. 37090Sstevel@tonic-gate */ 37100Sstevel@tonic-gate (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 37110Sstevel@tonic-gate 37120Sstevel@tonic-gate if (MD_UNIT(MD_SID(un)) != NULL) { 37130Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 37140Sstevel@tonic-gate continue; 37150Sstevel@tonic-gate } 37160Sstevel@tonic-gate all_raid_gotten = 0; 37170Sstevel@tonic-gate if (raid_build_incore((void *)un, 1) == 0) { 37180Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 37190Sstevel@tonic-gate md_create_unit_incore(MD_SID(un), &raid_md_ops, 37200Sstevel@tonic-gate 1); 37210Sstevel@tonic-gate gotsomething = 1; 37220Sstevel@tonic-gate } else if (un->mr_ic) { 37230Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 37240Sstevel@tonic-gate un->un_totalcolumncnt); 37250Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 37260Sstevel@tonic-gate } 37270Sstevel@tonic-gate } 37280Sstevel@tonic-gate 37290Sstevel@tonic-gate if (!all_raid_gotten) { 37300Sstevel@tonic-gate return (gotsomething); 37310Sstevel@tonic-gate } 37320Sstevel@tonic-gate 37330Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 37340Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 37350Sstevel@tonic-gate if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 37360Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 37370Sstevel@tonic-gate 37380Sstevel@tonic-gate return (0); 37390Sstevel@tonic-gate } 37400Sstevel@tonic-gate 37410Sstevel@tonic-gate /* 37420Sstevel@tonic-gate * NAMES: raid_halt 37430Sstevel@tonic-gate * DESCRIPTION: RAID metadevice HALT entry point 37440Sstevel@tonic-gate * PARAMETERS: md_haltcmd_t cmd - 37450Sstevel@tonic-gate * set_t setno - 37460Sstevel@tonic-gate * RETURNS: 37470Sstevel@tonic-gate */ 37480Sstevel@tonic-gate static int 37490Sstevel@tonic-gate raid_halt(md_haltcmd_t cmd, set_t setno) 37500Sstevel@tonic-gate { 37510Sstevel@tonic-gate set_t i; 37520Sstevel@tonic-gate mdi_unit_t *ui; 37530Sstevel@tonic-gate minor_t mnum; 37540Sstevel@tonic-gate 37550Sstevel@tonic-gate if (cmd == MD_HALT_CLOSE) 37560Sstevel@tonic-gate return (0); 37570Sstevel@tonic-gate 37580Sstevel@tonic-gate if (cmd == MD_HALT_OPEN) 37590Sstevel@tonic-gate return (0); 37600Sstevel@tonic-gate 37610Sstevel@tonic-gate if (cmd == MD_HALT_UNLOAD) 37620Sstevel@tonic-gate return (0); 37630Sstevel@tonic-gate 37640Sstevel@tonic-gate if (cmd == MD_HALT_CHECK) { 37650Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 37660Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 37670Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 37680Sstevel@tonic-gate continue; 37690Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 37700Sstevel@tonic-gate continue; 37710Sstevel@tonic-gate if (md_unit_isopen(ui)) 37720Sstevel@tonic-gate return (1); 37730Sstevel@tonic-gate } 37740Sstevel@tonic-gate return (0); 37750Sstevel@tonic-gate } 37760Sstevel@tonic-gate 37770Sstevel@tonic-gate if (cmd != MD_HALT_DOIT) 37780Sstevel@tonic-gate return (1); 37790Sstevel@tonic-gate 37800Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 37810Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 37820Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 37830Sstevel@tonic-gate continue; 37840Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 37850Sstevel@tonic-gate continue; 37860Sstevel@tonic-gate reset_raid((mr_unit_t *)MD_UNIT(mnum), mnum, 0); 37870Sstevel@tonic-gate } 37880Sstevel@tonic-gate return (0); 37890Sstevel@tonic-gate } 37900Sstevel@tonic-gate 37910Sstevel@tonic-gate /* 37920Sstevel@tonic-gate * NAMES: raid_close_all_devs 37930Sstevel@tonic-gate * DESCRIPTION: Close all the devices of the unit. 37940Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 37950Sstevel@tonic-gate * RETURNS: 37960Sstevel@tonic-gate */ 37970Sstevel@tonic-gate void 37980Sstevel@tonic-gate raid_close_all_devs(mr_unit_t *un, int init_pw, int md_cflags) 37990Sstevel@tonic-gate { 38000Sstevel@tonic-gate int i; 38010Sstevel@tonic-gate mr_column_t *device; 38020Sstevel@tonic-gate 38030Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38040Sstevel@tonic-gate device = &un->un_column[i]; 38050Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 38060Sstevel@tonic-gate ASSERT((device->un_dev != (md_dev64_t)0) && 38070Sstevel@tonic-gate (device->un_dev != NODEV64)); 38080Sstevel@tonic-gate if ((device->un_devstate & RCS_OKAY) && init_pw) 38090Sstevel@tonic-gate (void) init_pw_area(un, device->un_dev, 38100Sstevel@tonic-gate device->un_pwstart, i); 38110Sstevel@tonic-gate md_layered_close(device->un_dev, md_cflags); 38120Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_ISOPEN; 38130Sstevel@tonic-gate } 38140Sstevel@tonic-gate } 38150Sstevel@tonic-gate } 38160Sstevel@tonic-gate 38170Sstevel@tonic-gate /* 38180Sstevel@tonic-gate * NAMES: raid_open_all_devs 38190Sstevel@tonic-gate * DESCRIPTION: Open all the components (columns) of the device unit. 38200Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 38210Sstevel@tonic-gate * RETURNS: 38220Sstevel@tonic-gate */ 38230Sstevel@tonic-gate static int 38240Sstevel@tonic-gate raid_open_all_devs(mr_unit_t *un, int md_oflags) 38250Sstevel@tonic-gate { 38260Sstevel@tonic-gate minor_t mnum = MD_SID(un); 38270Sstevel@tonic-gate int i; 38280Sstevel@tonic-gate int not_opened = 0; 38290Sstevel@tonic-gate int commit = 0; 38300Sstevel@tonic-gate int col = -1; 38310Sstevel@tonic-gate mr_column_t *device; 38320Sstevel@tonic-gate set_t setno = MD_MIN2SET(MD_SID(un)); 38330Sstevel@tonic-gate side_t side = mddb_getsidenum(setno); 38340Sstevel@tonic-gate mdkey_t key; 38350Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 38360Sstevel@tonic-gate 38370Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 38380Sstevel@tonic-gate 38390Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38400Sstevel@tonic-gate md_dev64_t tmpdev; 38410Sstevel@tonic-gate 38420Sstevel@tonic-gate device = &un->un_column[i]; 38430Sstevel@tonic-gate 38440Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 38450Sstevel@tonic-gate not_opened++; 38460Sstevel@tonic-gate continue; 38470Sstevel@tonic-gate } 38480Sstevel@tonic-gate 38490Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) 38500Sstevel@tonic-gate continue; 38510Sstevel@tonic-gate 38520Sstevel@tonic-gate tmpdev = device->un_dev; 38530Sstevel@tonic-gate /* 38540Sstevel@tonic-gate * Open by device id 38550Sstevel@tonic-gate */ 38560Sstevel@tonic-gate key = HOTSPARED(un, i) ? 38570Sstevel@tonic-gate device->un_hs_key : device->un_orig_key; 38580Sstevel@tonic-gate if ((md_getmajor(tmpdev) != md_major) && 38590Sstevel@tonic-gate md_devid_found(setno, side, key) == 1) { 38600Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, key); 38610Sstevel@tonic-gate } 38620Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, md_oflags)) { 38630Sstevel@tonic-gate device->un_dev = tmpdev; 38640Sstevel@tonic-gate not_opened++; 38650Sstevel@tonic-gate continue; 38660Sstevel@tonic-gate } 38670Sstevel@tonic-gate device->un_dev = tmpdev; 38680Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_ISOPEN; 38690Sstevel@tonic-gate } 38700Sstevel@tonic-gate 38710Sstevel@tonic-gate /* if open errors and errored devices are 1 then device can run */ 38720Sstevel@tonic-gate if (not_opened > 1) { 38730Sstevel@tonic-gate cmn_err(CE_WARN, 38740Sstevel@tonic-gate "md: %s failed to open. open error on %s\n", 38750Sstevel@tonic-gate md_shortname(MD_SID(un)), 38760Sstevel@tonic-gate md_devname(MD_UN2SET(un), device->un_orig_dev, 38770Sstevel@tonic-gate NULL, 0)); 38780Sstevel@tonic-gate 38790Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 38800Sstevel@tonic-gate 38810Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 38820Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 38830Sstevel@tonic-gate 38840Sstevel@tonic-gate return (not_opened > 1); 38850Sstevel@tonic-gate } 38860Sstevel@tonic-gate 38870Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38880Sstevel@tonic-gate device = &un->un_column[i]; 38890Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 38900Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 38910Sstevel@tonic-gate /* 38920Sstevel@tonic-gate * At this point in time there is a possibility 38930Sstevel@tonic-gate * that errors were the result of a controller 38940Sstevel@tonic-gate * failure with more than a single column on it 38950Sstevel@tonic-gate * so clear out last errored columns and let errors 38960Sstevel@tonic-gate * re-occur is necessary. 38970Sstevel@tonic-gate */ 38980Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 38990Sstevel@tonic-gate commit++; 39000Sstevel@tonic-gate } 39010Sstevel@tonic-gate continue; 39020Sstevel@tonic-gate } 39030Sstevel@tonic-gate ASSERT(col == -1); 39040Sstevel@tonic-gate col = i; 39050Sstevel@tonic-gate } 39060Sstevel@tonic-gate 39070Sstevel@tonic-gate if (col != -1) { 39080Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 39090Sstevel@tonic-gate commit++; 39100Sstevel@tonic-gate } 39110Sstevel@tonic-gate 39120Sstevel@tonic-gate if (commit) 39130Sstevel@tonic-gate raid_commit(un, NULL); 39140Sstevel@tonic-gate 39150Sstevel@tonic-gate if (col != -1) { 39160Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 39170Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 39180Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 39190Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 39200Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 39210Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 39220Sstevel@tonic-gate } 39230Sstevel@tonic-gate } 39240Sstevel@tonic-gate 39250Sstevel@tonic-gate return (0); 39260Sstevel@tonic-gate } 39270Sstevel@tonic-gate 39280Sstevel@tonic-gate /* 39290Sstevel@tonic-gate * NAMES: raid_internal_open 39300Sstevel@tonic-gate * DESCRIPTION: Do the actual RAID open 39310Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number of the RAID device 39320Sstevel@tonic-gate * int flag - 39330Sstevel@tonic-gate * int otyp - 39340Sstevel@tonic-gate * int md_oflags - RAID open flags 39350Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 39360Sstevel@tonic-gate */ 39370Sstevel@tonic-gate int 39380Sstevel@tonic-gate raid_internal_open(minor_t mnum, int flag, int otyp, int md_oflags) 39390Sstevel@tonic-gate { 39400Sstevel@tonic-gate mr_unit_t *un; 39410Sstevel@tonic-gate mdi_unit_t *ui; 39420Sstevel@tonic-gate int err = 0; 39430Sstevel@tonic-gate int replay_error = 0; 39440Sstevel@tonic-gate 39450Sstevel@tonic-gate ui = MDI_UNIT(mnum); 39460Sstevel@tonic-gate ASSERT(ui != NULL); 39470Sstevel@tonic-gate 39480Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 39490Sstevel@tonic-gate /* 39500Sstevel@tonic-gate * this MUST be checked before md_unit_isopen is checked. 39510Sstevel@tonic-gate * raid_init_columns sets md_unit_isopen to block reset, halt. 39520Sstevel@tonic-gate */ 39530Sstevel@tonic-gate if ((UNIT_STATE(un) & (RUS_INIT | RUS_DOI)) && 39540Sstevel@tonic-gate !(md_oflags & MD_OFLG_ISINIT)) { 39550Sstevel@tonic-gate md_unit_openclose_exit(ui); 39560Sstevel@tonic-gate return (EAGAIN); 39570Sstevel@tonic-gate } 39580Sstevel@tonic-gate 39590Sstevel@tonic-gate if ((md_oflags & MD_OFLG_ISINIT) || md_unit_isopen(ui)) { 39600Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, otyp); 39610Sstevel@tonic-gate goto out; 39620Sstevel@tonic-gate } 39630Sstevel@tonic-gate 39640Sstevel@tonic-gate md_unit_readerexit(ui); 39650Sstevel@tonic-gate 39660Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 39670Sstevel@tonic-gate if (raid_open_all_devs(un, md_oflags) == 0) { 39680Sstevel@tonic-gate if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) { 39690Sstevel@tonic-gate md_unit_writerexit(ui); 39700Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39710Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 39720Sstevel@tonic-gate goto out; 39730Sstevel@tonic-gate } 39740Sstevel@tonic-gate } else { 39750Sstevel@tonic-gate /* 39760Sstevel@tonic-gate * if this unit contains more than two errored components 39770Sstevel@tonic-gate * should return error and close all opened devices 39780Sstevel@tonic-gate */ 39790Sstevel@tonic-gate 39800Sstevel@tonic-gate md_unit_writerexit(ui); 39810Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39820Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 39830Sstevel@tonic-gate md_unit_openclose_exit(ui); 39840Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 39850Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 39860Sstevel@tonic-gate return (ENXIO); 39870Sstevel@tonic-gate } 39880Sstevel@tonic-gate 39890Sstevel@tonic-gate if (!(MD_STATUS(un) & MD_UN_REPLAYED)) { 39900Sstevel@tonic-gate replay_error = raid_replay(un); 39910Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_REPLAYED; 39920Sstevel@tonic-gate } 39930Sstevel@tonic-gate 39940Sstevel@tonic-gate md_unit_writerexit(ui); 39950Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39960Sstevel@tonic-gate 39970Sstevel@tonic-gate if ((replay_error == RAID_RPLY_READONLY) && 39980Sstevel@tonic-gate ((flag & (FREAD | FWRITE)) == FREAD)) { 39990Sstevel@tonic-gate md_unit_openclose_exit(ui); 40000Sstevel@tonic-gate return (0); 40010Sstevel@tonic-gate } 40020Sstevel@tonic-gate 40030Sstevel@tonic-gate /* allocate hotspare if possible */ 40040Sstevel@tonic-gate (void) raid_hotspares(); 40050Sstevel@tonic-gate 40060Sstevel@tonic-gate 40070Sstevel@tonic-gate out: 40080Sstevel@tonic-gate md_unit_openclose_exit(ui); 40090Sstevel@tonic-gate return (err); 40100Sstevel@tonic-gate } 40110Sstevel@tonic-gate /* 40120Sstevel@tonic-gate * NAMES: raid_open 40130Sstevel@tonic-gate * DESCRIPTION: RAID metadevice OPEN entry point 40140Sstevel@tonic-gate * PARAMETERS: dev_t dev - 40150Sstevel@tonic-gate * int flag - 40160Sstevel@tonic-gate * int otyp - 40170Sstevel@tonic-gate * cred_t * cred_p - 40180Sstevel@tonic-gate * int md_oflags - 40190Sstevel@tonic-gate * RETURNS: 40200Sstevel@tonic-gate */ 40210Sstevel@tonic-gate /*ARGSUSED1*/ 40220Sstevel@tonic-gate static int 40230Sstevel@tonic-gate raid_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 40240Sstevel@tonic-gate { 40250Sstevel@tonic-gate int error = 0; 40260Sstevel@tonic-gate 40270Sstevel@tonic-gate if (error = raid_internal_open(getminor(*dev), flag, otyp, md_oflags)) { 40280Sstevel@tonic-gate return (error); 40290Sstevel@tonic-gate } 40300Sstevel@tonic-gate return (0); 40310Sstevel@tonic-gate } 40320Sstevel@tonic-gate 40330Sstevel@tonic-gate /* 40340Sstevel@tonic-gate * NAMES: raid_internal_close 40350Sstevel@tonic-gate * DESCRIPTION: RAID metadevice CLOSE actual implementation 40360Sstevel@tonic-gate * PARAMETERS: minor_t - minor number of the RAID device 40370Sstevel@tonic-gate * int otyp - 40380Sstevel@tonic-gate * int init_pw - 40390Sstevel@tonic-gate * int md_cflags - RAID close flags 40400Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 40410Sstevel@tonic-gate */ 40420Sstevel@tonic-gate /*ARGSUSED*/ 40430Sstevel@tonic-gate int 40440Sstevel@tonic-gate raid_internal_close(minor_t mnum, int otyp, int init_pw, int md_cflags) 40450Sstevel@tonic-gate { 40460Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 40470Sstevel@tonic-gate mr_unit_t *un; 40480Sstevel@tonic-gate int err = 0; 40490Sstevel@tonic-gate 40500Sstevel@tonic-gate /* single thread */ 40510Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 40520Sstevel@tonic-gate 40530Sstevel@tonic-gate /* count closed */ 40540Sstevel@tonic-gate if ((err = md_unit_decopen(mnum, otyp)) != 0) 40550Sstevel@tonic-gate goto out; 40560Sstevel@tonic-gate /* close devices, if necessary */ 40570Sstevel@tonic-gate if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 40580Sstevel@tonic-gate raid_close_all_devs(un, init_pw, md_cflags); 40590Sstevel@tonic-gate } 40600Sstevel@tonic-gate 40610Sstevel@tonic-gate /* unlock, return success */ 40620Sstevel@tonic-gate out: 40630Sstevel@tonic-gate md_unit_openclose_exit(ui); 40640Sstevel@tonic-gate return (err); 40650Sstevel@tonic-gate } 40660Sstevel@tonic-gate 40670Sstevel@tonic-gate /* 40680Sstevel@tonic-gate * NAMES: raid_close 40690Sstevel@tonic-gate * DESCRIPTION: RAID metadevice close entry point 40700Sstevel@tonic-gate * PARAMETERS: dev_t dev - 40710Sstevel@tonic-gate * int flag - 40720Sstevel@tonic-gate * int otyp - 40730Sstevel@tonic-gate * cred_t * cred_p - 40740Sstevel@tonic-gate * int md_oflags - 40750Sstevel@tonic-gate * RETURNS: 40760Sstevel@tonic-gate */ 40770Sstevel@tonic-gate /*ARGSUSED1*/ 40780Sstevel@tonic-gate static int 40790Sstevel@tonic-gate raid_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags) 40800Sstevel@tonic-gate { 40810Sstevel@tonic-gate int retval; 40820Sstevel@tonic-gate 40830Sstevel@tonic-gate (void) md_io_writerlock(MDI_UNIT(getminor(dev))); 40840Sstevel@tonic-gate retval = raid_internal_close(getminor(dev), otyp, 1, md_cflags); 40850Sstevel@tonic-gate (void) md_io_writerexit(MDI_UNIT(getminor(dev))); 40860Sstevel@tonic-gate return (retval); 40870Sstevel@tonic-gate } 40880Sstevel@tonic-gate 40890Sstevel@tonic-gate /* 40900Sstevel@tonic-gate * raid_probe_close_all_devs 40910Sstevel@tonic-gate */ 40920Sstevel@tonic-gate void 40930Sstevel@tonic-gate raid_probe_close_all_devs(mr_unit_t *un) 40940Sstevel@tonic-gate { 40950Sstevel@tonic-gate int i; 40960Sstevel@tonic-gate mr_column_t *device; 40970Sstevel@tonic-gate 40980Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 40990Sstevel@tonic-gate device = &un->un_column[i]; 41000Sstevel@tonic-gate 41010Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 41020Sstevel@tonic-gate md_layered_close(device->un_dev, 41030Sstevel@tonic-gate MD_OFLG_PROBEDEV); 41040Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_PROBEOPEN; 41050Sstevel@tonic-gate } 41060Sstevel@tonic-gate } 41070Sstevel@tonic-gate } 41080Sstevel@tonic-gate /* 41090Sstevel@tonic-gate * Raid_probe_dev: 41100Sstevel@tonic-gate * 41110Sstevel@tonic-gate * On entry the unit writerlock is held 41120Sstevel@tonic-gate */ 41130Sstevel@tonic-gate static int 41140Sstevel@tonic-gate raid_probe_dev(mdi_unit_t *ui, minor_t mnum) 41150Sstevel@tonic-gate { 41160Sstevel@tonic-gate mr_unit_t *un; 41170Sstevel@tonic-gate int i; 41180Sstevel@tonic-gate int not_opened = 0; 41190Sstevel@tonic-gate int commit = 0; 41200Sstevel@tonic-gate int col = -1; 41210Sstevel@tonic-gate mr_column_t *device; 41220Sstevel@tonic-gate int md_devopen = 0; 41230Sstevel@tonic-gate 41240Sstevel@tonic-gate if (md_unit_isopen(ui)) 41250Sstevel@tonic-gate md_devopen++; 41260Sstevel@tonic-gate 41270Sstevel@tonic-gate un = MD_UNIT(mnum); 41280Sstevel@tonic-gate /* 41290Sstevel@tonic-gate * If the state has been set to LAST_ERRED because 41300Sstevel@tonic-gate * of an error when the raid device was open at some 41310Sstevel@tonic-gate * point in the past, don't probe. We really don't want 41320Sstevel@tonic-gate * to reset the state in this case. 41330Sstevel@tonic-gate */ 41340Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 41350Sstevel@tonic-gate return (0); 41360Sstevel@tonic-gate 41370Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 41380Sstevel@tonic-gate 41390Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 41400Sstevel@tonic-gate md_dev64_t tmpdev; 41410Sstevel@tonic-gate 41420Sstevel@tonic-gate device = &un->un_column[i]; 41430Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 41440Sstevel@tonic-gate not_opened++; 41450Sstevel@tonic-gate continue; 41460Sstevel@tonic-gate } 41470Sstevel@tonic-gate 41480Sstevel@tonic-gate tmpdev = device->un_dev; 41490Sstevel@tonic-gate /* 41500Sstevel@tonic-gate * Currently the flags passed are not needed since 41510Sstevel@tonic-gate * there cannot be an underlying metadevice. However 41520Sstevel@tonic-gate * they are kept here for consistency. 41530Sstevel@tonic-gate * 41540Sstevel@tonic-gate * Open by device id 41550Sstevel@tonic-gate */ 41560Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i)? 41570Sstevel@tonic-gate device->un_hs_key : device->un_orig_key); 41580Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, 41590Sstevel@tonic-gate MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV)) { 41600Sstevel@tonic-gate device->un_dev = tmpdev; 41610Sstevel@tonic-gate not_opened++; 41620Sstevel@tonic-gate continue; 41630Sstevel@tonic-gate } 41640Sstevel@tonic-gate device->un_dev = tmpdev; 41650Sstevel@tonic-gate 41660Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_PROBEOPEN; 41670Sstevel@tonic-gate } 41680Sstevel@tonic-gate 41690Sstevel@tonic-gate /* 41700Sstevel@tonic-gate * The code below is careful on setting the LAST_ERRED state. 41710Sstevel@tonic-gate * 41720Sstevel@tonic-gate * If open errors and exactly one device has failed we can run. 41730Sstevel@tonic-gate * If more then one device fails we have to figure out when to set 41740Sstevel@tonic-gate * LAST_ERRED state. The rationale is to avoid unnecessary resyncs 41750Sstevel@tonic-gate * since they are painful and time consuming. 41760Sstevel@tonic-gate * 41770Sstevel@tonic-gate * When more than one component/column fails there are 2 scenerios. 41780Sstevel@tonic-gate * 41790Sstevel@tonic-gate * 1. Metadevice has NOT been opened: In this case, the behavior 41800Sstevel@tonic-gate * mimics the open symantics. ie. Only the first failed device 41810Sstevel@tonic-gate * is ERRED and LAST_ERRED is not set. 41820Sstevel@tonic-gate * 41830Sstevel@tonic-gate * 2. Metadevice has been opened: Here the read/write sematics are 41840Sstevel@tonic-gate * followed. The first failed devicce is ERRED and on the next 41850Sstevel@tonic-gate * failed device LAST_ERRED is set. 41860Sstevel@tonic-gate */ 41870Sstevel@tonic-gate 41880Sstevel@tonic-gate if (not_opened > 1 && !md_devopen) { 41890Sstevel@tonic-gate cmn_err(CE_WARN, 41900Sstevel@tonic-gate "md: %s failed to open. open error on %s\n", 41910Sstevel@tonic-gate md_shortname(MD_SID(un)), 41920Sstevel@tonic-gate md_devname(MD_UN2SET(un), device->un_orig_dev, 41930Sstevel@tonic-gate NULL, 0)); 41940Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 41950Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 41960Sstevel@tonic-gate raid_probe_close_all_devs(un); 41970Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 41980Sstevel@tonic-gate return (not_opened > 1); 41990Sstevel@tonic-gate } 42000Sstevel@tonic-gate 42010Sstevel@tonic-gate if (!md_devopen) { 42020Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 42030Sstevel@tonic-gate device = &un->un_column[i]; 42040Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 42050Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 42060Sstevel@tonic-gate /* 42070Sstevel@tonic-gate * At this point in time there is a 42080Sstevel@tonic-gate * possibility that errors were the 42090Sstevel@tonic-gate * result of a controller failure with 42100Sstevel@tonic-gate * more than a single column on it so 42110Sstevel@tonic-gate * clear out last errored columns and 42120Sstevel@tonic-gate * let errors re-occur is necessary. 42130Sstevel@tonic-gate */ 42140Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 42150Sstevel@tonic-gate commit++; 42160Sstevel@tonic-gate } 42170Sstevel@tonic-gate continue; 42180Sstevel@tonic-gate } 42190Sstevel@tonic-gate ASSERT(col == -1); 42200Sstevel@tonic-gate /* 42210Sstevel@tonic-gate * note if multiple devices are failing then only 42220Sstevel@tonic-gate * the last one is marked as error 42230Sstevel@tonic-gate */ 42240Sstevel@tonic-gate col = i; 42250Sstevel@tonic-gate } 42260Sstevel@tonic-gate 42270Sstevel@tonic-gate if (col != -1) { 42280Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 42290Sstevel@tonic-gate commit++; 42300Sstevel@tonic-gate } 42310Sstevel@tonic-gate 42320Sstevel@tonic-gate } else { 42330Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 42340Sstevel@tonic-gate device = &un->un_column[i]; 42350Sstevel@tonic-gate 42360Sstevel@tonic-gate /* if we have LAST_ERRED go ahead and commit. */ 42370Sstevel@tonic-gate if (un->un_state & RUS_LAST_ERRED) 42380Sstevel@tonic-gate break; 42390Sstevel@tonic-gate /* 42400Sstevel@tonic-gate * could not open the component 42410Sstevel@tonic-gate */ 42420Sstevel@tonic-gate 42430Sstevel@tonic-gate if (!(device->un_devflags & MD_RAID_DEV_PROBEOPEN)) { 42440Sstevel@tonic-gate col = i; 42450Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 42460Sstevel@tonic-gate commit++; 42470Sstevel@tonic-gate } 42480Sstevel@tonic-gate } 42490Sstevel@tonic-gate } 42500Sstevel@tonic-gate 42510Sstevel@tonic-gate if (commit) 42520Sstevel@tonic-gate raid_commit(un, NULL); 42530Sstevel@tonic-gate 42540Sstevel@tonic-gate if (col != -1) { 42550Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 42560Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 42570Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 42580Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 42590Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 42600Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 42610Sstevel@tonic-gate } 42620Sstevel@tonic-gate } 42630Sstevel@tonic-gate 42640Sstevel@tonic-gate raid_probe_close_all_devs(un); 42650Sstevel@tonic-gate return (0); 42660Sstevel@tonic-gate } 42670Sstevel@tonic-gate 42680Sstevel@tonic-gate static int 42690Sstevel@tonic-gate raid_imp_set( 42700Sstevel@tonic-gate set_t setno 42710Sstevel@tonic-gate ) 42720Sstevel@tonic-gate { 42730Sstevel@tonic-gate mddb_recid_t recid; 42740Sstevel@tonic-gate int i, gotsomething; 42750Sstevel@tonic-gate mddb_type_t typ1; 42760Sstevel@tonic-gate mddb_de_ic_t *dep; 42770Sstevel@tonic-gate mddb_rb32_t *rbp; 42780Sstevel@tonic-gate mr_unit_t *un64; 42790Sstevel@tonic-gate mr_unit32_od_t *un32; 42801623Stw21770 md_dev64_t self_devt; 42810Sstevel@tonic-gate minor_t *self_id; /* minor needs to be updated */ 42820Sstevel@tonic-gate md_parent_t *parent_id; /* parent needs to be updated */ 42830Sstevel@tonic-gate mddb_recid_t *record_id; /* record id needs to be updated */ 42840Sstevel@tonic-gate hsp_t *hsp_id; 42850Sstevel@tonic-gate 42860Sstevel@tonic-gate gotsomething = 0; 42870Sstevel@tonic-gate 42880Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 42890Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 42900Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 42910Sstevel@tonic-gate 42920Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 42930Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 42940Sstevel@tonic-gate continue; 42950Sstevel@tonic-gate 42960Sstevel@tonic-gate dep = mddb_getrecdep(recid); 42970Sstevel@tonic-gate rbp = dep->de_rb; 42980Sstevel@tonic-gate 42991623Stw21770 switch (rbp->rb_revision) { 43001623Stw21770 case MDDB_REV_RB: 43011623Stw21770 case MDDB_REV_RBFN: 43020Sstevel@tonic-gate /* 43030Sstevel@tonic-gate * Small device 43040Sstevel@tonic-gate */ 43050Sstevel@tonic-gate un32 = (mr_unit32_od_t *)mddb_getrecaddr(recid); 43060Sstevel@tonic-gate self_id = &(un32->c.un_self_id); 43070Sstevel@tonic-gate parent_id = &(un32->c.un_parent); 43080Sstevel@tonic-gate record_id = &(un32->c.un_record_id); 43090Sstevel@tonic-gate hsp_id = &(un32->un_hsp_id); 43100Sstevel@tonic-gate 43110Sstevel@tonic-gate for (i = 0; i < un32->un_totalcolumncnt; i++) { 43120Sstevel@tonic-gate mr_column32_od_t *device; 43130Sstevel@tonic-gate 43140Sstevel@tonic-gate device = &un32->un_column[i]; 43150Sstevel@tonic-gate if (!md_update_minor(setno, mddb_getsidenum 43160Sstevel@tonic-gate (setno), device->un_orig_key)) 43170Sstevel@tonic-gate goto out; 43180Sstevel@tonic-gate 43190Sstevel@tonic-gate if (device->un_hs_id != 0) 43200Sstevel@tonic-gate device->un_hs_id = MAKERECID( 43210Sstevel@tonic-gate setno, device->un_hs_id); 43220Sstevel@tonic-gate } 43231623Stw21770 break; 43241623Stw21770 case MDDB_REV_RB64: 43251623Stw21770 case MDDB_REV_RB64FN: 43260Sstevel@tonic-gate un64 = (mr_unit_t *)mddb_getrecaddr(recid); 43270Sstevel@tonic-gate self_id = &(un64->c.un_self_id); 43280Sstevel@tonic-gate parent_id = &(un64->c.un_parent); 43290Sstevel@tonic-gate record_id = &(un64->c.un_record_id); 43300Sstevel@tonic-gate hsp_id = &(un64->un_hsp_id); 43310Sstevel@tonic-gate 43320Sstevel@tonic-gate for (i = 0; i < un64->un_totalcolumncnt; i++) { 43330Sstevel@tonic-gate mr_column_t *device; 43340Sstevel@tonic-gate 43350Sstevel@tonic-gate device = &un64->un_column[i]; 43360Sstevel@tonic-gate if (!md_update_minor(setno, mddb_getsidenum 43370Sstevel@tonic-gate (setno), device->un_orig_key)) 43380Sstevel@tonic-gate goto out; 43390Sstevel@tonic-gate 43400Sstevel@tonic-gate if (device->un_hs_id != 0) 43410Sstevel@tonic-gate device->un_hs_id = MAKERECID( 43420Sstevel@tonic-gate setno, device->un_hs_id); 43430Sstevel@tonic-gate } 43441623Stw21770 break; 43451623Stw21770 } 43461623Stw21770 43471623Stw21770 /* 43481623Stw21770 * If this is a top level and a friendly name metadevice, 43491623Stw21770 * update its minor in the namespace. 43501623Stw21770 */ 43511623Stw21770 if ((*parent_id == MD_NO_PARENT) && 43521623Stw21770 ((rbp->rb_revision == MDDB_REV_RBFN) || 43531623Stw21770 (rbp->rb_revision == MDDB_REV_RB64FN))) { 43541623Stw21770 43551623Stw21770 self_devt = md_makedevice(md_major, *self_id); 43561623Stw21770 if (!md_update_top_device_minor(setno, 43571623Stw21770 mddb_getsidenum(setno), self_devt)) 43581623Stw21770 goto out; 43590Sstevel@tonic-gate } 43600Sstevel@tonic-gate 43610Sstevel@tonic-gate /* 43620Sstevel@tonic-gate * Update unit with the imported setno 43630Sstevel@tonic-gate */ 43640Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 43650Sstevel@tonic-gate 43660Sstevel@tonic-gate *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 43670Sstevel@tonic-gate 43680Sstevel@tonic-gate if (*hsp_id != -1) 43690Sstevel@tonic-gate *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 43700Sstevel@tonic-gate 43710Sstevel@tonic-gate if (*parent_id != MD_NO_PARENT) 43720Sstevel@tonic-gate *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 43730Sstevel@tonic-gate *record_id = MAKERECID(setno, DBID(*record_id)); 43740Sstevel@tonic-gate gotsomething = 1; 43750Sstevel@tonic-gate } 43760Sstevel@tonic-gate 43770Sstevel@tonic-gate out: 43780Sstevel@tonic-gate return (gotsomething); 43790Sstevel@tonic-gate } 43800Sstevel@tonic-gate 43810Sstevel@tonic-gate static md_named_services_t raid_named_services[] = { 43820Sstevel@tonic-gate {raid_hotspares, "poke hotspares" }, 43830Sstevel@tonic-gate {raid_rename_check, MDRNM_CHECK }, 43840Sstevel@tonic-gate {raid_rename_lock, MDRNM_LOCK }, 43850Sstevel@tonic-gate {(intptr_t (*)()) raid_rename_unlock, MDRNM_UNLOCK }, 43860Sstevel@tonic-gate {(intptr_t (*)()) raid_probe_dev, "probe open test" }, 43870Sstevel@tonic-gate {NULL, 0 } 43880Sstevel@tonic-gate }; 43890Sstevel@tonic-gate 43900Sstevel@tonic-gate md_ops_t raid_md_ops = { 43910Sstevel@tonic-gate raid_open, /* open */ 43920Sstevel@tonic-gate raid_close, /* close */ 43930Sstevel@tonic-gate md_raid_strategy, /* strategy */ 43940Sstevel@tonic-gate NULL, /* print */ 43950Sstevel@tonic-gate NULL, /* dump */ 43960Sstevel@tonic-gate NULL, /* read */ 43970Sstevel@tonic-gate NULL, /* write */ 43980Sstevel@tonic-gate md_raid_ioctl, /* ioctl, */ 43990Sstevel@tonic-gate raid_snarf, /* raid_snarf */ 44000Sstevel@tonic-gate raid_halt, /* raid_halt */ 44010Sstevel@tonic-gate NULL, /* aread */ 44020Sstevel@tonic-gate NULL, /* awrite */ 44030Sstevel@tonic-gate raid_imp_set, /* import set */ 44040Sstevel@tonic-gate raid_named_services 44050Sstevel@tonic-gate }; 44060Sstevel@tonic-gate 44070Sstevel@tonic-gate static void 44080Sstevel@tonic-gate init_init() 44090Sstevel@tonic-gate { 44100Sstevel@tonic-gate /* default to a second */ 44110Sstevel@tonic-gate if (md_wr_wait == 0) 44120Sstevel@tonic-gate md_wr_wait = md_hz >> 1; 44130Sstevel@tonic-gate 44140Sstevel@tonic-gate raid_parent_cache = kmem_cache_create("md_raid_parent", 44150Sstevel@tonic-gate sizeof (md_raidps_t), 0, raid_parent_constructor, 44160Sstevel@tonic-gate raid_parent_destructor, raid_run_queue, NULL, NULL, 0); 44170Sstevel@tonic-gate raid_child_cache = kmem_cache_create("md_raid_child", 44180Sstevel@tonic-gate sizeof (md_raidcs_t) - sizeof (buf_t) + biosize(), 0, 44190Sstevel@tonic-gate raid_child_constructor, raid_child_destructor, 44200Sstevel@tonic-gate raid_run_queue, NULL, NULL, 0); 44210Sstevel@tonic-gate raid_cbuf_cache = kmem_cache_create("md_raid_cbufs", 44220Sstevel@tonic-gate sizeof (md_raidcbuf_t), 0, raid_cbuf_constructor, 44230Sstevel@tonic-gate raid_cbuf_destructor, raid_run_queue, NULL, NULL, 0); 44240Sstevel@tonic-gate } 44250Sstevel@tonic-gate 44260Sstevel@tonic-gate static void 44270Sstevel@tonic-gate fini_uninit() 44280Sstevel@tonic-gate { 44290Sstevel@tonic-gate kmem_cache_destroy(raid_parent_cache); 44300Sstevel@tonic-gate kmem_cache_destroy(raid_child_cache); 44310Sstevel@tonic-gate kmem_cache_destroy(raid_cbuf_cache); 44320Sstevel@tonic-gate raid_parent_cache = raid_child_cache = raid_cbuf_cache = NULL; 44330Sstevel@tonic-gate } 44340Sstevel@tonic-gate 44350Sstevel@tonic-gate /* define the module linkage */ 44360Sstevel@tonic-gate MD_PLUGIN_MISC_MODULE("raid module %I%", init_init(), fini_uninit()) 4437