10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51366Spetede * Common Development and Distribution License (the "License"). 61366Spetede * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 217627SChris.Horne@Sun.COM 220Sstevel@tonic-gate /* 23*11066Srafael.vanoni@sun.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * NAME: raid.c 290Sstevel@tonic-gate * 300Sstevel@tonic-gate * DESCRIPTION: Main RAID driver source file containing open, close and I/O 310Sstevel@tonic-gate * operations. 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * ROUTINES PROVIDED FOR EXTERNAL USE: 340Sstevel@tonic-gate * raid_open() - open the RAID metadevice for access. 350Sstevel@tonic-gate * raid_internal_open() - internal open routine of RAID metdevice. 360Sstevel@tonic-gate * md_raid_strategy() - perform normal I/O operations, 370Sstevel@tonic-gate * such as read and write. 380Sstevel@tonic-gate * raid_close() - close the RAID metadevice. 390Sstevel@tonic-gate * raid_internal_close() - internal close routine of RAID metadevice. 400Sstevel@tonic-gate * raid_snarf() - initialize and clean up MDD records. 410Sstevel@tonic-gate * raid_halt() - reset the RAID metadevice 420Sstevel@tonic-gate * raid_line() - return the line # of this segment 430Sstevel@tonic-gate * raid_dcolumn() - return the data column # of this segment 440Sstevel@tonic-gate * raid_pcolumn() - return the parity column # of this segment 450Sstevel@tonic-gate */ 460Sstevel@tonic-gate 470Sstevel@tonic-gate #include <sys/param.h> 480Sstevel@tonic-gate #include <sys/systm.h> 490Sstevel@tonic-gate #include <sys/conf.h> 500Sstevel@tonic-gate #include <sys/file.h> 510Sstevel@tonic-gate #include <sys/user.h> 520Sstevel@tonic-gate #include <sys/uio.h> 530Sstevel@tonic-gate #include <sys/t_lock.h> 540Sstevel@tonic-gate #include <sys/buf.h> 550Sstevel@tonic-gate #include <sys/dkio.h> 560Sstevel@tonic-gate #include <sys/vtoc.h> 570Sstevel@tonic-gate #include <sys/kmem.h> 580Sstevel@tonic-gate #include <vm/page.h> 590Sstevel@tonic-gate #include <sys/cmn_err.h> 600Sstevel@tonic-gate #include <sys/sysmacros.h> 610Sstevel@tonic-gate #include <sys/types.h> 620Sstevel@tonic-gate #include <sys/mkdev.h> 630Sstevel@tonic-gate #include <sys/stat.h> 640Sstevel@tonic-gate #include <sys/open.h> 650Sstevel@tonic-gate #include <sys/modctl.h> 660Sstevel@tonic-gate #include <sys/ddi.h> 670Sstevel@tonic-gate #include <sys/sunddi.h> 680Sstevel@tonic-gate #include <sys/debug.h> 690Sstevel@tonic-gate #include <sys/lvm/md_raid.h> 700Sstevel@tonic-gate #include <sys/lvm/mdvar.h> 710Sstevel@tonic-gate #include <sys/lvm/md_convert.h> 720Sstevel@tonic-gate 730Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h> 740Sstevel@tonic-gate #include <sys/sysevent/svm.h> 750Sstevel@tonic-gate 760Sstevel@tonic-gate md_ops_t raid_md_ops; 770Sstevel@tonic-gate #ifndef lint 781366Spetede char _depends_on[] = "drv/md"; 790Sstevel@tonic-gate md_ops_t *md_interface_ops = &raid_md_ops; 800Sstevel@tonic-gate #endif /* lint */ 810Sstevel@tonic-gate 820Sstevel@tonic-gate extern unit_t md_nunits; 830Sstevel@tonic-gate extern unit_t md_nsets; 840Sstevel@tonic-gate extern md_set_t md_set[]; 850Sstevel@tonic-gate extern int md_status; 860Sstevel@tonic-gate extern major_t md_major; 870Sstevel@tonic-gate extern mdq_anchor_t md_done_daemon; 880Sstevel@tonic-gate extern mdq_anchor_t md_mstr_daemon; 890Sstevel@tonic-gate extern int md_sleep_for_test; 900Sstevel@tonic-gate extern clock_t md_hz; 910Sstevel@tonic-gate 920Sstevel@tonic-gate extern md_event_queue_t *md_event_queue; 930Sstevel@tonic-gate 940Sstevel@tonic-gate 950Sstevel@tonic-gate int pchunks = 16; 960Sstevel@tonic-gate int phigh = 1024; 970Sstevel@tonic-gate int plow = 128; 980Sstevel@tonic-gate int cchunks = 64; 990Sstevel@tonic-gate int chigh = 1024; 1000Sstevel@tonic-gate int clow = 512; 1010Sstevel@tonic-gate int bchunks = 32; 1020Sstevel@tonic-gate int bhigh = 256; 1030Sstevel@tonic-gate int blow = 128; 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate int raid_total_io = 0; 1060Sstevel@tonic-gate int raid_reads = 0; 1070Sstevel@tonic-gate int raid_writes = 0; 1080Sstevel@tonic-gate int raid_no_bpmaps = 0; 1090Sstevel@tonic-gate int raid_512 = 0; 1100Sstevel@tonic-gate int raid_1024 = 0; 1110Sstevel@tonic-gate int raid_1024_8192 = 0; 1120Sstevel@tonic-gate int raid_8192 = 0; 1130Sstevel@tonic-gate int raid_8192_bigger = 0; 1140Sstevel@tonic-gate int raid_line_lock_wait = 0; 1150Sstevel@tonic-gate 1160Sstevel@tonic-gate int data_buffer_waits = 0; 1170Sstevel@tonic-gate int parity_buffer_waits = 0; 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* writer line locks */ 1200Sstevel@tonic-gate int raid_writer_locks = 0; /* total writer locks */ 1210Sstevel@tonic-gate int raid_write_waits = 0; /* total writer locks that waited */ 1220Sstevel@tonic-gate int raid_full_line_writes = 0; /* total full line writes */ 1230Sstevel@tonic-gate int raid_write_queue_length = 0; /* wait queue length */ 1240Sstevel@tonic-gate int raid_max_write_q_length = 0; /* maximum queue length */ 1250Sstevel@tonic-gate int raid_write_locks_active = 0; /* writer locks at any time */ 1260Sstevel@tonic-gate int raid_max_write_locks = 0; /* maximum writer locks active */ 1270Sstevel@tonic-gate 1280Sstevel@tonic-gate /* read line locks */ 1290Sstevel@tonic-gate int raid_reader_locks = 0; /* total reader locks held */ 1300Sstevel@tonic-gate int raid_reader_locks_active = 0; /* reader locks held */ 1310Sstevel@tonic-gate int raid_max_reader_locks = 0; /* maximum reader locks held in run */ 1320Sstevel@tonic-gate int raid_read_overlaps = 0; /* number of times 2 reads hit same line */ 1330Sstevel@tonic-gate int raid_read_waits = 0; /* times a reader waited on writer */ 1340Sstevel@tonic-gate 1350Sstevel@tonic-gate /* prewrite stats */ 1360Sstevel@tonic-gate int raid_prewrite_waits = 0; /* number of waits for a pw slot */ 1370Sstevel@tonic-gate int raid_pw = 0; /* number of pw slots in use */ 1380Sstevel@tonic-gate int raid_prewrite_max = 0; /* maximum number of pw slots in use */ 1390Sstevel@tonic-gate int raid_pw_invalidates = 0; 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate static clock_t md_wr_wait = 0; 1420Sstevel@tonic-gate 1430Sstevel@tonic-gate int nv_available = 0; /* presence of nv-ram support in device */ 1440Sstevel@tonic-gate int nv_prewrite = 1; /* mark prewrites with nv_available */ 1450Sstevel@tonic-gate int nv_parity = 1; /* mark parity with nv_available */ 1460Sstevel@tonic-gate 1470Sstevel@tonic-gate kmem_cache_t *raid_parent_cache = NULL; 1480Sstevel@tonic-gate kmem_cache_t *raid_child_cache = NULL; 1490Sstevel@tonic-gate kmem_cache_t *raid_cbuf_cache = NULL; 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate int raid_internal_open(minor_t mnum, int flag, int otyp, 1520Sstevel@tonic-gate int md_oflags); 1530Sstevel@tonic-gate 1540Sstevel@tonic-gate static void freebuffers(md_raidcs_t *cs); 1550Sstevel@tonic-gate static int raid_read(mr_unit_t *un, md_raidcs_t *cs); 1560Sstevel@tonic-gate static void raid_read_io(mr_unit_t *un, md_raidcs_t *cs); 1570Sstevel@tonic-gate static int raid_write(mr_unit_t *un, md_raidcs_t *cs); 1580Sstevel@tonic-gate static void raid_write_io(mr_unit_t *un, md_raidcs_t *cs); 1590Sstevel@tonic-gate static void raid_stage(md_raidcs_t *cs); 1600Sstevel@tonic-gate static void raid_enqueue(md_raidcs_t *cs); 1610Sstevel@tonic-gate static diskaddr_t raid_line(diskaddr_t segment, mr_unit_t *un); 1620Sstevel@tonic-gate uint_t raid_dcolumn(diskaddr_t segment, mr_unit_t *un); 1630Sstevel@tonic-gate static void getpbuffer(md_raidcs_t *cs); 1640Sstevel@tonic-gate static void getdbuffer(md_raidcs_t *cs); 1650Sstevel@tonic-gate static void raid_done(buf_t *bp); 1660Sstevel@tonic-gate static void raid_io_startup(mr_unit_t *un); 1670Sstevel@tonic-gate 1680Sstevel@tonic-gate static rus_state_t 1690Sstevel@tonic-gate raid_col2unit(rcs_state_t state, rus_state_t unitstate) 1700Sstevel@tonic-gate { 1710Sstevel@tonic-gate switch (state) { 1720Sstevel@tonic-gate case RCS_INIT: 1730Sstevel@tonic-gate return (RUS_INIT); 1740Sstevel@tonic-gate case RCS_OKAY: 1750Sstevel@tonic-gate return (RUS_OKAY); 1760Sstevel@tonic-gate case RCS_RESYNC: 1770Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) 1780Sstevel@tonic-gate return (RUS_LAST_ERRED); 1790Sstevel@tonic-gate else 1800Sstevel@tonic-gate return (RUS_ERRED); 1810Sstevel@tonic-gate case RCS_ERRED: 1820Sstevel@tonic-gate return (RUS_ERRED); 1830Sstevel@tonic-gate case RCS_LAST_ERRED: 1840Sstevel@tonic-gate return (RUS_ERRED); 1850Sstevel@tonic-gate default: 1860Sstevel@tonic-gate break; 1870Sstevel@tonic-gate } 1880Sstevel@tonic-gate panic("raid_col2unit"); 1890Sstevel@tonic-gate /*NOTREACHED*/ 1900Sstevel@tonic-gate } 1910Sstevel@tonic-gate 1920Sstevel@tonic-gate void 1930Sstevel@tonic-gate raid_set_state(mr_unit_t *un, int col, rcs_state_t newstate, int force) 1940Sstevel@tonic-gate { 1950Sstevel@tonic-gate 1960Sstevel@tonic-gate rus_state_t unitstate, origstate; 1970Sstevel@tonic-gate rcs_state_t colstate; 1980Sstevel@tonic-gate rcs_state_t orig_colstate; 1997627SChris.Horne@Sun.COM int errcnt = 0, okaycnt = 0, resynccnt = 0; 2000Sstevel@tonic-gate int i; 2010Sstevel@tonic-gate char *devname; 2020Sstevel@tonic-gate 2030Sstevel@tonic-gate ASSERT(un); 2040Sstevel@tonic-gate ASSERT(col < un->un_totalcolumncnt); 2050Sstevel@tonic-gate ASSERT(newstate & 2060Sstevel@tonic-gate (RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 2070Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)); 2080Sstevel@tonic-gate ASSERT((newstate & 2090Sstevel@tonic-gate ~(RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 2100Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)) 2110Sstevel@tonic-gate == 0); 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1); 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate unitstate = un->un_state; 2160Sstevel@tonic-gate origstate = unitstate; 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate if (force) { 2190Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 2200Sstevel@tonic-gate un->un_state = raid_col2unit(newstate, unitstate); 2210Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 2220Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 2230Sstevel@tonic-gate return; 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate ASSERT(un->un_state & 2270Sstevel@tonic-gate (RUS_INIT | RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | 2280Sstevel@tonic-gate RUS_REGEN)); 2290Sstevel@tonic-gate ASSERT((un->un_state & ~(RUS_INIT | 2300Sstevel@tonic-gate RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | RUS_REGEN)) == 0); 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate if (un->un_column[col].un_devstate == newstate) 2330Sstevel@tonic-gate return; 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate if (newstate == RCS_REGEN) { 2360Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) 2370Sstevel@tonic-gate return; 2380Sstevel@tonic-gate un->un_state = RUS_REGEN; 2390Sstevel@tonic-gate return; 2400Sstevel@tonic-gate } 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate orig_colstate = un->un_column[col].un_devstate; 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate /* 2450Sstevel@tonic-gate * if there is another column in the error state then this 2460Sstevel@tonic-gate * column should go to the last errored state 2470Sstevel@tonic-gate */ 2480Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 2490Sstevel@tonic-gate if (i == col) 2500Sstevel@tonic-gate colstate = newstate; 2510Sstevel@tonic-gate else 2520Sstevel@tonic-gate colstate = un->un_column[i].un_devstate; 2530Sstevel@tonic-gate if (colstate & (RCS_ERRED | RCS_LAST_ERRED | RCS_INIT_ERRED)) 2540Sstevel@tonic-gate errcnt++; 2550Sstevel@tonic-gate if (colstate & RCS_OKAY) 2560Sstevel@tonic-gate okaycnt++; 2570Sstevel@tonic-gate if (colstate & RCS_RESYNC) 2580Sstevel@tonic-gate resynccnt++; 2590Sstevel@tonic-gate } 2600Sstevel@tonic-gate ASSERT(resynccnt < 2); 2610Sstevel@tonic-gate 2620Sstevel@tonic-gate if (okaycnt == un->un_totalcolumncnt) 2630Sstevel@tonic-gate unitstate = RUS_OKAY; 2640Sstevel@tonic-gate else if (errcnt > 1) { 2650Sstevel@tonic-gate unitstate = RUS_LAST_ERRED; 2660Sstevel@tonic-gate if (newstate & RCS_ERRED) 2670Sstevel@tonic-gate newstate = RCS_LAST_ERRED; 2680Sstevel@tonic-gate } else if (errcnt == 1) 2690Sstevel@tonic-gate if (!(unitstate & RUS_LAST_ERRED)) 2700Sstevel@tonic-gate unitstate = RUS_ERRED; 2710Sstevel@tonic-gate 2720Sstevel@tonic-gate if (un->un_state == RUS_DOI) 2730Sstevel@tonic-gate unitstate = RUS_DOI; 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 2760Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 2770Sstevel@tonic-gate /* 2780Sstevel@tonic-gate * if there are last errored column being brought back online 2790Sstevel@tonic-gate * by open or snarf, then be sure to clear the RUS_LAST_ERRED 2800Sstevel@tonic-gate * bit to allow writes. If there is a real error then the 2810Sstevel@tonic-gate * column will go back into last erred. 2820Sstevel@tonic-gate */ 2830Sstevel@tonic-gate if ((raid_state_cnt(un, RCS_LAST_ERRED) == 0) && 2840Sstevel@tonic-gate (raid_state_cnt(un, RCS_ERRED) == 1)) 2850Sstevel@tonic-gate unitstate = RUS_ERRED; 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate un->un_state = unitstate; 2880Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate if ((! (origstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) && 2910Sstevel@tonic-gate (unitstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) { 2920Sstevel@tonic-gate devname = md_devname(MD_UN2SET(un), 2937627SChris.Horne@Sun.COM un->un_column[col].un_dev, NULL, 0); 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s needs maintenance", 2960Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) { 2990Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s last erred", 3000Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate } else if (un->un_column[col].un_devflags & 3030Sstevel@tonic-gate MD_RAID_DEV_ISOPEN) { 3040Sstevel@tonic-gate /* 3050Sstevel@tonic-gate * Close the broken device and clear the open flag on 3060Sstevel@tonic-gate * it. We have to check that the device is open, 3070Sstevel@tonic-gate * otherwise the first open on it has resulted in the 3080Sstevel@tonic-gate * error that is being processed and the actual un_dev 3090Sstevel@tonic-gate * will be NODEV64. 3100Sstevel@tonic-gate */ 3110Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, 3120Sstevel@tonic-gate MD_OFLG_NULL); 3130Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 3140Sstevel@tonic-gate } 3150Sstevel@tonic-gate } else if (orig_colstate == RCS_LAST_ERRED && newstate == RCS_ERRED && 3160Sstevel@tonic-gate un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN) { 3170Sstevel@tonic-gate /* 3180Sstevel@tonic-gate * Similar to logic above except no log messages since we 3190Sstevel@tonic-gate * are just transitioning from Last Erred to Erred. 3200Sstevel@tonic-gate */ 3210Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, MD_OFLG_NULL); 3220Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 3230Sstevel@tonic-gate } 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate /* 3260Sstevel@tonic-gate * If a resync has completed, see if there is a Last Erred 3270Sstevel@tonic-gate * component that we can change to the Erred state. 3280Sstevel@tonic-gate */ 3290Sstevel@tonic-gate if ((orig_colstate == RCS_RESYNC) && (newstate == RCS_OKAY)) { 3300Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 3310Sstevel@tonic-gate if (i != col && 3320Sstevel@tonic-gate (un->un_column[i].un_devstate & RCS_LAST_ERRED)) { 3330Sstevel@tonic-gate raid_set_state(un, i, RCS_ERRED, 0); 3340Sstevel@tonic-gate break; 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate } 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate } 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate /* 3410Sstevel@tonic-gate * NAME: erred_check_line 3420Sstevel@tonic-gate * 3430Sstevel@tonic-gate * DESCRIPTION: Return the type of write to perform on an erred column based 3440Sstevel@tonic-gate * upon any resync activity. 3450Sstevel@tonic-gate * 3460Sstevel@tonic-gate * if a column is being resynced and the write is above the 3470Sstevel@tonic-gate * resync point may have to write to the target being resynced. 3480Sstevel@tonic-gate * 3490Sstevel@tonic-gate * Column state may make it impossible to do the write 3500Sstevel@tonic-gate * in which case RCL_EIO or RCL_ENXIO is returned. 3510Sstevel@tonic-gate * 3520Sstevel@tonic-gate * If a column cannot be written directly, RCL_ERRED is 3530Sstevel@tonic-gate * returned and processing should proceed accordingly. 3540Sstevel@tonic-gate * 3550Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number identity of metadevice 3560Sstevel@tonic-gate * md_raidcs_t *cs - child save structure 3570Sstevel@tonic-gate * mr_column_t *dcolumn - pointer to data column structure 3580Sstevel@tonic-gate * mr_column_t *pcolumn - pointer to parity column structure 3590Sstevel@tonic-gate * 3600Sstevel@tonic-gate * RETURNS: RCL_OKAY, RCL_ERRED 3610Sstevel@tonic-gate * 3620Sstevel@tonic-gate * LOCKS: Expects Line Writer Lock and Unit Resource Lock to be held 3630Sstevel@tonic-gate * across call. 3640Sstevel@tonic-gate */ 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate static int 3670Sstevel@tonic-gate erred_check_line(mr_unit_t *un, md_raidcs_t *cs, mr_column_t *column) 3680Sstevel@tonic-gate { 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate ASSERT(un != NULL); 3710Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 3720Sstevel@tonic-gate 3730Sstevel@tonic-gate if (column->un_devstate & RCS_OKAY) 3740Sstevel@tonic-gate return (RCL_OKAY); 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 3770Sstevel@tonic-gate return (RCL_ERRED); /* do not read from errored disk */ 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate /* 3800Sstevel@tonic-gate * for the last errored case their are two considerations. 3810Sstevel@tonic-gate * When the last errored column is the only errored column then 3820Sstevel@tonic-gate * do treat it like a maintenance column, not doing I/O from 3830Sstevel@tonic-gate * it. When it there are other failures then just attempt 3840Sstevel@tonic-gate * to use it. 3850Sstevel@tonic-gate */ 3860Sstevel@tonic-gate if (column->un_devstate & RCS_LAST_ERRED) 3870Sstevel@tonic-gate return (RCL_ERRED); 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate ASSERT(column->un_devstate & RCS_RESYNC); 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate /* 3920Sstevel@tonic-gate * When a resync from a hotspare is being done (copy resync) 3930Sstevel@tonic-gate * then always treat it as an OKAY column, since no regen 3940Sstevel@tonic-gate * is required. 3950Sstevel@tonic-gate */ 3960Sstevel@tonic-gate if (column->un_devflags & MD_RAID_COPY_RESYNC) { 3970Sstevel@tonic-gate return (RCL_OKAY); 3980Sstevel@tonic-gate } 3990Sstevel@tonic-gate 4000Sstevel@tonic-gate mutex_enter(&un->un_mx); 4010Sstevel@tonic-gate if (cs->cs_line < un->un_resync_line_index) { 4020Sstevel@tonic-gate mutex_exit(&un->un_mx); 4030Sstevel@tonic-gate return (RCL_OKAY); 4040Sstevel@tonic-gate } 4050Sstevel@tonic-gate mutex_exit(&un->un_mx); 4060Sstevel@tonic-gate return (RCL_ERRED); 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate /* 4110Sstevel@tonic-gate * NAMES: raid_state_cnt 4120Sstevel@tonic-gate * 4130Sstevel@tonic-gate * DESCRIPTION: counts number of column in a specific state 4140Sstevel@tonic-gate * 4150Sstevel@tonic-gate * PARAMETERS: md_raid_t *un 4160Sstevel@tonic-gate * rcs_state state 4170Sstevel@tonic-gate */ 4180Sstevel@tonic-gate int 4190Sstevel@tonic-gate raid_state_cnt(mr_unit_t *un, rcs_state_t state) 4200Sstevel@tonic-gate { 4210Sstevel@tonic-gate int i, retval = 0; 4220Sstevel@tonic-gate 4230Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 4240Sstevel@tonic-gate if (un->un_column[i].un_devstate & state) 4250Sstevel@tonic-gate retval++; 4260Sstevel@tonic-gate return (retval); 4270Sstevel@tonic-gate } 4280Sstevel@tonic-gate 4290Sstevel@tonic-gate /* 4300Sstevel@tonic-gate * NAMES: raid_io_overlaps 4310Sstevel@tonic-gate * 4320Sstevel@tonic-gate * DESCRIPTION: checkst for overlap of 2 child save structures 4330Sstevel@tonic-gate * 4340Sstevel@tonic-gate * PARAMETERS: md_raidcs_t cs1 4350Sstevel@tonic-gate * md_raidcs_t cs2 4360Sstevel@tonic-gate * 4370Sstevel@tonic-gate * RETURNS: 0 - no overlap 4380Sstevel@tonic-gate * 1 - overlap 4390Sstevel@tonic-gate */ 4400Sstevel@tonic-gate int 4410Sstevel@tonic-gate raid_io_overlaps(md_raidcs_t *cs1, md_raidcs_t *cs2) 4420Sstevel@tonic-gate { 4430Sstevel@tonic-gate if (cs1->cs_blkno > cs2->cs_lastblk) 4440Sstevel@tonic-gate return (0); 4450Sstevel@tonic-gate if (cs1->cs_lastblk < cs2->cs_blkno) 4460Sstevel@tonic-gate return (0); 4470Sstevel@tonic-gate return (1); 4480Sstevel@tonic-gate } 4490Sstevel@tonic-gate 4500Sstevel@tonic-gate /* 4510Sstevel@tonic-gate * NAMES: raid_parent_constructor 4520Sstevel@tonic-gate * DESCRIPTION: parent structure constructor routine 4530Sstevel@tonic-gate * PARAMETERS: 4540Sstevel@tonic-gate */ 4550Sstevel@tonic-gate /*ARGSUSED1*/ 4560Sstevel@tonic-gate static int 4570Sstevel@tonic-gate raid_parent_constructor(void *p, void *d1, int d2) 4580Sstevel@tonic-gate { 4590Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mx, 4600Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 4610Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mapin_mx, 4620Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 4630Sstevel@tonic-gate return (0); 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate void 4670Sstevel@tonic-gate raid_parent_init(md_raidps_t *ps) 4680Sstevel@tonic-gate { 4690Sstevel@tonic-gate bzero(ps, offsetof(md_raidps_t, ps_mx)); 4700Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_flags = MD_RPS_INUSE; 4710Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_magic = RAID_PSMAGIC; 4720Sstevel@tonic-gate } 4730Sstevel@tonic-gate 4740Sstevel@tonic-gate /*ARGSUSED1*/ 4750Sstevel@tonic-gate static void 4760Sstevel@tonic-gate raid_parent_destructor(void *p, void *d) 4770Sstevel@tonic-gate { 4780Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mx); 4790Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mapin_mx); 4800Sstevel@tonic-gate } 4810Sstevel@tonic-gate 4820Sstevel@tonic-gate /* 4830Sstevel@tonic-gate * NAMES: raid_child_constructor 4840Sstevel@tonic-gate * DESCRIPTION: child structure constructor routine 4850Sstevel@tonic-gate * PARAMETERS: 4860Sstevel@tonic-gate */ 4870Sstevel@tonic-gate /*ARGSUSED1*/ 4880Sstevel@tonic-gate static int 4890Sstevel@tonic-gate raid_child_constructor(void *p, void *d1, int d2) 4900Sstevel@tonic-gate { 4910Sstevel@tonic-gate md_raidcs_t *cs = (md_raidcs_t *)p; 4920Sstevel@tonic-gate mutex_init(&cs->cs_mx, NULL, MUTEX_DEFAULT, NULL); 4930Sstevel@tonic-gate bioinit(&cs->cs_dbuf); 4940Sstevel@tonic-gate bioinit(&cs->cs_pbuf); 4950Sstevel@tonic-gate bioinit(&cs->cs_hbuf); 4960Sstevel@tonic-gate return (0); 4970Sstevel@tonic-gate } 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate void 5000Sstevel@tonic-gate raid_child_init(md_raidcs_t *cs) 5010Sstevel@tonic-gate { 5020Sstevel@tonic-gate bzero(cs, offsetof(md_raidcs_t, cs_mx)); 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate md_bioreset(&cs->cs_dbuf); 5050Sstevel@tonic-gate md_bioreset(&cs->cs_pbuf); 5060Sstevel@tonic-gate md_bioreset(&cs->cs_hbuf); 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_dbuf.b_chain = 5090Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_pbuf.b_chain = 5100Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_hbuf.b_chain = 5110Sstevel@tonic-gate (struct buf *)(cs); 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate cs->cs_magic = RAID_CSMAGIC; 5140Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 5150Sstevel@tonic-gate cs->cs_dpwslot = -1; 5160Sstevel@tonic-gate cs->cs_ppwslot = -1; 5170Sstevel@tonic-gate } 5180Sstevel@tonic-gate 5190Sstevel@tonic-gate /*ARGSUSED1*/ 5200Sstevel@tonic-gate static void 5210Sstevel@tonic-gate raid_child_destructor(void *p, void *d) 5220Sstevel@tonic-gate { 5230Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_dbuf); 5240Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_hbuf); 5250Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_pbuf); 5260Sstevel@tonic-gate mutex_destroy(&((md_raidcs_t *)p)->cs_mx); 5270Sstevel@tonic-gate } 5280Sstevel@tonic-gate 5290Sstevel@tonic-gate /*ARGSUSED1*/ 5300Sstevel@tonic-gate static int 5310Sstevel@tonic-gate raid_cbuf_constructor(void *p, void *d1, int d2) 5320Sstevel@tonic-gate { 5330Sstevel@tonic-gate bioinit(&((md_raidcbuf_t *)p)->cbuf_bp); 5340Sstevel@tonic-gate return (0); 5350Sstevel@tonic-gate } 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate static void 5380Sstevel@tonic-gate raid_cbuf_init(md_raidcbuf_t *cb) 5390Sstevel@tonic-gate { 5400Sstevel@tonic-gate bzero(cb, offsetof(md_raidcbuf_t, cbuf_bp)); 5410Sstevel@tonic-gate md_bioreset(&cb->cbuf_bp); 5420Sstevel@tonic-gate cb->cbuf_magic = RAID_BUFMAGIC; 5430Sstevel@tonic-gate cb->cbuf_pwslot = -1; 5440Sstevel@tonic-gate cb->cbuf_flags = CBUF_WRITE; 5450Sstevel@tonic-gate } 5460Sstevel@tonic-gate 5470Sstevel@tonic-gate /*ARGSUSED1*/ 5480Sstevel@tonic-gate static void 5490Sstevel@tonic-gate raid_cbuf_destructor(void *p, void *d) 5500Sstevel@tonic-gate { 5510Sstevel@tonic-gate biofini(&((md_raidcbuf_t *)p)->cbuf_bp); 5520Sstevel@tonic-gate } 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate /* 5550Sstevel@tonic-gate * NAMES: raid_run_queue 5560Sstevel@tonic-gate * DESCRIPTION: spawn a backend processing daemon for RAID metadevice. 5570Sstevel@tonic-gate * PARAMETERS: 5580Sstevel@tonic-gate */ 5590Sstevel@tonic-gate /*ARGSUSED*/ 5600Sstevel@tonic-gate static void 5610Sstevel@tonic-gate raid_run_queue(void *d) 5620Sstevel@tonic-gate { 5630Sstevel@tonic-gate if (!(md_status & MD_GBL_DAEMONS_LIVE)) 5640Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 5650Sstevel@tonic-gate } 5660Sstevel@tonic-gate 5670Sstevel@tonic-gate /* 5680Sstevel@tonic-gate * NAME: raid_build_pwslot 5690Sstevel@tonic-gate * DESCRIPTION: builds mr_pw_reserve for the column 5700Sstevel@tonic-gate * PARAMETERS: un is the pointer to the unit structure 5710Sstevel@tonic-gate * colindex is the column to create the structure for 5720Sstevel@tonic-gate */ 5730Sstevel@tonic-gate int 5740Sstevel@tonic-gate raid_build_pw_reservation(mr_unit_t *un, int colindex) 5750Sstevel@tonic-gate { 5760Sstevel@tonic-gate mr_pw_reserve_t *pw; 5770Sstevel@tonic-gate mr_scoreboard_t *sb; 5780Sstevel@tonic-gate int i; 5790Sstevel@tonic-gate 5800Sstevel@tonic-gate pw = (mr_pw_reserve_t *) kmem_zalloc(sizeof (mr_pw_reserve_t) + 5810Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt), KM_SLEEP); 5820Sstevel@tonic-gate pw->pw_magic = RAID_PWMAGIC; 5830Sstevel@tonic-gate pw->pw_column = colindex; 5840Sstevel@tonic-gate pw->pw_free = un->un_pwcnt; 5850Sstevel@tonic-gate sb = &pw->pw_sb[0]; 5860Sstevel@tonic-gate for (i = 0; i < un->un_pwcnt; i++) { 5870Sstevel@tonic-gate sb[i].sb_column = colindex; 5880Sstevel@tonic-gate sb[i].sb_flags = SB_UNUSED; 5890Sstevel@tonic-gate sb[i].sb_start_blk = 0; 5900Sstevel@tonic-gate sb[i].sb_last_blk = 0; 5910Sstevel@tonic-gate sb[i].sb_cs = NULL; 5920Sstevel@tonic-gate } 5930Sstevel@tonic-gate un->un_column_ic[colindex].un_pw_reserve = pw; 5940Sstevel@tonic-gate return (0); 5950Sstevel@tonic-gate } 5960Sstevel@tonic-gate /* 5970Sstevel@tonic-gate * NAME: raid_free_pw_reservation 5980Sstevel@tonic-gate * DESCRIPTION: RAID metadevice pre-write slot structure destroy routine 5990Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 6000Sstevel@tonic-gate * int colindex - index of the column whose pre-write slot struct 6010Sstevel@tonic-gate * is to be destroyed. 6020Sstevel@tonic-gate */ 6030Sstevel@tonic-gate void 6040Sstevel@tonic-gate raid_free_pw_reservation(mr_unit_t *un, int colindex) 6050Sstevel@tonic-gate { 6060Sstevel@tonic-gate mr_pw_reserve_t *pw = un->un_column_ic[colindex].un_pw_reserve; 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate kmem_free(pw, sizeof (mr_pw_reserve_t) + 6090Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt)); 6100Sstevel@tonic-gate } 6110Sstevel@tonic-gate 6120Sstevel@tonic-gate /* 6130Sstevel@tonic-gate * NAME: raid_cancel_pwslot 6140Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 6150Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 6160Sstevel@tonic-gate */ 6170Sstevel@tonic-gate static void 6180Sstevel@tonic-gate raid_cancel_pwslot(md_raidcs_t *cs) 6190Sstevel@tonic-gate { 6200Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 6210Sstevel@tonic-gate mr_pw_reserve_t *pw; 6220Sstevel@tonic-gate mr_scoreboard_t *sb; 6230Sstevel@tonic-gate mr_column_ic_t *col; 6240Sstevel@tonic-gate md_raidcbuf_t *cbuf; 6250Sstevel@tonic-gate int broadcast = 0; 6260Sstevel@tonic-gate 6270Sstevel@tonic-gate if (cs->cs_ps->ps_flags & MD_RPS_READ) 6280Sstevel@tonic-gate return; 6290Sstevel@tonic-gate if (cs->cs_dpwslot != -1) { 6300Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_dcolumn]; 6310Sstevel@tonic-gate pw = col->un_pw_reserve; 6320Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_dpwslot]; 6330Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6340Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6350Sstevel@tonic-gate broadcast++; 6360Sstevel@tonic-gate sb->sb_cs = NULL; 6370Sstevel@tonic-gate } 6380Sstevel@tonic-gate 6390Sstevel@tonic-gate if (cs->cs_ppwslot != -1) { 6400Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_pcolumn]; 6410Sstevel@tonic-gate pw = col->un_pw_reserve; 6420Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_ppwslot]; 6430Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6440Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6450Sstevel@tonic-gate broadcast++; 6460Sstevel@tonic-gate sb->sb_cs = NULL; 6470Sstevel@tonic-gate } 6480Sstevel@tonic-gate 6490Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 6500Sstevel@tonic-gate if (cbuf->cbuf_pwslot == -1) 6510Sstevel@tonic-gate continue; 6520Sstevel@tonic-gate col = &un->un_column_ic[cbuf->cbuf_column]; 6530Sstevel@tonic-gate pw = col->un_pw_reserve; 6540Sstevel@tonic-gate sb = &pw->pw_sb[cbuf->cbuf_pwslot]; 6550Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 6560Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6570Sstevel@tonic-gate broadcast++; 6580Sstevel@tonic-gate sb->sb_cs = NULL; 6590Sstevel@tonic-gate } 6600Sstevel@tonic-gate if (broadcast) { 6610Sstevel@tonic-gate cv_broadcast(&un->un_cv); 6620Sstevel@tonic-gate return; 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate mutex_enter(&un->un_mx); 6650Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 6660Sstevel@tonic-gate cv_broadcast(&un->un_cv); 6670Sstevel@tonic-gate mutex_exit(&un->un_mx); 6680Sstevel@tonic-gate } 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate static void 6710Sstevel@tonic-gate raid_free_pwinvalidate(md_raidcs_t *cs) 6720Sstevel@tonic-gate { 6730Sstevel@tonic-gate md_raidcbuf_t *cbuf; 6740Sstevel@tonic-gate md_raidcbuf_t *cbuf_to_free; 6750Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 6760Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 6770Sstevel@tonic-gate mr_pw_reserve_t *pw; 6780Sstevel@tonic-gate mr_scoreboard_t *sb; 6790Sstevel@tonic-gate int broadcast = 0; 6800Sstevel@tonic-gate 6810Sstevel@tonic-gate cbuf = cs->cs_pw_inval_list; 6820Sstevel@tonic-gate ASSERT(cbuf); 6830Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 6840Sstevel@tonic-gate while (cbuf) { 6850Sstevel@tonic-gate pw = un->un_column_ic[cbuf->cbuf_column].un_pw_reserve; 6860Sstevel@tonic-gate sb = &pw->pw_sb[0]; 6870Sstevel@tonic-gate ASSERT(sb[cbuf->cbuf_pwslot].sb_flags & SB_INVAL_PEND); 6880Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_flags = SB_UNUSED; 6890Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_cs = NULL; 6900Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 6910Sstevel@tonic-gate broadcast++; 6920Sstevel@tonic-gate cbuf_to_free = cbuf; 6930Sstevel@tonic-gate cbuf = cbuf->cbuf_next; 6940Sstevel@tonic-gate kmem_free(cbuf_to_free->cbuf_buffer, dbtob(un->un_iosize)); 6950Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf_to_free); 6960Sstevel@tonic-gate } 6970Sstevel@tonic-gate cs->cs_pw_inval_list = (md_raidcbuf_t *)NULL; 6980Sstevel@tonic-gate /* 6990Sstevel@tonic-gate * now that there is a free prewrite slot, check to see if there 7000Sstevel@tonic-gate * are any io operations waiting first wake up the raid_io_startup 7010Sstevel@tonic-gate * then signal the the processes waiting in raid_write. 7020Sstevel@tonic-gate */ 7030Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front) 7040Sstevel@tonic-gate raid_io_startup(un); 7050Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 7060Sstevel@tonic-gate if (broadcast) { 7070Sstevel@tonic-gate cv_broadcast(&un->un_cv); 7080Sstevel@tonic-gate return; 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate mutex_enter(&un->un_mx); 7110Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 7120Sstevel@tonic-gate cv_broadcast(&un->un_cv); 7130Sstevel@tonic-gate mutex_exit(&un->un_mx); 7140Sstevel@tonic-gate } 7150Sstevel@tonic-gate 7160Sstevel@tonic-gate 7170Sstevel@tonic-gate static int 7180Sstevel@tonic-gate raid_get_pwslot(md_raidcs_t *cs, int column) 7190Sstevel@tonic-gate { 7200Sstevel@tonic-gate mr_scoreboard_t *sb; 7210Sstevel@tonic-gate mr_pw_reserve_t *pw; 7220Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 7230Sstevel@tonic-gate diskaddr_t start_blk = cs->cs_blkno; 7240Sstevel@tonic-gate diskaddr_t last_blk = cs->cs_lastblk; 7250Sstevel@tonic-gate int i; 7260Sstevel@tonic-gate int pwcnt = un->un_pwcnt; 7270Sstevel@tonic-gate int avail = -1; 7280Sstevel@tonic-gate int use = -1; 7290Sstevel@tonic-gate int flags; 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate 7320Sstevel@tonic-gate /* start with the data column */ 7330Sstevel@tonic-gate pw = cs->cs_un->un_column_ic[column].un_pw_reserve; 7340Sstevel@tonic-gate sb = &pw->pw_sb[0]; 7350Sstevel@tonic-gate ASSERT(pw->pw_free > 0); 7360Sstevel@tonic-gate for (i = 0; i < pwcnt; i++) { 7370Sstevel@tonic-gate flags = sb[i].sb_flags; 7380Sstevel@tonic-gate if (flags & SB_INVAL_PEND) 7390Sstevel@tonic-gate continue; 7400Sstevel@tonic-gate 7410Sstevel@tonic-gate if ((avail == -1) && (flags & (SB_AVAIL | SB_UNUSED))) 7420Sstevel@tonic-gate avail = i; 7430Sstevel@tonic-gate 7440Sstevel@tonic-gate if ((start_blk > sb[i].sb_last_blk) || 7450Sstevel@tonic-gate (last_blk < sb[i].sb_start_blk)) 7460Sstevel@tonic-gate continue; 7470Sstevel@tonic-gate 7480Sstevel@tonic-gate /* OVERLAP */ 7490Sstevel@tonic-gate ASSERT(! (sb[i].sb_flags & SB_INUSE)); 7500Sstevel@tonic-gate 7510Sstevel@tonic-gate /* 7520Sstevel@tonic-gate * raid_invalidate_pwslot attempts to zero out prewrite entry 7530Sstevel@tonic-gate * in parallel with other disk reads/writes related to current 7540Sstevel@tonic-gate * transaction. however cs_frags accounting for this case is 7550Sstevel@tonic-gate * broken because raid_write_io resets cs_frags i.e. ignoring 7560Sstevel@tonic-gate * that it could have been been set to > 0 value by 7570Sstevel@tonic-gate * raid_invalidate_pwslot. While this can be fixed an 7580Sstevel@tonic-gate * additional problem is that we don't seem to handle 7590Sstevel@tonic-gate * correctly the case of getting a disk error for prewrite 7600Sstevel@tonic-gate * entry invalidation. 7610Sstevel@tonic-gate * It does not look like we really need 7620Sstevel@tonic-gate * to invalidate prewrite slots because raid_replay sorts 7630Sstevel@tonic-gate * prewrite id's in ascending order and during recovery the 7640Sstevel@tonic-gate * latest prewrite entry for the same block will be replay 7650Sstevel@tonic-gate * last. That's why i ifdef'd out the call to 7660Sstevel@tonic-gate * raid_invalidate_pwslot. --aguzovsk@east 7670Sstevel@tonic-gate */ 7680Sstevel@tonic-gate 7690Sstevel@tonic-gate if (use == -1) { 7700Sstevel@tonic-gate use = i; 7710Sstevel@tonic-gate } 7720Sstevel@tonic-gate } 7730Sstevel@tonic-gate 7740Sstevel@tonic-gate ASSERT(avail != -1); 7750Sstevel@tonic-gate pw->pw_free--; 7760Sstevel@tonic-gate if (use == -1) 7770Sstevel@tonic-gate use = avail; 7780Sstevel@tonic-gate 7790Sstevel@tonic-gate ASSERT(! (sb[use].sb_flags & SB_INUSE)); 7800Sstevel@tonic-gate sb[use].sb_flags = SB_INUSE; 7810Sstevel@tonic-gate sb[use].sb_cs = cs; 7820Sstevel@tonic-gate sb[use].sb_start_blk = start_blk; 7830Sstevel@tonic-gate sb[use].sb_last_blk = last_blk; 7840Sstevel@tonic-gate ASSERT((use >= 0) && (use < un->un_pwcnt)); 7850Sstevel@tonic-gate return (use); 7860Sstevel@tonic-gate } 7870Sstevel@tonic-gate 7880Sstevel@tonic-gate static int 7890Sstevel@tonic-gate raid_check_pw(md_raidcs_t *cs) 7900Sstevel@tonic-gate { 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 7930Sstevel@tonic-gate int i; 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 7960Sstevel@tonic-gate /* 7970Sstevel@tonic-gate * check to be sure there is a prewrite slot available 7980Sstevel@tonic-gate * if not just return. 7990Sstevel@tonic-gate */ 8000Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 8010Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 8020Sstevel@tonic-gate if (un->un_column_ic[i].un_pw_reserve->pw_free <= 0) 8030Sstevel@tonic-gate return (1); 8040Sstevel@tonic-gate return (0); 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate if (un->un_column_ic[cs->cs_dcolumn].un_pw_reserve->pw_free <= 0) 8080Sstevel@tonic-gate return (1); 8090Sstevel@tonic-gate if (un->un_column_ic[cs->cs_pcolumn].un_pw_reserve->pw_free <= 0) 8100Sstevel@tonic-gate return (1); 8110Sstevel@tonic-gate return (0); 8120Sstevel@tonic-gate } 8130Sstevel@tonic-gate static int 8140Sstevel@tonic-gate raid_alloc_pwslot(md_raidcs_t *cs) 8150Sstevel@tonic-gate { 8160Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 8170Sstevel@tonic-gate md_raidcbuf_t *cbuf; 8180Sstevel@tonic-gate 8190Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 8200Sstevel@tonic-gate if (raid_check_pw(cs)) 8210Sstevel@tonic-gate return (1); 8220Sstevel@tonic-gate 8230Sstevel@tonic-gate mutex_enter(&un->un_mx); 8240Sstevel@tonic-gate un->un_pwid++; 8250Sstevel@tonic-gate cs->cs_pwid = un->un_pwid; 8260Sstevel@tonic-gate mutex_exit(&un->un_mx); 8270Sstevel@tonic-gate 8280Sstevel@tonic-gate cs->cs_dpwslot = raid_get_pwslot(cs, cs->cs_dcolumn); 8290Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 8300Sstevel@tonic-gate cbuf->cbuf_pwslot = raid_get_pwslot(cs, cbuf->cbuf_column); 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate cs->cs_ppwslot = raid_get_pwslot(cs, cs->cs_pcolumn); 8330Sstevel@tonic-gate 8340Sstevel@tonic-gate cs->cs_flags |= MD_RCS_HAVE_PW_SLOTS; 8350Sstevel@tonic-gate 8360Sstevel@tonic-gate return (0); 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate /* 8400Sstevel@tonic-gate * NAMES: raid_build_incore 8410Sstevel@tonic-gate * DESCRIPTION: RAID metadevice incore structure building routine 8420Sstevel@tonic-gate * PARAMETERS: void *p - pointer to a unit structure 8430Sstevel@tonic-gate * int snarfing - a flag to indicate snarfing is required 8440Sstevel@tonic-gate */ 8450Sstevel@tonic-gate int 8460Sstevel@tonic-gate raid_build_incore(void *p, int snarfing) 8470Sstevel@tonic-gate { 8480Sstevel@tonic-gate mr_unit_t *un = (mr_unit_t *)p; 8490Sstevel@tonic-gate minor_t mnum = MD_SID(un); 8500Sstevel@tonic-gate mddb_recid_t hs_recid = 0; 8510Sstevel@tonic-gate int i; 8520Sstevel@tonic-gate int preserve_flags; 8530Sstevel@tonic-gate mr_column_t *column; 8540Sstevel@tonic-gate int iosize; 8550Sstevel@tonic-gate md_dev64_t hs, dev; 8567627SChris.Horne@Sun.COM int resync_cnt = 0, error_cnt = 0; 8570Sstevel@tonic-gate 8580Sstevel@tonic-gate hs = NODEV64; 8590Sstevel@tonic-gate dev = NODEV64; 8600Sstevel@tonic-gate 8610Sstevel@tonic-gate /* clear out bogus pointer incase we return(1) prior to alloc */ 8620Sstevel@tonic-gate un->mr_ic = NULL; 8630Sstevel@tonic-gate 8640Sstevel@tonic-gate if (MD_STATUS(un) & MD_UN_BEING_RESET) { 8650Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 8660Sstevel@tonic-gate return (1); 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate 8690Sstevel@tonic-gate if (MD_UNIT(mnum) != NULL) 8700Sstevel@tonic-gate return (0); 8710Sstevel@tonic-gate 8720Sstevel@tonic-gate if (snarfing) 8730Sstevel@tonic-gate MD_STATUS(un) = 0; 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate un->mr_ic = (mr_unit_ic_t *)kmem_zalloc(sizeof (*un->mr_ic), 8760Sstevel@tonic-gate KM_SLEEP); 8770Sstevel@tonic-gate 8780Sstevel@tonic-gate un->un_column_ic = (mr_column_ic_t *) 8790Sstevel@tonic-gate kmem_zalloc(sizeof (mr_column_ic_t) * 8807627SChris.Horne@Sun.COM un->un_totalcolumncnt, KM_SLEEP); 8810Sstevel@tonic-gate 8820Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 8830Sstevel@tonic-gate 8840Sstevel@tonic-gate column = &un->un_column[i]; 8850Sstevel@tonic-gate preserve_flags = column->un_devflags & 8860Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); 8870Sstevel@tonic-gate column->un_devflags &= 8880Sstevel@tonic-gate ~(MD_RAID_ALT_ISOPEN | MD_RAID_DEV_ISOPEN | 8890Sstevel@tonic-gate MD_RAID_WRITE_ALT); 8900Sstevel@tonic-gate if (raid_build_pw_reservation(un, i) != 0) { 8910Sstevel@tonic-gate /* could not build pwslot */ 8920Sstevel@tonic-gate return (1); 8930Sstevel@tonic-gate } 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate if (snarfing) { 8960Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum); 8970Sstevel@tonic-gate dev = md_getdevnum(setno, mddb_getsidenum(setno), 8980Sstevel@tonic-gate column->un_orig_key, MD_NOTRUST_DEVT); 8990Sstevel@tonic-gate /* 9000Sstevel@tonic-gate * Comment out instead of remove so we have history 9010Sstevel@tonic-gate * In the pre-SVM releases stored devt is used so 9020Sstevel@tonic-gate * as long as there is one snarf is always happy 9030Sstevel@tonic-gate * even the component is powered off. This is not 9040Sstevel@tonic-gate * the case in current SVM implementation. NODEV64 9050Sstevel@tonic-gate * can be returned and in this case since we resolve 9060Sstevel@tonic-gate * the devt at 'open' time (first use of metadevice) 9070Sstevel@tonic-gate * we will allow snarf continue. 9080Sstevel@tonic-gate * 9090Sstevel@tonic-gate * if (dev == NODEV64) 9100Sstevel@tonic-gate * return (1); 9110Sstevel@tonic-gate */ 9120Sstevel@tonic-gate 9130Sstevel@tonic-gate /* 9140Sstevel@tonic-gate * Setup un_orig_dev from device id info if the device 9150Sstevel@tonic-gate * is valid (not NODEV64). 9160Sstevel@tonic-gate */ 9170Sstevel@tonic-gate if (dev != NODEV64) 9180Sstevel@tonic-gate column->un_orig_dev = dev; 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 9210Sstevel@tonic-gate resync_cnt++; 9220Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 9230Sstevel@tonic-gate error_cnt++; 9240Sstevel@tonic-gate 9250Sstevel@tonic-gate if (HOTSPARED(un, i)) { 9260Sstevel@tonic-gate (void) md_hot_spare_ifc(HS_MKDEV, 9270Sstevel@tonic-gate 0, 0, 0, &column->un_hs_id, NULL, 9280Sstevel@tonic-gate &hs, NULL); 9290Sstevel@tonic-gate /* 9300Sstevel@tonic-gate * Same here 9310Sstevel@tonic-gate * 9320Sstevel@tonic-gate * if (hs == NODEV64) 9330Sstevel@tonic-gate * return (1); 9340Sstevel@tonic-gate */ 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate 9370Sstevel@tonic-gate if (HOTSPARED(un, i)) { 9380Sstevel@tonic-gate if (column->un_devstate & 9390Sstevel@tonic-gate (RCS_OKAY | RCS_LAST_ERRED)) { 9400Sstevel@tonic-gate column->un_dev = hs; 9410Sstevel@tonic-gate column->un_pwstart = 9420Sstevel@tonic-gate column->un_hs_pwstart; 9430Sstevel@tonic-gate column->un_devstart = 9440Sstevel@tonic-gate column->un_hs_devstart; 9450Sstevel@tonic-gate preserve_flags &= 9460Sstevel@tonic-gate ~(MD_RAID_COPY_RESYNC | 9470Sstevel@tonic-gate MD_RAID_REGEN_RESYNC); 9480Sstevel@tonic-gate } else if (column->un_devstate & RCS_RESYNC) { 9490Sstevel@tonic-gate /* 9500Sstevel@tonic-gate * if previous system was 4.0 set 9510Sstevel@tonic-gate * the direction flags 9520Sstevel@tonic-gate */ 9530Sstevel@tonic-gate if ((preserve_flags & 9540Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | 9550Sstevel@tonic-gate MD_RAID_REGEN_RESYNC)) == 0) { 9567627SChris.Horne@Sun.COM if (column->un_alt_dev != 9577627SChris.Horne@Sun.COM NODEV64) 9587627SChris.Horne@Sun.COM preserve_flags |= 9597627SChris.Horne@Sun.COM MD_RAID_COPY_RESYNC; 9607627SChris.Horne@Sun.COM else 9617627SChris.Horne@Sun.COM preserve_flags |= 962*11066Srafael.vanoni@sun.com /* CSTYLED */ 963*11066Srafael.vanoni@sun.com MD_RAID_REGEN_RESYNC; 9640Sstevel@tonic-gate } 9650Sstevel@tonic-gate } 9660Sstevel@tonic-gate } else { /* no hot spares */ 9670Sstevel@tonic-gate column->un_dev = dev; 9680Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 9690Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 9700Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) { 9710Sstevel@tonic-gate preserve_flags |= MD_RAID_REGEN_RESYNC; 9720Sstevel@tonic-gate preserve_flags &= ~MD_RAID_COPY_RESYNC; 9730Sstevel@tonic-gate } 9740Sstevel@tonic-gate } 9750Sstevel@tonic-gate if (! (column->un_devstate & RCS_RESYNC)) { 9760Sstevel@tonic-gate preserve_flags &= 9770Sstevel@tonic-gate ~(MD_RAID_REGEN_RESYNC | 9780Sstevel@tonic-gate MD_RAID_COPY_RESYNC); 9790Sstevel@tonic-gate } 9800Sstevel@tonic-gate 9810Sstevel@tonic-gate column->un_devflags = preserve_flags; 9820Sstevel@tonic-gate column->un_alt_dev = NODEV64; 9830Sstevel@tonic-gate column->un_alt_pwstart = 0; 9840Sstevel@tonic-gate column->un_alt_devstart = 0; 9850Sstevel@tonic-gate un->un_resync_line_index = 0; 9860Sstevel@tonic-gate un->un_resync_index = 0; 9870Sstevel@tonic-gate un->un_percent_done = 0; 9880Sstevel@tonic-gate } 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate 9910Sstevel@tonic-gate if (resync_cnt && error_cnt) { 9920Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 9930Sstevel@tonic-gate column = &un->un_column[i]; 9940Sstevel@tonic-gate if (HOTSPARED(un, i) && 9950Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC) && 9960Sstevel@tonic-gate (column->un_devflags & MD_RAID_COPY_RESYNC)) 9970Sstevel@tonic-gate /* hotspare has data */ 9980Sstevel@tonic-gate continue; 9990Sstevel@tonic-gate 10000Sstevel@tonic-gate if (HOTSPARED(un, i) && 10010Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC)) { 10020Sstevel@tonic-gate /* hotspare does not have data */ 10030Sstevel@tonic-gate raid_hs_release(HS_FREE, un, &hs_recid, i); 10040Sstevel@tonic-gate column->un_dev = column->un_orig_dev; 10050Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 10060Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 10070Sstevel@tonic-gate mddb_setrecprivate(hs_recid, MD_PRV_PENDCOM); 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 10100Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 10110Sstevel@tonic-gate column->un_devstate = RCS_LAST_ERRED; 10120Sstevel@tonic-gate 10130Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 10140Sstevel@tonic-gate column->un_devstate = RCS_ERRED; 10150Sstevel@tonic-gate } 10160Sstevel@tonic-gate } 10170Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); 10180Sstevel@tonic-gate 10190Sstevel@tonic-gate un->un_pwid = 1; /* or some other possible value */ 10200Sstevel@tonic-gate un->un_magic = RAID_UNMAGIC; 10210Sstevel@tonic-gate iosize = un->un_iosize; 10220Sstevel@tonic-gate un->un_pbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 10230Sstevel@tonic-gate un->un_dbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 10240Sstevel@tonic-gate mutex_init(&un->un_linlck_mx, NULL, MUTEX_DEFAULT, NULL); 10250Sstevel@tonic-gate cv_init(&un->un_linlck_cv, NULL, CV_DEFAULT, NULL); 10260Sstevel@tonic-gate un->un_linlck_chn = NULL; 10277627SChris.Horne@Sun.COM 10287627SChris.Horne@Sun.COM /* place various information in the in-core data structures */ 10297627SChris.Horne@Sun.COM md_nblocks_set(mnum, un->c.un_total_blocks); 10300Sstevel@tonic-gate MD_UNIT(mnum) = un; 10310Sstevel@tonic-gate 10320Sstevel@tonic-gate return (0); 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate 10350Sstevel@tonic-gate /* 10360Sstevel@tonic-gate * NAMES: reset_raid 10370Sstevel@tonic-gate * DESCRIPTION: RAID metadevice reset routine 10380Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 10390Sstevel@tonic-gate * minor_t mnum - RAID metadevice minor number 10400Sstevel@tonic-gate * int removing - a flag to imply removing device name from 10410Sstevel@tonic-gate * MDDB database. 10420Sstevel@tonic-gate */ 10430Sstevel@tonic-gate void 10440Sstevel@tonic-gate reset_raid(mr_unit_t *un, minor_t mnum, int removing) 10450Sstevel@tonic-gate { 10460Sstevel@tonic-gate int i, n = 0; 10470Sstevel@tonic-gate sv_dev_t *sv; 10480Sstevel@tonic-gate mr_column_t *column; 10490Sstevel@tonic-gate int column_cnt = un->un_totalcolumncnt; 10500Sstevel@tonic-gate mddb_recid_t *recids, vtoc_id; 10510Sstevel@tonic-gate int hserr; 10520Sstevel@tonic-gate 10530Sstevel@tonic-gate ASSERT((MDI_UNIT(mnum)->ui_io_lock->io_list_front == NULL) && 10540Sstevel@tonic-gate (MDI_UNIT(mnum)->ui_io_lock->io_list_back == NULL)); 10550Sstevel@tonic-gate 10560Sstevel@tonic-gate md_destroy_unit_incore(mnum, &raid_md_ops); 10570Sstevel@tonic-gate 10587627SChris.Horne@Sun.COM md_nblocks_set(mnum, -1ULL); 10590Sstevel@tonic-gate MD_UNIT(mnum) = NULL; 10600Sstevel@tonic-gate 10610Sstevel@tonic-gate if (un->un_pbuffer) { 10620Sstevel@tonic-gate kmem_free(un->un_pbuffer, dbtob(un->un_iosize)); 10630Sstevel@tonic-gate un->un_pbuffer = NULL; 10640Sstevel@tonic-gate } 10650Sstevel@tonic-gate if (un->un_dbuffer) { 10660Sstevel@tonic-gate kmem_free(un->un_dbuffer, dbtob(un->un_iosize)); 10670Sstevel@tonic-gate un->un_dbuffer = NULL; 10680Sstevel@tonic-gate } 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate /* free all pre-write slots created during build incore */ 10710Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 10720Sstevel@tonic-gate raid_free_pw_reservation(un, i); 10730Sstevel@tonic-gate 10740Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 10757627SChris.Horne@Sun.COM un->un_totalcolumncnt); 10760Sstevel@tonic-gate 10770Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 10780Sstevel@tonic-gate 10791623Stw21770 /* 10801623Stw21770 * Attempt release of its minor node 10811623Stw21770 */ 10822077Stw21770 md_remove_minor_node(mnum); 10831623Stw21770 10840Sstevel@tonic-gate if (!removing) 10850Sstevel@tonic-gate return; 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate sv = (sv_dev_t *)kmem_zalloc((column_cnt + 1) * sizeof (sv_dev_t), 10880Sstevel@tonic-gate KM_SLEEP); 10890Sstevel@tonic-gate 10900Sstevel@tonic-gate recids = (mddb_recid_t *) 10910Sstevel@tonic-gate kmem_zalloc((column_cnt + 2) * sizeof (mddb_recid_t), KM_SLEEP); 10920Sstevel@tonic-gate 10930Sstevel@tonic-gate for (i = 0; i < column_cnt; i++) { 10940Sstevel@tonic-gate md_unit_t *comp_un; 10950Sstevel@tonic-gate md_dev64_t comp_dev; 10960Sstevel@tonic-gate 10970Sstevel@tonic-gate column = &un->un_column[i]; 10980Sstevel@tonic-gate sv[i].setno = MD_MIN2SET(mnum); 10990Sstevel@tonic-gate sv[i].key = column->un_orig_key; 11000Sstevel@tonic-gate if (HOTSPARED(un, i)) { 11010Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 11020Sstevel@tonic-gate hserr = HS_BAD; 11030Sstevel@tonic-gate else 11040Sstevel@tonic-gate hserr = HS_FREE; 11050Sstevel@tonic-gate raid_hs_release(hserr, un, &recids[n++], i); 11060Sstevel@tonic-gate } 11070Sstevel@tonic-gate /* 11080Sstevel@tonic-gate * deparent any metadevices. 11090Sstevel@tonic-gate * NOTE: currently soft partitions are the only metadevices 11100Sstevel@tonic-gate * allowed in RAID metadevices. 11110Sstevel@tonic-gate */ 11120Sstevel@tonic-gate comp_dev = column->un_dev; 11130Sstevel@tonic-gate if (md_getmajor(comp_dev) == md_major) { 11140Sstevel@tonic-gate comp_un = MD_UNIT(md_getminor(comp_dev)); 11150Sstevel@tonic-gate recids[n++] = MD_RECID(comp_un); 11160Sstevel@tonic-gate md_reset_parent(comp_dev); 11170Sstevel@tonic-gate } 11180Sstevel@tonic-gate } 11190Sstevel@tonic-gate /* decrement the reference count of the old hsp */ 11200Sstevel@tonic-gate if (un->un_hsp_id != -1) 11210Sstevel@tonic-gate (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 11220Sstevel@tonic-gate &recids[n++], NULL, NULL, NULL); 11230Sstevel@tonic-gate recids[n] = 0; 11240Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_BEING_RESET; 11250Sstevel@tonic-gate vtoc_id = un->c.un_vtoc_id; 11260Sstevel@tonic-gate 11270Sstevel@tonic-gate raid_commit(un, recids); 11280Sstevel@tonic-gate 11291623Stw21770 /* 11301623Stw21770 * Remove self from the namespace 11311623Stw21770 */ 11321623Stw21770 if (un->c.un_revision & MD_FN_META_DEV) { 11331623Stw21770 (void) md_rem_selfname(un->c.un_self_id); 11341623Stw21770 } 11350Sstevel@tonic-gate 11360Sstevel@tonic-gate /* Remove the unit structure */ 11370Sstevel@tonic-gate mddb_deleterec_wrapper(un->c.un_record_id); 11380Sstevel@tonic-gate 11390Sstevel@tonic-gate /* Remove the vtoc, if present */ 11400Sstevel@tonic-gate if (vtoc_id) 11410Sstevel@tonic-gate mddb_deleterec_wrapper(vtoc_id); 11420Sstevel@tonic-gate md_rem_names(sv, column_cnt); 11430Sstevel@tonic-gate kmem_free(sv, (column_cnt + 1) * sizeof (sv_dev_t)); 11440Sstevel@tonic-gate kmem_free(recids, (column_cnt + 2) * sizeof (mddb_recid_t)); 11450Sstevel@tonic-gate 11460Sstevel@tonic-gate SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 11470Sstevel@tonic-gate MD_MIN2SET(mnum), mnum); 11480Sstevel@tonic-gate } 11490Sstevel@tonic-gate 11500Sstevel@tonic-gate /* 11510Sstevel@tonic-gate * NAMES: raid_error_parent 11520Sstevel@tonic-gate * DESCRIPTION: mark a parent structure in error 11530Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 11540Sstevel@tonic-gate * int error - error value to set 11550Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 11560Sstevel@tonic-gate */ 11570Sstevel@tonic-gate static void 11580Sstevel@tonic-gate raid_error_parent(md_raidps_t *ps, int error) 11590Sstevel@tonic-gate { 11600Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 11610Sstevel@tonic-gate ps->ps_flags |= MD_RPS_ERROR; 11620Sstevel@tonic-gate ps->ps_error = error; 11630Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 11640Sstevel@tonic-gate } 11650Sstevel@tonic-gate 11660Sstevel@tonic-gate /* 11670Sstevel@tonic-gate * The following defines tell raid_free_parent 11680Sstevel@tonic-gate * RFP_RLS_LOCK release the unit reader lock when done. 11690Sstevel@tonic-gate * RFP_DECR_PWFRAGS decrement ps_pwfrags 11700Sstevel@tonic-gate * RFP_DECR_FRAGS decrement ps_frags 11710Sstevel@tonic-gate * RFP_DECR_READFRAGS read keeps FRAGS and PWFRAGS in lockstep 11720Sstevel@tonic-gate */ 11730Sstevel@tonic-gate #define RFP_RLS_LOCK 0x00001 11740Sstevel@tonic-gate #define RFP_DECR_PWFRAGS 0x00002 11750Sstevel@tonic-gate #define RFP_DECR_FRAGS 0x00004 11760Sstevel@tonic-gate #define RFP_DECR_READFRAGS (RFP_DECR_PWFRAGS | RFP_DECR_FRAGS) 11770Sstevel@tonic-gate 11780Sstevel@tonic-gate /* 11790Sstevel@tonic-gate * NAMES: raid_free_parent 11800Sstevel@tonic-gate * DESCRIPTION: free a parent structure 11810Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 11820Sstevel@tonic-gate * int todo - indicates what needs to be done 11830Sstevel@tonic-gate */ 11840Sstevel@tonic-gate static void 11850Sstevel@tonic-gate raid_free_parent(md_raidps_t *ps, int todo) 11860Sstevel@tonic-gate { 11870Sstevel@tonic-gate mdi_unit_t *ui = ps->ps_ui; 11880Sstevel@tonic-gate 11890Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 11900Sstevel@tonic-gate ASSERT(ps->ps_flags & MD_RPS_INUSE); 11910Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 11920Sstevel@tonic-gate if (todo & RFP_DECR_PWFRAGS) { 11930Sstevel@tonic-gate ASSERT(ps->ps_pwfrags); 11940Sstevel@tonic-gate ps->ps_pwfrags--; 11950Sstevel@tonic-gate if (ps->ps_pwfrags == 0 && (! (ps->ps_flags & MD_RPS_IODONE))) { 11960Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_ERROR) { 11970Sstevel@tonic-gate ps->ps_bp->b_flags |= B_ERROR; 11980Sstevel@tonic-gate ps->ps_bp->b_error = ps->ps_error; 11990Sstevel@tonic-gate } 12000Sstevel@tonic-gate md_kstat_done(ui, ps->ps_bp, 0); 12010Sstevel@tonic-gate biodone(ps->ps_bp); 12020Sstevel@tonic-gate ps->ps_flags |= MD_RPS_IODONE; 12030Sstevel@tonic-gate } 12040Sstevel@tonic-gate } 12050Sstevel@tonic-gate 12060Sstevel@tonic-gate if (todo & RFP_DECR_FRAGS) { 12070Sstevel@tonic-gate ASSERT(ps->ps_frags); 12080Sstevel@tonic-gate ps->ps_frags--; 12090Sstevel@tonic-gate } 12100Sstevel@tonic-gate 12110Sstevel@tonic-gate if (ps->ps_frags != 0) { 12120Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 12130Sstevel@tonic-gate return; 12140Sstevel@tonic-gate } 12150Sstevel@tonic-gate 12160Sstevel@tonic-gate ASSERT((ps->ps_frags == 0) && (ps->ps_pwfrags == 0)); 12170Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 12180Sstevel@tonic-gate 12190Sstevel@tonic-gate if (todo & RFP_RLS_LOCK) 12200Sstevel@tonic-gate md_io_readerexit(ui); 12210Sstevel@tonic-gate 12220Sstevel@tonic-gate if (panicstr) { 12230Sstevel@tonic-gate ps->ps_flags |= MD_RPS_DONE; 12240Sstevel@tonic-gate return; 12250Sstevel@tonic-gate } 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_HSREQ) 12280Sstevel@tonic-gate (void) raid_hotspares(); 12290Sstevel@tonic-gate 12300Sstevel@tonic-gate ASSERT(todo & RFP_RLS_LOCK); 12310Sstevel@tonic-gate ps->ps_flags &= ~MD_RPS_INUSE; 12320Sstevel@tonic-gate 12330Sstevel@tonic-gate md_dec_iocount(MD_MIN2SET(ps->ps_un->c.un_self_id)); 12340Sstevel@tonic-gate 12350Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 12360Sstevel@tonic-gate } 12370Sstevel@tonic-gate 12380Sstevel@tonic-gate /* 12390Sstevel@tonic-gate * NAMES: raid_free_child 12400Sstevel@tonic-gate * DESCRIPTION: free a parent structure 12410Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 12420Sstevel@tonic-gate * int drop_locks - 0 for no locks held 12430Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 12440Sstevel@tonic-gate */ 12450Sstevel@tonic-gate static void 12460Sstevel@tonic-gate raid_free_child(md_raidcs_t *cs, int drop_locks) 12470Sstevel@tonic-gate { 12480Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 12490Sstevel@tonic-gate md_raidcbuf_t *cbuf, *cbuf1; 12500Sstevel@tonic-gate 12510Sstevel@tonic-gate if (cs->cs_pw_inval_list) 12520Sstevel@tonic-gate raid_free_pwinvalidate(cs); 12530Sstevel@tonic-gate 12540Sstevel@tonic-gate if (drop_locks) { 12550Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD && 12560Sstevel@tonic-gate (cs->cs_flags & (MD_RCS_READER | MD_RCS_WRITER))); 12570Sstevel@tonic-gate md_unit_readerexit(MDI_UNIT(MD_SID(un))); 12580Sstevel@tonic-gate raid_line_exit(cs); 12590Sstevel@tonic-gate } else { 12600Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_LLOCKD)); 12610Sstevel@tonic-gate } 12620Sstevel@tonic-gate 12630Sstevel@tonic-gate freebuffers(cs); 12640Sstevel@tonic-gate cbuf = cs->cs_buflist; 12650Sstevel@tonic-gate while (cbuf) { 12660Sstevel@tonic-gate cbuf1 = cbuf->cbuf_next; 12670Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf); 12680Sstevel@tonic-gate cbuf = cbuf1; 12690Sstevel@tonic-gate } 12700Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_REMAPPED) 12710Sstevel@tonic-gate bp_mapout(&cs->cs_dbuf); 12720Sstevel@tonic-gate kmem_cache_free(raid_child_cache, cs); 12730Sstevel@tonic-gate } 12740Sstevel@tonic-gate 12750Sstevel@tonic-gate /* 12760Sstevel@tonic-gate * NAME: raid_regen_parity 12770Sstevel@tonic-gate * 12780Sstevel@tonic-gate * DESCRIPTION: This routine is used to regenerate the parity blocks 12790Sstevel@tonic-gate * for the entire raid device. It is called from 12800Sstevel@tonic-gate * both the regen thread and the IO path. 12810Sstevel@tonic-gate * 12820Sstevel@tonic-gate * On error the entire device is marked as in error by 12830Sstevel@tonic-gate * placing the erroring device in error and all other 12840Sstevel@tonic-gate * devices in last_errored. 12850Sstevel@tonic-gate * 12860Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs 12870Sstevel@tonic-gate */ 12880Sstevel@tonic-gate void 12890Sstevel@tonic-gate raid_regen_parity(md_raidcs_t *cs) 12900Sstevel@tonic-gate { 12910Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 12920Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 12930Sstevel@tonic-gate caddr_t buffer; 12940Sstevel@tonic-gate caddr_t parity_buffer; 12950Sstevel@tonic-gate buf_t *bp; 12960Sstevel@tonic-gate uint_t *dbuf, *pbuf; 12970Sstevel@tonic-gate uint_t colcnt = un->un_totalcolumncnt; 12980Sstevel@tonic-gate int column; 12990Sstevel@tonic-gate int parity_column = cs->cs_pcolumn; 13000Sstevel@tonic-gate size_t bcount; 13010Sstevel@tonic-gate int j; 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate /* 13040Sstevel@tonic-gate * This routine uses the data and parity buffers allocated to a 13050Sstevel@tonic-gate * write. In the case of a read the buffers are allocated and 13060Sstevel@tonic-gate * freed at the end. 13070Sstevel@tonic-gate */ 13080Sstevel@tonic-gate 13090Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 13100Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 13110Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 13120Sstevel@tonic-gate 13130Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != colcnt) 13140Sstevel@tonic-gate return; 13150Sstevel@tonic-gate 13160Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13170Sstevel@tonic-gate getpbuffer(cs); 13180Sstevel@tonic-gate getdbuffer(cs); 13190Sstevel@tonic-gate } 13200Sstevel@tonic-gate ASSERT(cs->cs_dbuffer && cs->cs_pbuffer); 13210Sstevel@tonic-gate bcount = cs->cs_bcount; 13220Sstevel@tonic-gate buffer = cs->cs_dbuffer; 13230Sstevel@tonic-gate parity_buffer = cs->cs_pbuffer; 13240Sstevel@tonic-gate bzero(parity_buffer, bcount); 13250Sstevel@tonic-gate bp = &cs->cs_dbuf; 13260Sstevel@tonic-gate for (column = 0; column < colcnt; column++) { 13270Sstevel@tonic-gate if (column == parity_column) 13280Sstevel@tonic-gate continue; 13290Sstevel@tonic-gate reset_buf(bp, B_READ | B_BUSY, bcount); 13300Sstevel@tonic-gate bp->b_un.b_addr = buffer; 13310Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 13320Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[column].un_devstart; 13330Sstevel@tonic-gate bp->b_bcount = bcount; 13340Sstevel@tonic-gate bp->b_bufsize = bcount; 13350Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 13360Sstevel@tonic-gate if (biowait(bp)) 13370Sstevel@tonic-gate goto bail; 13380Sstevel@tonic-gate pbuf = (uint_t *)(void *)parity_buffer; 13390Sstevel@tonic-gate dbuf = (uint_t *)(void *)buffer; 13400Sstevel@tonic-gate for (j = 0; j < (bcount / (sizeof (uint_t))); j++) { 13410Sstevel@tonic-gate *pbuf = *pbuf ^ *dbuf; 13420Sstevel@tonic-gate pbuf++; 13430Sstevel@tonic-gate dbuf++; 13440Sstevel@tonic-gate } 13450Sstevel@tonic-gate } 13460Sstevel@tonic-gate 13470Sstevel@tonic-gate reset_buf(bp, B_WRITE | B_BUSY, cs->cs_bcount); 13480Sstevel@tonic-gate bp->b_un.b_addr = parity_buffer; 13490Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[parity_column].un_dev); 13500Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[parity_column].un_devstart; 13510Sstevel@tonic-gate bp->b_bcount = bcount; 13520Sstevel@tonic-gate bp->b_bufsize = bcount; 13530Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 13540Sstevel@tonic-gate if (biowait(bp)) 13550Sstevel@tonic-gate goto bail; 13560Sstevel@tonic-gate 13570Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13580Sstevel@tonic-gate freebuffers(cs); 13590Sstevel@tonic-gate cs->cs_pbuffer = NULL; 13600Sstevel@tonic-gate cs->cs_dbuffer = NULL; 13610Sstevel@tonic-gate } 13620Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 13630Sstevel@tonic-gate return; 13640Sstevel@tonic-gate bail: 13650Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 13660Sstevel@tonic-gate freebuffers(cs); 13670Sstevel@tonic-gate cs->cs_pbuffer = NULL; 13680Sstevel@tonic-gate cs->cs_dbuffer = NULL; 13690Sstevel@tonic-gate } 13700Sstevel@tonic-gate md_unit_readerexit(ui); 13710Sstevel@tonic-gate un = md_unit_writerlock(ui); 13720Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 13730Sstevel@tonic-gate for (column = 0; column < colcnt; column++) 13740Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 13750Sstevel@tonic-gate raid_commit(un, NULL); 13760Sstevel@tonic-gate md_unit_writerexit(ui); 13770Sstevel@tonic-gate un = md_unit_readerlock(ui); 13780Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 13790Sstevel@tonic-gate } 13800Sstevel@tonic-gate 13810Sstevel@tonic-gate /* 13820Sstevel@tonic-gate * NAMES: raid_error_state 13830Sstevel@tonic-gate * DESCRIPTION: check unit and column states' impact on I/O error 13840Sstevel@tonic-gate * NOTE: the state now may not be the state when the 13850Sstevel@tonic-gate * I/O completed due to race conditions. 13860Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 13870Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 13880Sstevel@tonic-gate * buf_t *bp - pointer to buffer structure 13890Sstevel@tonic-gate */ 13900Sstevel@tonic-gate static int 13910Sstevel@tonic-gate raid_error_state(mr_unit_t *un, buf_t *bp) 13920Sstevel@tonic-gate { 13930Sstevel@tonic-gate int column; 13940Sstevel@tonic-gate int i; 13950Sstevel@tonic-gate 13960Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 13970Sstevel@tonic-gate ASSERT(UNIT_WRITER_HELD(un)); 13980Sstevel@tonic-gate 13990Sstevel@tonic-gate column = -1; 14000Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 14010Sstevel@tonic-gate if (un->un_column[i].un_dev == md_expldev(bp->b_edev)) { 14020Sstevel@tonic-gate column = i; 14030Sstevel@tonic-gate break; 14040Sstevel@tonic-gate } 14050Sstevel@tonic-gate if (un->un_column[i].un_alt_dev == md_expldev(bp->b_edev)) { 14060Sstevel@tonic-gate column = i; 14070Sstevel@tonic-gate break; 14080Sstevel@tonic-gate } 14090Sstevel@tonic-gate } 14100Sstevel@tonic-gate 14110Sstevel@tonic-gate /* in case a replace snuck in while waiting on unit writer lock */ 14120Sstevel@tonic-gate 14130Sstevel@tonic-gate if (column == -1) { 14140Sstevel@tonic-gate return (0); 14150Sstevel@tonic-gate } 14160Sstevel@tonic-gate 14170Sstevel@tonic-gate (void) raid_set_state(un, column, RCS_ERRED, 0); 14180Sstevel@tonic-gate ASSERT(un->un_state & (RUS_ERRED | RUS_LAST_ERRED)); 14190Sstevel@tonic-gate 14200Sstevel@tonic-gate raid_commit(un, NULL); 14210Sstevel@tonic-gate if (un->un_state & RUS_ERRED) { 14220Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, 14230Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 14240Sstevel@tonic-gate } else if (un->un_state & RUS_LAST_ERRED) { 14250Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, 14260Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 14270Sstevel@tonic-gate } 14280Sstevel@tonic-gate 14290Sstevel@tonic-gate return (EIO); 14300Sstevel@tonic-gate } 14310Sstevel@tonic-gate 14320Sstevel@tonic-gate /* 14330Sstevel@tonic-gate * NAME: raid_mapin_buf 14340Sstevel@tonic-gate * DESCRIPTION: wait for the input buffer header to be maped in 14350Sstevel@tonic-gate * PARAMETERS: md_raidps_t *ps 14360Sstevel@tonic-gate */ 14370Sstevel@tonic-gate static void 14380Sstevel@tonic-gate raid_mapin_buf(md_raidcs_t *cs) 14390Sstevel@tonic-gate { 14400Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 14410Sstevel@tonic-gate 14420Sstevel@tonic-gate /* 14430Sstevel@tonic-gate * check to see if the buffer is maped. If all is ok return the 14440Sstevel@tonic-gate * offset of the data and return. Since it is expensive to grab 14450Sstevel@tonic-gate * a mutex this is only done if the mapin is not complete. 14460Sstevel@tonic-gate * Once the mutex is aquired it is possible that the mapin was 14470Sstevel@tonic-gate * not done so recheck and if necessary do the mapin. 14480Sstevel@tonic-gate */ 14490Sstevel@tonic-gate if (ps->ps_mapin > 0) { 14500Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14510Sstevel@tonic-gate return; 14520Sstevel@tonic-gate } 14530Sstevel@tonic-gate mutex_enter(&ps->ps_mapin_mx); 14540Sstevel@tonic-gate if (ps->ps_mapin > 0) { 14550Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14560Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 14570Sstevel@tonic-gate return; 14580Sstevel@tonic-gate } 14590Sstevel@tonic-gate bp_mapin(ps->ps_bp); 14600Sstevel@tonic-gate /* 14610Sstevel@tonic-gate * get the new b_addr out of the parent since bp_mapin just changed it 14620Sstevel@tonic-gate */ 14630Sstevel@tonic-gate ps->ps_addr = ps->ps_bp->b_un.b_addr; 14640Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 14650Sstevel@tonic-gate ps->ps_mapin++; 14660Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 14670Sstevel@tonic-gate } 14680Sstevel@tonic-gate 14690Sstevel@tonic-gate /* 14700Sstevel@tonic-gate * NAMES: raid_read_no_retry 14710Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 14720Sstevel@tonic-gate * read failed attempting to regenerate the data, 14730Sstevel@tonic-gate * no retry possible, error occured in raid_raidregenloop(). 14740Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 14750Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 14760Sstevel@tonic-gate */ 14770Sstevel@tonic-gate /*ARGSUSED*/ 14780Sstevel@tonic-gate static void 14790Sstevel@tonic-gate raid_read_no_retry(mr_unit_t *un, md_raidcs_t *cs) 14800Sstevel@tonic-gate { 14810Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 14820Sstevel@tonic-gate 14830Sstevel@tonic-gate raid_error_parent(ps, EIO); 14840Sstevel@tonic-gate raid_free_child(cs, 1); 14850Sstevel@tonic-gate 14860Sstevel@tonic-gate /* decrement readfrags */ 14870Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 14880Sstevel@tonic-gate } 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate /* 14910Sstevel@tonic-gate * NAMES: raid_read_retry 14920Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 14930Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 14940Sstevel@tonic-gate */ 14950Sstevel@tonic-gate static void 14960Sstevel@tonic-gate raid_read_retry(mr_unit_t *un, md_raidcs_t *cs) 14970Sstevel@tonic-gate { 14980Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_read() */ 14990Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 15000Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 15010Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 15020Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 15030Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 15040Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 15050Sstevel@tonic-gate /* Initialize semaphores */ 15060Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 15070Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15080Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 15090Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15100Sstevel@tonic-gate 15110Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 15120Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 15130Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 15140Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 15150Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 15160Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 15170Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 15180Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15190Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 15200Sstevel@tonic-gate SEMA_DEFAULT, NULL); 15210Sstevel@tonic-gate 15220Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_ERROR; /* reset child error flag */ 15230Sstevel@tonic-gate cs->cs_flags |= MD_RCS_RECOVERY; /* set RECOVERY flag */ 15240Sstevel@tonic-gate 15250Sstevel@tonic-gate /* 15260Sstevel@tonic-gate * re-scheduling I/O with raid_read_io() is simpler. basically, 15270Sstevel@tonic-gate * raid_read_io() is invoked again with same child structure. 15280Sstevel@tonic-gate * (NOTE: we aren`t supposed to do any error recovery when an I/O 15290Sstevel@tonic-gate * error occured in raid_raidregenloop(). 15300Sstevel@tonic-gate */ 15310Sstevel@tonic-gate raid_mapin_buf(cs); 15320Sstevel@tonic-gate raid_read_io(un, cs); 15330Sstevel@tonic-gate } 15340Sstevel@tonic-gate 15350Sstevel@tonic-gate /* 15360Sstevel@tonic-gate * NAMES: raid_rderr 15370Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 15380Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 15390Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 15400Sstevel@tonic-gate * since a unit or column state transition may take place. 15410Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 15420Sstevel@tonic-gate */ 15430Sstevel@tonic-gate /*ARGSUSED*/ 15440Sstevel@tonic-gate static void 15450Sstevel@tonic-gate raid_rderr(md_raidcs_t *cs) 15460Sstevel@tonic-gate { 15470Sstevel@tonic-gate md_raidps_t *ps; 15480Sstevel@tonic-gate mdi_unit_t *ui; 15490Sstevel@tonic-gate mr_unit_t *un; 15500Sstevel@tonic-gate int error = 0; 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate ps = cs->cs_ps; 15530Sstevel@tonic-gate ui = ps->ps_ui; 15540Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 15550Sstevel@tonic-gate ASSERT(un != 0); 15560Sstevel@tonic-gate 15570Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 15580Sstevel@tonic-gate error = raid_error_state(un, &cs->cs_dbuf); 15590Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 15600Sstevel@tonic-gate error |= raid_error_state(un, &cs->cs_pbuf); 15610Sstevel@tonic-gate 15620Sstevel@tonic-gate md_unit_writerexit(ui); 15630Sstevel@tonic-gate 15640Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 15670Sstevel@tonic-gate ASSERT(un != 0); 15680Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 15690Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate 15720Sstevel@tonic-gate 15730Sstevel@tonic-gate /* 15740Sstevel@tonic-gate * NAMES: raid_read_error 15750Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 15760Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 15770Sstevel@tonic-gate */ 15780Sstevel@tonic-gate /*ARGSUSED*/ 15790Sstevel@tonic-gate static void 15800Sstevel@tonic-gate raid_read_error(md_raidcs_t *cs) 15810Sstevel@tonic-gate { 15820Sstevel@tonic-gate md_raidps_t *ps; 15830Sstevel@tonic-gate mdi_unit_t *ui; 15840Sstevel@tonic-gate mr_unit_t *un; 15850Sstevel@tonic-gate set_t setno; 15860Sstevel@tonic-gate 15870Sstevel@tonic-gate ps = cs->cs_ps; 15880Sstevel@tonic-gate ui = ps->ps_ui; 15890Sstevel@tonic-gate un = cs->cs_un; 15900Sstevel@tonic-gate 15910Sstevel@tonic-gate setno = MD_UN2SET(un); 15920Sstevel@tonic-gate 15930Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 15940Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 15950Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 15960Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 15970Sstevel@tonic-gate md_shortname(MD_SID(un)), 15980Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 15990Sstevel@tonic-gate 16000Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 16010Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 16020Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 16030Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 16040Sstevel@tonic-gate md_shortname(MD_SID(un)), 16050Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 16060Sstevel@tonic-gate 16070Sstevel@tonic-gate md_unit_readerexit(ui); 16080Sstevel@tonic-gate 16090Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 16100Sstevel@tonic-gate 16110Sstevel@tonic-gate /* now schedule processing for possible state change */ 16120Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_rderr, 16137627SChris.Horne@Sun.COM (daemon_queue_t *)cs, REQ_OLD); 16140Sstevel@tonic-gate 16150Sstevel@tonic-gate } 16160Sstevel@tonic-gate 16170Sstevel@tonic-gate /* 16180Sstevel@tonic-gate * NAMES: getdbuffer 16190Sstevel@tonic-gate * DESCRIPTION: data buffer allocation for a child structure 16200Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16210Sstevel@tonic-gate * 16220Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16230Sstevel@tonic-gate * and get both buffers before pwslot 16240Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16250Sstevel@tonic-gate */ 16260Sstevel@tonic-gate static void 16270Sstevel@tonic-gate getdbuffer(md_raidcs_t *cs) 16280Sstevel@tonic-gate { 16290Sstevel@tonic-gate mr_unit_t *un; 16300Sstevel@tonic-gate 16310Sstevel@tonic-gate cs->cs_dbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 16320Sstevel@tonic-gate if (cs->cs_dbuffer != NULL) 16330Sstevel@tonic-gate return; 16340Sstevel@tonic-gate un = cs->cs_ps->ps_un; 16350Sstevel@tonic-gate mutex_enter(&un->un_mx); 16360Sstevel@tonic-gate while (un->un_dbuffer == NULL) { 16370Sstevel@tonic-gate STAT_INC(data_buffer_waits); 16380Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 16390Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 16400Sstevel@tonic-gate } 16410Sstevel@tonic-gate cs->cs_dbuffer = un->un_dbuffer; 16420Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNDBUF; 16430Sstevel@tonic-gate un->un_dbuffer = NULL; 16440Sstevel@tonic-gate mutex_exit(&un->un_mx); 16450Sstevel@tonic-gate } 16460Sstevel@tonic-gate 16470Sstevel@tonic-gate /* 16480Sstevel@tonic-gate * NAMES: getpbuffer 16490Sstevel@tonic-gate * DESCRIPTION: parity buffer allocation for a child structure 16500Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16510Sstevel@tonic-gate * 16520Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16530Sstevel@tonic-gate * and get both buffers before pwslot 16540Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16550Sstevel@tonic-gate */ 16560Sstevel@tonic-gate static void 16570Sstevel@tonic-gate getpbuffer(md_raidcs_t *cs) 16580Sstevel@tonic-gate { 16590Sstevel@tonic-gate mr_unit_t *un; 16600Sstevel@tonic-gate 16610Sstevel@tonic-gate cs->cs_pbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 16620Sstevel@tonic-gate if (cs->cs_pbuffer != NULL) 16630Sstevel@tonic-gate return; 16640Sstevel@tonic-gate un = cs->cs_ps->ps_un; 16650Sstevel@tonic-gate mutex_enter(&un->un_mx); 16660Sstevel@tonic-gate while (un->un_pbuffer == NULL) { 16670Sstevel@tonic-gate STAT_INC(parity_buffer_waits); 16680Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 16690Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 16700Sstevel@tonic-gate } 16710Sstevel@tonic-gate cs->cs_pbuffer = un->un_pbuffer; 16720Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNPBUF; 16730Sstevel@tonic-gate un->un_pbuffer = NULL; 16740Sstevel@tonic-gate mutex_exit(&un->un_mx); 16750Sstevel@tonic-gate } 16760Sstevel@tonic-gate static void 16770Sstevel@tonic-gate getresources(md_raidcs_t *cs) 16780Sstevel@tonic-gate { 16790Sstevel@tonic-gate md_raidcbuf_t *cbuf; 16800Sstevel@tonic-gate /* 16810Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 16820Sstevel@tonic-gate * and get both buffers before pwslot 16830Sstevel@tonic-gate * otherwise a deadlock could be introduced. 16840Sstevel@tonic-gate */ 16850Sstevel@tonic-gate getdbuffer(cs); 16860Sstevel@tonic-gate getpbuffer(cs); 16870Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 16880Sstevel@tonic-gate cbuf->cbuf_buffer = 16890Sstevel@tonic-gate kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_SLEEP); 16900Sstevel@tonic-gate } 16910Sstevel@tonic-gate /* 16920Sstevel@tonic-gate * NAMES: freebuffers 16930Sstevel@tonic-gate * DESCRIPTION: child structure buffer freeing routine 16940Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 16950Sstevel@tonic-gate */ 16960Sstevel@tonic-gate static void 16970Sstevel@tonic-gate freebuffers(md_raidcs_t *cs) 16980Sstevel@tonic-gate { 16990Sstevel@tonic-gate mr_unit_t *un; 17000Sstevel@tonic-gate md_raidcbuf_t *cbuf; 17010Sstevel@tonic-gate 17020Sstevel@tonic-gate /* free buffers used for full line write */ 17030Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 17040Sstevel@tonic-gate if (cbuf->cbuf_buffer == NULL) 17050Sstevel@tonic-gate continue; 17060Sstevel@tonic-gate kmem_free(cbuf->cbuf_buffer, cbuf->cbuf_bcount + DEV_BSIZE); 17070Sstevel@tonic-gate cbuf->cbuf_buffer = NULL; 17080Sstevel@tonic-gate cbuf->cbuf_bcount = 0; 17090Sstevel@tonic-gate } 17100Sstevel@tonic-gate 17110Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 17120Sstevel@tonic-gate un = cs->cs_un; 17130Sstevel@tonic-gate mutex_enter(&un->un_mx); 17140Sstevel@tonic-gate } 17150Sstevel@tonic-gate if (cs->cs_dbuffer) { 17160Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNDBUF) 17170Sstevel@tonic-gate un->un_dbuffer = cs->cs_dbuffer; 17180Sstevel@tonic-gate else 17190Sstevel@tonic-gate kmem_free(cs->cs_dbuffer, cs->cs_bcount + DEV_BSIZE); 17200Sstevel@tonic-gate } 17210Sstevel@tonic-gate if (cs->cs_pbuffer) { 17220Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNPBUF) 17230Sstevel@tonic-gate un->un_pbuffer = cs->cs_pbuffer; 17240Sstevel@tonic-gate else 17250Sstevel@tonic-gate kmem_free(cs->cs_pbuffer, cs->cs_bcount + DEV_BSIZE); 17260Sstevel@tonic-gate } 17270Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 17280Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDBUF; 17290Sstevel@tonic-gate cv_broadcast(&un->un_cv); 17300Sstevel@tonic-gate mutex_exit(&un->un_mx); 17310Sstevel@tonic-gate } 17320Sstevel@tonic-gate } 17330Sstevel@tonic-gate 17340Sstevel@tonic-gate /* 17350Sstevel@tonic-gate * NAMES: raid_line_reader_lock, raid_line_writer_lock 17360Sstevel@tonic-gate * DESCRIPTION: RAID metadevice line reader and writer lock routines 17370Sstevel@tonic-gate * data column # and parity column #. 17380Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 17390Sstevel@tonic-gate */ 17400Sstevel@tonic-gate 17410Sstevel@tonic-gate void 17420Sstevel@tonic-gate raid_line_reader_lock(md_raidcs_t *cs, int resync_thread) 17430Sstevel@tonic-gate { 17440Sstevel@tonic-gate mr_unit_t *un; 17450Sstevel@tonic-gate md_raidcs_t *cs1; 17460Sstevel@tonic-gate 17470Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 17480Sstevel@tonic-gate un = cs->cs_un; 17490Sstevel@tonic-gate cs->cs_flags |= MD_RCS_READER; 17500Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 17510Sstevel@tonic-gate if (!panicstr) 17520Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 17530Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17540Sstevel@tonic-gate while (cs1 != NULL) { 17550Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 17560Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1) == 1) 17570Sstevel@tonic-gate if (cs1->cs_flags & MD_RCS_WRITER) 17580Sstevel@tonic-gate break; 17590Sstevel@tonic-gate 17600Sstevel@tonic-gate if (cs1 != NULL) { 17610Sstevel@tonic-gate if (panicstr) 17620Sstevel@tonic-gate panic("md; raid line write lock held"); 17630Sstevel@tonic-gate un->un_linlck_flg = 1; 17640Sstevel@tonic-gate cv_wait(&un->un_linlck_cv, &un->un_linlck_mx); 17650Sstevel@tonic-gate STAT_INC(raid_read_waits); 17660Sstevel@tonic-gate } 17670Sstevel@tonic-gate } 17680Sstevel@tonic-gate STAT_MAX(raid_max_reader_locks, raid_reader_locks_active); 17690Sstevel@tonic-gate STAT_INC(raid_reader_locks); 17700Sstevel@tonic-gate cs1 = un->un_linlck_chn; 17710Sstevel@tonic-gate if (cs1 != NULL) 17720Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 17730Sstevel@tonic-gate cs->cs_linlck_next = cs1; 17740Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 17750Sstevel@tonic-gate un->un_linlck_chn = cs; 17760Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 17770Sstevel@tonic-gate if (resync_thread) { 17780Sstevel@tonic-gate diskaddr_t lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 17790Sstevel@tonic-gate diskaddr_t line = (lastblk + 1) / un->un_segsize; 17800Sstevel@tonic-gate ASSERT(raid_state_cnt(un, RCS_RESYNC)); 17810Sstevel@tonic-gate mutex_enter(&un->un_mx); 17820Sstevel@tonic-gate un->un_resync_line_index = line; 17830Sstevel@tonic-gate mutex_exit(&un->un_mx); 17840Sstevel@tonic-gate } 17850Sstevel@tonic-gate if (!panicstr) 17860Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 17870Sstevel@tonic-gate } 17880Sstevel@tonic-gate 17890Sstevel@tonic-gate int 17900Sstevel@tonic-gate raid_line_writer_lock(md_raidcs_t *cs, int lock) 17910Sstevel@tonic-gate { 17920Sstevel@tonic-gate mr_unit_t *un; 17930Sstevel@tonic-gate md_raidcs_t *cs1; 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 17960Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WRITER; 17970Sstevel@tonic-gate un = cs->cs_ps->ps_un; 17980Sstevel@tonic-gate 17990Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 18000Sstevel@tonic-gate if (lock && !panicstr) 18010Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 18020Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 18030Sstevel@tonic-gate 18040Sstevel@tonic-gate cs1 = un->un_linlck_chn; 18050Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 18060Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1)) 18070Sstevel@tonic-gate break; 18080Sstevel@tonic-gate 18090Sstevel@tonic-gate if (cs1 != NULL) { 18100Sstevel@tonic-gate if (panicstr) 18110Sstevel@tonic-gate panic("md: line writer lock inaccessible"); 18120Sstevel@tonic-gate goto no_lock_exit; 18130Sstevel@tonic-gate } 18140Sstevel@tonic-gate 18150Sstevel@tonic-gate if (raid_alloc_pwslot(cs)) { 18160Sstevel@tonic-gate if (panicstr) 18170Sstevel@tonic-gate panic("md: no prewrite slots"); 18180Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 18190Sstevel@tonic-gate goto no_lock_exit; 18200Sstevel@tonic-gate } 18210Sstevel@tonic-gate 18220Sstevel@tonic-gate cs1 = un->un_linlck_chn; 18230Sstevel@tonic-gate if (cs1 != NULL) 18240Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 18250Sstevel@tonic-gate cs->cs_linlck_next = cs1; 18260Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 18270Sstevel@tonic-gate un->un_linlck_chn = cs; 18280Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 18290Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_WAITING; 18300Sstevel@tonic-gate STAT_INC(raid_writer_locks); 18310Sstevel@tonic-gate STAT_MAX(raid_max_write_locks, raid_write_locks_active); 18320Sstevel@tonic-gate if (lock && !panicstr) 18330Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 18340Sstevel@tonic-gate return (0); 18350Sstevel@tonic-gate 18360Sstevel@tonic-gate no_lock_exit: 18370Sstevel@tonic-gate /* if this is already queued then do not requeue it */ 18380Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 18390Sstevel@tonic-gate if (!lock || (cs->cs_flags & MD_RCS_WAITING)) 18400Sstevel@tonic-gate return (1); 18410Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WAITING; 18420Sstevel@tonic-gate cs->cs_un = un; 18430Sstevel@tonic-gate raid_enqueue(cs); 18440Sstevel@tonic-gate if (lock && !panicstr) 18450Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 18460Sstevel@tonic-gate return (1); 18470Sstevel@tonic-gate } 18480Sstevel@tonic-gate 18490Sstevel@tonic-gate static void 18500Sstevel@tonic-gate raid_startio(md_raidcs_t *cs) 18510Sstevel@tonic-gate { 18520Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 18530Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 18540Sstevel@tonic-gate 18550Sstevel@tonic-gate un = md_unit_readerlock(ui); 18560Sstevel@tonic-gate raid_write_io(un, cs); 18570Sstevel@tonic-gate } 18580Sstevel@tonic-gate 18590Sstevel@tonic-gate void 18600Sstevel@tonic-gate raid_io_startup(mr_unit_t *un) 18610Sstevel@tonic-gate { 18620Sstevel@tonic-gate md_raidcs_t *waiting_list, *cs1; 18630Sstevel@tonic-gate md_raidcs_t *previous = NULL, *next = NULL; 18640Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 18650Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 18660Sstevel@tonic-gate 18670Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 18680Sstevel@tonic-gate mutex_enter(io_list_mutex); 18690Sstevel@tonic-gate 18700Sstevel@tonic-gate /* 18710Sstevel@tonic-gate * check to be sure there are no reader locks outstanding. If 18720Sstevel@tonic-gate * there are not then pass on the writer lock. 18730Sstevel@tonic-gate */ 18740Sstevel@tonic-gate waiting_list = ui->ui_io_lock->io_list_front; 18750Sstevel@tonic-gate while (waiting_list) { 18760Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18770Sstevel@tonic-gate ASSERT(! (waiting_list->cs_flags & MD_RCS_LLOCKD)); 18780Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 18790Sstevel@tonic-gate if (raid_io_overlaps(waiting_list, cs1) == 1) 18800Sstevel@tonic-gate break; 18810Sstevel@tonic-gate /* 18820Sstevel@tonic-gate * there was an IOs that overlaps this io so go onto 18830Sstevel@tonic-gate * the next io in the waiting list 18840Sstevel@tonic-gate */ 18850Sstevel@tonic-gate if (cs1) { 18860Sstevel@tonic-gate previous = waiting_list; 18870Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 18880Sstevel@tonic-gate continue; 18890Sstevel@tonic-gate } 18900Sstevel@tonic-gate 18910Sstevel@tonic-gate /* 18920Sstevel@tonic-gate * There are no IOs that overlap this, so remove it from 18930Sstevel@tonic-gate * the waiting queue, and start it 18940Sstevel@tonic-gate */ 18950Sstevel@tonic-gate 18960Sstevel@tonic-gate if (raid_check_pw(waiting_list)) { 18970Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 18980Sstevel@tonic-gate previous = waiting_list; 18990Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 19000Sstevel@tonic-gate continue; 19010Sstevel@tonic-gate } 19020Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 19030Sstevel@tonic-gate 19040Sstevel@tonic-gate next = waiting_list->cs_linlck_next; 19050Sstevel@tonic-gate if (previous) 19060Sstevel@tonic-gate previous->cs_linlck_next = next; 19070Sstevel@tonic-gate else 19080Sstevel@tonic-gate ui->ui_io_lock->io_list_front = next; 19090Sstevel@tonic-gate 19100Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) 19110Sstevel@tonic-gate ui->ui_io_lock->io_list_back = NULL; 19120Sstevel@tonic-gate 19130Sstevel@tonic-gate if (ui->ui_io_lock->io_list_back == waiting_list) 19140Sstevel@tonic-gate ui->ui_io_lock->io_list_back = previous; 19150Sstevel@tonic-gate 19160Sstevel@tonic-gate waiting_list->cs_linlck_next = NULL; 19170Sstevel@tonic-gate waiting_list->cs_flags &= ~MD_RCS_WAITING; 19180Sstevel@tonic-gate STAT_DEC(raid_write_queue_length); 19190Sstevel@tonic-gate if (raid_line_writer_lock(waiting_list, 0)) 19200Sstevel@tonic-gate panic("region locking corrupted"); 19210Sstevel@tonic-gate 19220Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_LLOCKD); 19230Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_startio, 19240Sstevel@tonic-gate (daemon_queue_t *)waiting_list, REQ_OLD); 19250Sstevel@tonic-gate waiting_list = next; 19260Sstevel@tonic-gate 19270Sstevel@tonic-gate } 19280Sstevel@tonic-gate mutex_exit(io_list_mutex); 19290Sstevel@tonic-gate } 19300Sstevel@tonic-gate 19310Sstevel@tonic-gate void 19320Sstevel@tonic-gate raid_line_exit(md_raidcs_t *cs) 19330Sstevel@tonic-gate { 19340Sstevel@tonic-gate mr_unit_t *un; 19350Sstevel@tonic-gate 19360Sstevel@tonic-gate un = cs->cs_ps->ps_un; 19370Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 19380Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 19390Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) 19400Sstevel@tonic-gate STAT_DEC(raid_reader_locks_active); 19410Sstevel@tonic-gate else 19420Sstevel@tonic-gate STAT_DEC(raid_write_locks_active); 19430Sstevel@tonic-gate 19440Sstevel@tonic-gate if (cs->cs_linlck_prev) 19450Sstevel@tonic-gate cs->cs_linlck_prev->cs_linlck_next = cs->cs_linlck_next; 19460Sstevel@tonic-gate else 19470Sstevel@tonic-gate un->un_linlck_chn = cs->cs_linlck_next; 19480Sstevel@tonic-gate if (cs->cs_linlck_next) 19490Sstevel@tonic-gate cs->cs_linlck_next->cs_linlck_prev = cs->cs_linlck_prev; 19500Sstevel@tonic-gate 19510Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LLOCKD; 19520Sstevel@tonic-gate 19530Sstevel@tonic-gate if (un->un_linlck_flg) 19540Sstevel@tonic-gate cv_broadcast(&un->un_linlck_cv); 19550Sstevel@tonic-gate 19560Sstevel@tonic-gate un->un_linlck_flg = 0; 19570Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 19580Sstevel@tonic-gate 19590Sstevel@tonic-gate raid_cancel_pwslot(cs); 19600Sstevel@tonic-gate /* 19610Sstevel@tonic-gate * now that the lock is droped go ahead and see if there are any 19620Sstevel@tonic-gate * other writes that can be started up 19630Sstevel@tonic-gate */ 19640Sstevel@tonic-gate raid_io_startup(un); 19650Sstevel@tonic-gate 19660Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 19670Sstevel@tonic-gate } 19680Sstevel@tonic-gate 19690Sstevel@tonic-gate /* 19700Sstevel@tonic-gate * NAMES: raid_line, raid_pcolumn, raid_dcolumn 19710Sstevel@tonic-gate * DESCRIPTION: RAID metadevice APIs for mapping segment # to line #, 19720Sstevel@tonic-gate * data column # and parity column #. 19730Sstevel@tonic-gate * PARAMETERS: int segment - segment number 19740Sstevel@tonic-gate * mr_unit_t *un - pointer to an unit structure 19750Sstevel@tonic-gate * RETURNS: raid_line returns line # 19760Sstevel@tonic-gate * raid_dcolumn returns data column # 19770Sstevel@tonic-gate * raid_pcolumn returns parity column # 19780Sstevel@tonic-gate */ 19790Sstevel@tonic-gate static diskaddr_t 19800Sstevel@tonic-gate raid_line(diskaddr_t segment, mr_unit_t *un) 19810Sstevel@tonic-gate { 19820Sstevel@tonic-gate diskaddr_t adj_seg; 19830Sstevel@tonic-gate diskaddr_t line; 19840Sstevel@tonic-gate diskaddr_t max_orig_segment; 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 19870Sstevel@tonic-gate if (segment >= max_orig_segment) { 19880Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 19890Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 19900Sstevel@tonic-gate } else { 19910Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 19920Sstevel@tonic-gate } 19930Sstevel@tonic-gate return (line); 19940Sstevel@tonic-gate } 19950Sstevel@tonic-gate 19960Sstevel@tonic-gate uint_t 19970Sstevel@tonic-gate raid_dcolumn(diskaddr_t segment, mr_unit_t *un) 19980Sstevel@tonic-gate { 19990Sstevel@tonic-gate diskaddr_t adj_seg; 20000Sstevel@tonic-gate diskaddr_t line; 20010Sstevel@tonic-gate diskaddr_t max_orig_segment; 20020Sstevel@tonic-gate uint_t column; 20030Sstevel@tonic-gate 20040Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 20050Sstevel@tonic-gate if (segment >= max_orig_segment) { 20060Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 20070Sstevel@tonic-gate column = un->un_origcolumncnt + 20087627SChris.Horne@Sun.COM (uint_t)(adj_seg / un->un_segsincolumn); 20090Sstevel@tonic-gate } else { 20100Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 20117627SChris.Horne@Sun.COM column = (uint_t)((segment % 20127627SChris.Horne@Sun.COM (un->un_origcolumncnt - 1) + line) % un->un_origcolumncnt); 20130Sstevel@tonic-gate } 20140Sstevel@tonic-gate return (column); 20150Sstevel@tonic-gate } 20160Sstevel@tonic-gate 20170Sstevel@tonic-gate uint_t 20180Sstevel@tonic-gate raid_pcolumn(diskaddr_t segment, mr_unit_t *un) 20190Sstevel@tonic-gate { 20200Sstevel@tonic-gate diskaddr_t adj_seg; 20210Sstevel@tonic-gate diskaddr_t line; 20220Sstevel@tonic-gate diskaddr_t max_orig_segment; 20230Sstevel@tonic-gate uint_t column; 20240Sstevel@tonic-gate 20250Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 20260Sstevel@tonic-gate if (segment >= max_orig_segment) { 20270Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 20280Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 20290Sstevel@tonic-gate } else { 20300Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 20310Sstevel@tonic-gate } 20327627SChris.Horne@Sun.COM column = (uint_t)((line + (un->un_origcolumncnt - 1)) % 20337627SChris.Horne@Sun.COM un->un_origcolumncnt); 20340Sstevel@tonic-gate return (column); 20350Sstevel@tonic-gate } 20360Sstevel@tonic-gate 20370Sstevel@tonic-gate 20380Sstevel@tonic-gate /* 20390Sstevel@tonic-gate * Is called in raid_iosetup to probe each column to insure 20400Sstevel@tonic-gate * that all the columns are in 'okay' state and meet the 20410Sstevel@tonic-gate * 'full line' requirement. If any column is in error, 20420Sstevel@tonic-gate * we don't want to enable the 'full line' flag. Previously, 20430Sstevel@tonic-gate * we would do so and disable it only when a error is 20440Sstevel@tonic-gate * detected after the first 'full line' io which is too late 20450Sstevel@tonic-gate * and leads to the potential data corruption. 20460Sstevel@tonic-gate */ 20470Sstevel@tonic-gate static int 20480Sstevel@tonic-gate raid_check_cols(mr_unit_t *un) 20490Sstevel@tonic-gate { 20500Sstevel@tonic-gate buf_t bp; 20510Sstevel@tonic-gate char *buf; 20520Sstevel@tonic-gate mr_column_t *colptr; 20530Sstevel@tonic-gate minor_t mnum = MD_SID(un); 20540Sstevel@tonic-gate int i; 20550Sstevel@tonic-gate int err = 0; 20560Sstevel@tonic-gate 20570Sstevel@tonic-gate buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP); 20580Sstevel@tonic-gate 20590Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 20600Sstevel@tonic-gate md_dev64_t tmpdev; 20610Sstevel@tonic-gate 20620Sstevel@tonic-gate colptr = &un->un_column[i]; 20630Sstevel@tonic-gate 20640Sstevel@tonic-gate tmpdev = colptr->un_dev; 20650Sstevel@tonic-gate /* 20660Sstevel@tonic-gate * Open by device id 20670Sstevel@tonic-gate * If this device is hotspared 20680Sstevel@tonic-gate * use the hotspare key 20690Sstevel@tonic-gate */ 20700Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ? 20717627SChris.Horne@Sun.COM colptr->un_hs_key : colptr->un_orig_key); 20720Sstevel@tonic-gate 20730Sstevel@tonic-gate if (tmpdev == NODEV64) { 20740Sstevel@tonic-gate err = 1; 20750Sstevel@tonic-gate break; 20760Sstevel@tonic-gate } 20770Sstevel@tonic-gate 20780Sstevel@tonic-gate colptr->un_dev = tmpdev; 20790Sstevel@tonic-gate 20800Sstevel@tonic-gate bzero((caddr_t)&bp, sizeof (buf_t)); 20810Sstevel@tonic-gate bp.b_back = &bp; 20820Sstevel@tonic-gate bp.b_forw = &bp; 20830Sstevel@tonic-gate bp.b_flags = (B_READ | B_BUSY); 20840Sstevel@tonic-gate sema_init(&bp.b_io, 0, NULL, 20850Sstevel@tonic-gate SEMA_DEFAULT, NULL); 20860Sstevel@tonic-gate sema_init(&bp.b_sem, 0, NULL, 20870Sstevel@tonic-gate SEMA_DEFAULT, NULL); 20880Sstevel@tonic-gate bp.b_edev = md_dev64_to_dev(colptr->un_dev); 20890Sstevel@tonic-gate bp.b_lblkno = colptr->un_pwstart; 20900Sstevel@tonic-gate bp.b_bcount = DEV_BSIZE; 20910Sstevel@tonic-gate bp.b_bufsize = DEV_BSIZE; 20920Sstevel@tonic-gate bp.b_un.b_addr = (caddr_t)buf; 20930Sstevel@tonic-gate (void) md_call_strategy(&bp, 0, NULL); 20940Sstevel@tonic-gate if (biowait(&bp)) { 20950Sstevel@tonic-gate err = 1; 20960Sstevel@tonic-gate break; 20970Sstevel@tonic-gate } 20980Sstevel@tonic-gate } 20990Sstevel@tonic-gate 21000Sstevel@tonic-gate kmem_free(buf, DEV_BSIZE); 21010Sstevel@tonic-gate return (err); 21020Sstevel@tonic-gate } 21030Sstevel@tonic-gate 21040Sstevel@tonic-gate /* 21050Sstevel@tonic-gate * NAME: raid_iosetup 21060Sstevel@tonic-gate * DESCRIPTION: RAID metadevice specific I/O set up routine which does 21070Sstevel@tonic-gate * all the necessary calculations to determine the location 21080Sstevel@tonic-gate * of the segement for the I/O. 21090Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - unit number of RAID metadevice 21100Sstevel@tonic-gate * diskaddr_t blkno - block number of the I/O attempt 21110Sstevel@tonic-gate * size_t blkcnt - block count for this I/O 21120Sstevel@tonic-gate * md_raidcs_t *cs - child structure for each segmented I/O 21130Sstevel@tonic-gate * 21140Sstevel@tonic-gate * NOTE: The following is an example of a raid disk layer out: 21150Sstevel@tonic-gate * 21160Sstevel@tonic-gate * Total Column = 5 21170Sstevel@tonic-gate * Original Column = 4 21180Sstevel@tonic-gate * Segment Per Column = 10 21190Sstevel@tonic-gate * 21200Sstevel@tonic-gate * Col#0 Col#1 Col#2 Col#3 Col#4 Col#5 Col#6 21210Sstevel@tonic-gate * ------------------------------------------------------------- 21220Sstevel@tonic-gate * line#0 Seg#0 Seg#1 Seg#2 Parity Seg#30 Seg#40 21230Sstevel@tonic-gate * line#1 Parity Seg#3 Seg#4 Seg#5 Seg#31 21240Sstevel@tonic-gate * line#2 Seg#8 Parity Seg#6 Seg#7 Seg#32 21250Sstevel@tonic-gate * line#3 Seg#10 Seg#11 Parity Seg#9 Seg#33 21260Sstevel@tonic-gate * line#4 Seg#12 Seg#13 Seg#14 Parity Seg#34 21270Sstevel@tonic-gate * line#5 Parity Seg#15 Seg#16 Seg#17 Seg#35 21280Sstevel@tonic-gate * line#6 Seg#20 Parity Seg#18 Seg#19 Seg#36 21290Sstevel@tonic-gate * line#7 Seg#22 Seg#23 Parity Seg#21 Seg#37 21300Sstevel@tonic-gate * line#8 Seg#24 Seg#25 Seg#26 Parity Seg#38 21310Sstevel@tonic-gate * line#9 Parity Seg#27 Seg#28 Seg#29 Seg#39 21320Sstevel@tonic-gate */ 21330Sstevel@tonic-gate static size_t 21340Sstevel@tonic-gate raid_iosetup( 21350Sstevel@tonic-gate mr_unit_t *un, 21360Sstevel@tonic-gate diskaddr_t blkno, 21370Sstevel@tonic-gate size_t blkcnt, 21380Sstevel@tonic-gate md_raidcs_t *cs 21390Sstevel@tonic-gate ) 21400Sstevel@tonic-gate { 21410Sstevel@tonic-gate diskaddr_t segment; 21420Sstevel@tonic-gate diskaddr_t segstart; 21430Sstevel@tonic-gate diskaddr_t segoff; 21440Sstevel@tonic-gate size_t leftover; 21450Sstevel@tonic-gate diskaddr_t line; 21460Sstevel@tonic-gate uint_t iosize; 21470Sstevel@tonic-gate uint_t colcnt; 21480Sstevel@tonic-gate 21490Sstevel@tonic-gate /* caculate the segment# and offset for the block */ 21500Sstevel@tonic-gate segment = blkno / un->un_segsize; 21510Sstevel@tonic-gate segstart = segment * un->un_segsize; 21520Sstevel@tonic-gate segoff = blkno - segstart; 21530Sstevel@tonic-gate iosize = un->un_iosize - 1; 21540Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 21550Sstevel@tonic-gate line = raid_line(segment, un); 21560Sstevel@tonic-gate cs->cs_dcolumn = raid_dcolumn(segment, un); 21570Sstevel@tonic-gate cs->cs_pcolumn = raid_pcolumn(segment, un); 21580Sstevel@tonic-gate cs->cs_dflags = un->un_column[cs->cs_dcolumn].un_devflags; 21590Sstevel@tonic-gate cs->cs_pflags = un->un_column[cs->cs_pcolumn].un_devflags; 21600Sstevel@tonic-gate cs->cs_line = line; 21610Sstevel@tonic-gate 21620Sstevel@tonic-gate if ((cs->cs_ps->ps_flags & MD_RPS_WRITE) && 21630Sstevel@tonic-gate (UNIT_STATE(un) & RCS_OKAY) && 21640Sstevel@tonic-gate (segoff == 0) && 21650Sstevel@tonic-gate (un->un_totalcolumncnt == un->un_origcolumncnt) && 21660Sstevel@tonic-gate (un->un_segsize < un->un_iosize) && 21670Sstevel@tonic-gate (un->un_iosize <= un->un_maxio) && 21680Sstevel@tonic-gate (blkno == line * un->un_segsize * colcnt) && 21690Sstevel@tonic-gate (blkcnt >= ((un->un_totalcolumncnt -1) * un->un_segsize)) && 21700Sstevel@tonic-gate (raid_state_cnt(un, RCS_OKAY) == un->un_origcolumncnt) && 21710Sstevel@tonic-gate (raid_check_cols(un) == 0)) { 21720Sstevel@tonic-gate 21730Sstevel@tonic-gate md_raidcbuf_t **cbufp; 21740Sstevel@tonic-gate md_raidcbuf_t *cbuf; 21750Sstevel@tonic-gate int i, j; 21760Sstevel@tonic-gate 21770Sstevel@tonic-gate STAT_INC(raid_full_line_writes); 21780Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize * colcnt); 21790Sstevel@tonic-gate ASSERT(blkcnt >= (un->un_segsize * colcnt)); 21800Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize; 21810Sstevel@tonic-gate cs->cs_blkcnt = un->un_segsize; 21820Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 21830Sstevel@tonic-gate cs->cs_bcount = dbtob(cs->cs_blkcnt); 21840Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LINE; 21850Sstevel@tonic-gate 21860Sstevel@tonic-gate cbufp = &cs->cs_buflist; 21870Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 21880Sstevel@tonic-gate j = cs->cs_dcolumn + i; 21890Sstevel@tonic-gate j = j % un->un_totalcolumncnt; 21900Sstevel@tonic-gate 21910Sstevel@tonic-gate if ((j == cs->cs_dcolumn) || (j == cs->cs_pcolumn)) 21920Sstevel@tonic-gate continue; 21930Sstevel@tonic-gate cbuf = kmem_cache_alloc(raid_cbuf_cache, 21940Sstevel@tonic-gate MD_ALLOCFLAGS); 21950Sstevel@tonic-gate raid_cbuf_init(cbuf); 21960Sstevel@tonic-gate cbuf->cbuf_un = cs->cs_un; 21970Sstevel@tonic-gate cbuf->cbuf_ps = cs->cs_ps; 21980Sstevel@tonic-gate cbuf->cbuf_column = j; 21990Sstevel@tonic-gate cbuf->cbuf_bcount = dbtob(un->un_segsize); 22000Sstevel@tonic-gate *cbufp = cbuf; 22010Sstevel@tonic-gate cbufp = &cbuf->cbuf_next; 22020Sstevel@tonic-gate } 22030Sstevel@tonic-gate return (leftover); 22040Sstevel@tonic-gate } 22050Sstevel@tonic-gate 22060Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize - segoff); 22070Sstevel@tonic-gate if (blkcnt > (un->un_segsize - segoff)) 22080Sstevel@tonic-gate blkcnt -= leftover; 22090Sstevel@tonic-gate else 22100Sstevel@tonic-gate leftover = 0; 22110Sstevel@tonic-gate 22120Sstevel@tonic-gate if (blkcnt > (size_t)iosize) { 22130Sstevel@tonic-gate leftover += (blkcnt - iosize); 22140Sstevel@tonic-gate blkcnt = iosize; 22150Sstevel@tonic-gate } 22160Sstevel@tonic-gate 22170Sstevel@tonic-gate /* calculate the line# and column# for the segment */ 22180Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LINE; 22190Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize + segoff; 22200Sstevel@tonic-gate cs->cs_blkcnt = (uint_t)blkcnt; 22210Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 22220Sstevel@tonic-gate cs->cs_bcount = dbtob((uint_t)blkcnt); 22230Sstevel@tonic-gate return (leftover); 22240Sstevel@tonic-gate } 22250Sstevel@tonic-gate 22260Sstevel@tonic-gate /* 22270Sstevel@tonic-gate * NAME: raid_done 22280Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O done interrupt routine 22290Sstevel@tonic-gate * PARAMETERS: struct buf *bp - pointer to a buffer structure 22300Sstevel@tonic-gate */ 22310Sstevel@tonic-gate static void 22320Sstevel@tonic-gate raid_done(struct buf *bp) 22330Sstevel@tonic-gate { 22340Sstevel@tonic-gate md_raidcs_t *cs; 22350Sstevel@tonic-gate int flags, frags; 22360Sstevel@tonic-gate 22370Sstevel@tonic-gate sema_v(&bp->b_io); 22380Sstevel@tonic-gate cs = (md_raidcs_t *)bp->b_chain; 22390Sstevel@tonic-gate 22400Sstevel@tonic-gate ASSERT(cs != NULL); 22410Sstevel@tonic-gate 22420Sstevel@tonic-gate mutex_enter(&cs->cs_mx); 22430Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 22440Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ERROR; 22450Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 22460Sstevel@tonic-gate } 22470Sstevel@tonic-gate 22480Sstevel@tonic-gate flags = cs->cs_flags; 22490Sstevel@tonic-gate frags = --cs->cs_frags; 22500Sstevel@tonic-gate mutex_exit(&cs->cs_mx); 22510Sstevel@tonic-gate if (frags != 0) { 22520Sstevel@tonic-gate return; 22530Sstevel@tonic-gate } 22540Sstevel@tonic-gate 22550Sstevel@tonic-gate if (flags & MD_RCS_ERROR) { 22560Sstevel@tonic-gate if (cs->cs_error_call) { 22570Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_error_call, 22587627SChris.Horne@Sun.COM (daemon_queue_t *)cs, REQ_OLD); 22590Sstevel@tonic-gate } 22600Sstevel@tonic-gate return; 22610Sstevel@tonic-gate } 22620Sstevel@tonic-gate 22630Sstevel@tonic-gate if (flags & MD_RCS_ISCALL) { 22640Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 22650Sstevel@tonic-gate (*(cs->cs_call))(cs); 22660Sstevel@tonic-gate return; 22670Sstevel@tonic-gate } 22680Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_call, 22697627SChris.Horne@Sun.COM (daemon_queue_t *)cs, REQ_OLD); 22700Sstevel@tonic-gate } 22710Sstevel@tonic-gate /* 22720Sstevel@tonic-gate * the flag RIO_EXTRA is used when dealing with a column in the process 22730Sstevel@tonic-gate * of being resynced. During the resync, writes may have to take place 22740Sstevel@tonic-gate * on both the original component and a hotspare component. 22750Sstevel@tonic-gate */ 22760Sstevel@tonic-gate #define RIO_DATA 0x00100 /* use data buffer & data column */ 22770Sstevel@tonic-gate #define RIO_PARITY 0x00200 /* use parity buffer & parity column */ 22780Sstevel@tonic-gate #define RIO_WRITE 0x00400 /* issue a write */ 22790Sstevel@tonic-gate #define RIO_READ 0x00800 /* issue a read */ 22800Sstevel@tonic-gate #define RIO_PWIO 0x01000 /* do the I/O to the prewrite entry */ 22810Sstevel@tonic-gate #define RIO_ALT 0x02000 /* do write to alternate device */ 22820Sstevel@tonic-gate #define RIO_EXTRA 0x04000 /* use extra buffer */ 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate #define RIO_COLMASK 0x000ff 22850Sstevel@tonic-gate 22860Sstevel@tonic-gate #define RIO_PREWRITE RIO_WRITE | RIO_PWIO 22870Sstevel@tonic-gate 22880Sstevel@tonic-gate /* 22890Sstevel@tonic-gate * NAME: raidio 22900Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 22910Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 22920Sstevel@tonic-gate */ 22930Sstevel@tonic-gate static void 22940Sstevel@tonic-gate raidio(md_raidcs_t *cs, int flags) 22950Sstevel@tonic-gate { 22960Sstevel@tonic-gate buf_t *bp; 22970Sstevel@tonic-gate int column; 22980Sstevel@tonic-gate int flag; 22990Sstevel@tonic-gate void *private; 23000Sstevel@tonic-gate mr_unit_t *un; 23010Sstevel@tonic-gate int iosize; 23020Sstevel@tonic-gate diskaddr_t pwstart; 23030Sstevel@tonic-gate diskaddr_t devstart; 23040Sstevel@tonic-gate md_dev64_t dev; 23050Sstevel@tonic-gate 23060Sstevel@tonic-gate un = cs->cs_un; 23070Sstevel@tonic-gate 23080Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 23090Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 23100Sstevel@tonic-gate 23110Sstevel@tonic-gate if (flags & RIO_DATA) { 23120Sstevel@tonic-gate if (flags & RIO_EXTRA) 23130Sstevel@tonic-gate bp = &cs->cs_hbuf; 23140Sstevel@tonic-gate else 23150Sstevel@tonic-gate bp = &cs->cs_dbuf; 23160Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_dbuffer; 23170Sstevel@tonic-gate column = cs->cs_dcolumn; 23180Sstevel@tonic-gate } else { 23190Sstevel@tonic-gate if (flags & RIO_EXTRA) 23200Sstevel@tonic-gate bp = &cs->cs_hbuf; 23210Sstevel@tonic-gate else 23220Sstevel@tonic-gate bp = &cs->cs_pbuf; 23230Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_pbuffer; 23240Sstevel@tonic-gate column = cs->cs_pcolumn; 23250Sstevel@tonic-gate } 23260Sstevel@tonic-gate if (flags & RIO_COLMASK) 23270Sstevel@tonic-gate column = (flags & RIO_COLMASK) - 1; 23280Sstevel@tonic-gate 23290Sstevel@tonic-gate bp->b_bcount = cs->cs_bcount; 23300Sstevel@tonic-gate bp->b_bufsize = cs->cs_bcount; 23310Sstevel@tonic-gate iosize = un->un_iosize; 23320Sstevel@tonic-gate 23330Sstevel@tonic-gate /* check if the hotspared device will be used */ 23340Sstevel@tonic-gate if (flags & RIO_ALT && (flags & RIO_WRITE)) { 23350Sstevel@tonic-gate pwstart = un->un_column[column].un_alt_pwstart; 23360Sstevel@tonic-gate devstart = un->un_column[column].un_alt_devstart; 23370Sstevel@tonic-gate dev = un->un_column[column].un_alt_dev; 23380Sstevel@tonic-gate } else { 23390Sstevel@tonic-gate pwstart = un->un_column[column].un_pwstart; 23400Sstevel@tonic-gate devstart = un->un_column[column].un_devstart; 23410Sstevel@tonic-gate dev = un->un_column[column].un_dev; 23420Sstevel@tonic-gate } 23430Sstevel@tonic-gate 23440Sstevel@tonic-gate /* if not writing to log skip log header */ 23450Sstevel@tonic-gate if ((flags & RIO_PWIO) == 0) { 23460Sstevel@tonic-gate bp->b_lblkno = devstart + cs->cs_blkno; 23470Sstevel@tonic-gate bp->b_un.b_addr += DEV_BSIZE; 23480Sstevel@tonic-gate } else { 23490Sstevel@tonic-gate bp->b_bcount += DEV_BSIZE; 23500Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 23510Sstevel@tonic-gate if (flags & RIO_DATA) { 23520Sstevel@tonic-gate bp->b_lblkno = cs->cs_dpwslot * iosize + pwstart; 23530Sstevel@tonic-gate } else { /* not DATA -> PARITY */ 23540Sstevel@tonic-gate bp->b_lblkno = cs->cs_ppwslot * iosize + pwstart; 23550Sstevel@tonic-gate } 23560Sstevel@tonic-gate } 23570Sstevel@tonic-gate 23580Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR | nv_available); 23590Sstevel@tonic-gate bp->b_flags |= B_BUSY; 23600Sstevel@tonic-gate if (flags & RIO_READ) { 23610Sstevel@tonic-gate bp->b_flags |= B_READ; 23620Sstevel@tonic-gate } else { 23630Sstevel@tonic-gate bp->b_flags |= B_WRITE; 23640Sstevel@tonic-gate if ((nv_available && nv_parity && (flags & RIO_PARITY)) || 23650Sstevel@tonic-gate (nv_available && nv_prewrite && (flags & RIO_PWIO))) 23660Sstevel@tonic-gate bp->b_flags |= nv_available; 23670Sstevel@tonic-gate } 23680Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 23690Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(dev); 23700Sstevel@tonic-gate 23710Sstevel@tonic-gate ASSERT((bp->b_edev != 0) && (bp->b_edev != NODEV)); 23720Sstevel@tonic-gate 23730Sstevel@tonic-gate private = cs->cs_strategy_private; 23740Sstevel@tonic-gate flag = cs->cs_strategy_flag; 23750Sstevel@tonic-gate 23760Sstevel@tonic-gate md_call_strategy(bp, flag, private); 23770Sstevel@tonic-gate } 23780Sstevel@tonic-gate 23790Sstevel@tonic-gate /* 23800Sstevel@tonic-gate * NAME: genstandardparity 23810Sstevel@tonic-gate * DESCRIPTION: This routine 23820Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 23830Sstevel@tonic-gate */ 23840Sstevel@tonic-gate static void 23850Sstevel@tonic-gate genstandardparity(md_raidcs_t *cs) 23860Sstevel@tonic-gate { 23870Sstevel@tonic-gate uint_t *dbuf, *pbuf; 23880Sstevel@tonic-gate size_t wordcnt; 23890Sstevel@tonic-gate uint_t dsum = 0; 23900Sstevel@tonic-gate uint_t psum = 0; 23910Sstevel@tonic-gate 23920Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 23930Sstevel@tonic-gate 23940Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 23950Sstevel@tonic-gate 23960Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 23970Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 23980Sstevel@tonic-gate 23990Sstevel@tonic-gate /* Word aligned */ 24000Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 24010Sstevel@tonic-gate uint_t *uwbuf = (uint_t *)(void *)(cs->cs_addr); 24020Sstevel@tonic-gate uint_t uval; 24030Sstevel@tonic-gate 24040Sstevel@tonic-gate while (wordcnt--) { 24050Sstevel@tonic-gate uval = *uwbuf++; 24060Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ uval)); 24070Sstevel@tonic-gate ++pbuf; 24080Sstevel@tonic-gate *dbuf = uval; 24090Sstevel@tonic-gate dsum ^= uval; 24100Sstevel@tonic-gate ++dbuf; 24110Sstevel@tonic-gate } 24120Sstevel@tonic-gate } else { 24130Sstevel@tonic-gate uchar_t *ubbuf = (uchar_t *)(cs->cs_addr); 24140Sstevel@tonic-gate union { 24150Sstevel@tonic-gate uint_t wb; 24160Sstevel@tonic-gate uchar_t bb[4]; 24170Sstevel@tonic-gate } cb; 24180Sstevel@tonic-gate 24190Sstevel@tonic-gate while (wordcnt--) { 24200Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 24210Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 24220Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 24230Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 24240Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ cb.wb)); 24250Sstevel@tonic-gate ++pbuf; 24260Sstevel@tonic-gate *dbuf = cb.wb; 24270Sstevel@tonic-gate dsum ^= cb.wb; 24280Sstevel@tonic-gate ++dbuf; 24290Sstevel@tonic-gate } 24300Sstevel@tonic-gate } 24310Sstevel@tonic-gate 24320Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, cs->cs_un, dsum, cs->cs_pcolumn, 24337627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24347627SChris.Horne@Sun.COM 2, cs->cs_dcolumn, RAID_PWMAGIC); 24350Sstevel@tonic-gate 24360Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, cs->cs_un, psum, cs->cs_dcolumn, 24377627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24387627SChris.Horne@Sun.COM 2, cs->cs_pcolumn, RAID_PWMAGIC); 24390Sstevel@tonic-gate } 24400Sstevel@tonic-gate 24410Sstevel@tonic-gate static void 24420Sstevel@tonic-gate genlineparity(md_raidcs_t *cs) 24430Sstevel@tonic-gate { 24440Sstevel@tonic-gate 24450Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 24460Sstevel@tonic-gate md_raidcbuf_t *cbuf; 24470Sstevel@tonic-gate uint_t *pbuf, *dbuf; 24480Sstevel@tonic-gate uint_t *uwbuf; 24490Sstevel@tonic-gate uchar_t *ubbuf; 24500Sstevel@tonic-gate size_t wordcnt; 24510Sstevel@tonic-gate uint_t psum = 0, dsum = 0; 24520Sstevel@tonic-gate size_t count = un->un_segsize * DEV_BSIZE; 24530Sstevel@tonic-gate uint_t col; 24540Sstevel@tonic-gate buf_t *bp; 24550Sstevel@tonic-gate 24560Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 24570Sstevel@tonic-gate 24580Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 24590Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 24600Sstevel@tonic-gate uwbuf = (uint_t *)(void *)(cs->cs_addr); 24610Sstevel@tonic-gate ubbuf = (uchar_t *)(void *)(cs->cs_addr); 24620Sstevel@tonic-gate 24630Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 24640Sstevel@tonic-gate 24650Sstevel@tonic-gate /* Word aligned */ 24660Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 24670Sstevel@tonic-gate uint_t uval; 24680Sstevel@tonic-gate 24690Sstevel@tonic-gate while (wordcnt--) { 24700Sstevel@tonic-gate uval = *uwbuf++; 24710Sstevel@tonic-gate *dbuf = uval; 24720Sstevel@tonic-gate *pbuf = uval; 24730Sstevel@tonic-gate dsum ^= uval; 24740Sstevel@tonic-gate ++pbuf; 24750Sstevel@tonic-gate ++dbuf; 24760Sstevel@tonic-gate } 24770Sstevel@tonic-gate } else { 24780Sstevel@tonic-gate union { 24790Sstevel@tonic-gate uint_t wb; 24800Sstevel@tonic-gate uchar_t bb[4]; 24810Sstevel@tonic-gate } cb; 24820Sstevel@tonic-gate 24830Sstevel@tonic-gate while (wordcnt--) { 24840Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 24850Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 24860Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 24870Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 24880Sstevel@tonic-gate *dbuf = cb.wb; 24890Sstevel@tonic-gate *pbuf = cb.wb; 24900Sstevel@tonic-gate dsum ^= cb.wb; 24910Sstevel@tonic-gate ++pbuf; 24920Sstevel@tonic-gate ++dbuf; 24930Sstevel@tonic-gate } 24940Sstevel@tonic-gate } 24950Sstevel@tonic-gate 24960Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, cs->cs_pcolumn, 24977627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 24987627SChris.Horne@Sun.COM un->un_totalcolumncnt, cs->cs_dcolumn, RAID_PWMAGIC); 24990Sstevel@tonic-gate 25000Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_DATA); 25010Sstevel@tonic-gate 25020Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 25030Sstevel@tonic-gate 25040Sstevel@tonic-gate dsum = 0; 25050Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 25060Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cbuf->cbuf_buffer + DEV_BSIZE); 25070Sstevel@tonic-gate 25080Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 25090Sstevel@tonic-gate 25100Sstevel@tonic-gate col = cbuf->cbuf_column; 25110Sstevel@tonic-gate 25120Sstevel@tonic-gate /* Word aligned */ 25130Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 25140Sstevel@tonic-gate uint_t uval; 25150Sstevel@tonic-gate 25160Sstevel@tonic-gate /* 25170Sstevel@tonic-gate * Only calculate psum when working on the last 25180Sstevel@tonic-gate * data buffer. 25190Sstevel@tonic-gate */ 25200Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 25210Sstevel@tonic-gate psum = 0; 25220Sstevel@tonic-gate while (wordcnt--) { 25230Sstevel@tonic-gate uval = *uwbuf++; 25240Sstevel@tonic-gate *dbuf = uval; 25250Sstevel@tonic-gate psum ^= (*pbuf ^= uval); 25260Sstevel@tonic-gate dsum ^= uval; 25270Sstevel@tonic-gate ++dbuf; 25280Sstevel@tonic-gate ++pbuf; 25290Sstevel@tonic-gate } 25300Sstevel@tonic-gate } else { 25310Sstevel@tonic-gate while (wordcnt--) { 25320Sstevel@tonic-gate uval = *uwbuf++; 25330Sstevel@tonic-gate *dbuf = uval; 25340Sstevel@tonic-gate *pbuf ^= uval; 25350Sstevel@tonic-gate dsum ^= uval; 25360Sstevel@tonic-gate ++dbuf; 25370Sstevel@tonic-gate ++pbuf; 25380Sstevel@tonic-gate } 25390Sstevel@tonic-gate } 25400Sstevel@tonic-gate } else { 25410Sstevel@tonic-gate union { 25420Sstevel@tonic-gate uint_t wb; 25430Sstevel@tonic-gate uchar_t bb[4]; 25440Sstevel@tonic-gate } cb; 25450Sstevel@tonic-gate 25460Sstevel@tonic-gate /* 25470Sstevel@tonic-gate * Only calculate psum when working on the last 25480Sstevel@tonic-gate * data buffer. 25490Sstevel@tonic-gate */ 25500Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 25510Sstevel@tonic-gate psum = 0; 25520Sstevel@tonic-gate while (wordcnt--) { 25530Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 25540Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 25550Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 25560Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 25570Sstevel@tonic-gate *dbuf = cb.wb; 25580Sstevel@tonic-gate psum ^= (*pbuf ^= cb.wb); 25590Sstevel@tonic-gate dsum ^= cb.wb; 25600Sstevel@tonic-gate ++dbuf; 25610Sstevel@tonic-gate ++pbuf; 25620Sstevel@tonic-gate } 25630Sstevel@tonic-gate } else { 25640Sstevel@tonic-gate while (wordcnt--) { 25650Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 25660Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 25670Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 25680Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 25690Sstevel@tonic-gate *dbuf = cb.wb; 25700Sstevel@tonic-gate *pbuf ^= cb.wb; 25710Sstevel@tonic-gate dsum ^= cb.wb; 25720Sstevel@tonic-gate ++dbuf; 25730Sstevel@tonic-gate ++pbuf; 25740Sstevel@tonic-gate } 25750Sstevel@tonic-gate } 25760Sstevel@tonic-gate } 25770Sstevel@tonic-gate RAID_FILLIN_RPW(cbuf->cbuf_buffer, un, dsum, cs->cs_pcolumn, 25787627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 25797627SChris.Horne@Sun.COM un->un_totalcolumncnt, col, RAID_PWMAGIC); 25800Sstevel@tonic-gate 25810Sstevel@tonic-gate /* 25820Sstevel@tonic-gate * fill in buffer for write to prewrite area 25830Sstevel@tonic-gate */ 25840Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 25850Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer; 25860Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount + DEV_BSIZE; 25870Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 25880Sstevel@tonic-gate bp->b_lblkno = (cbuf->cbuf_pwslot * un->un_iosize) + 25890Sstevel@tonic-gate un->un_column[col].un_pwstart; 25900Sstevel@tonic-gate bp->b_flags = B_WRITE | B_BUSY; 25910Sstevel@tonic-gate if (nv_available && nv_prewrite) 25920Sstevel@tonic-gate bp->b_flags |= nv_available; 25930Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 25940Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[col].un_dev); 25950Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 25960Sstevel@tonic-gate md_call_strategy(bp, 25977627SChris.Horne@Sun.COM cs->cs_strategy_flag, cs->cs_strategy_private); 25980Sstevel@tonic-gate } 25990Sstevel@tonic-gate 26000Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, cs->cs_dcolumn, 26017627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 26027627SChris.Horne@Sun.COM un->un_totalcolumncnt, cs->cs_pcolumn, RAID_PWMAGIC); 26030Sstevel@tonic-gate 26040Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_PARITY); 26050Sstevel@tonic-gate } 26060Sstevel@tonic-gate 26070Sstevel@tonic-gate /* 26080Sstevel@tonic-gate * NAME: raid_readregenloop 26090Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 26100Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 26110Sstevel@tonic-gate */ 26120Sstevel@tonic-gate static void 26130Sstevel@tonic-gate raid_readregenloop(md_raidcs_t *cs) 26140Sstevel@tonic-gate { 26150Sstevel@tonic-gate mr_unit_t *un; 26160Sstevel@tonic-gate md_raidps_t *ps; 26170Sstevel@tonic-gate uint_t *dbuf; 26180Sstevel@tonic-gate uint_t *pbuf; 26190Sstevel@tonic-gate size_t wordcnt; 26200Sstevel@tonic-gate 26210Sstevel@tonic-gate un = cs->cs_un; 26220Sstevel@tonic-gate 26230Sstevel@tonic-gate /* 26240Sstevel@tonic-gate * XOR the parity with data bytes, must skip the 26250Sstevel@tonic-gate * pre-write entry header in all data/parity buffers 26260Sstevel@tonic-gate */ 26270Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 26280Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 26290Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 26300Sstevel@tonic-gate while (wordcnt--) 26310Sstevel@tonic-gate *dbuf++ ^= *pbuf++; 26320Sstevel@tonic-gate 26330Sstevel@tonic-gate /* bump up the loop count */ 26340Sstevel@tonic-gate cs->cs_loop++; 26350Sstevel@tonic-gate 26360Sstevel@tonic-gate /* skip the errored component */ 26370Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 26380Sstevel@tonic-gate cs->cs_loop++; 26390Sstevel@tonic-gate 26400Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 26410Sstevel@tonic-gate cs->cs_frags = 1; 26420Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 26430Sstevel@tonic-gate return; 26440Sstevel@tonic-gate } 26450Sstevel@tonic-gate /* reaching the end sof loop */ 26460Sstevel@tonic-gate ps = cs->cs_ps; 26470Sstevel@tonic-gate bcopy(cs->cs_dbuffer + DEV_BSIZE, cs->cs_addr, cs->cs_bcount); 26480Sstevel@tonic-gate raid_free_child(cs, 1); 26490Sstevel@tonic-gate 26500Sstevel@tonic-gate /* decrement readfrags */ 26510Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 26520Sstevel@tonic-gate } 26530Sstevel@tonic-gate 26540Sstevel@tonic-gate /* 26550Sstevel@tonic-gate * NAME: raid_read_io 26560Sstevel@tonic-gate * DESCRIPTION: RAID metadevice read I/O routine 26570Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 26580Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 26590Sstevel@tonic-gate */ 26600Sstevel@tonic-gate static void 26610Sstevel@tonic-gate raid_read_io(mr_unit_t *un, md_raidcs_t *cs) 26620Sstevel@tonic-gate { 26630Sstevel@tonic-gate int flag; 26640Sstevel@tonic-gate void *private; 26650Sstevel@tonic-gate buf_t *bp; 26660Sstevel@tonic-gate buf_t *pb = cs->cs_ps->ps_bp; 26670Sstevel@tonic-gate mr_column_t *column; 26680Sstevel@tonic-gate 26690Sstevel@tonic-gate flag = cs->cs_strategy_flag; 26700Sstevel@tonic-gate private = cs->cs_strategy_private; 26710Sstevel@tonic-gate column = &un->un_column[cs->cs_dcolumn]; 26720Sstevel@tonic-gate 26730Sstevel@tonic-gate /* 26740Sstevel@tonic-gate * The component to be read is good, simply set up bp structure 26750Sstevel@tonic-gate * and call low level md routine doing the read. 26760Sstevel@tonic-gate */ 26770Sstevel@tonic-gate 26780Sstevel@tonic-gate if (COLUMN_ISOKAY(un, cs->cs_dcolumn) || 26790Sstevel@tonic-gate (COLUMN_ISLASTERR(un, cs->cs_dcolumn) && 26807627SChris.Horne@Sun.COM (cs->cs_flags & MD_RCS_RECOVERY) == 0)) { 26810Sstevel@tonic-gate dev_t ddi_dev; /* needed for bioclone, so not md_dev64_t */ 26820Sstevel@tonic-gate ddi_dev = md_dev64_to_dev(column->un_dev); 26830Sstevel@tonic-gate 26840Sstevel@tonic-gate bp = &cs->cs_dbuf; 26850Sstevel@tonic-gate bp = md_bioclone(pb, cs->cs_offset, cs->cs_bcount, ddi_dev, 26867627SChris.Horne@Sun.COM column->un_devstart + cs->cs_blkno, 26877627SChris.Horne@Sun.COM (int (*)())raid_done, bp, KM_NOSLEEP); 26880Sstevel@tonic-gate 26890Sstevel@tonic-gate bp->b_chain = (buf_t *)cs; 26900Sstevel@tonic-gate 26910Sstevel@tonic-gate cs->cs_frags = 1; 26920Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 26930Sstevel@tonic-gate cs->cs_retry_call = raid_read_retry; 26940Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ISCALL; 26950Sstevel@tonic-gate cs->cs_stage = RAID_READ_DONE; 26960Sstevel@tonic-gate cs->cs_call = raid_stage; 26970Sstevel@tonic-gate 26980Sstevel@tonic-gate ASSERT(bp->b_edev != 0); 26990Sstevel@tonic-gate 27000Sstevel@tonic-gate md_call_strategy(bp, flag, private); 27010Sstevel@tonic-gate return; 27020Sstevel@tonic-gate } 27030Sstevel@tonic-gate 27040Sstevel@tonic-gate /* 27050Sstevel@tonic-gate * The component to be read is bad, have to go through 27060Sstevel@tonic-gate * raid specific method to read data from other members. 27070Sstevel@tonic-gate */ 27080Sstevel@tonic-gate cs->cs_loop = 0; 27090Sstevel@tonic-gate /* 27100Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 27110Sstevel@tonic-gate * and get both buffers before pwslot 27120Sstevel@tonic-gate * otherwise a deadlock could be introduced. 27130Sstevel@tonic-gate */ 27140Sstevel@tonic-gate raid_mapin_buf(cs); 27150Sstevel@tonic-gate getdbuffer(cs); 27160Sstevel@tonic-gate getpbuffer(cs); 27170Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 27180Sstevel@tonic-gate cs->cs_loop++; 27190Sstevel@tonic-gate 27200Sstevel@tonic-gate /* zero out data buffer for use as a data sink */ 27210Sstevel@tonic-gate bzero(cs->cs_dbuffer + DEV_BSIZE, cs->cs_bcount); 27220Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 27230Sstevel@tonic-gate cs->cs_call = raid_readregenloop; 27240Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 27250Sstevel@tonic-gate cs->cs_retry_call = raid_read_no_retry; 27260Sstevel@tonic-gate cs->cs_frags = 1; 27270Sstevel@tonic-gate 27280Sstevel@tonic-gate /* use parity buffer to read other columns */ 27290Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 27300Sstevel@tonic-gate } 27310Sstevel@tonic-gate 27320Sstevel@tonic-gate /* 27330Sstevel@tonic-gate * NAME: raid_read 27340Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 27350Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27360Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27370Sstevel@tonic-gate */ 27380Sstevel@tonic-gate static int 27390Sstevel@tonic-gate raid_read(mr_unit_t *un, md_raidcs_t *cs) 27400Sstevel@tonic-gate { 27410Sstevel@tonic-gate int error = 0; 27420Sstevel@tonic-gate md_raidps_t *ps; 27430Sstevel@tonic-gate mdi_unit_t *ui; 27440Sstevel@tonic-gate minor_t mnum; 27450Sstevel@tonic-gate 27460Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 27470Sstevel@tonic-gate ps = cs->cs_ps; 27480Sstevel@tonic-gate ui = ps->ps_ui; 27490Sstevel@tonic-gate raid_line_reader_lock(cs, 0); 27500Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 27510Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 27520Sstevel@tonic-gate mnum = MD_SID(un); 27530Sstevel@tonic-gate cs->cs_un = un; 27540Sstevel@tonic-gate 27550Sstevel@tonic-gate /* make sure the read doesn't go beyond the end of the column */ 27560Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > 27570Sstevel@tonic-gate un->un_segsize * un->un_segsincolumn) { 27580Sstevel@tonic-gate error = ENXIO; 27590Sstevel@tonic-gate } 27600Sstevel@tonic-gate if (error) 27610Sstevel@tonic-gate goto rerror; 27620Sstevel@tonic-gate 27630Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 27640Sstevel@tonic-gate raid_regen_parity(cs); 27650Sstevel@tonic-gate un = MD_UNIT(mnum); 27660Sstevel@tonic-gate cs->cs_un = un; 27670Sstevel@tonic-gate } 27680Sstevel@tonic-gate 27690Sstevel@tonic-gate raid_read_io(un, cs); 27700Sstevel@tonic-gate return (0); 27710Sstevel@tonic-gate 27720Sstevel@tonic-gate rerror: 27730Sstevel@tonic-gate raid_error_parent(ps, error); 27740Sstevel@tonic-gate raid_free_child(cs, 1); 27750Sstevel@tonic-gate /* decrement readfrags */ 27760Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 27770Sstevel@tonic-gate return (0); 27780Sstevel@tonic-gate } 27790Sstevel@tonic-gate 27800Sstevel@tonic-gate /* 27810Sstevel@tonic-gate * NAME: raid_write_err_retry 27820Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 27830Sstevel@tonic-gate * write was for parity or data only; 27840Sstevel@tonic-gate * complete write with error, no recovery possible 27850Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 27860Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 27870Sstevel@tonic-gate */ 27880Sstevel@tonic-gate /*ARGSUSED*/ 27890Sstevel@tonic-gate static void 27900Sstevel@tonic-gate raid_write_err_retry(mr_unit_t *un, md_raidcs_t *cs) 27910Sstevel@tonic-gate { 27920Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 27930Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 27940Sstevel@tonic-gate 27950Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 27960Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 27970Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 27980Sstevel@tonic-gate raid_error_parent(ps, EIO); 27990Sstevel@tonic-gate raid_free_child(cs, 1); 28000Sstevel@tonic-gate raid_free_parent(ps, flags); 28010Sstevel@tonic-gate } 28020Sstevel@tonic-gate 28030Sstevel@tonic-gate /* 28040Sstevel@tonic-gate * NAME: raid_write_err_retry 28050Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 28060Sstevel@tonic-gate * write is too far along to retry and parent 28070Sstevel@tonic-gate * has already been signaled with iodone. 28080Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 28090Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 28100Sstevel@tonic-gate */ 28110Sstevel@tonic-gate /*ARGSUSED*/ 28120Sstevel@tonic-gate static void 28130Sstevel@tonic-gate raid_write_no_retry(mr_unit_t *un, md_raidcs_t *cs) 28140Sstevel@tonic-gate { 28150Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 28160Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 28170Sstevel@tonic-gate 28180Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 28190Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 28200Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 28210Sstevel@tonic-gate raid_free_child(cs, 1); 28220Sstevel@tonic-gate raid_free_parent(ps, flags); 28230Sstevel@tonic-gate } 28240Sstevel@tonic-gate 28250Sstevel@tonic-gate /* 28260Sstevel@tonic-gate * NAME: raid_write_retry 28270Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 28280Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 28290Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 28300Sstevel@tonic-gate */ 28310Sstevel@tonic-gate static void 28320Sstevel@tonic-gate raid_write_retry(mr_unit_t *un, md_raidcs_t *cs) 28330Sstevel@tonic-gate { 28340Sstevel@tonic-gate md_raidps_t *ps; 28350Sstevel@tonic-gate 28360Sstevel@tonic-gate ps = cs->cs_ps; 28370Sstevel@tonic-gate 28380Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_write() */ 28390Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 28400Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 28410Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 28420Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 28430Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 28440Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 28450Sstevel@tonic-gate /* Initialize semaphores */ 28460Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 28470Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28480Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 28490Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 28520Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 28530Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 28540Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 28550Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 28560Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 28570Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 28580Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28590Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 28600Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28610Sstevel@tonic-gate 28620Sstevel@tonic-gate cs->cs_hbuf.b_chain = (struct buf *)cs; 28630Sstevel@tonic-gate cs->cs_hbuf.b_back = &cs->cs_hbuf; 28640Sstevel@tonic-gate cs->cs_hbuf.b_forw = &cs->cs_hbuf; 28650Sstevel@tonic-gate cs->cs_hbuf.b_flags = B_BUSY; /* initialize flags */ 28660Sstevel@tonic-gate cs->cs_hbuf.b_error = 0; /* initialize error */ 28670Sstevel@tonic-gate cs->cs_hbuf.b_offset = -1; 28680Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_io, 0, NULL, 28690Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28700Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_sem, 0, NULL, 28710Sstevel@tonic-gate SEMA_DEFAULT, NULL); 28720Sstevel@tonic-gate 28730Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ERROR); 28740Sstevel@tonic-gate /* 28750Sstevel@tonic-gate * If we have already done'ed the i/o but have done prewrite 28760Sstevel@tonic-gate * on this child, then reset PWDONE flag and bump pwfrags before 28770Sstevel@tonic-gate * restarting i/o. 28780Sstevel@tonic-gate * If pwfrags is zero, we have already 'iodone'd the i/o so 28790Sstevel@tonic-gate * leave things alone. We don't want to re-'done' it. 28800Sstevel@tonic-gate */ 28810Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 28820Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_PWDONE) { 28830Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_PWDONE; 28840Sstevel@tonic-gate ps->ps_pwfrags++; 28850Sstevel@tonic-gate } 28860Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 28870Sstevel@tonic-gate raid_write_io(un, cs); 28880Sstevel@tonic-gate } 28890Sstevel@tonic-gate 28900Sstevel@tonic-gate /* 28910Sstevel@tonic-gate * NAME: raid_wrerr 28920Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 28930Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 28940Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 28950Sstevel@tonic-gate * since a unit or column state transition may take place. 28960Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 28970Sstevel@tonic-gate */ 28980Sstevel@tonic-gate static void 28990Sstevel@tonic-gate raid_wrerr(md_raidcs_t *cs) 29000Sstevel@tonic-gate { 29010Sstevel@tonic-gate md_raidps_t *ps; 29020Sstevel@tonic-gate mdi_unit_t *ui; 29030Sstevel@tonic-gate mr_unit_t *un; 29040Sstevel@tonic-gate md_raidcbuf_t *cbuf; 29050Sstevel@tonic-gate 29060Sstevel@tonic-gate ps = cs->cs_ps; 29070Sstevel@tonic-gate ui = ps->ps_ui; 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 29100Sstevel@tonic-gate ASSERT(un != 0); 29110Sstevel@tonic-gate 29120Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 29130Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_dbuf); 29140Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 29150Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_pbuf); 29160Sstevel@tonic-gate if (cs->cs_hbuf.b_flags & B_ERROR) 29170Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_hbuf); 29180Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 29190Sstevel@tonic-gate if (cbuf->cbuf_bp.b_flags & B_ERROR) 29200Sstevel@tonic-gate (void) raid_error_state(un, &cbuf->cbuf_bp); 29210Sstevel@tonic-gate 29220Sstevel@tonic-gate md_unit_writerexit(ui); 29230Sstevel@tonic-gate 29240Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 29250Sstevel@tonic-gate 29260Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 29270Sstevel@tonic-gate 29280Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 29290Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 29300Sstevel@tonic-gate } 29310Sstevel@tonic-gate /* 29320Sstevel@tonic-gate * NAMES: raid_write_error 29330Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice write 29340Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 29350Sstevel@tonic-gate */ 29360Sstevel@tonic-gate /*ARGSUSED*/ 29370Sstevel@tonic-gate static void 29380Sstevel@tonic-gate raid_write_error(md_raidcs_t *cs) 29390Sstevel@tonic-gate { 29400Sstevel@tonic-gate md_raidps_t *ps; 29410Sstevel@tonic-gate mdi_unit_t *ui; 29420Sstevel@tonic-gate mr_unit_t *un; 29430Sstevel@tonic-gate md_raidcbuf_t *cbuf; 29440Sstevel@tonic-gate set_t setno; 29450Sstevel@tonic-gate 29460Sstevel@tonic-gate ps = cs->cs_ps; 29470Sstevel@tonic-gate ui = ps->ps_ui; 29480Sstevel@tonic-gate un = cs->cs_un; 29490Sstevel@tonic-gate 29500Sstevel@tonic-gate setno = MD_UN2SET(un); 29510Sstevel@tonic-gate 29520Sstevel@tonic-gate /* 29530Sstevel@tonic-gate * locate each buf that is in error on this io and then 29540Sstevel@tonic-gate * output an error message 29550Sstevel@tonic-gate */ 29560Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 29570Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 29580Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 29590Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29600Sstevel@tonic-gate md_shortname(MD_SID(un)), 29610Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 29620Sstevel@tonic-gate 29630Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 29640Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 29650Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 29660Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29670Sstevel@tonic-gate md_shortname(MD_SID(un)), 29680Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 29690Sstevel@tonic-gate 29700Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 29710Sstevel@tonic-gate if ((cbuf->cbuf_bp.b_flags & B_ERROR) && 29720Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_ERRED) && 29730Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_LAST_ERRED)) 29740Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 29750Sstevel@tonic-gate md_shortname(MD_SID(un)), 29760Sstevel@tonic-gate md_devname(setno, md_expldev(cbuf->cbuf_bp.b_edev), 29777627SChris.Horne@Sun.COM NULL, 0)); 29780Sstevel@tonic-gate 29790Sstevel@tonic-gate md_unit_readerexit(ui); 29800Sstevel@tonic-gate 29810Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 29820Sstevel@tonic-gate 29830Sstevel@tonic-gate /* now schedule processing for possible state change */ 29840Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_wrerr, 29857627SChris.Horne@Sun.COM (daemon_queue_t *)cs, REQ_OLD); 29860Sstevel@tonic-gate 29870Sstevel@tonic-gate } 29880Sstevel@tonic-gate 29890Sstevel@tonic-gate /* 29900Sstevel@tonic-gate * NAME: raid_write_ponly 29910Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 29920Sstevel@tonic-gate * in the case where only the parity column can be written 29930Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 29940Sstevel@tonic-gate */ 29950Sstevel@tonic-gate static void 29960Sstevel@tonic-gate raid_write_ponly(md_raidcs_t *cs) 29970Sstevel@tonic-gate { 29980Sstevel@tonic-gate md_raidps_t *ps; 29990Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30000Sstevel@tonic-gate 30010Sstevel@tonic-gate ps = cs->cs_ps; 30020Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 30030Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 30040Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 30050Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 30060Sstevel@tonic-gate cs->cs_frags = 1; 30070Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_PONLY_DONE; 30080Sstevel@tonic-gate cs->cs_call = raid_stage; 30090Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30100Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 30110Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 30120Sstevel@tonic-gate cs->cs_frags++; 30130Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_WRITE); 30140Sstevel@tonic-gate } 30150Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 30160Sstevel@tonic-gate } 30170Sstevel@tonic-gate 30180Sstevel@tonic-gate /* 30190Sstevel@tonic-gate * NAME: raid_write_ploop 30200Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine, constructs parity from 30210Sstevel@tonic-gate * data in other columns. 30220Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 30230Sstevel@tonic-gate */ 30240Sstevel@tonic-gate static void 30250Sstevel@tonic-gate raid_write_ploop(md_raidcs_t *cs) 30260Sstevel@tonic-gate { 30270Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30280Sstevel@tonic-gate uint_t *dbuf; 30290Sstevel@tonic-gate uint_t *pbuf; 30300Sstevel@tonic-gate size_t wordcnt; 30310Sstevel@tonic-gate uint_t psum = 0; 30320Sstevel@tonic-gate 30330Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 30340Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 30350Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 30360Sstevel@tonic-gate while (wordcnt--) 30370Sstevel@tonic-gate *pbuf++ ^= *dbuf++; 30380Sstevel@tonic-gate cs->cs_loop++; 30390Sstevel@tonic-gate 30400Sstevel@tonic-gate /* 30410Sstevel@tonic-gate * build parity from scratch using new data, 30420Sstevel@tonic-gate * skip reading the data and parity columns. 30430Sstevel@tonic-gate */ 30440Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || cs->cs_loop == cs->cs_pcolumn) 30450Sstevel@tonic-gate cs->cs_loop++; 30460Sstevel@tonic-gate 30470Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 30480Sstevel@tonic-gate cs->cs_frags = 1; 30490Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 30500Sstevel@tonic-gate return; 30510Sstevel@tonic-gate } 30520Sstevel@tonic-gate 30530Sstevel@tonic-gate /* construct checksum for parity buffer */ 30540Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 30550Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 30560Sstevel@tonic-gate while (wordcnt--) { 30570Sstevel@tonic-gate psum ^= *pbuf; 30580Sstevel@tonic-gate pbuf++; 30590Sstevel@tonic-gate } 30600Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, -1, 30617627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 30627627SChris.Horne@Sun.COM 1, cs->cs_pcolumn, RAID_PWMAGIC); 30630Sstevel@tonic-gate 30640Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 30650Sstevel@tonic-gate cs->cs_call = raid_write_ponly; 30660Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30670Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 30680Sstevel@tonic-gate cs->cs_frags = 1; 30690Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 30700Sstevel@tonic-gate cs->cs_frags++; 30710Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 30720Sstevel@tonic-gate } 30730Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 30740Sstevel@tonic-gate } 30750Sstevel@tonic-gate 30760Sstevel@tonic-gate /* 30770Sstevel@tonic-gate * NAME: raid_write_donly 30780Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 30790Sstevel@tonic-gate * Completed writing data to prewrite entry 30800Sstevel@tonic-gate * in the case where only the data column can be written 30810Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 30820Sstevel@tonic-gate */ 30830Sstevel@tonic-gate static void 30840Sstevel@tonic-gate raid_write_donly(md_raidcs_t *cs) 30850Sstevel@tonic-gate { 30860Sstevel@tonic-gate md_raidps_t *ps; 30870Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 30880Sstevel@tonic-gate 30890Sstevel@tonic-gate ps = cs->cs_ps; 30900Sstevel@tonic-gate /* WARNING: don't release unit reader lock here... */ 30910Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 30920Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 30930Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 30940Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 30950Sstevel@tonic-gate cs->cs_frags = 1; 30960Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONLY_DONE; 30970Sstevel@tonic-gate cs->cs_call = raid_stage; 30980Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 30990Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 31000Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 31010Sstevel@tonic-gate cs->cs_frags++; 31020Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 31030Sstevel@tonic-gate } 31040Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 31050Sstevel@tonic-gate } 31060Sstevel@tonic-gate 31070Sstevel@tonic-gate /* 31080Sstevel@tonic-gate * NAME: raid_write_got_old 31090Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 31100Sstevel@tonic-gate * completed read of old data and old parity 31110Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 31120Sstevel@tonic-gate */ 31130Sstevel@tonic-gate static void 31140Sstevel@tonic-gate raid_write_got_old(md_raidcs_t *cs) 31150Sstevel@tonic-gate { 31160Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 31170Sstevel@tonic-gate 31180Sstevel@tonic-gate ASSERT(IO_READER_HELD(cs->cs_un)); 31190Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(cs->cs_un)); 31200Sstevel@tonic-gate 31210Sstevel@tonic-gate raid_mapin_buf(cs); 31220Sstevel@tonic-gate genstandardparity(cs); 31230Sstevel@tonic-gate cs->cs_frags = 2; 31240Sstevel@tonic-gate cs->cs_call = raid_stage; 31250Sstevel@tonic-gate cs->cs_stage = RAID_PREWRITE_DONE; 31260Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 31270Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 31280Sstevel@tonic-gate 31290Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 31300Sstevel@tonic-gate cs->cs_frags++; 31310Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_PREWRITE); 31320Sstevel@tonic-gate } 31330Sstevel@tonic-gate 31340Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 31350Sstevel@tonic-gate cs->cs_frags++; 31360Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 31370Sstevel@tonic-gate } 31380Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 31390Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 31400Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 31410Sstevel@tonic-gate } 31420Sstevel@tonic-gate 31430Sstevel@tonic-gate /* 31440Sstevel@tonic-gate * NAME: raid_write_io 31450Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write I/O routine 31460Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 31470Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 31480Sstevel@tonic-gate */ 31490Sstevel@tonic-gate 31500Sstevel@tonic-gate /*ARGSUSED*/ 31510Sstevel@tonic-gate static void 31520Sstevel@tonic-gate raid_write_io(mr_unit_t *un, md_raidcs_t *cs) 31530Sstevel@tonic-gate { 31540Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 31550Sstevel@tonic-gate uint_t *dbuf; 31560Sstevel@tonic-gate uint_t *ubuf; 31570Sstevel@tonic-gate size_t wordcnt; 31580Sstevel@tonic-gate uint_t dsum = 0; 31590Sstevel@tonic-gate int pcheck; 31600Sstevel@tonic-gate int dcheck; 31610Sstevel@tonic-gate 31620Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_pcolumn].un_devstate & 31630Sstevel@tonic-gate RCS_INIT) == 0); 31640Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_dcolumn].un_devstate & 31650Sstevel@tonic-gate RCS_INIT) == 0); 31660Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 31670Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 31680Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_HAVE_PW_SLOTS); 31690Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 31700Sstevel@tonic-gate 31710Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 31720Sstevel@tonic-gate 31730Sstevel@tonic-gate ASSERT(un->un_origcolumncnt == un->un_totalcolumncnt); 31740Sstevel@tonic-gate raid_mapin_buf(cs); 31750Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 31760Sstevel@tonic-gate cs->cs_call = raid_stage; 31770Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 31780Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 31790Sstevel@tonic-gate cs->cs_stage = RAID_LINE_PWDONE; 31800Sstevel@tonic-gate genlineparity(cs); 31810Sstevel@tonic-gate return; 31820Sstevel@tonic-gate } 31830Sstevel@tonic-gate 31840Sstevel@tonic-gate pcheck = erred_check_line(un, cs, &un->un_column[cs->cs_pcolumn]); 31850Sstevel@tonic-gate dcheck = erred_check_line(un, cs, &un->un_column[cs->cs_dcolumn]); 31860Sstevel@tonic-gate cs->cs_resync_check = pcheck << RCL_PARITY_OFFSET || dcheck; 31870Sstevel@tonic-gate 31880Sstevel@tonic-gate if (pcheck == RCL_ERRED && dcheck == RCL_ERRED) { 31890Sstevel@tonic-gate int err = EIO; 31900Sstevel@tonic-gate 31910Sstevel@tonic-gate if ((un->un_column[cs->cs_pcolumn].un_devstate == 31920Sstevel@tonic-gate RCS_LAST_ERRED) || 31930Sstevel@tonic-gate (un->un_column[cs->cs_dcolumn].un_devstate == 31940Sstevel@tonic-gate RCS_LAST_ERRED)) 31950Sstevel@tonic-gate err = ENXIO; 31960Sstevel@tonic-gate raid_error_parent(ps, err); 31970Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 31980Sstevel@tonic-gate raid_free_child(cs, 1); 31990Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS 32000Sstevel@tonic-gate | RFP_RLS_LOCK | RFP_DECR_PWFRAGS); 32010Sstevel@tonic-gate return; 32020Sstevel@tonic-gate } 32030Sstevel@tonic-gate 32040Sstevel@tonic-gate if (pcheck & RCL_ERRED) { 32050Sstevel@tonic-gate /* 32060Sstevel@tonic-gate * handle case of only having data drive 32070Sstevel@tonic-gate */ 32080Sstevel@tonic-gate raid_mapin_buf(cs); 32090Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 32100Sstevel@tonic-gate 32110Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 32120Sstevel@tonic-gate ubuf = (uint_t *)(void *)(cs->cs_addr); 32130Sstevel@tonic-gate 32140Sstevel@tonic-gate while (wordcnt--) { 32150Sstevel@tonic-gate *dbuf = *ubuf; 32160Sstevel@tonic-gate dsum ^= *ubuf; 32170Sstevel@tonic-gate dbuf++; 32180Sstevel@tonic-gate ubuf++; 32190Sstevel@tonic-gate } 32200Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, -1, 32217627SChris.Horne@Sun.COM cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 32227627SChris.Horne@Sun.COM 1, cs->cs_dcolumn, RAID_PWMAGIC); 32230Sstevel@tonic-gate cs->cs_frags = 1; 32240Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32250Sstevel@tonic-gate cs->cs_call = raid_write_donly; 32260Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32270Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 32280Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 32290Sstevel@tonic-gate cs->cs_frags++; 32300Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_ALT | RIO_EXTRA | 32310Sstevel@tonic-gate RIO_PREWRITE); 32320Sstevel@tonic-gate } 32330Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 32340Sstevel@tonic-gate return; 32350Sstevel@tonic-gate } 32360Sstevel@tonic-gate 32370Sstevel@tonic-gate if (dcheck & RCL_ERRED) { 32380Sstevel@tonic-gate /* 32390Sstevel@tonic-gate * handle case of only having parity drive 32400Sstevel@tonic-gate * build parity from scratch using new data, 32410Sstevel@tonic-gate * skip reading the data and parity columns. 32420Sstevel@tonic-gate */ 32430Sstevel@tonic-gate raid_mapin_buf(cs); 32440Sstevel@tonic-gate cs->cs_loop = 0; 32450Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || 32460Sstevel@tonic-gate cs->cs_loop == cs->cs_pcolumn) 32470Sstevel@tonic-gate cs->cs_loop++; 32480Sstevel@tonic-gate 32490Sstevel@tonic-gate /* copy new data in to begin building parity */ 32500Sstevel@tonic-gate bcopy(cs->cs_addr, cs->cs_pbuffer + DEV_BSIZE, cs->cs_bcount); 32510Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32520Sstevel@tonic-gate cs->cs_call = raid_write_ploop; 32530Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32540Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 32550Sstevel@tonic-gate cs->cs_frags = 1; 32560Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 32570Sstevel@tonic-gate return; 32580Sstevel@tonic-gate } 32590Sstevel@tonic-gate /* 32600Sstevel@tonic-gate * handle normal cases 32610Sstevel@tonic-gate * read old data and old parity 32620Sstevel@tonic-gate */ 32630Sstevel@tonic-gate cs->cs_frags = 2; 32640Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 32650Sstevel@tonic-gate cs->cs_call = raid_write_got_old; 32660Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 32670Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 32680Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 32690Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ); 32700Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ); 32710Sstevel@tonic-gate } 32720Sstevel@tonic-gate 32730Sstevel@tonic-gate static void 32740Sstevel@tonic-gate raid_enqueue(md_raidcs_t *cs) 32750Sstevel@tonic-gate { 32760Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 32770Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 32780Sstevel@tonic-gate md_raidcs_t *cs1; 32790Sstevel@tonic-gate 32800Sstevel@tonic-gate mutex_enter(io_list_mutex); 32810Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 32820Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) { 32830Sstevel@tonic-gate ui->ui_io_lock->io_list_front = cs; 32840Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 32850Sstevel@tonic-gate } else { 32860Sstevel@tonic-gate cs1 = ui->ui_io_lock->io_list_back; 32870Sstevel@tonic-gate cs1->cs_linlck_next = cs; 32880Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 32890Sstevel@tonic-gate } 32900Sstevel@tonic-gate STAT_INC(raid_write_waits); 32910Sstevel@tonic-gate STAT_MAX(raid_max_write_q_length, raid_write_queue_length); 32920Sstevel@tonic-gate cs->cs_linlck_next = NULL; 32930Sstevel@tonic-gate mutex_exit(io_list_mutex); 32940Sstevel@tonic-gate } 32950Sstevel@tonic-gate 32960Sstevel@tonic-gate /* 32970Sstevel@tonic-gate * NAME: raid_write 32980Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 32990Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 33000Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 33010Sstevel@tonic-gate */ 33020Sstevel@tonic-gate 33030Sstevel@tonic-gate /*ARGSUSED*/ 33040Sstevel@tonic-gate static int 33050Sstevel@tonic-gate raid_write(mr_unit_t *un, md_raidcs_t *cs) 33060Sstevel@tonic-gate { 33070Sstevel@tonic-gate int error = 0; 33080Sstevel@tonic-gate md_raidps_t *ps; 33090Sstevel@tonic-gate mdi_unit_t *ui; 33100Sstevel@tonic-gate minor_t mnum; 33110Sstevel@tonic-gate 33120Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 33130Sstevel@tonic-gate ps = cs->cs_ps; 33140Sstevel@tonic-gate ui = ps->ps_ui; 33150Sstevel@tonic-gate 33160Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 33170Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 33180Sstevel@tonic-gate error = EIO; 33190Sstevel@tonic-gate 33200Sstevel@tonic-gate /* make sure the write doesn't go beyond the column */ 33210Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > un->un_segsize * un->un_segsincolumn) 33220Sstevel@tonic-gate error = ENXIO; 33230Sstevel@tonic-gate if (error) 33240Sstevel@tonic-gate goto werror; 33250Sstevel@tonic-gate 33260Sstevel@tonic-gate getresources(cs); 33270Sstevel@tonic-gate 33280Sstevel@tonic-gate /* 33290Sstevel@tonic-gate * this is an advisory loop that keeps the waiting lists short 33300Sstevel@tonic-gate * to reduce cpu time. Since there is a race introduced by not 33310Sstevel@tonic-gate * aquiring all the correct mutexes, use a cv_timedwait to be 33320Sstevel@tonic-gate * sure the write always will wake up and start. 33330Sstevel@tonic-gate */ 33340Sstevel@tonic-gate while (raid_check_pw(cs)) { 33350Sstevel@tonic-gate mutex_enter(&un->un_mx); 33360Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDPW; 33370Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 3338*11066Srafael.vanoni@sun.com (void) cv_reltimedwait(&un->un_cv, &un->un_mx, md_wr_wait, 3339*11066Srafael.vanoni@sun.com TR_CLOCK_TICK); 33400Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDPW; 33410Sstevel@tonic-gate mutex_exit(&un->un_mx); 33420Sstevel@tonic-gate } 33430Sstevel@tonic-gate 33440Sstevel@tonic-gate if (raid_line_writer_lock(cs, 1)) 33450Sstevel@tonic-gate return (0); 33460Sstevel@tonic-gate 33470Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 33480Sstevel@tonic-gate cs->cs_un = un; 33490Sstevel@tonic-gate mnum = MD_SID(un); 33500Sstevel@tonic-gate 33510Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 33520Sstevel@tonic-gate raid_regen_parity(cs); 33530Sstevel@tonic-gate un = MD_UNIT(mnum); 33540Sstevel@tonic-gate cs->cs_un = un; 33550Sstevel@tonic-gate } 33560Sstevel@tonic-gate 33570Sstevel@tonic-gate raid_write_io(un, cs); 33580Sstevel@tonic-gate return (0); 33590Sstevel@tonic-gate werror: 33600Sstevel@tonic-gate /* aquire unit reader lock sinc raid_free_child always drops it */ 33610Sstevel@tonic-gate raid_error_parent(ps, error); 33620Sstevel@tonic-gate raid_free_child(cs, 0); 33630Sstevel@tonic-gate /* decrement both pwfrags and frags */ 33640Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS | RFP_DECR_FRAGS | RFP_RLS_LOCK); 33650Sstevel@tonic-gate return (0); 33660Sstevel@tonic-gate } 33670Sstevel@tonic-gate 33680Sstevel@tonic-gate 33690Sstevel@tonic-gate /* 33700Sstevel@tonic-gate * NAMES: raid_stage 33710Sstevel@tonic-gate * DESCRIPTION: post-processing routine for a RAID metadevice 33720Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 33730Sstevel@tonic-gate */ 33740Sstevel@tonic-gate static void 33750Sstevel@tonic-gate raid_stage(md_raidcs_t *cs) 33760Sstevel@tonic-gate { 33770Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 33780Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 33790Sstevel@tonic-gate md_raidcbuf_t *cbuf; 33800Sstevel@tonic-gate buf_t *bp; 33810Sstevel@tonic-gate void *private; 33820Sstevel@tonic-gate int flag; 33830Sstevel@tonic-gate 33840Sstevel@tonic-gate switch (cs->cs_stage) { 33857627SChris.Horne@Sun.COM case RAID_READ_DONE: 33860Sstevel@tonic-gate raid_free_child(cs, 1); 33870Sstevel@tonic-gate /* decrement readfrags */ 33880Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 33890Sstevel@tonic-gate return; 33900Sstevel@tonic-gate 33917627SChris.Horne@Sun.COM case RAID_WRITE_DONE: 33927627SChris.Horne@Sun.COM case RAID_WRITE_PONLY_DONE: 33937627SChris.Horne@Sun.COM case RAID_WRITE_DONLY_DONE: 33940Sstevel@tonic-gate /* 33950Sstevel@tonic-gate * Completed writing real parity and/or data. 33960Sstevel@tonic-gate */ 33970Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_PWDONE); 33980Sstevel@tonic-gate raid_free_child(cs, 1); 33990Sstevel@tonic-gate /* decrement frags but not pwfrags */ 34000Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS | RFP_RLS_LOCK); 34010Sstevel@tonic-gate return; 34020Sstevel@tonic-gate 34037627SChris.Horne@Sun.COM case RAID_PREWRITE_DONE: 34040Sstevel@tonic-gate /* 34050Sstevel@tonic-gate * completed writing data and parity to prewrite entries 34060Sstevel@tonic-gate */ 34070Sstevel@tonic-gate /* 34080Sstevel@tonic-gate * WARNING: don't release unit reader lock here.. 34090Sstevel@tonic-gate * decrement pwfrags but not frags 34100Sstevel@tonic-gate */ 34110Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 34120Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 34130Sstevel@tonic-gate cs->cs_frags = 2; 34140Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 34150Sstevel@tonic-gate cs->cs_call = raid_stage; 34160Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 34170Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 34180Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 34190Sstevel@tonic-gate cs->cs_frags++; 34200Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | 34210Sstevel@tonic-gate RIO_WRITE); 34220Sstevel@tonic-gate } 34230Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 34240Sstevel@tonic-gate cs->cs_frags++; 34250Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 34260Sstevel@tonic-gate } 34270Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 34280Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 34290Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 34300Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 34310Sstevel@tonic-gate raid_free_pwinvalidate(cs); 34320Sstevel@tonic-gate } 34330Sstevel@tonic-gate return; 34340Sstevel@tonic-gate 34357627SChris.Horne@Sun.COM case RAID_LINE_PWDONE: 34360Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 34370Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 34380Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 34390Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 34400Sstevel@tonic-gate cs->cs_call = raid_stage; 34410Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 34420Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 34430Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 34440Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 34450Sstevel@tonic-gate /* 34460Sstevel@tonic-gate * fill in buffer for write to prewrite area 34470Sstevel@tonic-gate */ 34480Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 34490Sstevel@tonic-gate bp->b_back = bp; 34500Sstevel@tonic-gate bp->b_forw = bp; 34510Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer + DEV_BSIZE; 34520Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount; 34530Sstevel@tonic-gate bp->b_bufsize = cbuf->cbuf_bcount; 34540Sstevel@tonic-gate bp->b_lblkno = 34550Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_devstart + 34560Sstevel@tonic-gate cs->cs_blkno; 34570Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR); 34580Sstevel@tonic-gate bp->b_flags &= ~nv_available; 34590Sstevel@tonic-gate bp->b_flags |= B_WRITE | B_BUSY; 34600Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 34610Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev( 34627627SChris.Horne@Sun.COM un->un_column[cbuf->cbuf_column].un_dev); 34630Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 34640Sstevel@tonic-gate private = cs->cs_strategy_private; 34650Sstevel@tonic-gate flag = cs->cs_strategy_flag; 34660Sstevel@tonic-gate md_call_strategy(bp, flag, private); 34670Sstevel@tonic-gate } 34680Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 34690Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 34700Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 34710Sstevel@tonic-gate raid_free_pwinvalidate(cs); 34720Sstevel@tonic-gate } 34730Sstevel@tonic-gate return; 34740Sstevel@tonic-gate 34757627SChris.Horne@Sun.COM default: 34760Sstevel@tonic-gate ASSERT(0); 34770Sstevel@tonic-gate break; 34780Sstevel@tonic-gate } 34790Sstevel@tonic-gate } 34800Sstevel@tonic-gate /* 34810Sstevel@tonic-gate * NAME: md_raid_strategy 34820Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O oprations entry point. 34830Sstevel@tonic-gate * PARAMETERS: buf_t *pb - pointer to a user I/O buffer 34840Sstevel@tonic-gate * int flag - metadevice specific flag 34850Sstevel@tonic-gate * void *private - carry over flag ?? 34860Sstevel@tonic-gate * 34870Sstevel@tonic-gate */ 34880Sstevel@tonic-gate 34890Sstevel@tonic-gate void 34900Sstevel@tonic-gate md_raid_strategy(buf_t *pb, int flag, void *private) 34910Sstevel@tonic-gate { 34920Sstevel@tonic-gate md_raidps_t *ps; 34930Sstevel@tonic-gate md_raidcs_t *cs; 34940Sstevel@tonic-gate int doing_writes; 34950Sstevel@tonic-gate int err; 34960Sstevel@tonic-gate mr_unit_t *un; 34970Sstevel@tonic-gate mdi_unit_t *ui; 34980Sstevel@tonic-gate size_t count; 34990Sstevel@tonic-gate diskaddr_t blkno; 35000Sstevel@tonic-gate caddr_t addr; 35010Sstevel@tonic-gate off_t offset; 35020Sstevel@tonic-gate int colcnt; 35030Sstevel@tonic-gate minor_t mnum; 35040Sstevel@tonic-gate set_t setno; 35050Sstevel@tonic-gate 35060Sstevel@tonic-gate ui = MDI_UNIT(getminor(pb->b_edev)); 35070Sstevel@tonic-gate md_kstat_waitq_enter(ui); 35080Sstevel@tonic-gate un = (mr_unit_t *)md_io_readerlock(ui); 35090Sstevel@tonic-gate setno = MD_MIN2SET(getminor(pb->b_edev)); 35100Sstevel@tonic-gate 35110Sstevel@tonic-gate if ((flag & MD_NOBLOCK) == 0) { 35120Sstevel@tonic-gate if (md_inc_iocount(setno) != 0) { 35130Sstevel@tonic-gate pb->b_flags |= B_ERROR; 35140Sstevel@tonic-gate pb->b_error = ENXIO; 35150Sstevel@tonic-gate pb->b_resid = pb->b_bcount; 35162150Sjeanm md_kstat_waitq_exit(ui); 35170Sstevel@tonic-gate md_io_readerexit(ui); 35180Sstevel@tonic-gate biodone(pb); 35190Sstevel@tonic-gate return; 35200Sstevel@tonic-gate } 35210Sstevel@tonic-gate } else { 35220Sstevel@tonic-gate md_inc_iocount_noblock(setno); 35230Sstevel@tonic-gate } 35240Sstevel@tonic-gate 35250Sstevel@tonic-gate mnum = MD_SID(un); 35260Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 35270Sstevel@tonic-gate count = pb->b_bcount; 35280Sstevel@tonic-gate 35290Sstevel@tonic-gate STAT_CHECK(raid_512, count == 512); 35300Sstevel@tonic-gate STAT_CHECK(raid_1024, count == 1024); 35310Sstevel@tonic-gate STAT_CHECK(raid_1024_8192, count > 1024 && count < 8192); 35320Sstevel@tonic-gate STAT_CHECK(raid_8192, count == 8192); 35330Sstevel@tonic-gate STAT_CHECK(raid_8192_bigger, count > 8192); 35340Sstevel@tonic-gate 35350Sstevel@tonic-gate (void *) md_unit_readerlock(ui); 35360Sstevel@tonic-gate if (!(flag & MD_STR_NOTTOP)) { 35370Sstevel@tonic-gate err = md_checkbuf(ui, (md_unit_t *)un, pb); /* check and map */ 35380Sstevel@tonic-gate if (err != 0) { 35390Sstevel@tonic-gate md_kstat_waitq_exit(ui); 35400Sstevel@tonic-gate md_io_readerexit(ui); 35410Sstevel@tonic-gate return; 35420Sstevel@tonic-gate } 35430Sstevel@tonic-gate } 35440Sstevel@tonic-gate md_unit_readerexit(ui); 35450Sstevel@tonic-gate 35460Sstevel@tonic-gate STAT_INC(raid_total_io); 35470Sstevel@tonic-gate 35480Sstevel@tonic-gate /* allocate a parent structure for the user I/O */ 35490Sstevel@tonic-gate ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); 35500Sstevel@tonic-gate raid_parent_init(ps); 35510Sstevel@tonic-gate 35520Sstevel@tonic-gate /* 35530Sstevel@tonic-gate * Save essential information from the original buffhdr 35540Sstevel@tonic-gate * in the md_save structure. 35550Sstevel@tonic-gate */ 35560Sstevel@tonic-gate ps->ps_un = un; 35570Sstevel@tonic-gate ps->ps_ui = ui; 35580Sstevel@tonic-gate ps->ps_bp = pb; 35590Sstevel@tonic-gate ps->ps_addr = pb->b_un.b_addr; 35600Sstevel@tonic-gate 35610Sstevel@tonic-gate if ((pb->b_flags & B_READ) == 0) { 35620Sstevel@tonic-gate ps->ps_flags |= MD_RPS_WRITE; 35630Sstevel@tonic-gate doing_writes = 1; 35640Sstevel@tonic-gate STAT_INC(raid_writes); 35650Sstevel@tonic-gate } else { 35660Sstevel@tonic-gate ps->ps_flags |= MD_RPS_READ; 35670Sstevel@tonic-gate doing_writes = 0; 35680Sstevel@tonic-gate STAT_INC(raid_reads); 35690Sstevel@tonic-gate } 35700Sstevel@tonic-gate 35710Sstevel@tonic-gate count = lbtodb(pb->b_bcount); /* transfer count (in blocks) */ 35720Sstevel@tonic-gate blkno = pb->b_lblkno; /* block number on device */ 35730Sstevel@tonic-gate addr = 0; 35740Sstevel@tonic-gate offset = 0; 35750Sstevel@tonic-gate ps->ps_pwfrags = 1; 35760Sstevel@tonic-gate ps->ps_frags = 1; 35770Sstevel@tonic-gate md_kstat_waitq_to_runq(ui); 35780Sstevel@tonic-gate 35790Sstevel@tonic-gate do { 35800Sstevel@tonic-gate cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); 35810Sstevel@tonic-gate raid_child_init(cs); 35820Sstevel@tonic-gate cs->cs_ps = ps; 35830Sstevel@tonic-gate cs->cs_un = un; 35840Sstevel@tonic-gate cs->cs_mdunit = mnum; 35850Sstevel@tonic-gate cs->cs_strategy_flag = flag; 35860Sstevel@tonic-gate cs->cs_strategy_private = private; 35870Sstevel@tonic-gate cs->cs_addr = addr; 35880Sstevel@tonic-gate cs->cs_offset = offset; 35890Sstevel@tonic-gate count = raid_iosetup(un, blkno, count, cs); 35900Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 35910Sstevel@tonic-gate blkno += (cs->cs_blkcnt * colcnt); 35920Sstevel@tonic-gate offset += (cs->cs_bcount * colcnt); 35930Sstevel@tonic-gate } else { 35940Sstevel@tonic-gate blkno += cs->cs_blkcnt; 35950Sstevel@tonic-gate offset += cs->cs_bcount; 35960Sstevel@tonic-gate } 35970Sstevel@tonic-gate /* for each cs bump up the ps_pwfrags and ps_frags fields */ 35980Sstevel@tonic-gate if (count) { 35990Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 36000Sstevel@tonic-gate ps->ps_pwfrags++; 36010Sstevel@tonic-gate ps->ps_frags++; 36020Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 36030Sstevel@tonic-gate if (doing_writes) 36040Sstevel@tonic-gate (void) raid_write(un, cs); 36050Sstevel@tonic-gate else 36060Sstevel@tonic-gate (void) raid_read(un, cs); 36070Sstevel@tonic-gate } 36080Sstevel@tonic-gate } while (count); 36090Sstevel@tonic-gate if (doing_writes) { 36100Sstevel@tonic-gate (void) raid_write(un, cs); 36110Sstevel@tonic-gate } else 36120Sstevel@tonic-gate (void) raid_read(un, cs); 36130Sstevel@tonic-gate 36140Sstevel@tonic-gate if (! (flag & MD_STR_NOTTOP) && panicstr) { 36150Sstevel@tonic-gate while (! (ps->ps_flags & MD_RPS_DONE)) { 36160Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 36170Sstevel@tonic-gate drv_usecwait(10); 36180Sstevel@tonic-gate } 36190Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 36200Sstevel@tonic-gate } 36210Sstevel@tonic-gate } 36220Sstevel@tonic-gate 36230Sstevel@tonic-gate /* 36240Sstevel@tonic-gate * NAMES: raid_snarf 36250Sstevel@tonic-gate * DESCRIPTION: RAID metadevice SNARF entry point 36260Sstevel@tonic-gate * PARAMETERS: md_snarfcmd_t cmd, 36270Sstevel@tonic-gate * set_t setno 36280Sstevel@tonic-gate * RETURNS: 36290Sstevel@tonic-gate */ 36300Sstevel@tonic-gate static int 36310Sstevel@tonic-gate raid_snarf(md_snarfcmd_t cmd, set_t setno) 36320Sstevel@tonic-gate { 36330Sstevel@tonic-gate mr_unit_t *un; 36340Sstevel@tonic-gate mddb_recid_t recid; 36350Sstevel@tonic-gate int gotsomething; 36360Sstevel@tonic-gate int all_raid_gotten; 36370Sstevel@tonic-gate mddb_type_t typ1; 36380Sstevel@tonic-gate uint_t ncol; 36390Sstevel@tonic-gate mddb_de_ic_t *dep; 36400Sstevel@tonic-gate mddb_rb32_t *rbp; 36410Sstevel@tonic-gate size_t newreqsize; 36420Sstevel@tonic-gate mr_unit_t *big_un; 36430Sstevel@tonic-gate mr_unit32_od_t *small_un; 36440Sstevel@tonic-gate 36450Sstevel@tonic-gate 36460Sstevel@tonic-gate if (cmd == MD_SNARF_CLEANUP) 36470Sstevel@tonic-gate return (0); 36480Sstevel@tonic-gate 36490Sstevel@tonic-gate all_raid_gotten = 1; 36500Sstevel@tonic-gate gotsomething = 0; 36510Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 36520Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 36530Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 36540Sstevel@tonic-gate 36550Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 36560Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) { 36570Sstevel@tonic-gate continue; 36580Sstevel@tonic-gate } 36590Sstevel@tonic-gate 36600Sstevel@tonic-gate dep = mddb_getrecdep(recid); 36610Sstevel@tonic-gate dep->de_flags = MDDB_F_RAID; 36620Sstevel@tonic-gate rbp = dep->de_rb; 36631623Stw21770 switch (rbp->rb_revision) { 36641623Stw21770 case MDDB_REV_RB: 36651623Stw21770 case MDDB_REV_RBFN: 36661623Stw21770 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 36671623Stw21770 /* 36681623Stw21770 * This means, we have an old and small record 36691623Stw21770 * and this record hasn't already been 36701623Stw21770 * converted. Before we create an incore 36711623Stw21770 * metadevice from this we have to convert it to 36721623Stw21770 * a big record. 36731623Stw21770 */ 36741623Stw21770 small_un = 36751623Stw21770 (mr_unit32_od_t *)mddb_getrecaddr(recid); 36761623Stw21770 ncol = small_un->un_totalcolumncnt; 36771623Stw21770 newreqsize = sizeof (mr_unit_t) + 36787627SChris.Horne@Sun.COM ((ncol - 1) * sizeof (mr_column_t)); 36791623Stw21770 big_un = (mr_unit_t *)kmem_zalloc(newreqsize, 36807627SChris.Horne@Sun.COM KM_SLEEP); 36811623Stw21770 raid_convert((caddr_t)small_un, (caddr_t)big_un, 36827627SChris.Horne@Sun.COM SMALL_2_BIG); 36831623Stw21770 kmem_free(small_un, dep->de_reqsize); 36841623Stw21770 dep->de_rb_userdata = big_un; 36851623Stw21770 dep->de_reqsize = newreqsize; 36861623Stw21770 un = big_un; 36871623Stw21770 rbp->rb_private |= MD_PRV_CONVD; 36881623Stw21770 } else { 36891623Stw21770 /* 36901623Stw21770 * Record has already been converted. Just 36911623Stw21770 * get its address. 36921623Stw21770 */ 36931623Stw21770 un = (mr_unit_t *)mddb_getrecaddr(recid); 36941623Stw21770 } 36951623Stw21770 un->c.un_revision &= ~MD_64BIT_META_DEV; 36961623Stw21770 break; 36971623Stw21770 case MDDB_REV_RB64: 36981623Stw21770 case MDDB_REV_RB64FN: 36990Sstevel@tonic-gate /* Big device */ 37000Sstevel@tonic-gate un = (mr_unit_t *)mddb_getrecaddr(recid); 37011623Stw21770 un->c.un_revision |= MD_64BIT_META_DEV; 37021623Stw21770 un->c.un_flag |= MD_EFILABEL; 37031623Stw21770 break; 37040Sstevel@tonic-gate } 37052077Stw21770 MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); 37060Sstevel@tonic-gate 37070Sstevel@tonic-gate /* 37080Sstevel@tonic-gate * Create minor device node for snarfed entry. 37090Sstevel@tonic-gate */ 37100Sstevel@tonic-gate (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 37110Sstevel@tonic-gate 37120Sstevel@tonic-gate if (MD_UNIT(MD_SID(un)) != NULL) { 37130Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 37140Sstevel@tonic-gate continue; 37150Sstevel@tonic-gate } 37160Sstevel@tonic-gate all_raid_gotten = 0; 37170Sstevel@tonic-gate if (raid_build_incore((void *)un, 1) == 0) { 37180Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 37197627SChris.Horne@Sun.COM md_create_unit_incore(MD_SID(un), &raid_md_ops, 1); 37200Sstevel@tonic-gate gotsomething = 1; 37210Sstevel@tonic-gate } else if (un->mr_ic) { 37220Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 37237627SChris.Horne@Sun.COM un->un_totalcolumncnt); 37240Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 37250Sstevel@tonic-gate } 37260Sstevel@tonic-gate } 37270Sstevel@tonic-gate 37280Sstevel@tonic-gate if (!all_raid_gotten) { 37290Sstevel@tonic-gate return (gotsomething); 37300Sstevel@tonic-gate } 37310Sstevel@tonic-gate 37320Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 37330Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 37340Sstevel@tonic-gate if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 37350Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 37360Sstevel@tonic-gate 37370Sstevel@tonic-gate return (0); 37380Sstevel@tonic-gate } 37390Sstevel@tonic-gate 37400Sstevel@tonic-gate /* 37410Sstevel@tonic-gate * NAMES: raid_halt 37420Sstevel@tonic-gate * DESCRIPTION: RAID metadevice HALT entry point 37430Sstevel@tonic-gate * PARAMETERS: md_haltcmd_t cmd - 37440Sstevel@tonic-gate * set_t setno - 37450Sstevel@tonic-gate * RETURNS: 37460Sstevel@tonic-gate */ 37470Sstevel@tonic-gate static int 37480Sstevel@tonic-gate raid_halt(md_haltcmd_t cmd, set_t setno) 37490Sstevel@tonic-gate { 37500Sstevel@tonic-gate set_t i; 37510Sstevel@tonic-gate mdi_unit_t *ui; 37520Sstevel@tonic-gate minor_t mnum; 37530Sstevel@tonic-gate 37540Sstevel@tonic-gate if (cmd == MD_HALT_CLOSE) 37550Sstevel@tonic-gate return (0); 37560Sstevel@tonic-gate 37570Sstevel@tonic-gate if (cmd == MD_HALT_OPEN) 37580Sstevel@tonic-gate return (0); 37590Sstevel@tonic-gate 37600Sstevel@tonic-gate if (cmd == MD_HALT_UNLOAD) 37610Sstevel@tonic-gate return (0); 37620Sstevel@tonic-gate 37630Sstevel@tonic-gate if (cmd == MD_HALT_CHECK) { 37640Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 37650Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 37660Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 37670Sstevel@tonic-gate continue; 37680Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 37690Sstevel@tonic-gate continue; 37700Sstevel@tonic-gate if (md_unit_isopen(ui)) 37710Sstevel@tonic-gate return (1); 37720Sstevel@tonic-gate } 37730Sstevel@tonic-gate return (0); 37740Sstevel@tonic-gate } 37750Sstevel@tonic-gate 37760Sstevel@tonic-gate if (cmd != MD_HALT_DOIT) 37770Sstevel@tonic-gate return (1); 37780Sstevel@tonic-gate 37790Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 37800Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 37810Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 37820Sstevel@tonic-gate continue; 37830Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 37840Sstevel@tonic-gate continue; 37850Sstevel@tonic-gate reset_raid((mr_unit_t *)MD_UNIT(mnum), mnum, 0); 37860Sstevel@tonic-gate } 37870Sstevel@tonic-gate return (0); 37880Sstevel@tonic-gate } 37890Sstevel@tonic-gate 37900Sstevel@tonic-gate /* 37910Sstevel@tonic-gate * NAMES: raid_close_all_devs 37920Sstevel@tonic-gate * DESCRIPTION: Close all the devices of the unit. 37930Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 37940Sstevel@tonic-gate * RETURNS: 37950Sstevel@tonic-gate */ 37960Sstevel@tonic-gate void 37970Sstevel@tonic-gate raid_close_all_devs(mr_unit_t *un, int init_pw, int md_cflags) 37980Sstevel@tonic-gate { 37990Sstevel@tonic-gate int i; 38000Sstevel@tonic-gate mr_column_t *device; 38010Sstevel@tonic-gate 38020Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38030Sstevel@tonic-gate device = &un->un_column[i]; 38040Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 38050Sstevel@tonic-gate ASSERT((device->un_dev != (md_dev64_t)0) && 38060Sstevel@tonic-gate (device->un_dev != NODEV64)); 38070Sstevel@tonic-gate if ((device->un_devstate & RCS_OKAY) && init_pw) 38080Sstevel@tonic-gate (void) init_pw_area(un, device->un_dev, 38097627SChris.Horne@Sun.COM device->un_pwstart, i); 38100Sstevel@tonic-gate md_layered_close(device->un_dev, md_cflags); 38110Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_ISOPEN; 38120Sstevel@tonic-gate } 38130Sstevel@tonic-gate } 38140Sstevel@tonic-gate } 38150Sstevel@tonic-gate 38160Sstevel@tonic-gate /* 38170Sstevel@tonic-gate * NAMES: raid_open_all_devs 38180Sstevel@tonic-gate * DESCRIPTION: Open all the components (columns) of the device unit. 38190Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 38200Sstevel@tonic-gate * RETURNS: 38210Sstevel@tonic-gate */ 38220Sstevel@tonic-gate static int 38230Sstevel@tonic-gate raid_open_all_devs(mr_unit_t *un, int md_oflags) 38240Sstevel@tonic-gate { 38250Sstevel@tonic-gate minor_t mnum = MD_SID(un); 38260Sstevel@tonic-gate int i; 38270Sstevel@tonic-gate int not_opened = 0; 38280Sstevel@tonic-gate int commit = 0; 38290Sstevel@tonic-gate int col = -1; 38300Sstevel@tonic-gate mr_column_t *device; 38310Sstevel@tonic-gate set_t setno = MD_MIN2SET(MD_SID(un)); 38320Sstevel@tonic-gate side_t side = mddb_getsidenum(setno); 38330Sstevel@tonic-gate mdkey_t key; 38340Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 38350Sstevel@tonic-gate 38360Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 38370Sstevel@tonic-gate 38380Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38390Sstevel@tonic-gate md_dev64_t tmpdev; 38400Sstevel@tonic-gate 38410Sstevel@tonic-gate device = &un->un_column[i]; 38420Sstevel@tonic-gate 38430Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 38440Sstevel@tonic-gate not_opened++; 38450Sstevel@tonic-gate continue; 38460Sstevel@tonic-gate } 38470Sstevel@tonic-gate 38480Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) 38490Sstevel@tonic-gate continue; 38500Sstevel@tonic-gate 38510Sstevel@tonic-gate tmpdev = device->un_dev; 38520Sstevel@tonic-gate /* 38530Sstevel@tonic-gate * Open by device id 38540Sstevel@tonic-gate */ 38550Sstevel@tonic-gate key = HOTSPARED(un, i) ? 38567627SChris.Horne@Sun.COM device->un_hs_key : device->un_orig_key; 38570Sstevel@tonic-gate if ((md_getmajor(tmpdev) != md_major) && 38587627SChris.Horne@Sun.COM md_devid_found(setno, side, key) == 1) { 38590Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, key); 38600Sstevel@tonic-gate } 38610Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, md_oflags)) { 38620Sstevel@tonic-gate device->un_dev = tmpdev; 38630Sstevel@tonic-gate not_opened++; 38640Sstevel@tonic-gate continue; 38650Sstevel@tonic-gate } 38660Sstevel@tonic-gate device->un_dev = tmpdev; 38670Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_ISOPEN; 38680Sstevel@tonic-gate } 38690Sstevel@tonic-gate 38700Sstevel@tonic-gate /* if open errors and errored devices are 1 then device can run */ 38710Sstevel@tonic-gate if (not_opened > 1) { 38720Sstevel@tonic-gate cmn_err(CE_WARN, 38737627SChris.Horne@Sun.COM "md: %s failed to open. open error on %s\n", 38747627SChris.Horne@Sun.COM md_shortname(MD_SID(un)), 38757627SChris.Horne@Sun.COM md_devname(MD_UN2SET(un), device->un_orig_dev, NULL, 0)); 38760Sstevel@tonic-gate 38770Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 38780Sstevel@tonic-gate 38790Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 38800Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 38810Sstevel@tonic-gate 38820Sstevel@tonic-gate return (not_opened > 1); 38830Sstevel@tonic-gate } 38840Sstevel@tonic-gate 38850Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 38860Sstevel@tonic-gate device = &un->un_column[i]; 38870Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 38880Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 38890Sstevel@tonic-gate /* 38900Sstevel@tonic-gate * At this point in time there is a possibility 38910Sstevel@tonic-gate * that errors were the result of a controller 38920Sstevel@tonic-gate * failure with more than a single column on it 38930Sstevel@tonic-gate * so clear out last errored columns and let errors 38940Sstevel@tonic-gate * re-occur is necessary. 38950Sstevel@tonic-gate */ 38960Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 38970Sstevel@tonic-gate commit++; 38980Sstevel@tonic-gate } 38990Sstevel@tonic-gate continue; 39000Sstevel@tonic-gate } 39010Sstevel@tonic-gate ASSERT(col == -1); 39020Sstevel@tonic-gate col = i; 39030Sstevel@tonic-gate } 39040Sstevel@tonic-gate 39050Sstevel@tonic-gate if (col != -1) { 39060Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 39070Sstevel@tonic-gate commit++; 39080Sstevel@tonic-gate } 39090Sstevel@tonic-gate 39100Sstevel@tonic-gate if (commit) 39110Sstevel@tonic-gate raid_commit(un, NULL); 39120Sstevel@tonic-gate 39130Sstevel@tonic-gate if (col != -1) { 39140Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 39150Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 39160Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 39170Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 39180Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 39190Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 39200Sstevel@tonic-gate } 39210Sstevel@tonic-gate } 39220Sstevel@tonic-gate 39230Sstevel@tonic-gate return (0); 39240Sstevel@tonic-gate } 39250Sstevel@tonic-gate 39260Sstevel@tonic-gate /* 39270Sstevel@tonic-gate * NAMES: raid_internal_open 39280Sstevel@tonic-gate * DESCRIPTION: Do the actual RAID open 39290Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number of the RAID device 39300Sstevel@tonic-gate * int flag - 39310Sstevel@tonic-gate * int otyp - 39320Sstevel@tonic-gate * int md_oflags - RAID open flags 39330Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 39340Sstevel@tonic-gate */ 39350Sstevel@tonic-gate int 39360Sstevel@tonic-gate raid_internal_open(minor_t mnum, int flag, int otyp, int md_oflags) 39370Sstevel@tonic-gate { 39380Sstevel@tonic-gate mr_unit_t *un; 39390Sstevel@tonic-gate mdi_unit_t *ui; 39400Sstevel@tonic-gate int err = 0; 39410Sstevel@tonic-gate int replay_error = 0; 39420Sstevel@tonic-gate 39430Sstevel@tonic-gate ui = MDI_UNIT(mnum); 39440Sstevel@tonic-gate ASSERT(ui != NULL); 39450Sstevel@tonic-gate 39460Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 39470Sstevel@tonic-gate /* 39480Sstevel@tonic-gate * this MUST be checked before md_unit_isopen is checked. 39490Sstevel@tonic-gate * raid_init_columns sets md_unit_isopen to block reset, halt. 39500Sstevel@tonic-gate */ 39510Sstevel@tonic-gate if ((UNIT_STATE(un) & (RUS_INIT | RUS_DOI)) && 39527627SChris.Horne@Sun.COM !(md_oflags & MD_OFLG_ISINIT)) { 39530Sstevel@tonic-gate md_unit_openclose_exit(ui); 39540Sstevel@tonic-gate return (EAGAIN); 39550Sstevel@tonic-gate } 39560Sstevel@tonic-gate 39570Sstevel@tonic-gate if ((md_oflags & MD_OFLG_ISINIT) || md_unit_isopen(ui)) { 39580Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, otyp); 39590Sstevel@tonic-gate goto out; 39600Sstevel@tonic-gate } 39610Sstevel@tonic-gate 39620Sstevel@tonic-gate md_unit_readerexit(ui); 39630Sstevel@tonic-gate 39640Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 39650Sstevel@tonic-gate if (raid_open_all_devs(un, md_oflags) == 0) { 39660Sstevel@tonic-gate if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) { 39670Sstevel@tonic-gate md_unit_writerexit(ui); 39680Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39690Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 39700Sstevel@tonic-gate goto out; 39710Sstevel@tonic-gate } 39720Sstevel@tonic-gate } else { 39730Sstevel@tonic-gate /* 39740Sstevel@tonic-gate * if this unit contains more than two errored components 39750Sstevel@tonic-gate * should return error and close all opened devices 39760Sstevel@tonic-gate */ 39770Sstevel@tonic-gate 39780Sstevel@tonic-gate md_unit_writerexit(ui); 39790Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39800Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 39810Sstevel@tonic-gate md_unit_openclose_exit(ui); 39820Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 39830Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 39840Sstevel@tonic-gate return (ENXIO); 39850Sstevel@tonic-gate } 39860Sstevel@tonic-gate 39870Sstevel@tonic-gate if (!(MD_STATUS(un) & MD_UN_REPLAYED)) { 39880Sstevel@tonic-gate replay_error = raid_replay(un); 39890Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_REPLAYED; 39900Sstevel@tonic-gate } 39910Sstevel@tonic-gate 39920Sstevel@tonic-gate md_unit_writerexit(ui); 39930Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 39940Sstevel@tonic-gate 39950Sstevel@tonic-gate if ((replay_error == RAID_RPLY_READONLY) && 39960Sstevel@tonic-gate ((flag & (FREAD | FWRITE)) == FREAD)) { 39970Sstevel@tonic-gate md_unit_openclose_exit(ui); 39980Sstevel@tonic-gate return (0); 39990Sstevel@tonic-gate } 40000Sstevel@tonic-gate 40010Sstevel@tonic-gate /* allocate hotspare if possible */ 40020Sstevel@tonic-gate (void) raid_hotspares(); 40030Sstevel@tonic-gate 40040Sstevel@tonic-gate 40050Sstevel@tonic-gate out: 40060Sstevel@tonic-gate md_unit_openclose_exit(ui); 40070Sstevel@tonic-gate return (err); 40080Sstevel@tonic-gate } 40090Sstevel@tonic-gate /* 40100Sstevel@tonic-gate * NAMES: raid_open 40110Sstevel@tonic-gate * DESCRIPTION: RAID metadevice OPEN entry point 40120Sstevel@tonic-gate * PARAMETERS: dev_t dev - 40130Sstevel@tonic-gate * int flag - 40140Sstevel@tonic-gate * int otyp - 40150Sstevel@tonic-gate * cred_t * cred_p - 40160Sstevel@tonic-gate * int md_oflags - 40170Sstevel@tonic-gate * RETURNS: 40180Sstevel@tonic-gate */ 40190Sstevel@tonic-gate /*ARGSUSED1*/ 40200Sstevel@tonic-gate static int 40210Sstevel@tonic-gate raid_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 40220Sstevel@tonic-gate { 40230Sstevel@tonic-gate int error = 0; 40240Sstevel@tonic-gate 40250Sstevel@tonic-gate if (error = raid_internal_open(getminor(*dev), flag, otyp, md_oflags)) { 40260Sstevel@tonic-gate return (error); 40270Sstevel@tonic-gate } 40280Sstevel@tonic-gate return (0); 40290Sstevel@tonic-gate } 40300Sstevel@tonic-gate 40310Sstevel@tonic-gate /* 40320Sstevel@tonic-gate * NAMES: raid_internal_close 40330Sstevel@tonic-gate * DESCRIPTION: RAID metadevice CLOSE actual implementation 40340Sstevel@tonic-gate * PARAMETERS: minor_t - minor number of the RAID device 40350Sstevel@tonic-gate * int otyp - 40360Sstevel@tonic-gate * int init_pw - 40370Sstevel@tonic-gate * int md_cflags - RAID close flags 40380Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 40390Sstevel@tonic-gate */ 40400Sstevel@tonic-gate /*ARGSUSED*/ 40410Sstevel@tonic-gate int 40420Sstevel@tonic-gate raid_internal_close(minor_t mnum, int otyp, int init_pw, int md_cflags) 40430Sstevel@tonic-gate { 40440Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 40450Sstevel@tonic-gate mr_unit_t *un; 40460Sstevel@tonic-gate int err = 0; 40470Sstevel@tonic-gate 40480Sstevel@tonic-gate /* single thread */ 40490Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 40500Sstevel@tonic-gate 40510Sstevel@tonic-gate /* count closed */ 40520Sstevel@tonic-gate if ((err = md_unit_decopen(mnum, otyp)) != 0) 40530Sstevel@tonic-gate goto out; 40540Sstevel@tonic-gate /* close devices, if necessary */ 40550Sstevel@tonic-gate if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 40560Sstevel@tonic-gate raid_close_all_devs(un, init_pw, md_cflags); 40570Sstevel@tonic-gate } 40580Sstevel@tonic-gate 40590Sstevel@tonic-gate /* unlock, return success */ 40600Sstevel@tonic-gate out: 40610Sstevel@tonic-gate md_unit_openclose_exit(ui); 40620Sstevel@tonic-gate return (err); 40630Sstevel@tonic-gate } 40640Sstevel@tonic-gate 40650Sstevel@tonic-gate /* 40660Sstevel@tonic-gate * NAMES: raid_close 40670Sstevel@tonic-gate * DESCRIPTION: RAID metadevice close entry point 40680Sstevel@tonic-gate * PARAMETERS: dev_t dev - 40690Sstevel@tonic-gate * int flag - 40700Sstevel@tonic-gate * int otyp - 40710Sstevel@tonic-gate * cred_t * cred_p - 40720Sstevel@tonic-gate * int md_oflags - 40730Sstevel@tonic-gate * RETURNS: 40740Sstevel@tonic-gate */ 40750Sstevel@tonic-gate /*ARGSUSED1*/ 40760Sstevel@tonic-gate static int 40770Sstevel@tonic-gate raid_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags) 40780Sstevel@tonic-gate { 40790Sstevel@tonic-gate int retval; 40800Sstevel@tonic-gate 40810Sstevel@tonic-gate (void) md_io_writerlock(MDI_UNIT(getminor(dev))); 40820Sstevel@tonic-gate retval = raid_internal_close(getminor(dev), otyp, 1, md_cflags); 40830Sstevel@tonic-gate (void) md_io_writerexit(MDI_UNIT(getminor(dev))); 40840Sstevel@tonic-gate return (retval); 40850Sstevel@tonic-gate } 40860Sstevel@tonic-gate 40870Sstevel@tonic-gate /* 40880Sstevel@tonic-gate * raid_probe_close_all_devs 40890Sstevel@tonic-gate */ 40900Sstevel@tonic-gate void 40910Sstevel@tonic-gate raid_probe_close_all_devs(mr_unit_t *un) 40920Sstevel@tonic-gate { 40930Sstevel@tonic-gate int i; 40940Sstevel@tonic-gate mr_column_t *device; 40950Sstevel@tonic-gate 40960Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 40970Sstevel@tonic-gate device = &un->un_column[i]; 40980Sstevel@tonic-gate 40990Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 41000Sstevel@tonic-gate md_layered_close(device->un_dev, 41017627SChris.Horne@Sun.COM MD_OFLG_PROBEDEV); 41020Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_PROBEOPEN; 41030Sstevel@tonic-gate } 41040Sstevel@tonic-gate } 41050Sstevel@tonic-gate } 41060Sstevel@tonic-gate /* 41070Sstevel@tonic-gate * Raid_probe_dev: 41080Sstevel@tonic-gate * 41090Sstevel@tonic-gate * On entry the unit writerlock is held 41100Sstevel@tonic-gate */ 41110Sstevel@tonic-gate static int 41120Sstevel@tonic-gate raid_probe_dev(mdi_unit_t *ui, minor_t mnum) 41130Sstevel@tonic-gate { 41140Sstevel@tonic-gate mr_unit_t *un; 41150Sstevel@tonic-gate int i; 41160Sstevel@tonic-gate int not_opened = 0; 41170Sstevel@tonic-gate int commit = 0; 41180Sstevel@tonic-gate int col = -1; 41190Sstevel@tonic-gate mr_column_t *device; 41200Sstevel@tonic-gate int md_devopen = 0; 41210Sstevel@tonic-gate 41220Sstevel@tonic-gate if (md_unit_isopen(ui)) 41230Sstevel@tonic-gate md_devopen++; 41240Sstevel@tonic-gate 41250Sstevel@tonic-gate un = MD_UNIT(mnum); 41260Sstevel@tonic-gate /* 41270Sstevel@tonic-gate * If the state has been set to LAST_ERRED because 41280Sstevel@tonic-gate * of an error when the raid device was open at some 41290Sstevel@tonic-gate * point in the past, don't probe. We really don't want 41300Sstevel@tonic-gate * to reset the state in this case. 41310Sstevel@tonic-gate */ 41320Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 41330Sstevel@tonic-gate return (0); 41340Sstevel@tonic-gate 41350Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 41360Sstevel@tonic-gate 41370Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 41380Sstevel@tonic-gate md_dev64_t tmpdev; 41390Sstevel@tonic-gate 41400Sstevel@tonic-gate device = &un->un_column[i]; 41410Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 41420Sstevel@tonic-gate not_opened++; 41430Sstevel@tonic-gate continue; 41440Sstevel@tonic-gate } 41450Sstevel@tonic-gate 41460Sstevel@tonic-gate tmpdev = device->un_dev; 41470Sstevel@tonic-gate /* 41480Sstevel@tonic-gate * Currently the flags passed are not needed since 41490Sstevel@tonic-gate * there cannot be an underlying metadevice. However 41500Sstevel@tonic-gate * they are kept here for consistency. 41510Sstevel@tonic-gate * 41520Sstevel@tonic-gate * Open by device id 41530Sstevel@tonic-gate */ 41540Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i)? 41557627SChris.Horne@Sun.COM device->un_hs_key : device->un_orig_key); 41560Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, 41577627SChris.Horne@Sun.COM MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV)) { 41580Sstevel@tonic-gate device->un_dev = tmpdev; 41590Sstevel@tonic-gate not_opened++; 41600Sstevel@tonic-gate continue; 41610Sstevel@tonic-gate } 41620Sstevel@tonic-gate device->un_dev = tmpdev; 41630Sstevel@tonic-gate 41640Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_PROBEOPEN; 41650Sstevel@tonic-gate } 41660Sstevel@tonic-gate 41670Sstevel@tonic-gate /* 41680Sstevel@tonic-gate * The code below is careful on setting the LAST_ERRED state. 41690Sstevel@tonic-gate * 41700Sstevel@tonic-gate * If open errors and exactly one device has failed we can run. 41710Sstevel@tonic-gate * If more then one device fails we have to figure out when to set 41720Sstevel@tonic-gate * LAST_ERRED state. The rationale is to avoid unnecessary resyncs 41730Sstevel@tonic-gate * since they are painful and time consuming. 41740Sstevel@tonic-gate * 41750Sstevel@tonic-gate * When more than one component/column fails there are 2 scenerios. 41760Sstevel@tonic-gate * 41770Sstevel@tonic-gate * 1. Metadevice has NOT been opened: In this case, the behavior 41780Sstevel@tonic-gate * mimics the open symantics. ie. Only the first failed device 41790Sstevel@tonic-gate * is ERRED and LAST_ERRED is not set. 41800Sstevel@tonic-gate * 41810Sstevel@tonic-gate * 2. Metadevice has been opened: Here the read/write sematics are 41820Sstevel@tonic-gate * followed. The first failed devicce is ERRED and on the next 41830Sstevel@tonic-gate * failed device LAST_ERRED is set. 41840Sstevel@tonic-gate */ 41850Sstevel@tonic-gate 41860Sstevel@tonic-gate if (not_opened > 1 && !md_devopen) { 41870Sstevel@tonic-gate cmn_err(CE_WARN, 41887627SChris.Horne@Sun.COM "md: %s failed to open. open error on %s\n", 41897627SChris.Horne@Sun.COM md_shortname(MD_SID(un)), 41907627SChris.Horne@Sun.COM md_devname(MD_UN2SET(un), device->un_orig_dev, NULL, 0)); 41910Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 41920Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 41930Sstevel@tonic-gate raid_probe_close_all_devs(un); 41940Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 41950Sstevel@tonic-gate return (not_opened > 1); 41960Sstevel@tonic-gate } 41970Sstevel@tonic-gate 41980Sstevel@tonic-gate if (!md_devopen) { 41990Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 42000Sstevel@tonic-gate device = &un->un_column[i]; 42010Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 42020Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 42030Sstevel@tonic-gate /* 42040Sstevel@tonic-gate * At this point in time there is a 42050Sstevel@tonic-gate * possibility that errors were the 42060Sstevel@tonic-gate * result of a controller failure with 42070Sstevel@tonic-gate * more than a single column on it so 42080Sstevel@tonic-gate * clear out last errored columns and 42090Sstevel@tonic-gate * let errors re-occur is necessary. 42100Sstevel@tonic-gate */ 42110Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 42120Sstevel@tonic-gate commit++; 42130Sstevel@tonic-gate } 42140Sstevel@tonic-gate continue; 42150Sstevel@tonic-gate } 42160Sstevel@tonic-gate ASSERT(col == -1); 42170Sstevel@tonic-gate /* 42180Sstevel@tonic-gate * note if multiple devices are failing then only 42190Sstevel@tonic-gate * the last one is marked as error 42200Sstevel@tonic-gate */ 42210Sstevel@tonic-gate col = i; 42220Sstevel@tonic-gate } 42230Sstevel@tonic-gate 42240Sstevel@tonic-gate if (col != -1) { 42250Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 42260Sstevel@tonic-gate commit++; 42270Sstevel@tonic-gate } 42280Sstevel@tonic-gate 42290Sstevel@tonic-gate } else { 42300Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 42310Sstevel@tonic-gate device = &un->un_column[i]; 42320Sstevel@tonic-gate 42330Sstevel@tonic-gate /* if we have LAST_ERRED go ahead and commit. */ 42340Sstevel@tonic-gate if (un->un_state & RUS_LAST_ERRED) 42350Sstevel@tonic-gate break; 42360Sstevel@tonic-gate /* 42370Sstevel@tonic-gate * could not open the component 42380Sstevel@tonic-gate */ 42390Sstevel@tonic-gate 42400Sstevel@tonic-gate if (!(device->un_devflags & MD_RAID_DEV_PROBEOPEN)) { 42410Sstevel@tonic-gate col = i; 42420Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 42430Sstevel@tonic-gate commit++; 42440Sstevel@tonic-gate } 42450Sstevel@tonic-gate } 42460Sstevel@tonic-gate } 42470Sstevel@tonic-gate 42480Sstevel@tonic-gate if (commit) 42490Sstevel@tonic-gate raid_commit(un, NULL); 42500Sstevel@tonic-gate 42510Sstevel@tonic-gate if (col != -1) { 42520Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 42530Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 42540Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 42550Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 42560Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 42570Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 42580Sstevel@tonic-gate } 42590Sstevel@tonic-gate } 42600Sstevel@tonic-gate 42610Sstevel@tonic-gate raid_probe_close_all_devs(un); 42620Sstevel@tonic-gate return (0); 42630Sstevel@tonic-gate } 42640Sstevel@tonic-gate 42650Sstevel@tonic-gate static int 42660Sstevel@tonic-gate raid_imp_set( 42670Sstevel@tonic-gate set_t setno 42680Sstevel@tonic-gate ) 42690Sstevel@tonic-gate { 42700Sstevel@tonic-gate mddb_recid_t recid; 42710Sstevel@tonic-gate int i, gotsomething; 42720Sstevel@tonic-gate mddb_type_t typ1; 42730Sstevel@tonic-gate mddb_de_ic_t *dep; 42740Sstevel@tonic-gate mddb_rb32_t *rbp; 42750Sstevel@tonic-gate mr_unit_t *un64; 42760Sstevel@tonic-gate mr_unit32_od_t *un32; 42771623Stw21770 md_dev64_t self_devt; 42780Sstevel@tonic-gate minor_t *self_id; /* minor needs to be updated */ 42790Sstevel@tonic-gate md_parent_t *parent_id; /* parent needs to be updated */ 42800Sstevel@tonic-gate mddb_recid_t *record_id; /* record id needs to be updated */ 42810Sstevel@tonic-gate hsp_t *hsp_id; 42820Sstevel@tonic-gate 42830Sstevel@tonic-gate gotsomething = 0; 42840Sstevel@tonic-gate 42850Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 42860Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 42870Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 42880Sstevel@tonic-gate 42890Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 42900Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 42910Sstevel@tonic-gate continue; 42920Sstevel@tonic-gate 42930Sstevel@tonic-gate dep = mddb_getrecdep(recid); 42940Sstevel@tonic-gate rbp = dep->de_rb; 42950Sstevel@tonic-gate 42961623Stw21770 switch (rbp->rb_revision) { 42971623Stw21770 case MDDB_REV_RB: 42981623Stw21770 case MDDB_REV_RBFN: 42990Sstevel@tonic-gate /* 43000Sstevel@tonic-gate * Small device 43010Sstevel@tonic-gate */ 43020Sstevel@tonic-gate un32 = (mr_unit32_od_t *)mddb_getrecaddr(recid); 43030Sstevel@tonic-gate self_id = &(un32->c.un_self_id); 43040Sstevel@tonic-gate parent_id = &(un32->c.un_parent); 43050Sstevel@tonic-gate record_id = &(un32->c.un_record_id); 43060Sstevel@tonic-gate hsp_id = &(un32->un_hsp_id); 43070Sstevel@tonic-gate 43080Sstevel@tonic-gate for (i = 0; i < un32->un_totalcolumncnt; i++) { 43097627SChris.Horne@Sun.COM mr_column32_od_t *device; 43107627SChris.Horne@Sun.COM 43117627SChris.Horne@Sun.COM device = &un32->un_column[i]; 43127627SChris.Horne@Sun.COM if (!md_update_minor(setno, mddb_getsidenum 43137627SChris.Horne@Sun.COM (setno), device->un_orig_key)) 43147627SChris.Horne@Sun.COM goto out; 43157627SChris.Horne@Sun.COM 43167627SChris.Horne@Sun.COM if (device->un_hs_id != 0) 43177627SChris.Horne@Sun.COM device->un_hs_id = 43187627SChris.Horne@Sun.COM MAKERECID(setno, device->un_hs_id); 43190Sstevel@tonic-gate } 43201623Stw21770 break; 43211623Stw21770 case MDDB_REV_RB64: 43221623Stw21770 case MDDB_REV_RB64FN: 43230Sstevel@tonic-gate un64 = (mr_unit_t *)mddb_getrecaddr(recid); 43240Sstevel@tonic-gate self_id = &(un64->c.un_self_id); 43250Sstevel@tonic-gate parent_id = &(un64->c.un_parent); 43260Sstevel@tonic-gate record_id = &(un64->c.un_record_id); 43270Sstevel@tonic-gate hsp_id = &(un64->un_hsp_id); 43280Sstevel@tonic-gate 43290Sstevel@tonic-gate for (i = 0; i < un64->un_totalcolumncnt; i++) { 43307627SChris.Horne@Sun.COM mr_column_t *device; 43317627SChris.Horne@Sun.COM 43327627SChris.Horne@Sun.COM device = &un64->un_column[i]; 43337627SChris.Horne@Sun.COM if (!md_update_minor(setno, mddb_getsidenum 43347627SChris.Horne@Sun.COM (setno), device->un_orig_key)) 43357627SChris.Horne@Sun.COM goto out; 43367627SChris.Horne@Sun.COM 43377627SChris.Horne@Sun.COM if (device->un_hs_id != 0) 43387627SChris.Horne@Sun.COM device->un_hs_id = 43397627SChris.Horne@Sun.COM MAKERECID(setno, device->un_hs_id); 43400Sstevel@tonic-gate } 43411623Stw21770 break; 43421623Stw21770 } 43431623Stw21770 43441623Stw21770 /* 43451623Stw21770 * If this is a top level and a friendly name metadevice, 43461623Stw21770 * update its minor in the namespace. 43471623Stw21770 */ 43481623Stw21770 if ((*parent_id == MD_NO_PARENT) && 43491623Stw21770 ((rbp->rb_revision == MDDB_REV_RBFN) || 43501623Stw21770 (rbp->rb_revision == MDDB_REV_RB64FN))) { 43511623Stw21770 43521623Stw21770 self_devt = md_makedevice(md_major, *self_id); 43531623Stw21770 if (!md_update_top_device_minor(setno, 43541623Stw21770 mddb_getsidenum(setno), self_devt)) 43551623Stw21770 goto out; 43560Sstevel@tonic-gate } 43570Sstevel@tonic-gate 43580Sstevel@tonic-gate /* 43590Sstevel@tonic-gate * Update unit with the imported setno 43600Sstevel@tonic-gate */ 43610Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 43620Sstevel@tonic-gate 43630Sstevel@tonic-gate *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 43640Sstevel@tonic-gate 43650Sstevel@tonic-gate if (*hsp_id != -1) 43660Sstevel@tonic-gate *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 43670Sstevel@tonic-gate 43680Sstevel@tonic-gate if (*parent_id != MD_NO_PARENT) 43690Sstevel@tonic-gate *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 43700Sstevel@tonic-gate *record_id = MAKERECID(setno, DBID(*record_id)); 43710Sstevel@tonic-gate gotsomething = 1; 43720Sstevel@tonic-gate } 43730Sstevel@tonic-gate 43740Sstevel@tonic-gate out: 43750Sstevel@tonic-gate return (gotsomething); 43760Sstevel@tonic-gate } 43770Sstevel@tonic-gate 43780Sstevel@tonic-gate static md_named_services_t raid_named_services[] = { 43790Sstevel@tonic-gate {raid_hotspares, "poke hotspares" }, 43800Sstevel@tonic-gate {raid_rename_check, MDRNM_CHECK }, 43810Sstevel@tonic-gate {raid_rename_lock, MDRNM_LOCK }, 43820Sstevel@tonic-gate {(intptr_t (*)()) raid_rename_unlock, MDRNM_UNLOCK }, 43830Sstevel@tonic-gate {(intptr_t (*)()) raid_probe_dev, "probe open test" }, 43840Sstevel@tonic-gate {NULL, 0 } 43850Sstevel@tonic-gate }; 43860Sstevel@tonic-gate 43870Sstevel@tonic-gate md_ops_t raid_md_ops = { 43880Sstevel@tonic-gate raid_open, /* open */ 43890Sstevel@tonic-gate raid_close, /* close */ 43900Sstevel@tonic-gate md_raid_strategy, /* strategy */ 43910Sstevel@tonic-gate NULL, /* print */ 43920Sstevel@tonic-gate NULL, /* dump */ 43930Sstevel@tonic-gate NULL, /* read */ 43940Sstevel@tonic-gate NULL, /* write */ 43950Sstevel@tonic-gate md_raid_ioctl, /* ioctl, */ 43960Sstevel@tonic-gate raid_snarf, /* raid_snarf */ 43970Sstevel@tonic-gate raid_halt, /* raid_halt */ 43980Sstevel@tonic-gate NULL, /* aread */ 43990Sstevel@tonic-gate NULL, /* awrite */ 44000Sstevel@tonic-gate raid_imp_set, /* import set */ 44010Sstevel@tonic-gate raid_named_services 44020Sstevel@tonic-gate }; 44030Sstevel@tonic-gate 44040Sstevel@tonic-gate static void 44050Sstevel@tonic-gate init_init() 44060Sstevel@tonic-gate { 44070Sstevel@tonic-gate /* default to a second */ 44080Sstevel@tonic-gate if (md_wr_wait == 0) 44090Sstevel@tonic-gate md_wr_wait = md_hz >> 1; 44100Sstevel@tonic-gate 44110Sstevel@tonic-gate raid_parent_cache = kmem_cache_create("md_raid_parent", 44120Sstevel@tonic-gate sizeof (md_raidps_t), 0, raid_parent_constructor, 44130Sstevel@tonic-gate raid_parent_destructor, raid_run_queue, NULL, NULL, 0); 44140Sstevel@tonic-gate raid_child_cache = kmem_cache_create("md_raid_child", 44150Sstevel@tonic-gate sizeof (md_raidcs_t) - sizeof (buf_t) + biosize(), 0, 44160Sstevel@tonic-gate raid_child_constructor, raid_child_destructor, 44170Sstevel@tonic-gate raid_run_queue, NULL, NULL, 0); 44180Sstevel@tonic-gate raid_cbuf_cache = kmem_cache_create("md_raid_cbufs", 44190Sstevel@tonic-gate sizeof (md_raidcbuf_t), 0, raid_cbuf_constructor, 44200Sstevel@tonic-gate raid_cbuf_destructor, raid_run_queue, NULL, NULL, 0); 44210Sstevel@tonic-gate } 44220Sstevel@tonic-gate 44230Sstevel@tonic-gate static void 44240Sstevel@tonic-gate fini_uninit() 44250Sstevel@tonic-gate { 44260Sstevel@tonic-gate kmem_cache_destroy(raid_parent_cache); 44270Sstevel@tonic-gate kmem_cache_destroy(raid_child_cache); 44280Sstevel@tonic-gate kmem_cache_destroy(raid_cbuf_cache); 44290Sstevel@tonic-gate raid_parent_cache = raid_child_cache = raid_cbuf_cache = NULL; 44300Sstevel@tonic-gate } 44310Sstevel@tonic-gate 44320Sstevel@tonic-gate /* define the module linkage */ 44334932Spetede MD_PLUGIN_MISC_MODULE("raid module", init_init(), fini_uninit()) 4434