1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate /* 30*0Sstevel@tonic-gate * NAME: raid.c 31*0Sstevel@tonic-gate * 32*0Sstevel@tonic-gate * DESCRIPTION: Main RAID driver source file containing open, close and I/O 33*0Sstevel@tonic-gate * operations. 34*0Sstevel@tonic-gate * 35*0Sstevel@tonic-gate * ROUTINES PROVIDED FOR EXTERNAL USE: 36*0Sstevel@tonic-gate * raid_open() - open the RAID metadevice for access. 37*0Sstevel@tonic-gate * raid_internal_open() - internal open routine of RAID metdevice. 38*0Sstevel@tonic-gate * md_raid_strategy() - perform normal I/O operations, 39*0Sstevel@tonic-gate * such as read and write. 40*0Sstevel@tonic-gate * raid_close() - close the RAID metadevice. 41*0Sstevel@tonic-gate * raid_internal_close() - internal close routine of RAID metadevice. 42*0Sstevel@tonic-gate * raid_snarf() - initialize and clean up MDD records. 43*0Sstevel@tonic-gate * raid_halt() - reset the RAID metadevice 44*0Sstevel@tonic-gate * raid_line() - return the line # of this segment 45*0Sstevel@tonic-gate * raid_dcolumn() - return the data column # of this segment 46*0Sstevel@tonic-gate * raid_pcolumn() - return the parity column # of this segment 47*0Sstevel@tonic-gate */ 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate #include <sys/param.h> 50*0Sstevel@tonic-gate #include <sys/systm.h> 51*0Sstevel@tonic-gate #include <sys/conf.h> 52*0Sstevel@tonic-gate #include <sys/file.h> 53*0Sstevel@tonic-gate #include <sys/user.h> 54*0Sstevel@tonic-gate #include <sys/uio.h> 55*0Sstevel@tonic-gate #include <sys/t_lock.h> 56*0Sstevel@tonic-gate #include <sys/buf.h> 57*0Sstevel@tonic-gate #include <sys/dkio.h> 58*0Sstevel@tonic-gate #include <sys/vtoc.h> 59*0Sstevel@tonic-gate #include <sys/kmem.h> 60*0Sstevel@tonic-gate #include <vm/page.h> 61*0Sstevel@tonic-gate #include <sys/cmn_err.h> 62*0Sstevel@tonic-gate #include <sys/sysmacros.h> 63*0Sstevel@tonic-gate #include <sys/types.h> 64*0Sstevel@tonic-gate #include <sys/mkdev.h> 65*0Sstevel@tonic-gate #include <sys/stat.h> 66*0Sstevel@tonic-gate #include <sys/open.h> 67*0Sstevel@tonic-gate #include <sys/modctl.h> 68*0Sstevel@tonic-gate #include <sys/ddi.h> 69*0Sstevel@tonic-gate #include <sys/sunddi.h> 70*0Sstevel@tonic-gate #include <sys/debug.h> 71*0Sstevel@tonic-gate #include <sys/lvm/md_raid.h> 72*0Sstevel@tonic-gate #include <sys/lvm/mdvar.h> 73*0Sstevel@tonic-gate #include <sys/lvm/md_convert.h> 74*0Sstevel@tonic-gate 75*0Sstevel@tonic-gate #include <sys/sysevent/eventdefs.h> 76*0Sstevel@tonic-gate #include <sys/sysevent/svm.h> 77*0Sstevel@tonic-gate 78*0Sstevel@tonic-gate md_ops_t raid_md_ops; 79*0Sstevel@tonic-gate #ifndef lint 80*0Sstevel@tonic-gate static char _depends_on[] = "drv/md"; 81*0Sstevel@tonic-gate md_ops_t *md_interface_ops = &raid_md_ops; 82*0Sstevel@tonic-gate #endif /* lint */ 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gate extern unit_t md_nunits; 85*0Sstevel@tonic-gate extern unit_t md_nsets; 86*0Sstevel@tonic-gate extern md_set_t md_set[]; 87*0Sstevel@tonic-gate extern int md_status; 88*0Sstevel@tonic-gate extern major_t md_major; 89*0Sstevel@tonic-gate extern mdq_anchor_t md_done_daemon; 90*0Sstevel@tonic-gate extern mdq_anchor_t md_mstr_daemon; 91*0Sstevel@tonic-gate extern int md_sleep_for_test; 92*0Sstevel@tonic-gate extern clock_t md_hz; 93*0Sstevel@tonic-gate 94*0Sstevel@tonic-gate extern md_event_queue_t *md_event_queue; 95*0Sstevel@tonic-gate 96*0Sstevel@tonic-gate 97*0Sstevel@tonic-gate int pchunks = 16; 98*0Sstevel@tonic-gate int phigh = 1024; 99*0Sstevel@tonic-gate int plow = 128; 100*0Sstevel@tonic-gate int cchunks = 64; 101*0Sstevel@tonic-gate int chigh = 1024; 102*0Sstevel@tonic-gate int clow = 512; 103*0Sstevel@tonic-gate int bchunks = 32; 104*0Sstevel@tonic-gate int bhigh = 256; 105*0Sstevel@tonic-gate int blow = 128; 106*0Sstevel@tonic-gate 107*0Sstevel@tonic-gate int raid_total_io = 0; 108*0Sstevel@tonic-gate int raid_reads = 0; 109*0Sstevel@tonic-gate int raid_writes = 0; 110*0Sstevel@tonic-gate int raid_no_bpmaps = 0; 111*0Sstevel@tonic-gate int raid_512 = 0; 112*0Sstevel@tonic-gate int raid_1024 = 0; 113*0Sstevel@tonic-gate int raid_1024_8192 = 0; 114*0Sstevel@tonic-gate int raid_8192 = 0; 115*0Sstevel@tonic-gate int raid_8192_bigger = 0; 116*0Sstevel@tonic-gate int raid_line_lock_wait = 0; 117*0Sstevel@tonic-gate 118*0Sstevel@tonic-gate int data_buffer_waits = 0; 119*0Sstevel@tonic-gate int parity_buffer_waits = 0; 120*0Sstevel@tonic-gate 121*0Sstevel@tonic-gate /* writer line locks */ 122*0Sstevel@tonic-gate int raid_writer_locks = 0; /* total writer locks */ 123*0Sstevel@tonic-gate int raid_write_waits = 0; /* total writer locks that waited */ 124*0Sstevel@tonic-gate int raid_full_line_writes = 0; /* total full line writes */ 125*0Sstevel@tonic-gate int raid_write_queue_length = 0; /* wait queue length */ 126*0Sstevel@tonic-gate int raid_max_write_q_length = 0; /* maximum queue length */ 127*0Sstevel@tonic-gate int raid_write_locks_active = 0; /* writer locks at any time */ 128*0Sstevel@tonic-gate int raid_max_write_locks = 0; /* maximum writer locks active */ 129*0Sstevel@tonic-gate 130*0Sstevel@tonic-gate /* read line locks */ 131*0Sstevel@tonic-gate int raid_reader_locks = 0; /* total reader locks held */ 132*0Sstevel@tonic-gate int raid_reader_locks_active = 0; /* reader locks held */ 133*0Sstevel@tonic-gate int raid_max_reader_locks = 0; /* maximum reader locks held in run */ 134*0Sstevel@tonic-gate int raid_read_overlaps = 0; /* number of times 2 reads hit same line */ 135*0Sstevel@tonic-gate int raid_read_waits = 0; /* times a reader waited on writer */ 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate /* prewrite stats */ 138*0Sstevel@tonic-gate int raid_prewrite_waits = 0; /* number of waits for a pw slot */ 139*0Sstevel@tonic-gate int raid_pw = 0; /* number of pw slots in use */ 140*0Sstevel@tonic-gate int raid_prewrite_max = 0; /* maximum number of pw slots in use */ 141*0Sstevel@tonic-gate int raid_pw_invalidates = 0; 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate static clock_t md_wr_wait = 0; 144*0Sstevel@tonic-gate 145*0Sstevel@tonic-gate int nv_available = 0; /* presence of nv-ram support in device */ 146*0Sstevel@tonic-gate int nv_prewrite = 1; /* mark prewrites with nv_available */ 147*0Sstevel@tonic-gate int nv_parity = 1; /* mark parity with nv_available */ 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate kmem_cache_t *raid_parent_cache = NULL; 150*0Sstevel@tonic-gate kmem_cache_t *raid_child_cache = NULL; 151*0Sstevel@tonic-gate kmem_cache_t *raid_cbuf_cache = NULL; 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gate int raid_internal_open(minor_t mnum, int flag, int otyp, 154*0Sstevel@tonic-gate int md_oflags); 155*0Sstevel@tonic-gate 156*0Sstevel@tonic-gate static void freebuffers(md_raidcs_t *cs); 157*0Sstevel@tonic-gate static int raid_read(mr_unit_t *un, md_raidcs_t *cs); 158*0Sstevel@tonic-gate static void raid_read_io(mr_unit_t *un, md_raidcs_t *cs); 159*0Sstevel@tonic-gate static int raid_write(mr_unit_t *un, md_raidcs_t *cs); 160*0Sstevel@tonic-gate static void raid_write_io(mr_unit_t *un, md_raidcs_t *cs); 161*0Sstevel@tonic-gate static void raid_stage(md_raidcs_t *cs); 162*0Sstevel@tonic-gate static void raid_enqueue(md_raidcs_t *cs); 163*0Sstevel@tonic-gate static diskaddr_t raid_line(diskaddr_t segment, mr_unit_t *un); 164*0Sstevel@tonic-gate uint_t raid_dcolumn(diskaddr_t segment, mr_unit_t *un); 165*0Sstevel@tonic-gate static void getpbuffer(md_raidcs_t *cs); 166*0Sstevel@tonic-gate static void getdbuffer(md_raidcs_t *cs); 167*0Sstevel@tonic-gate static void raid_done(buf_t *bp); 168*0Sstevel@tonic-gate static void raid_io_startup(mr_unit_t *un); 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate static rus_state_t 171*0Sstevel@tonic-gate raid_col2unit(rcs_state_t state, rus_state_t unitstate) 172*0Sstevel@tonic-gate { 173*0Sstevel@tonic-gate switch (state) { 174*0Sstevel@tonic-gate case RCS_INIT: 175*0Sstevel@tonic-gate return (RUS_INIT); 176*0Sstevel@tonic-gate case RCS_OKAY: 177*0Sstevel@tonic-gate return (RUS_OKAY); 178*0Sstevel@tonic-gate case RCS_RESYNC: 179*0Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) 180*0Sstevel@tonic-gate return (RUS_LAST_ERRED); 181*0Sstevel@tonic-gate else 182*0Sstevel@tonic-gate return (RUS_ERRED); 183*0Sstevel@tonic-gate case RCS_ERRED: 184*0Sstevel@tonic-gate return (RUS_ERRED); 185*0Sstevel@tonic-gate case RCS_LAST_ERRED: 186*0Sstevel@tonic-gate return (RUS_ERRED); 187*0Sstevel@tonic-gate default: 188*0Sstevel@tonic-gate break; 189*0Sstevel@tonic-gate } 190*0Sstevel@tonic-gate panic("raid_col2unit"); 191*0Sstevel@tonic-gate /*NOTREACHED*/ 192*0Sstevel@tonic-gate } 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate void 195*0Sstevel@tonic-gate raid_set_state(mr_unit_t *un, int col, rcs_state_t newstate, int force) 196*0Sstevel@tonic-gate { 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate rus_state_t unitstate, origstate; 199*0Sstevel@tonic-gate rcs_state_t colstate; 200*0Sstevel@tonic-gate rcs_state_t orig_colstate; 201*0Sstevel@tonic-gate int errcnt = 0, 202*0Sstevel@tonic-gate okaycnt = 0, 203*0Sstevel@tonic-gate resynccnt = 0; 204*0Sstevel@tonic-gate int i; 205*0Sstevel@tonic-gate char *devname; 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate ASSERT(un); 208*0Sstevel@tonic-gate ASSERT(col < un->un_totalcolumncnt); 209*0Sstevel@tonic-gate ASSERT(newstate & 210*0Sstevel@tonic-gate (RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 211*0Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)); 212*0Sstevel@tonic-gate ASSERT((newstate & 213*0Sstevel@tonic-gate ~(RCS_INIT | RCS_INIT_ERRED | RCS_OKAY | RCS_RESYNC | RCS_ERRED | 214*0Sstevel@tonic-gate RCS_LAST_ERRED | RCS_REGEN)) 215*0Sstevel@tonic-gate == 0); 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate ASSERT(MDI_UNIT(MD_SID(un)) ? UNIT_WRITER_HELD(un) : 1); 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate unitstate = un->un_state; 220*0Sstevel@tonic-gate origstate = unitstate; 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gate if (force) { 223*0Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 224*0Sstevel@tonic-gate un->un_state = raid_col2unit(newstate, unitstate); 225*0Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 226*0Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 227*0Sstevel@tonic-gate return; 228*0Sstevel@tonic-gate } 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate ASSERT(un->un_state & 231*0Sstevel@tonic-gate (RUS_INIT | RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | 232*0Sstevel@tonic-gate RUS_REGEN)); 233*0Sstevel@tonic-gate ASSERT((un->un_state & ~(RUS_INIT | 234*0Sstevel@tonic-gate RUS_OKAY | RUS_ERRED | RUS_DOI | RUS_LAST_ERRED | RUS_REGEN)) == 0); 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate if (un->un_column[col].un_devstate == newstate) 237*0Sstevel@tonic-gate return; 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gate if (newstate == RCS_REGEN) { 240*0Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != un->un_totalcolumncnt) 241*0Sstevel@tonic-gate return; 242*0Sstevel@tonic-gate un->un_state = RUS_REGEN; 243*0Sstevel@tonic-gate return; 244*0Sstevel@tonic-gate } 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate orig_colstate = un->un_column[col].un_devstate; 247*0Sstevel@tonic-gate 248*0Sstevel@tonic-gate /* 249*0Sstevel@tonic-gate * if there is another column in the error state then this 250*0Sstevel@tonic-gate * column should go to the last errored state 251*0Sstevel@tonic-gate */ 252*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 253*0Sstevel@tonic-gate if (i == col) 254*0Sstevel@tonic-gate colstate = newstate; 255*0Sstevel@tonic-gate else 256*0Sstevel@tonic-gate colstate = un->un_column[i].un_devstate; 257*0Sstevel@tonic-gate if (colstate & (RCS_ERRED | RCS_LAST_ERRED | RCS_INIT_ERRED)) 258*0Sstevel@tonic-gate errcnt++; 259*0Sstevel@tonic-gate if (colstate & RCS_OKAY) 260*0Sstevel@tonic-gate okaycnt++; 261*0Sstevel@tonic-gate if (colstate & RCS_RESYNC) 262*0Sstevel@tonic-gate resynccnt++; 263*0Sstevel@tonic-gate } 264*0Sstevel@tonic-gate ASSERT(resynccnt < 2); 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate if (okaycnt == un->un_totalcolumncnt) 267*0Sstevel@tonic-gate unitstate = RUS_OKAY; 268*0Sstevel@tonic-gate else if (errcnt > 1) { 269*0Sstevel@tonic-gate unitstate = RUS_LAST_ERRED; 270*0Sstevel@tonic-gate if (newstate & RCS_ERRED) 271*0Sstevel@tonic-gate newstate = RCS_LAST_ERRED; 272*0Sstevel@tonic-gate } else if (errcnt == 1) 273*0Sstevel@tonic-gate if (!(unitstate & RUS_LAST_ERRED)) 274*0Sstevel@tonic-gate unitstate = RUS_ERRED; 275*0Sstevel@tonic-gate 276*0Sstevel@tonic-gate if (un->un_state == RUS_DOI) 277*0Sstevel@tonic-gate unitstate = RUS_DOI; 278*0Sstevel@tonic-gate 279*0Sstevel@tonic-gate un->un_column[col].un_devstate = newstate; 280*0Sstevel@tonic-gate uniqtime32(&un->un_column[col].un_devtimestamp); 281*0Sstevel@tonic-gate /* 282*0Sstevel@tonic-gate * if there are last errored column being brought back online 283*0Sstevel@tonic-gate * by open or snarf, then be sure to clear the RUS_LAST_ERRED 284*0Sstevel@tonic-gate * bit to allow writes. If there is a real error then the 285*0Sstevel@tonic-gate * column will go back into last erred. 286*0Sstevel@tonic-gate */ 287*0Sstevel@tonic-gate if ((raid_state_cnt(un, RCS_LAST_ERRED) == 0) && 288*0Sstevel@tonic-gate (raid_state_cnt(un, RCS_ERRED) == 1)) 289*0Sstevel@tonic-gate unitstate = RUS_ERRED; 290*0Sstevel@tonic-gate 291*0Sstevel@tonic-gate un->un_state = unitstate; 292*0Sstevel@tonic-gate uniqtime32(&un->un_timestamp); 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate if ((! (origstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) && 295*0Sstevel@tonic-gate (unitstate & (RUS_ERRED|RUS_LAST_ERRED|RUS_DOI))) { 296*0Sstevel@tonic-gate devname = md_devname(MD_UN2SET(un), 297*0Sstevel@tonic-gate un->un_column[col].un_dev, NULL, 0); 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s needs maintenance", 300*0Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 301*0Sstevel@tonic-gate 302*0Sstevel@tonic-gate if (unitstate & RUS_LAST_ERRED) { 303*0Sstevel@tonic-gate cmn_err(CE_WARN, "md: %s: %s last erred", 304*0Sstevel@tonic-gate md_shortname(MD_SID(un)), devname); 305*0Sstevel@tonic-gate 306*0Sstevel@tonic-gate } else if (un->un_column[col].un_devflags & 307*0Sstevel@tonic-gate MD_RAID_DEV_ISOPEN) { 308*0Sstevel@tonic-gate /* 309*0Sstevel@tonic-gate * Close the broken device and clear the open flag on 310*0Sstevel@tonic-gate * it. We have to check that the device is open, 311*0Sstevel@tonic-gate * otherwise the first open on it has resulted in the 312*0Sstevel@tonic-gate * error that is being processed and the actual un_dev 313*0Sstevel@tonic-gate * will be NODEV64. 314*0Sstevel@tonic-gate */ 315*0Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, 316*0Sstevel@tonic-gate MD_OFLG_NULL); 317*0Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 318*0Sstevel@tonic-gate } 319*0Sstevel@tonic-gate } else if (orig_colstate == RCS_LAST_ERRED && newstate == RCS_ERRED && 320*0Sstevel@tonic-gate un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN) { 321*0Sstevel@tonic-gate /* 322*0Sstevel@tonic-gate * Similar to logic above except no log messages since we 323*0Sstevel@tonic-gate * are just transitioning from Last Erred to Erred. 324*0Sstevel@tonic-gate */ 325*0Sstevel@tonic-gate md_layered_close(un->un_column[col].un_dev, MD_OFLG_NULL); 326*0Sstevel@tonic-gate un->un_column[col].un_devflags &= ~MD_RAID_DEV_ISOPEN; 327*0Sstevel@tonic-gate } 328*0Sstevel@tonic-gate 329*0Sstevel@tonic-gate /* 330*0Sstevel@tonic-gate * If a resync has completed, see if there is a Last Erred 331*0Sstevel@tonic-gate * component that we can change to the Erred state. 332*0Sstevel@tonic-gate */ 333*0Sstevel@tonic-gate if ((orig_colstate == RCS_RESYNC) && (newstate == RCS_OKAY)) { 334*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 335*0Sstevel@tonic-gate if (i != col && 336*0Sstevel@tonic-gate (un->un_column[i].un_devstate & RCS_LAST_ERRED)) { 337*0Sstevel@tonic-gate raid_set_state(un, i, RCS_ERRED, 0); 338*0Sstevel@tonic-gate break; 339*0Sstevel@tonic-gate } 340*0Sstevel@tonic-gate } 341*0Sstevel@tonic-gate } 342*0Sstevel@tonic-gate } 343*0Sstevel@tonic-gate 344*0Sstevel@tonic-gate /* 345*0Sstevel@tonic-gate * NAME: erred_check_line 346*0Sstevel@tonic-gate * 347*0Sstevel@tonic-gate * DESCRIPTION: Return the type of write to perform on an erred column based 348*0Sstevel@tonic-gate * upon any resync activity. 349*0Sstevel@tonic-gate * 350*0Sstevel@tonic-gate * if a column is being resynced and the write is above the 351*0Sstevel@tonic-gate * resync point may have to write to the target being resynced. 352*0Sstevel@tonic-gate * 353*0Sstevel@tonic-gate * Column state may make it impossible to do the write 354*0Sstevel@tonic-gate * in which case RCL_EIO or RCL_ENXIO is returned. 355*0Sstevel@tonic-gate * 356*0Sstevel@tonic-gate * If a column cannot be written directly, RCL_ERRED is 357*0Sstevel@tonic-gate * returned and processing should proceed accordingly. 358*0Sstevel@tonic-gate * 359*0Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number identity of metadevice 360*0Sstevel@tonic-gate * md_raidcs_t *cs - child save structure 361*0Sstevel@tonic-gate * mr_column_t *dcolumn - pointer to data column structure 362*0Sstevel@tonic-gate * mr_column_t *pcolumn - pointer to parity column structure 363*0Sstevel@tonic-gate * 364*0Sstevel@tonic-gate * RETURNS: RCL_OKAY, RCL_ERRED 365*0Sstevel@tonic-gate * 366*0Sstevel@tonic-gate * LOCKS: Expects Line Writer Lock and Unit Resource Lock to be held 367*0Sstevel@tonic-gate * across call. 368*0Sstevel@tonic-gate */ 369*0Sstevel@tonic-gate 370*0Sstevel@tonic-gate static int 371*0Sstevel@tonic-gate erred_check_line(mr_unit_t *un, md_raidcs_t *cs, mr_column_t *column) 372*0Sstevel@tonic-gate { 373*0Sstevel@tonic-gate 374*0Sstevel@tonic-gate ASSERT(un != NULL); 375*0Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 376*0Sstevel@tonic-gate 377*0Sstevel@tonic-gate if (column->un_devstate & RCS_OKAY) 378*0Sstevel@tonic-gate return (RCL_OKAY); 379*0Sstevel@tonic-gate 380*0Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 381*0Sstevel@tonic-gate return (RCL_ERRED); /* do not read from errored disk */ 382*0Sstevel@tonic-gate 383*0Sstevel@tonic-gate /* 384*0Sstevel@tonic-gate * for the last errored case their are two considerations. 385*0Sstevel@tonic-gate * When the last errored column is the only errored column then 386*0Sstevel@tonic-gate * do treat it like a maintenance column, not doing I/O from 387*0Sstevel@tonic-gate * it. When it there are other failures then just attempt 388*0Sstevel@tonic-gate * to use it. 389*0Sstevel@tonic-gate */ 390*0Sstevel@tonic-gate if (column->un_devstate & RCS_LAST_ERRED) 391*0Sstevel@tonic-gate return (RCL_ERRED); 392*0Sstevel@tonic-gate 393*0Sstevel@tonic-gate ASSERT(column->un_devstate & RCS_RESYNC); 394*0Sstevel@tonic-gate 395*0Sstevel@tonic-gate /* 396*0Sstevel@tonic-gate * When a resync from a hotspare is being done (copy resync) 397*0Sstevel@tonic-gate * then always treat it as an OKAY column, since no regen 398*0Sstevel@tonic-gate * is required. 399*0Sstevel@tonic-gate */ 400*0Sstevel@tonic-gate if (column->un_devflags & MD_RAID_COPY_RESYNC) { 401*0Sstevel@tonic-gate return (RCL_OKAY); 402*0Sstevel@tonic-gate } 403*0Sstevel@tonic-gate 404*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 405*0Sstevel@tonic-gate if (cs->cs_line < un->un_resync_line_index) { 406*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 407*0Sstevel@tonic-gate return (RCL_OKAY); 408*0Sstevel@tonic-gate } 409*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 410*0Sstevel@tonic-gate return (RCL_ERRED); 411*0Sstevel@tonic-gate 412*0Sstevel@tonic-gate } 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate /* 415*0Sstevel@tonic-gate * NAMES: raid_state_cnt 416*0Sstevel@tonic-gate * 417*0Sstevel@tonic-gate * DESCRIPTION: counts number of column in a specific state 418*0Sstevel@tonic-gate * 419*0Sstevel@tonic-gate * PARAMETERS: md_raid_t *un 420*0Sstevel@tonic-gate * rcs_state state 421*0Sstevel@tonic-gate */ 422*0Sstevel@tonic-gate int 423*0Sstevel@tonic-gate raid_state_cnt(mr_unit_t *un, rcs_state_t state) 424*0Sstevel@tonic-gate { 425*0Sstevel@tonic-gate int i, retval = 0; 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 428*0Sstevel@tonic-gate if (un->un_column[i].un_devstate & state) 429*0Sstevel@tonic-gate retval++; 430*0Sstevel@tonic-gate return (retval); 431*0Sstevel@tonic-gate } 432*0Sstevel@tonic-gate 433*0Sstevel@tonic-gate /* 434*0Sstevel@tonic-gate * NAMES: raid_io_overlaps 435*0Sstevel@tonic-gate * 436*0Sstevel@tonic-gate * DESCRIPTION: checkst for overlap of 2 child save structures 437*0Sstevel@tonic-gate * 438*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t cs1 439*0Sstevel@tonic-gate * md_raidcs_t cs2 440*0Sstevel@tonic-gate * 441*0Sstevel@tonic-gate * RETURNS: 0 - no overlap 442*0Sstevel@tonic-gate * 1 - overlap 443*0Sstevel@tonic-gate */ 444*0Sstevel@tonic-gate int 445*0Sstevel@tonic-gate raid_io_overlaps(md_raidcs_t *cs1, md_raidcs_t *cs2) 446*0Sstevel@tonic-gate { 447*0Sstevel@tonic-gate if (cs1->cs_blkno > cs2->cs_lastblk) 448*0Sstevel@tonic-gate return (0); 449*0Sstevel@tonic-gate if (cs1->cs_lastblk < cs2->cs_blkno) 450*0Sstevel@tonic-gate return (0); 451*0Sstevel@tonic-gate return (1); 452*0Sstevel@tonic-gate } 453*0Sstevel@tonic-gate 454*0Sstevel@tonic-gate /* 455*0Sstevel@tonic-gate * NAMES: raid_parent_constructor 456*0Sstevel@tonic-gate * DESCRIPTION: parent structure constructor routine 457*0Sstevel@tonic-gate * PARAMETERS: 458*0Sstevel@tonic-gate */ 459*0Sstevel@tonic-gate /*ARGSUSED1*/ 460*0Sstevel@tonic-gate static int 461*0Sstevel@tonic-gate raid_parent_constructor(void *p, void *d1, int d2) 462*0Sstevel@tonic-gate { 463*0Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mx, 464*0Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 465*0Sstevel@tonic-gate mutex_init(&((md_raidps_t *)p)->ps_mapin_mx, 466*0Sstevel@tonic-gate NULL, MUTEX_DEFAULT, NULL); 467*0Sstevel@tonic-gate return (0); 468*0Sstevel@tonic-gate } 469*0Sstevel@tonic-gate 470*0Sstevel@tonic-gate void 471*0Sstevel@tonic-gate raid_parent_init(md_raidps_t *ps) 472*0Sstevel@tonic-gate { 473*0Sstevel@tonic-gate bzero(ps, offsetof(md_raidps_t, ps_mx)); 474*0Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_flags = MD_RPS_INUSE; 475*0Sstevel@tonic-gate ((md_raidps_t *)ps)->ps_magic = RAID_PSMAGIC; 476*0Sstevel@tonic-gate } 477*0Sstevel@tonic-gate 478*0Sstevel@tonic-gate /*ARGSUSED1*/ 479*0Sstevel@tonic-gate static void 480*0Sstevel@tonic-gate raid_parent_destructor(void *p, void *d) 481*0Sstevel@tonic-gate { 482*0Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mx); 483*0Sstevel@tonic-gate mutex_destroy(&((md_raidps_t *)p)->ps_mapin_mx); 484*0Sstevel@tonic-gate } 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * NAMES: raid_child_constructor 488*0Sstevel@tonic-gate * DESCRIPTION: child structure constructor routine 489*0Sstevel@tonic-gate * PARAMETERS: 490*0Sstevel@tonic-gate */ 491*0Sstevel@tonic-gate /*ARGSUSED1*/ 492*0Sstevel@tonic-gate static int 493*0Sstevel@tonic-gate raid_child_constructor(void *p, void *d1, int d2) 494*0Sstevel@tonic-gate { 495*0Sstevel@tonic-gate md_raidcs_t *cs = (md_raidcs_t *)p; 496*0Sstevel@tonic-gate mutex_init(&cs->cs_mx, NULL, MUTEX_DEFAULT, NULL); 497*0Sstevel@tonic-gate bioinit(&cs->cs_dbuf); 498*0Sstevel@tonic-gate bioinit(&cs->cs_pbuf); 499*0Sstevel@tonic-gate bioinit(&cs->cs_hbuf); 500*0Sstevel@tonic-gate return (0); 501*0Sstevel@tonic-gate } 502*0Sstevel@tonic-gate 503*0Sstevel@tonic-gate void 504*0Sstevel@tonic-gate raid_child_init(md_raidcs_t *cs) 505*0Sstevel@tonic-gate { 506*0Sstevel@tonic-gate bzero(cs, offsetof(md_raidcs_t, cs_mx)); 507*0Sstevel@tonic-gate 508*0Sstevel@tonic-gate md_bioreset(&cs->cs_dbuf); 509*0Sstevel@tonic-gate md_bioreset(&cs->cs_pbuf); 510*0Sstevel@tonic-gate md_bioreset(&cs->cs_hbuf); 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_dbuf.b_chain = 513*0Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_pbuf.b_chain = 514*0Sstevel@tonic-gate ((md_raidcs_t *)cs)->cs_hbuf.b_chain = 515*0Sstevel@tonic-gate (struct buf *)(cs); 516*0Sstevel@tonic-gate 517*0Sstevel@tonic-gate cs->cs_magic = RAID_CSMAGIC; 518*0Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 519*0Sstevel@tonic-gate cs->cs_dpwslot = -1; 520*0Sstevel@tonic-gate cs->cs_ppwslot = -1; 521*0Sstevel@tonic-gate } 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate /*ARGSUSED1*/ 524*0Sstevel@tonic-gate static void 525*0Sstevel@tonic-gate raid_child_destructor(void *p, void *d) 526*0Sstevel@tonic-gate { 527*0Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_dbuf); 528*0Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_hbuf); 529*0Sstevel@tonic-gate biofini(&((md_raidcs_t *)p)->cs_pbuf); 530*0Sstevel@tonic-gate mutex_destroy(&((md_raidcs_t *)p)->cs_mx); 531*0Sstevel@tonic-gate } 532*0Sstevel@tonic-gate 533*0Sstevel@tonic-gate /*ARGSUSED1*/ 534*0Sstevel@tonic-gate static int 535*0Sstevel@tonic-gate raid_cbuf_constructor(void *p, void *d1, int d2) 536*0Sstevel@tonic-gate { 537*0Sstevel@tonic-gate bioinit(&((md_raidcbuf_t *)p)->cbuf_bp); 538*0Sstevel@tonic-gate return (0); 539*0Sstevel@tonic-gate } 540*0Sstevel@tonic-gate 541*0Sstevel@tonic-gate static void 542*0Sstevel@tonic-gate raid_cbuf_init(md_raidcbuf_t *cb) 543*0Sstevel@tonic-gate { 544*0Sstevel@tonic-gate bzero(cb, offsetof(md_raidcbuf_t, cbuf_bp)); 545*0Sstevel@tonic-gate md_bioreset(&cb->cbuf_bp); 546*0Sstevel@tonic-gate cb->cbuf_magic = RAID_BUFMAGIC; 547*0Sstevel@tonic-gate cb->cbuf_pwslot = -1; 548*0Sstevel@tonic-gate cb->cbuf_flags = CBUF_WRITE; 549*0Sstevel@tonic-gate } 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate /*ARGSUSED1*/ 552*0Sstevel@tonic-gate static void 553*0Sstevel@tonic-gate raid_cbuf_destructor(void *p, void *d) 554*0Sstevel@tonic-gate { 555*0Sstevel@tonic-gate biofini(&((md_raidcbuf_t *)p)->cbuf_bp); 556*0Sstevel@tonic-gate } 557*0Sstevel@tonic-gate 558*0Sstevel@tonic-gate /* 559*0Sstevel@tonic-gate * NAMES: raid_run_queue 560*0Sstevel@tonic-gate * DESCRIPTION: spawn a backend processing daemon for RAID metadevice. 561*0Sstevel@tonic-gate * PARAMETERS: 562*0Sstevel@tonic-gate */ 563*0Sstevel@tonic-gate /*ARGSUSED*/ 564*0Sstevel@tonic-gate static void 565*0Sstevel@tonic-gate raid_run_queue(void *d) 566*0Sstevel@tonic-gate { 567*0Sstevel@tonic-gate if (!(md_status & MD_GBL_DAEMONS_LIVE)) 568*0Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 569*0Sstevel@tonic-gate } 570*0Sstevel@tonic-gate 571*0Sstevel@tonic-gate /* 572*0Sstevel@tonic-gate * NAME: raid_build_pwslot 573*0Sstevel@tonic-gate * DESCRIPTION: builds mr_pw_reserve for the column 574*0Sstevel@tonic-gate * PARAMETERS: un is the pointer to the unit structure 575*0Sstevel@tonic-gate * colindex is the column to create the structure for 576*0Sstevel@tonic-gate */ 577*0Sstevel@tonic-gate int 578*0Sstevel@tonic-gate raid_build_pw_reservation(mr_unit_t *un, int colindex) 579*0Sstevel@tonic-gate { 580*0Sstevel@tonic-gate mr_pw_reserve_t *pw; 581*0Sstevel@tonic-gate mr_scoreboard_t *sb; 582*0Sstevel@tonic-gate int i; 583*0Sstevel@tonic-gate 584*0Sstevel@tonic-gate pw = (mr_pw_reserve_t *) kmem_zalloc(sizeof (mr_pw_reserve_t) + 585*0Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt), KM_SLEEP); 586*0Sstevel@tonic-gate pw->pw_magic = RAID_PWMAGIC; 587*0Sstevel@tonic-gate pw->pw_column = colindex; 588*0Sstevel@tonic-gate pw->pw_free = un->un_pwcnt; 589*0Sstevel@tonic-gate sb = &pw->pw_sb[0]; 590*0Sstevel@tonic-gate for (i = 0; i < un->un_pwcnt; i++) { 591*0Sstevel@tonic-gate sb[i].sb_column = colindex; 592*0Sstevel@tonic-gate sb[i].sb_flags = SB_UNUSED; 593*0Sstevel@tonic-gate sb[i].sb_start_blk = 0; 594*0Sstevel@tonic-gate sb[i].sb_last_blk = 0; 595*0Sstevel@tonic-gate sb[i].sb_cs = NULL; 596*0Sstevel@tonic-gate } 597*0Sstevel@tonic-gate un->un_column_ic[colindex].un_pw_reserve = pw; 598*0Sstevel@tonic-gate return (0); 599*0Sstevel@tonic-gate } 600*0Sstevel@tonic-gate /* 601*0Sstevel@tonic-gate * NAME: raid_free_pw_reservation 602*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice pre-write slot structure destroy routine 603*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 604*0Sstevel@tonic-gate * int colindex - index of the column whose pre-write slot struct 605*0Sstevel@tonic-gate * is to be destroyed. 606*0Sstevel@tonic-gate */ 607*0Sstevel@tonic-gate void 608*0Sstevel@tonic-gate raid_free_pw_reservation(mr_unit_t *un, int colindex) 609*0Sstevel@tonic-gate { 610*0Sstevel@tonic-gate mr_pw_reserve_t *pw = un->un_column_ic[colindex].un_pw_reserve; 611*0Sstevel@tonic-gate 612*0Sstevel@tonic-gate kmem_free(pw, sizeof (mr_pw_reserve_t) + 613*0Sstevel@tonic-gate (sizeof (mr_scoreboard_t) * un->un_pwcnt)); 614*0Sstevel@tonic-gate } 615*0Sstevel@tonic-gate 616*0Sstevel@tonic-gate /* 617*0Sstevel@tonic-gate * NAME: raid_cancel_pwslot 618*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 619*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 620*0Sstevel@tonic-gate */ 621*0Sstevel@tonic-gate static void 622*0Sstevel@tonic-gate raid_cancel_pwslot(md_raidcs_t *cs) 623*0Sstevel@tonic-gate { 624*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 625*0Sstevel@tonic-gate mr_pw_reserve_t *pw; 626*0Sstevel@tonic-gate mr_scoreboard_t *sb; 627*0Sstevel@tonic-gate mr_column_ic_t *col; 628*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 629*0Sstevel@tonic-gate int broadcast = 0; 630*0Sstevel@tonic-gate 631*0Sstevel@tonic-gate if (cs->cs_ps->ps_flags & MD_RPS_READ) 632*0Sstevel@tonic-gate return; 633*0Sstevel@tonic-gate if (cs->cs_dpwslot != -1) { 634*0Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_dcolumn]; 635*0Sstevel@tonic-gate pw = col->un_pw_reserve; 636*0Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_dpwslot]; 637*0Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 638*0Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 639*0Sstevel@tonic-gate broadcast++; 640*0Sstevel@tonic-gate sb->sb_cs = NULL; 641*0Sstevel@tonic-gate } 642*0Sstevel@tonic-gate 643*0Sstevel@tonic-gate if (cs->cs_ppwslot != -1) { 644*0Sstevel@tonic-gate col = &un->un_column_ic[cs->cs_pcolumn]; 645*0Sstevel@tonic-gate pw = col->un_pw_reserve; 646*0Sstevel@tonic-gate sb = &pw->pw_sb[cs->cs_ppwslot]; 647*0Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 648*0Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 649*0Sstevel@tonic-gate broadcast++; 650*0Sstevel@tonic-gate sb->sb_cs = NULL; 651*0Sstevel@tonic-gate } 652*0Sstevel@tonic-gate 653*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 654*0Sstevel@tonic-gate if (cbuf->cbuf_pwslot == -1) 655*0Sstevel@tonic-gate continue; 656*0Sstevel@tonic-gate col = &un->un_column_ic[cbuf->cbuf_column]; 657*0Sstevel@tonic-gate pw = col->un_pw_reserve; 658*0Sstevel@tonic-gate sb = &pw->pw_sb[cbuf->cbuf_pwslot]; 659*0Sstevel@tonic-gate sb->sb_flags = SB_AVAIL; 660*0Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 661*0Sstevel@tonic-gate broadcast++; 662*0Sstevel@tonic-gate sb->sb_cs = NULL; 663*0Sstevel@tonic-gate } 664*0Sstevel@tonic-gate if (broadcast) { 665*0Sstevel@tonic-gate cv_broadcast(&un->un_cv); 666*0Sstevel@tonic-gate return; 667*0Sstevel@tonic-gate } 668*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 669*0Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 670*0Sstevel@tonic-gate cv_broadcast(&un->un_cv); 671*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 672*0Sstevel@tonic-gate } 673*0Sstevel@tonic-gate 674*0Sstevel@tonic-gate static void 675*0Sstevel@tonic-gate raid_free_pwinvalidate(md_raidcs_t *cs) 676*0Sstevel@tonic-gate { 677*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 678*0Sstevel@tonic-gate md_raidcbuf_t *cbuf_to_free; 679*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 680*0Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 681*0Sstevel@tonic-gate mr_pw_reserve_t *pw; 682*0Sstevel@tonic-gate mr_scoreboard_t *sb; 683*0Sstevel@tonic-gate int broadcast = 0; 684*0Sstevel@tonic-gate 685*0Sstevel@tonic-gate cbuf = cs->cs_pw_inval_list; 686*0Sstevel@tonic-gate ASSERT(cbuf); 687*0Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 688*0Sstevel@tonic-gate while (cbuf) { 689*0Sstevel@tonic-gate pw = un->un_column_ic[cbuf->cbuf_column].un_pw_reserve; 690*0Sstevel@tonic-gate sb = &pw->pw_sb[0]; 691*0Sstevel@tonic-gate ASSERT(sb[cbuf->cbuf_pwslot].sb_flags & SB_INVAL_PEND); 692*0Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_flags = SB_UNUSED; 693*0Sstevel@tonic-gate sb[cbuf->cbuf_pwslot].sb_cs = NULL; 694*0Sstevel@tonic-gate if ((pw->pw_free++ == 0) || (un->un_rflags & MD_RFLAG_NEEDPW)) 695*0Sstevel@tonic-gate broadcast++; 696*0Sstevel@tonic-gate cbuf_to_free = cbuf; 697*0Sstevel@tonic-gate cbuf = cbuf->cbuf_next; 698*0Sstevel@tonic-gate kmem_free(cbuf_to_free->cbuf_buffer, dbtob(un->un_iosize)); 699*0Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf_to_free); 700*0Sstevel@tonic-gate } 701*0Sstevel@tonic-gate cs->cs_pw_inval_list = (md_raidcbuf_t *)NULL; 702*0Sstevel@tonic-gate /* 703*0Sstevel@tonic-gate * now that there is a free prewrite slot, check to see if there 704*0Sstevel@tonic-gate * are any io operations waiting first wake up the raid_io_startup 705*0Sstevel@tonic-gate * then signal the the processes waiting in raid_write. 706*0Sstevel@tonic-gate */ 707*0Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front) 708*0Sstevel@tonic-gate raid_io_startup(un); 709*0Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 710*0Sstevel@tonic-gate if (broadcast) { 711*0Sstevel@tonic-gate cv_broadcast(&un->un_cv); 712*0Sstevel@tonic-gate return; 713*0Sstevel@tonic-gate } 714*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 715*0Sstevel@tonic-gate if (un->un_rflags & MD_RFLAG_NEEDPW) 716*0Sstevel@tonic-gate cv_broadcast(&un->un_cv); 717*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 718*0Sstevel@tonic-gate } 719*0Sstevel@tonic-gate 720*0Sstevel@tonic-gate 721*0Sstevel@tonic-gate static int 722*0Sstevel@tonic-gate raid_get_pwslot(md_raidcs_t *cs, int column) 723*0Sstevel@tonic-gate { 724*0Sstevel@tonic-gate mr_scoreboard_t *sb; 725*0Sstevel@tonic-gate mr_pw_reserve_t *pw; 726*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 727*0Sstevel@tonic-gate diskaddr_t start_blk = cs->cs_blkno; 728*0Sstevel@tonic-gate diskaddr_t last_blk = cs->cs_lastblk; 729*0Sstevel@tonic-gate int i; 730*0Sstevel@tonic-gate int pwcnt = un->un_pwcnt; 731*0Sstevel@tonic-gate int avail = -1; 732*0Sstevel@tonic-gate int use = -1; 733*0Sstevel@tonic-gate int flags; 734*0Sstevel@tonic-gate 735*0Sstevel@tonic-gate 736*0Sstevel@tonic-gate /* start with the data column */ 737*0Sstevel@tonic-gate pw = cs->cs_un->un_column_ic[column].un_pw_reserve; 738*0Sstevel@tonic-gate sb = &pw->pw_sb[0]; 739*0Sstevel@tonic-gate ASSERT(pw->pw_free > 0); 740*0Sstevel@tonic-gate for (i = 0; i < pwcnt; i++) { 741*0Sstevel@tonic-gate flags = sb[i].sb_flags; 742*0Sstevel@tonic-gate if (flags & SB_INVAL_PEND) 743*0Sstevel@tonic-gate continue; 744*0Sstevel@tonic-gate 745*0Sstevel@tonic-gate if ((avail == -1) && (flags & (SB_AVAIL | SB_UNUSED))) 746*0Sstevel@tonic-gate avail = i; 747*0Sstevel@tonic-gate 748*0Sstevel@tonic-gate if ((start_blk > sb[i].sb_last_blk) || 749*0Sstevel@tonic-gate (last_blk < sb[i].sb_start_blk)) 750*0Sstevel@tonic-gate continue; 751*0Sstevel@tonic-gate 752*0Sstevel@tonic-gate /* OVERLAP */ 753*0Sstevel@tonic-gate ASSERT(! (sb[i].sb_flags & SB_INUSE)); 754*0Sstevel@tonic-gate 755*0Sstevel@tonic-gate /* 756*0Sstevel@tonic-gate * raid_invalidate_pwslot attempts to zero out prewrite entry 757*0Sstevel@tonic-gate * in parallel with other disk reads/writes related to current 758*0Sstevel@tonic-gate * transaction. however cs_frags accounting for this case is 759*0Sstevel@tonic-gate * broken because raid_write_io resets cs_frags i.e. ignoring 760*0Sstevel@tonic-gate * that it could have been been set to > 0 value by 761*0Sstevel@tonic-gate * raid_invalidate_pwslot. While this can be fixed an 762*0Sstevel@tonic-gate * additional problem is that we don't seem to handle 763*0Sstevel@tonic-gate * correctly the case of getting a disk error for prewrite 764*0Sstevel@tonic-gate * entry invalidation. 765*0Sstevel@tonic-gate * It does not look like we really need 766*0Sstevel@tonic-gate * to invalidate prewrite slots because raid_replay sorts 767*0Sstevel@tonic-gate * prewrite id's in ascending order and during recovery the 768*0Sstevel@tonic-gate * latest prewrite entry for the same block will be replay 769*0Sstevel@tonic-gate * last. That's why i ifdef'd out the call to 770*0Sstevel@tonic-gate * raid_invalidate_pwslot. --aguzovsk@east 771*0Sstevel@tonic-gate */ 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate if (use == -1) { 774*0Sstevel@tonic-gate use = i; 775*0Sstevel@tonic-gate } 776*0Sstevel@tonic-gate } 777*0Sstevel@tonic-gate 778*0Sstevel@tonic-gate ASSERT(avail != -1); 779*0Sstevel@tonic-gate pw->pw_free--; 780*0Sstevel@tonic-gate if (use == -1) 781*0Sstevel@tonic-gate use = avail; 782*0Sstevel@tonic-gate 783*0Sstevel@tonic-gate ASSERT(! (sb[use].sb_flags & SB_INUSE)); 784*0Sstevel@tonic-gate sb[use].sb_flags = SB_INUSE; 785*0Sstevel@tonic-gate sb[use].sb_cs = cs; 786*0Sstevel@tonic-gate sb[use].sb_start_blk = start_blk; 787*0Sstevel@tonic-gate sb[use].sb_last_blk = last_blk; 788*0Sstevel@tonic-gate ASSERT((use >= 0) && (use < un->un_pwcnt)); 789*0Sstevel@tonic-gate return (use); 790*0Sstevel@tonic-gate } 791*0Sstevel@tonic-gate 792*0Sstevel@tonic-gate static int 793*0Sstevel@tonic-gate raid_check_pw(md_raidcs_t *cs) 794*0Sstevel@tonic-gate { 795*0Sstevel@tonic-gate 796*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 797*0Sstevel@tonic-gate int i; 798*0Sstevel@tonic-gate 799*0Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 800*0Sstevel@tonic-gate /* 801*0Sstevel@tonic-gate * check to be sure there is a prewrite slot available 802*0Sstevel@tonic-gate * if not just return. 803*0Sstevel@tonic-gate */ 804*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 805*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 806*0Sstevel@tonic-gate if (un->un_column_ic[i].un_pw_reserve->pw_free <= 0) 807*0Sstevel@tonic-gate return (1); 808*0Sstevel@tonic-gate return (0); 809*0Sstevel@tonic-gate } 810*0Sstevel@tonic-gate 811*0Sstevel@tonic-gate if (un->un_column_ic[cs->cs_dcolumn].un_pw_reserve->pw_free <= 0) 812*0Sstevel@tonic-gate return (1); 813*0Sstevel@tonic-gate if (un->un_column_ic[cs->cs_pcolumn].un_pw_reserve->pw_free <= 0) 814*0Sstevel@tonic-gate return (1); 815*0Sstevel@tonic-gate return (0); 816*0Sstevel@tonic-gate } 817*0Sstevel@tonic-gate static int 818*0Sstevel@tonic-gate raid_alloc_pwslot(md_raidcs_t *cs) 819*0Sstevel@tonic-gate { 820*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 821*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 822*0Sstevel@tonic-gate 823*0Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_HAVE_PW_SLOTS)); 824*0Sstevel@tonic-gate if (raid_check_pw(cs)) 825*0Sstevel@tonic-gate return (1); 826*0Sstevel@tonic-gate 827*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 828*0Sstevel@tonic-gate un->un_pwid++; 829*0Sstevel@tonic-gate cs->cs_pwid = un->un_pwid; 830*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 831*0Sstevel@tonic-gate 832*0Sstevel@tonic-gate cs->cs_dpwslot = raid_get_pwslot(cs, cs->cs_dcolumn); 833*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 834*0Sstevel@tonic-gate cbuf->cbuf_pwslot = raid_get_pwslot(cs, cbuf->cbuf_column); 835*0Sstevel@tonic-gate } 836*0Sstevel@tonic-gate cs->cs_ppwslot = raid_get_pwslot(cs, cs->cs_pcolumn); 837*0Sstevel@tonic-gate 838*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_HAVE_PW_SLOTS; 839*0Sstevel@tonic-gate 840*0Sstevel@tonic-gate return (0); 841*0Sstevel@tonic-gate } 842*0Sstevel@tonic-gate 843*0Sstevel@tonic-gate /* 844*0Sstevel@tonic-gate * NAMES: raid_build_incore 845*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice incore structure building routine 846*0Sstevel@tonic-gate * PARAMETERS: void *p - pointer to a unit structure 847*0Sstevel@tonic-gate * int snarfing - a flag to indicate snarfing is required 848*0Sstevel@tonic-gate */ 849*0Sstevel@tonic-gate int 850*0Sstevel@tonic-gate raid_build_incore(void *p, int snarfing) 851*0Sstevel@tonic-gate { 852*0Sstevel@tonic-gate mr_unit_t *un = (mr_unit_t *)p; 853*0Sstevel@tonic-gate minor_t mnum = MD_SID(un); 854*0Sstevel@tonic-gate mddb_recid_t hs_recid = 0; 855*0Sstevel@tonic-gate int i; 856*0Sstevel@tonic-gate int preserve_flags; 857*0Sstevel@tonic-gate mr_column_t *column; 858*0Sstevel@tonic-gate int iosize; 859*0Sstevel@tonic-gate md_dev64_t hs, dev; 860*0Sstevel@tonic-gate int resync_cnt = 0, 861*0Sstevel@tonic-gate error_cnt = 0; 862*0Sstevel@tonic-gate 863*0Sstevel@tonic-gate hs = NODEV64; 864*0Sstevel@tonic-gate dev = NODEV64; 865*0Sstevel@tonic-gate 866*0Sstevel@tonic-gate /* clear out bogus pointer incase we return(1) prior to alloc */ 867*0Sstevel@tonic-gate un->mr_ic = NULL; 868*0Sstevel@tonic-gate 869*0Sstevel@tonic-gate if (MD_STATUS(un) & MD_UN_BEING_RESET) { 870*0Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 871*0Sstevel@tonic-gate return (1); 872*0Sstevel@tonic-gate } 873*0Sstevel@tonic-gate 874*0Sstevel@tonic-gate if (MD_UNIT(mnum) != NULL) 875*0Sstevel@tonic-gate return (0); 876*0Sstevel@tonic-gate 877*0Sstevel@tonic-gate if (snarfing) 878*0Sstevel@tonic-gate MD_STATUS(un) = 0; 879*0Sstevel@tonic-gate 880*0Sstevel@tonic-gate un->mr_ic = (mr_unit_ic_t *)kmem_zalloc(sizeof (*un->mr_ic), 881*0Sstevel@tonic-gate KM_SLEEP); 882*0Sstevel@tonic-gate 883*0Sstevel@tonic-gate un->un_column_ic = (mr_column_ic_t *) 884*0Sstevel@tonic-gate kmem_zalloc(sizeof (mr_column_ic_t) * 885*0Sstevel@tonic-gate un->un_totalcolumncnt, KM_SLEEP); 886*0Sstevel@tonic-gate 887*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 888*0Sstevel@tonic-gate 889*0Sstevel@tonic-gate column = &un->un_column[i]; 890*0Sstevel@tonic-gate preserve_flags = column->un_devflags & 891*0Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); 892*0Sstevel@tonic-gate column->un_devflags &= 893*0Sstevel@tonic-gate ~(MD_RAID_ALT_ISOPEN | MD_RAID_DEV_ISOPEN | 894*0Sstevel@tonic-gate MD_RAID_WRITE_ALT); 895*0Sstevel@tonic-gate if (raid_build_pw_reservation(un, i) != 0) { 896*0Sstevel@tonic-gate /* could not build pwslot */ 897*0Sstevel@tonic-gate return (1); 898*0Sstevel@tonic-gate } 899*0Sstevel@tonic-gate 900*0Sstevel@tonic-gate if (snarfing) { 901*0Sstevel@tonic-gate set_t setno = MD_MIN2SET(mnum); 902*0Sstevel@tonic-gate dev = md_getdevnum(setno, mddb_getsidenum(setno), 903*0Sstevel@tonic-gate column->un_orig_key, MD_NOTRUST_DEVT); 904*0Sstevel@tonic-gate /* 905*0Sstevel@tonic-gate * Comment out instead of remove so we have history 906*0Sstevel@tonic-gate * In the pre-SVM releases stored devt is used so 907*0Sstevel@tonic-gate * as long as there is one snarf is always happy 908*0Sstevel@tonic-gate * even the component is powered off. This is not 909*0Sstevel@tonic-gate * the case in current SVM implementation. NODEV64 910*0Sstevel@tonic-gate * can be returned and in this case since we resolve 911*0Sstevel@tonic-gate * the devt at 'open' time (first use of metadevice) 912*0Sstevel@tonic-gate * we will allow snarf continue. 913*0Sstevel@tonic-gate * 914*0Sstevel@tonic-gate * if (dev == NODEV64) 915*0Sstevel@tonic-gate * return (1); 916*0Sstevel@tonic-gate */ 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate /* 919*0Sstevel@tonic-gate * Setup un_orig_dev from device id info if the device 920*0Sstevel@tonic-gate * is valid (not NODEV64). 921*0Sstevel@tonic-gate */ 922*0Sstevel@tonic-gate if (dev != NODEV64) 923*0Sstevel@tonic-gate column->un_orig_dev = dev; 924*0Sstevel@tonic-gate 925*0Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 926*0Sstevel@tonic-gate resync_cnt++; 927*0Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 928*0Sstevel@tonic-gate error_cnt++; 929*0Sstevel@tonic-gate 930*0Sstevel@tonic-gate if (HOTSPARED(un, i)) { 931*0Sstevel@tonic-gate (void) md_hot_spare_ifc(HS_MKDEV, 932*0Sstevel@tonic-gate 0, 0, 0, &column->un_hs_id, NULL, 933*0Sstevel@tonic-gate &hs, NULL); 934*0Sstevel@tonic-gate /* 935*0Sstevel@tonic-gate * Same here 936*0Sstevel@tonic-gate * 937*0Sstevel@tonic-gate * if (hs == NODEV64) 938*0Sstevel@tonic-gate * return (1); 939*0Sstevel@tonic-gate */ 940*0Sstevel@tonic-gate } 941*0Sstevel@tonic-gate 942*0Sstevel@tonic-gate if (HOTSPARED(un, i)) { 943*0Sstevel@tonic-gate if (column->un_devstate & 944*0Sstevel@tonic-gate (RCS_OKAY | RCS_LAST_ERRED)) { 945*0Sstevel@tonic-gate column->un_dev = hs; 946*0Sstevel@tonic-gate column->un_pwstart = 947*0Sstevel@tonic-gate column->un_hs_pwstart; 948*0Sstevel@tonic-gate column->un_devstart = 949*0Sstevel@tonic-gate column->un_hs_devstart; 950*0Sstevel@tonic-gate preserve_flags &= 951*0Sstevel@tonic-gate ~(MD_RAID_COPY_RESYNC | 952*0Sstevel@tonic-gate MD_RAID_REGEN_RESYNC); 953*0Sstevel@tonic-gate } else if (column->un_devstate & RCS_RESYNC) { 954*0Sstevel@tonic-gate /* 955*0Sstevel@tonic-gate * if previous system was 4.0 set 956*0Sstevel@tonic-gate * the direction flags 957*0Sstevel@tonic-gate */ 958*0Sstevel@tonic-gate if ((preserve_flags & 959*0Sstevel@tonic-gate (MD_RAID_COPY_RESYNC | 960*0Sstevel@tonic-gate MD_RAID_REGEN_RESYNC)) == 0) { 961*0Sstevel@tonic-gate if (column->un_alt_dev != NODEV64) 962*0Sstevel@tonic-gate preserve_flags |= 963*0Sstevel@tonic-gate MD_RAID_COPY_RESYNC; 964*0Sstevel@tonic-gate else 965*0Sstevel@tonic-gate preserve_flags |= 966*0Sstevel@tonic-gate MD_RAID_REGEN_RESYNC; 967*0Sstevel@tonic-gate } 968*0Sstevel@tonic-gate } 969*0Sstevel@tonic-gate } else { /* no hot spares */ 970*0Sstevel@tonic-gate column->un_dev = dev; 971*0Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 972*0Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 973*0Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) { 974*0Sstevel@tonic-gate preserve_flags |= MD_RAID_REGEN_RESYNC; 975*0Sstevel@tonic-gate preserve_flags &= ~MD_RAID_COPY_RESYNC; 976*0Sstevel@tonic-gate } 977*0Sstevel@tonic-gate } 978*0Sstevel@tonic-gate if (! (column->un_devstate & RCS_RESYNC)) { 979*0Sstevel@tonic-gate preserve_flags &= 980*0Sstevel@tonic-gate ~(MD_RAID_REGEN_RESYNC | 981*0Sstevel@tonic-gate MD_RAID_COPY_RESYNC); 982*0Sstevel@tonic-gate } 983*0Sstevel@tonic-gate 984*0Sstevel@tonic-gate column->un_devflags = preserve_flags; 985*0Sstevel@tonic-gate column->un_alt_dev = NODEV64; 986*0Sstevel@tonic-gate column->un_alt_pwstart = 0; 987*0Sstevel@tonic-gate column->un_alt_devstart = 0; 988*0Sstevel@tonic-gate un->un_resync_line_index = 0; 989*0Sstevel@tonic-gate un->un_resync_index = 0; 990*0Sstevel@tonic-gate un->un_percent_done = 0; 991*0Sstevel@tonic-gate } 992*0Sstevel@tonic-gate } 993*0Sstevel@tonic-gate 994*0Sstevel@tonic-gate if (resync_cnt && error_cnt) { 995*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 996*0Sstevel@tonic-gate column = &un->un_column[i]; 997*0Sstevel@tonic-gate if (HOTSPARED(un, i) && 998*0Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC) && 999*0Sstevel@tonic-gate (column->un_devflags & MD_RAID_COPY_RESYNC)) 1000*0Sstevel@tonic-gate /* hotspare has data */ 1001*0Sstevel@tonic-gate continue; 1002*0Sstevel@tonic-gate 1003*0Sstevel@tonic-gate if (HOTSPARED(un, i) && 1004*0Sstevel@tonic-gate (column->un_devstate & RCS_RESYNC)) { 1005*0Sstevel@tonic-gate /* hotspare does not have data */ 1006*0Sstevel@tonic-gate raid_hs_release(HS_FREE, un, &hs_recid, i); 1007*0Sstevel@tonic-gate column->un_dev = column->un_orig_dev; 1008*0Sstevel@tonic-gate column->un_pwstart = column->un_orig_pwstart; 1009*0Sstevel@tonic-gate column->un_devstart = column->un_orig_devstart; 1010*0Sstevel@tonic-gate mddb_setrecprivate(hs_recid, MD_PRV_PENDCOM); 1011*0Sstevel@tonic-gate } 1012*0Sstevel@tonic-gate 1013*0Sstevel@tonic-gate if (column->un_devstate & RCS_ERRED) 1014*0Sstevel@tonic-gate column->un_devstate = RCS_LAST_ERRED; 1015*0Sstevel@tonic-gate 1016*0Sstevel@tonic-gate if (column->un_devstate & RCS_RESYNC) 1017*0Sstevel@tonic-gate column->un_devstate = RCS_ERRED; 1018*0Sstevel@tonic-gate } 1019*0Sstevel@tonic-gate } 1020*0Sstevel@tonic-gate mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCOM); 1021*0Sstevel@tonic-gate 1022*0Sstevel@tonic-gate un->un_pwid = 1; /* or some other possible value */ 1023*0Sstevel@tonic-gate un->un_magic = RAID_UNMAGIC; 1024*0Sstevel@tonic-gate iosize = un->un_iosize; 1025*0Sstevel@tonic-gate un->un_pbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 1026*0Sstevel@tonic-gate un->un_dbuffer = kmem_alloc(dbtob(iosize), KM_SLEEP); 1027*0Sstevel@tonic-gate mutex_init(&un->un_linlck_mx, NULL, MUTEX_DEFAULT, NULL); 1028*0Sstevel@tonic-gate cv_init(&un->un_linlck_cv, NULL, CV_DEFAULT, NULL); 1029*0Sstevel@tonic-gate un->un_linlck_chn = NULL; 1030*0Sstevel@tonic-gate MD_UNIT(mnum) = un; 1031*0Sstevel@tonic-gate 1032*0Sstevel@tonic-gate 1033*0Sstevel@tonic-gate return (0); 1034*0Sstevel@tonic-gate } 1035*0Sstevel@tonic-gate 1036*0Sstevel@tonic-gate /* 1037*0Sstevel@tonic-gate * NAMES: reset_raid 1038*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice reset routine 1039*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 1040*0Sstevel@tonic-gate * minor_t mnum - RAID metadevice minor number 1041*0Sstevel@tonic-gate * int removing - a flag to imply removing device name from 1042*0Sstevel@tonic-gate * MDDB database. 1043*0Sstevel@tonic-gate */ 1044*0Sstevel@tonic-gate void 1045*0Sstevel@tonic-gate reset_raid(mr_unit_t *un, minor_t mnum, int removing) 1046*0Sstevel@tonic-gate { 1047*0Sstevel@tonic-gate int i, n = 0; 1048*0Sstevel@tonic-gate sv_dev_t *sv; 1049*0Sstevel@tonic-gate mr_column_t *column; 1050*0Sstevel@tonic-gate int column_cnt = un->un_totalcolumncnt; 1051*0Sstevel@tonic-gate mddb_recid_t *recids, vtoc_id; 1052*0Sstevel@tonic-gate int hserr; 1053*0Sstevel@tonic-gate 1054*0Sstevel@tonic-gate ASSERT((MDI_UNIT(mnum)->ui_io_lock->io_list_front == NULL) && 1055*0Sstevel@tonic-gate (MDI_UNIT(mnum)->ui_io_lock->io_list_back == NULL)); 1056*0Sstevel@tonic-gate 1057*0Sstevel@tonic-gate md_destroy_unit_incore(mnum, &raid_md_ops); 1058*0Sstevel@tonic-gate 1059*0Sstevel@tonic-gate MD_UNIT(mnum) = NULL; 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate if (un->un_pbuffer) { 1062*0Sstevel@tonic-gate kmem_free(un->un_pbuffer, dbtob(un->un_iosize)); 1063*0Sstevel@tonic-gate un->un_pbuffer = NULL; 1064*0Sstevel@tonic-gate } 1065*0Sstevel@tonic-gate if (un->un_dbuffer) { 1066*0Sstevel@tonic-gate kmem_free(un->un_dbuffer, dbtob(un->un_iosize)); 1067*0Sstevel@tonic-gate un->un_dbuffer = NULL; 1068*0Sstevel@tonic-gate } 1069*0Sstevel@tonic-gate 1070*0Sstevel@tonic-gate /* free all pre-write slots created during build incore */ 1071*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) 1072*0Sstevel@tonic-gate raid_free_pw_reservation(un, i); 1073*0Sstevel@tonic-gate 1074*0Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 1075*0Sstevel@tonic-gate un->un_totalcolumncnt); 1076*0Sstevel@tonic-gate 1077*0Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 1078*0Sstevel@tonic-gate 1079*0Sstevel@tonic-gate if (!removing) 1080*0Sstevel@tonic-gate return; 1081*0Sstevel@tonic-gate 1082*0Sstevel@tonic-gate sv = (sv_dev_t *)kmem_zalloc((column_cnt + 1) * sizeof (sv_dev_t), 1083*0Sstevel@tonic-gate KM_SLEEP); 1084*0Sstevel@tonic-gate 1085*0Sstevel@tonic-gate recids = (mddb_recid_t *) 1086*0Sstevel@tonic-gate kmem_zalloc((column_cnt + 2) * sizeof (mddb_recid_t), KM_SLEEP); 1087*0Sstevel@tonic-gate 1088*0Sstevel@tonic-gate for (i = 0; i < column_cnt; i++) { 1089*0Sstevel@tonic-gate md_unit_t *comp_un; 1090*0Sstevel@tonic-gate md_dev64_t comp_dev; 1091*0Sstevel@tonic-gate 1092*0Sstevel@tonic-gate column = &un->un_column[i]; 1093*0Sstevel@tonic-gate sv[i].setno = MD_MIN2SET(mnum); 1094*0Sstevel@tonic-gate sv[i].key = column->un_orig_key; 1095*0Sstevel@tonic-gate if (HOTSPARED(un, i)) { 1096*0Sstevel@tonic-gate if (column->un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) 1097*0Sstevel@tonic-gate hserr = HS_BAD; 1098*0Sstevel@tonic-gate else 1099*0Sstevel@tonic-gate hserr = HS_FREE; 1100*0Sstevel@tonic-gate raid_hs_release(hserr, un, &recids[n++], i); 1101*0Sstevel@tonic-gate } 1102*0Sstevel@tonic-gate /* 1103*0Sstevel@tonic-gate * deparent any metadevices. 1104*0Sstevel@tonic-gate * NOTE: currently soft partitions are the only metadevices 1105*0Sstevel@tonic-gate * allowed in RAID metadevices. 1106*0Sstevel@tonic-gate */ 1107*0Sstevel@tonic-gate comp_dev = column->un_dev; 1108*0Sstevel@tonic-gate if (md_getmajor(comp_dev) == md_major) { 1109*0Sstevel@tonic-gate comp_un = MD_UNIT(md_getminor(comp_dev)); 1110*0Sstevel@tonic-gate recids[n++] = MD_RECID(comp_un); 1111*0Sstevel@tonic-gate md_reset_parent(comp_dev); 1112*0Sstevel@tonic-gate } 1113*0Sstevel@tonic-gate } 1114*0Sstevel@tonic-gate /* decrement the reference count of the old hsp */ 1115*0Sstevel@tonic-gate if (un->un_hsp_id != -1) 1116*0Sstevel@tonic-gate (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 1117*0Sstevel@tonic-gate &recids[n++], NULL, NULL, NULL); 1118*0Sstevel@tonic-gate recids[n] = 0; 1119*0Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_BEING_RESET; 1120*0Sstevel@tonic-gate vtoc_id = un->c.un_vtoc_id; 1121*0Sstevel@tonic-gate 1122*0Sstevel@tonic-gate raid_commit(un, recids); 1123*0Sstevel@tonic-gate 1124*0Sstevel@tonic-gate 1125*0Sstevel@tonic-gate /* Remove the unit structure */ 1126*0Sstevel@tonic-gate mddb_deleterec_wrapper(un->c.un_record_id); 1127*0Sstevel@tonic-gate 1128*0Sstevel@tonic-gate /* Remove the vtoc, if present */ 1129*0Sstevel@tonic-gate if (vtoc_id) 1130*0Sstevel@tonic-gate mddb_deleterec_wrapper(vtoc_id); 1131*0Sstevel@tonic-gate md_rem_names(sv, column_cnt); 1132*0Sstevel@tonic-gate kmem_free(sv, (column_cnt + 1) * sizeof (sv_dev_t)); 1133*0Sstevel@tonic-gate kmem_free(recids, (column_cnt + 2) * sizeof (mddb_recid_t)); 1134*0Sstevel@tonic-gate 1135*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 1136*0Sstevel@tonic-gate MD_MIN2SET(mnum), mnum); 1137*0Sstevel@tonic-gate } 1138*0Sstevel@tonic-gate 1139*0Sstevel@tonic-gate /* 1140*0Sstevel@tonic-gate * NAMES: raid_error_parent 1141*0Sstevel@tonic-gate * DESCRIPTION: mark a parent structure in error 1142*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1143*0Sstevel@tonic-gate * int error - error value to set 1144*0Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 1145*0Sstevel@tonic-gate */ 1146*0Sstevel@tonic-gate static void 1147*0Sstevel@tonic-gate raid_error_parent(md_raidps_t *ps, int error) 1148*0Sstevel@tonic-gate { 1149*0Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 1150*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_ERROR; 1151*0Sstevel@tonic-gate ps->ps_error = error; 1152*0Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 1153*0Sstevel@tonic-gate } 1154*0Sstevel@tonic-gate 1155*0Sstevel@tonic-gate /* 1156*0Sstevel@tonic-gate * The following defines tell raid_free_parent 1157*0Sstevel@tonic-gate * RFP_RLS_LOCK release the unit reader lock when done. 1158*0Sstevel@tonic-gate * RFP_DECR_PWFRAGS decrement ps_pwfrags 1159*0Sstevel@tonic-gate * RFP_DECR_FRAGS decrement ps_frags 1160*0Sstevel@tonic-gate * RFP_DECR_READFRAGS read keeps FRAGS and PWFRAGS in lockstep 1161*0Sstevel@tonic-gate */ 1162*0Sstevel@tonic-gate #define RFP_RLS_LOCK 0x00001 1163*0Sstevel@tonic-gate #define RFP_DECR_PWFRAGS 0x00002 1164*0Sstevel@tonic-gate #define RFP_DECR_FRAGS 0x00004 1165*0Sstevel@tonic-gate #define RFP_DECR_READFRAGS (RFP_DECR_PWFRAGS | RFP_DECR_FRAGS) 1166*0Sstevel@tonic-gate 1167*0Sstevel@tonic-gate /* 1168*0Sstevel@tonic-gate * NAMES: raid_free_parent 1169*0Sstevel@tonic-gate * DESCRIPTION: free a parent structure 1170*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1171*0Sstevel@tonic-gate * int todo - indicates what needs to be done 1172*0Sstevel@tonic-gate */ 1173*0Sstevel@tonic-gate static void 1174*0Sstevel@tonic-gate raid_free_parent(md_raidps_t *ps, int todo) 1175*0Sstevel@tonic-gate { 1176*0Sstevel@tonic-gate mdi_unit_t *ui = ps->ps_ui; 1177*0Sstevel@tonic-gate 1178*0Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 1179*0Sstevel@tonic-gate ASSERT(ps->ps_flags & MD_RPS_INUSE); 1180*0Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 1181*0Sstevel@tonic-gate if (todo & RFP_DECR_PWFRAGS) { 1182*0Sstevel@tonic-gate ASSERT(ps->ps_pwfrags); 1183*0Sstevel@tonic-gate ps->ps_pwfrags--; 1184*0Sstevel@tonic-gate if (ps->ps_pwfrags == 0 && (! (ps->ps_flags & MD_RPS_IODONE))) { 1185*0Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_ERROR) { 1186*0Sstevel@tonic-gate ps->ps_bp->b_flags |= B_ERROR; 1187*0Sstevel@tonic-gate ps->ps_bp->b_error = ps->ps_error; 1188*0Sstevel@tonic-gate } 1189*0Sstevel@tonic-gate md_kstat_done(ui, ps->ps_bp, 0); 1190*0Sstevel@tonic-gate biodone(ps->ps_bp); 1191*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_IODONE; 1192*0Sstevel@tonic-gate } 1193*0Sstevel@tonic-gate } 1194*0Sstevel@tonic-gate 1195*0Sstevel@tonic-gate if (todo & RFP_DECR_FRAGS) { 1196*0Sstevel@tonic-gate ASSERT(ps->ps_frags); 1197*0Sstevel@tonic-gate ps->ps_frags--; 1198*0Sstevel@tonic-gate } 1199*0Sstevel@tonic-gate 1200*0Sstevel@tonic-gate if (ps->ps_frags != 0) { 1201*0Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 1202*0Sstevel@tonic-gate return; 1203*0Sstevel@tonic-gate } 1204*0Sstevel@tonic-gate 1205*0Sstevel@tonic-gate ASSERT((ps->ps_frags == 0) && (ps->ps_pwfrags == 0)); 1206*0Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 1207*0Sstevel@tonic-gate 1208*0Sstevel@tonic-gate if (todo & RFP_RLS_LOCK) 1209*0Sstevel@tonic-gate md_io_readerexit(ui); 1210*0Sstevel@tonic-gate 1211*0Sstevel@tonic-gate if (panicstr) { 1212*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_DONE; 1213*0Sstevel@tonic-gate return; 1214*0Sstevel@tonic-gate } 1215*0Sstevel@tonic-gate 1216*0Sstevel@tonic-gate if (ps->ps_flags & MD_RPS_HSREQ) 1217*0Sstevel@tonic-gate (void) raid_hotspares(); 1218*0Sstevel@tonic-gate 1219*0Sstevel@tonic-gate ASSERT(todo & RFP_RLS_LOCK); 1220*0Sstevel@tonic-gate ps->ps_flags &= ~MD_RPS_INUSE; 1221*0Sstevel@tonic-gate 1222*0Sstevel@tonic-gate md_dec_iocount(MD_MIN2SET(ps->ps_un->c.un_self_id)); 1223*0Sstevel@tonic-gate 1224*0Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 1225*0Sstevel@tonic-gate } 1226*0Sstevel@tonic-gate 1227*0Sstevel@tonic-gate /* 1228*0Sstevel@tonic-gate * NAMES: raid_free_child 1229*0Sstevel@tonic-gate * DESCRIPTION: free a parent structure 1230*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1231*0Sstevel@tonic-gate * int drop_locks - 0 for no locks held 1232*0Sstevel@tonic-gate * NOTE: (TBR) - this routine currently is not in use. 1233*0Sstevel@tonic-gate */ 1234*0Sstevel@tonic-gate static void 1235*0Sstevel@tonic-gate raid_free_child(md_raidcs_t *cs, int drop_locks) 1236*0Sstevel@tonic-gate { 1237*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 1238*0Sstevel@tonic-gate md_raidcbuf_t *cbuf, *cbuf1; 1239*0Sstevel@tonic-gate 1240*0Sstevel@tonic-gate if (cs->cs_pw_inval_list) 1241*0Sstevel@tonic-gate raid_free_pwinvalidate(cs); 1242*0Sstevel@tonic-gate 1243*0Sstevel@tonic-gate if (drop_locks) { 1244*0Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD && 1245*0Sstevel@tonic-gate (cs->cs_flags & (MD_RCS_READER | MD_RCS_WRITER))); 1246*0Sstevel@tonic-gate md_unit_readerexit(MDI_UNIT(MD_SID(un))); 1247*0Sstevel@tonic-gate raid_line_exit(cs); 1248*0Sstevel@tonic-gate } else { 1249*0Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_LLOCKD)); 1250*0Sstevel@tonic-gate } 1251*0Sstevel@tonic-gate 1252*0Sstevel@tonic-gate freebuffers(cs); 1253*0Sstevel@tonic-gate cbuf = cs->cs_buflist; 1254*0Sstevel@tonic-gate while (cbuf) { 1255*0Sstevel@tonic-gate cbuf1 = cbuf->cbuf_next; 1256*0Sstevel@tonic-gate kmem_cache_free(raid_cbuf_cache, cbuf); 1257*0Sstevel@tonic-gate cbuf = cbuf1; 1258*0Sstevel@tonic-gate } 1259*0Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_REMAPPED) 1260*0Sstevel@tonic-gate bp_mapout(&cs->cs_dbuf); 1261*0Sstevel@tonic-gate kmem_cache_free(raid_child_cache, cs); 1262*0Sstevel@tonic-gate } 1263*0Sstevel@tonic-gate 1264*0Sstevel@tonic-gate /* 1265*0Sstevel@tonic-gate * NAME: raid_regen_parity 1266*0Sstevel@tonic-gate * 1267*0Sstevel@tonic-gate * DESCRIPTION: This routine is used to regenerate the parity blocks 1268*0Sstevel@tonic-gate * for the entire raid device. It is called from 1269*0Sstevel@tonic-gate * both the regen thread and the IO path. 1270*0Sstevel@tonic-gate * 1271*0Sstevel@tonic-gate * On error the entire device is marked as in error by 1272*0Sstevel@tonic-gate * placing the erroring device in error and all other 1273*0Sstevel@tonic-gate * devices in last_errored. 1274*0Sstevel@tonic-gate * 1275*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs 1276*0Sstevel@tonic-gate */ 1277*0Sstevel@tonic-gate void 1278*0Sstevel@tonic-gate raid_regen_parity(md_raidcs_t *cs) 1279*0Sstevel@tonic-gate { 1280*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 1281*0Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 1282*0Sstevel@tonic-gate caddr_t buffer; 1283*0Sstevel@tonic-gate caddr_t parity_buffer; 1284*0Sstevel@tonic-gate buf_t *bp; 1285*0Sstevel@tonic-gate uint_t *dbuf, *pbuf; 1286*0Sstevel@tonic-gate uint_t colcnt = un->un_totalcolumncnt; 1287*0Sstevel@tonic-gate int column; 1288*0Sstevel@tonic-gate int parity_column = cs->cs_pcolumn; 1289*0Sstevel@tonic-gate size_t bcount; 1290*0Sstevel@tonic-gate int j; 1291*0Sstevel@tonic-gate 1292*0Sstevel@tonic-gate /* 1293*0Sstevel@tonic-gate * This routine uses the data and parity buffers allocated to a 1294*0Sstevel@tonic-gate * write. In the case of a read the buffers are allocated and 1295*0Sstevel@tonic-gate * freed at the end. 1296*0Sstevel@tonic-gate */ 1297*0Sstevel@tonic-gate 1298*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 1299*0Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_LLOCKD); 1300*0Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 1301*0Sstevel@tonic-gate 1302*0Sstevel@tonic-gate if (raid_state_cnt(un, RCS_OKAY) != colcnt) 1303*0Sstevel@tonic-gate return; 1304*0Sstevel@tonic-gate 1305*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 1306*0Sstevel@tonic-gate getpbuffer(cs); 1307*0Sstevel@tonic-gate getdbuffer(cs); 1308*0Sstevel@tonic-gate } 1309*0Sstevel@tonic-gate ASSERT(cs->cs_dbuffer && cs->cs_pbuffer); 1310*0Sstevel@tonic-gate bcount = cs->cs_bcount; 1311*0Sstevel@tonic-gate buffer = cs->cs_dbuffer; 1312*0Sstevel@tonic-gate parity_buffer = cs->cs_pbuffer; 1313*0Sstevel@tonic-gate bzero(parity_buffer, bcount); 1314*0Sstevel@tonic-gate bp = &cs->cs_dbuf; 1315*0Sstevel@tonic-gate for (column = 0; column < colcnt; column++) { 1316*0Sstevel@tonic-gate if (column == parity_column) 1317*0Sstevel@tonic-gate continue; 1318*0Sstevel@tonic-gate reset_buf(bp, B_READ | B_BUSY, bcount); 1319*0Sstevel@tonic-gate bp->b_un.b_addr = buffer; 1320*0Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 1321*0Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[column].un_devstart; 1322*0Sstevel@tonic-gate bp->b_bcount = bcount; 1323*0Sstevel@tonic-gate bp->b_bufsize = bcount; 1324*0Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 1325*0Sstevel@tonic-gate if (biowait(bp)) 1326*0Sstevel@tonic-gate goto bail; 1327*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)parity_buffer; 1328*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)buffer; 1329*0Sstevel@tonic-gate for (j = 0; j < (bcount / (sizeof (uint_t))); j++) { 1330*0Sstevel@tonic-gate *pbuf = *pbuf ^ *dbuf; 1331*0Sstevel@tonic-gate pbuf++; 1332*0Sstevel@tonic-gate dbuf++; 1333*0Sstevel@tonic-gate } 1334*0Sstevel@tonic-gate } 1335*0Sstevel@tonic-gate 1336*0Sstevel@tonic-gate reset_buf(bp, B_WRITE | B_BUSY, cs->cs_bcount); 1337*0Sstevel@tonic-gate bp->b_un.b_addr = parity_buffer; 1338*0Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[parity_column].un_dev); 1339*0Sstevel@tonic-gate bp->b_lblkno = cs->cs_blkno + un->un_column[parity_column].un_devstart; 1340*0Sstevel@tonic-gate bp->b_bcount = bcount; 1341*0Sstevel@tonic-gate bp->b_bufsize = bcount; 1342*0Sstevel@tonic-gate (void) md_call_strategy(bp, MD_STR_NOTTOP, NULL); 1343*0Sstevel@tonic-gate if (biowait(bp)) 1344*0Sstevel@tonic-gate goto bail; 1345*0Sstevel@tonic-gate 1346*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 1347*0Sstevel@tonic-gate freebuffers(cs); 1348*0Sstevel@tonic-gate cs->cs_pbuffer = NULL; 1349*0Sstevel@tonic-gate cs->cs_dbuffer = NULL; 1350*0Sstevel@tonic-gate } 1351*0Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 1352*0Sstevel@tonic-gate return; 1353*0Sstevel@tonic-gate bail: 1354*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) { 1355*0Sstevel@tonic-gate freebuffers(cs); 1356*0Sstevel@tonic-gate cs->cs_pbuffer = NULL; 1357*0Sstevel@tonic-gate cs->cs_dbuffer = NULL; 1358*0Sstevel@tonic-gate } 1359*0Sstevel@tonic-gate md_unit_readerexit(ui); 1360*0Sstevel@tonic-gate un = md_unit_writerlock(ui); 1361*0Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 1362*0Sstevel@tonic-gate for (column = 0; column < colcnt; column++) 1363*0Sstevel@tonic-gate raid_set_state(un, column, RCS_ERRED, 0); 1364*0Sstevel@tonic-gate raid_commit(un, NULL); 1365*0Sstevel@tonic-gate md_unit_writerexit(ui); 1366*0Sstevel@tonic-gate un = md_unit_readerlock(ui); 1367*0Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 1368*0Sstevel@tonic-gate } 1369*0Sstevel@tonic-gate 1370*0Sstevel@tonic-gate /* 1371*0Sstevel@tonic-gate * NAMES: raid_error_state 1372*0Sstevel@tonic-gate * DESCRIPTION: check unit and column states' impact on I/O error 1373*0Sstevel@tonic-gate * NOTE: the state now may not be the state when the 1374*0Sstevel@tonic-gate * I/O completed due to race conditions. 1375*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 1376*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 1377*0Sstevel@tonic-gate * buf_t *bp - pointer to buffer structure 1378*0Sstevel@tonic-gate */ 1379*0Sstevel@tonic-gate static int 1380*0Sstevel@tonic-gate raid_error_state(mr_unit_t *un, buf_t *bp) 1381*0Sstevel@tonic-gate { 1382*0Sstevel@tonic-gate int column; 1383*0Sstevel@tonic-gate int i; 1384*0Sstevel@tonic-gate 1385*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 1386*0Sstevel@tonic-gate ASSERT(UNIT_WRITER_HELD(un)); 1387*0Sstevel@tonic-gate 1388*0Sstevel@tonic-gate column = -1; 1389*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 1390*0Sstevel@tonic-gate if (un->un_column[i].un_dev == md_expldev(bp->b_edev)) { 1391*0Sstevel@tonic-gate column = i; 1392*0Sstevel@tonic-gate break; 1393*0Sstevel@tonic-gate } 1394*0Sstevel@tonic-gate if (un->un_column[i].un_alt_dev == md_expldev(bp->b_edev)) { 1395*0Sstevel@tonic-gate column = i; 1396*0Sstevel@tonic-gate break; 1397*0Sstevel@tonic-gate } 1398*0Sstevel@tonic-gate } 1399*0Sstevel@tonic-gate 1400*0Sstevel@tonic-gate /* in case a replace snuck in while waiting on unit writer lock */ 1401*0Sstevel@tonic-gate 1402*0Sstevel@tonic-gate if (column == -1) { 1403*0Sstevel@tonic-gate return (0); 1404*0Sstevel@tonic-gate } 1405*0Sstevel@tonic-gate 1406*0Sstevel@tonic-gate (void) raid_set_state(un, column, RCS_ERRED, 0); 1407*0Sstevel@tonic-gate ASSERT(un->un_state & (RUS_ERRED | RUS_LAST_ERRED)); 1408*0Sstevel@tonic-gate 1409*0Sstevel@tonic-gate raid_commit(un, NULL); 1410*0Sstevel@tonic-gate if (un->un_state & RUS_ERRED) { 1411*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, 1412*0Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 1413*0Sstevel@tonic-gate } else if (un->un_state & RUS_LAST_ERRED) { 1414*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, 1415*0Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 1416*0Sstevel@tonic-gate } 1417*0Sstevel@tonic-gate 1418*0Sstevel@tonic-gate return (EIO); 1419*0Sstevel@tonic-gate } 1420*0Sstevel@tonic-gate 1421*0Sstevel@tonic-gate /* 1422*0Sstevel@tonic-gate * NAME: raid_mapin_buf 1423*0Sstevel@tonic-gate * DESCRIPTION: wait for the input buffer header to be maped in 1424*0Sstevel@tonic-gate * PARAMETERS: md_raidps_t *ps 1425*0Sstevel@tonic-gate */ 1426*0Sstevel@tonic-gate static void 1427*0Sstevel@tonic-gate raid_mapin_buf(md_raidcs_t *cs) 1428*0Sstevel@tonic-gate { 1429*0Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 1430*0Sstevel@tonic-gate 1431*0Sstevel@tonic-gate /* 1432*0Sstevel@tonic-gate * check to see if the buffer is maped. If all is ok return the 1433*0Sstevel@tonic-gate * offset of the data and return. Since it is expensive to grab 1434*0Sstevel@tonic-gate * a mutex this is only done if the mapin is not complete. 1435*0Sstevel@tonic-gate * Once the mutex is aquired it is possible that the mapin was 1436*0Sstevel@tonic-gate * not done so recheck and if necessary do the mapin. 1437*0Sstevel@tonic-gate */ 1438*0Sstevel@tonic-gate if (ps->ps_mapin > 0) { 1439*0Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 1440*0Sstevel@tonic-gate return; 1441*0Sstevel@tonic-gate } 1442*0Sstevel@tonic-gate mutex_enter(&ps->ps_mapin_mx); 1443*0Sstevel@tonic-gate if (ps->ps_mapin > 0) { 1444*0Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 1445*0Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 1446*0Sstevel@tonic-gate return; 1447*0Sstevel@tonic-gate } 1448*0Sstevel@tonic-gate bp_mapin(ps->ps_bp); 1449*0Sstevel@tonic-gate /* 1450*0Sstevel@tonic-gate * get the new b_addr out of the parent since bp_mapin just changed it 1451*0Sstevel@tonic-gate */ 1452*0Sstevel@tonic-gate ps->ps_addr = ps->ps_bp->b_un.b_addr; 1453*0Sstevel@tonic-gate cs->cs_addr = ps->ps_addr + cs->cs_offset; 1454*0Sstevel@tonic-gate ps->ps_mapin++; 1455*0Sstevel@tonic-gate mutex_exit(&ps->ps_mapin_mx); 1456*0Sstevel@tonic-gate } 1457*0Sstevel@tonic-gate 1458*0Sstevel@tonic-gate /* 1459*0Sstevel@tonic-gate * NAMES: raid_read_no_retry 1460*0Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 1461*0Sstevel@tonic-gate * read failed attempting to regenerate the data, 1462*0Sstevel@tonic-gate * no retry possible, error occured in raid_raidregenloop(). 1463*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to raid unit structure 1464*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to child structure 1465*0Sstevel@tonic-gate */ 1466*0Sstevel@tonic-gate /*ARGSUSED*/ 1467*0Sstevel@tonic-gate static void 1468*0Sstevel@tonic-gate raid_read_no_retry(mr_unit_t *un, md_raidcs_t *cs) 1469*0Sstevel@tonic-gate { 1470*0Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 1471*0Sstevel@tonic-gate 1472*0Sstevel@tonic-gate raid_error_parent(ps, EIO); 1473*0Sstevel@tonic-gate raid_free_child(cs, 1); 1474*0Sstevel@tonic-gate 1475*0Sstevel@tonic-gate /* decrement readfrags */ 1476*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 1477*0Sstevel@tonic-gate } 1478*0Sstevel@tonic-gate 1479*0Sstevel@tonic-gate /* 1480*0Sstevel@tonic-gate * NAMES: raid_read_retry 1481*0Sstevel@tonic-gate * DESCRIPTION: I/O retry routine for a RAID metadevice read 1482*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1483*0Sstevel@tonic-gate */ 1484*0Sstevel@tonic-gate static void 1485*0Sstevel@tonic-gate raid_read_retry(mr_unit_t *un, md_raidcs_t *cs) 1486*0Sstevel@tonic-gate { 1487*0Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_read() */ 1488*0Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 1489*0Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 1490*0Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 1491*0Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 1492*0Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 1493*0Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 1494*0Sstevel@tonic-gate /* Initialize semaphores */ 1495*0Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 1496*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 1497*0Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 1498*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 1499*0Sstevel@tonic-gate 1500*0Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 1501*0Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 1502*0Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 1503*0Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 1504*0Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 1505*0Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 1506*0Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 1507*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 1508*0Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 1509*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 1510*0Sstevel@tonic-gate 1511*0Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_ERROR; /* reset child error flag */ 1512*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_RECOVERY; /* set RECOVERY flag */ 1513*0Sstevel@tonic-gate 1514*0Sstevel@tonic-gate /* 1515*0Sstevel@tonic-gate * re-scheduling I/O with raid_read_io() is simpler. basically, 1516*0Sstevel@tonic-gate * raid_read_io() is invoked again with same child structure. 1517*0Sstevel@tonic-gate * (NOTE: we aren`t supposed to do any error recovery when an I/O 1518*0Sstevel@tonic-gate * error occured in raid_raidregenloop(). 1519*0Sstevel@tonic-gate */ 1520*0Sstevel@tonic-gate raid_mapin_buf(cs); 1521*0Sstevel@tonic-gate raid_read_io(un, cs); 1522*0Sstevel@tonic-gate } 1523*0Sstevel@tonic-gate 1524*0Sstevel@tonic-gate /* 1525*0Sstevel@tonic-gate * NAMES: raid_rderr 1526*0Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 1527*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1528*0Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 1529*0Sstevel@tonic-gate * since a unit or column state transition may take place. 1530*0Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 1531*0Sstevel@tonic-gate */ 1532*0Sstevel@tonic-gate /*ARGSUSED*/ 1533*0Sstevel@tonic-gate static void 1534*0Sstevel@tonic-gate raid_rderr(md_raidcs_t *cs) 1535*0Sstevel@tonic-gate { 1536*0Sstevel@tonic-gate md_raidps_t *ps; 1537*0Sstevel@tonic-gate mdi_unit_t *ui; 1538*0Sstevel@tonic-gate mr_unit_t *un; 1539*0Sstevel@tonic-gate int error = 0; 1540*0Sstevel@tonic-gate 1541*0Sstevel@tonic-gate ps = cs->cs_ps; 1542*0Sstevel@tonic-gate ui = ps->ps_ui; 1543*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 1544*0Sstevel@tonic-gate ASSERT(un != 0); 1545*0Sstevel@tonic-gate 1546*0Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 1547*0Sstevel@tonic-gate error = raid_error_state(un, &cs->cs_dbuf); 1548*0Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 1549*0Sstevel@tonic-gate error |= raid_error_state(un, &cs->cs_pbuf); 1550*0Sstevel@tonic-gate 1551*0Sstevel@tonic-gate md_unit_writerexit(ui); 1552*0Sstevel@tonic-gate 1553*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 1554*0Sstevel@tonic-gate 1555*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 1556*0Sstevel@tonic-gate ASSERT(un != 0); 1557*0Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 1558*0Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 1559*0Sstevel@tonic-gate } 1560*0Sstevel@tonic-gate 1561*0Sstevel@tonic-gate 1562*0Sstevel@tonic-gate /* 1563*0Sstevel@tonic-gate * NAMES: raid_read_error 1564*0Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice read 1565*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1566*0Sstevel@tonic-gate */ 1567*0Sstevel@tonic-gate /*ARGSUSED*/ 1568*0Sstevel@tonic-gate static void 1569*0Sstevel@tonic-gate raid_read_error(md_raidcs_t *cs) 1570*0Sstevel@tonic-gate { 1571*0Sstevel@tonic-gate md_raidps_t *ps; 1572*0Sstevel@tonic-gate mdi_unit_t *ui; 1573*0Sstevel@tonic-gate mr_unit_t *un; 1574*0Sstevel@tonic-gate set_t setno; 1575*0Sstevel@tonic-gate 1576*0Sstevel@tonic-gate ps = cs->cs_ps; 1577*0Sstevel@tonic-gate ui = ps->ps_ui; 1578*0Sstevel@tonic-gate un = cs->cs_un; 1579*0Sstevel@tonic-gate 1580*0Sstevel@tonic-gate setno = MD_UN2SET(un); 1581*0Sstevel@tonic-gate 1582*0Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 1583*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 1584*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 1585*0Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 1586*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 1587*0Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 1588*0Sstevel@tonic-gate 1589*0Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 1590*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 1591*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 1592*0Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: read error on %s", 1593*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 1594*0Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 1595*0Sstevel@tonic-gate 1596*0Sstevel@tonic-gate md_unit_readerexit(ui); 1597*0Sstevel@tonic-gate 1598*0Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 1599*0Sstevel@tonic-gate 1600*0Sstevel@tonic-gate /* now schedule processing for possible state change */ 1601*0Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_rderr, 1602*0Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 1603*0Sstevel@tonic-gate 1604*0Sstevel@tonic-gate } 1605*0Sstevel@tonic-gate 1606*0Sstevel@tonic-gate /* 1607*0Sstevel@tonic-gate * NAMES: getdbuffer 1608*0Sstevel@tonic-gate * DESCRIPTION: data buffer allocation for a child structure 1609*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1610*0Sstevel@tonic-gate * 1611*0Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 1612*0Sstevel@tonic-gate * and get both buffers before pwslot 1613*0Sstevel@tonic-gate * otherwise a deadlock could be introduced. 1614*0Sstevel@tonic-gate */ 1615*0Sstevel@tonic-gate static void 1616*0Sstevel@tonic-gate getdbuffer(md_raidcs_t *cs) 1617*0Sstevel@tonic-gate { 1618*0Sstevel@tonic-gate mr_unit_t *un; 1619*0Sstevel@tonic-gate 1620*0Sstevel@tonic-gate cs->cs_dbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 1621*0Sstevel@tonic-gate if (cs->cs_dbuffer != NULL) 1622*0Sstevel@tonic-gate return; 1623*0Sstevel@tonic-gate un = cs->cs_ps->ps_un; 1624*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 1625*0Sstevel@tonic-gate while (un->un_dbuffer == NULL) { 1626*0Sstevel@tonic-gate STAT_INC(data_buffer_waits); 1627*0Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 1628*0Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 1629*0Sstevel@tonic-gate } 1630*0Sstevel@tonic-gate cs->cs_dbuffer = un->un_dbuffer; 1631*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNDBUF; 1632*0Sstevel@tonic-gate un->un_dbuffer = NULL; 1633*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 1634*0Sstevel@tonic-gate } 1635*0Sstevel@tonic-gate 1636*0Sstevel@tonic-gate /* 1637*0Sstevel@tonic-gate * NAMES: getpbuffer 1638*0Sstevel@tonic-gate * DESCRIPTION: parity buffer allocation for a child structure 1639*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1640*0Sstevel@tonic-gate * 1641*0Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 1642*0Sstevel@tonic-gate * and get both buffers before pwslot 1643*0Sstevel@tonic-gate * otherwise a deadlock could be introduced. 1644*0Sstevel@tonic-gate */ 1645*0Sstevel@tonic-gate static void 1646*0Sstevel@tonic-gate getpbuffer(md_raidcs_t *cs) 1647*0Sstevel@tonic-gate { 1648*0Sstevel@tonic-gate mr_unit_t *un; 1649*0Sstevel@tonic-gate 1650*0Sstevel@tonic-gate cs->cs_pbuffer = kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_NOSLEEP); 1651*0Sstevel@tonic-gate if (cs->cs_pbuffer != NULL) 1652*0Sstevel@tonic-gate return; 1653*0Sstevel@tonic-gate un = cs->cs_ps->ps_un; 1654*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 1655*0Sstevel@tonic-gate while (un->un_pbuffer == NULL) { 1656*0Sstevel@tonic-gate STAT_INC(parity_buffer_waits); 1657*0Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDBUF; 1658*0Sstevel@tonic-gate cv_wait(&un->un_cv, &un->un_mx); 1659*0Sstevel@tonic-gate } 1660*0Sstevel@tonic-gate cs->cs_pbuffer = un->un_pbuffer; 1661*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_UNPBUF; 1662*0Sstevel@tonic-gate un->un_pbuffer = NULL; 1663*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 1664*0Sstevel@tonic-gate } 1665*0Sstevel@tonic-gate static void 1666*0Sstevel@tonic-gate getresources(md_raidcs_t *cs) 1667*0Sstevel@tonic-gate { 1668*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 1669*0Sstevel@tonic-gate /* 1670*0Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 1671*0Sstevel@tonic-gate * and get both buffers before pwslot 1672*0Sstevel@tonic-gate * otherwise a deadlock could be introduced. 1673*0Sstevel@tonic-gate */ 1674*0Sstevel@tonic-gate getdbuffer(cs); 1675*0Sstevel@tonic-gate getpbuffer(cs); 1676*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 1677*0Sstevel@tonic-gate cbuf->cbuf_buffer = 1678*0Sstevel@tonic-gate kmem_alloc(cs->cs_bcount + DEV_BSIZE, KM_SLEEP); 1679*0Sstevel@tonic-gate } 1680*0Sstevel@tonic-gate /* 1681*0Sstevel@tonic-gate * NAMES: freebuffers 1682*0Sstevel@tonic-gate * DESCRIPTION: child structure buffer freeing routine 1683*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1684*0Sstevel@tonic-gate */ 1685*0Sstevel@tonic-gate static void 1686*0Sstevel@tonic-gate freebuffers(md_raidcs_t *cs) 1687*0Sstevel@tonic-gate { 1688*0Sstevel@tonic-gate mr_unit_t *un; 1689*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 1690*0Sstevel@tonic-gate 1691*0Sstevel@tonic-gate /* free buffers used for full line write */ 1692*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 1693*0Sstevel@tonic-gate if (cbuf->cbuf_buffer == NULL) 1694*0Sstevel@tonic-gate continue; 1695*0Sstevel@tonic-gate kmem_free(cbuf->cbuf_buffer, cbuf->cbuf_bcount + DEV_BSIZE); 1696*0Sstevel@tonic-gate cbuf->cbuf_buffer = NULL; 1697*0Sstevel@tonic-gate cbuf->cbuf_bcount = 0; 1698*0Sstevel@tonic-gate } 1699*0Sstevel@tonic-gate 1700*0Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 1701*0Sstevel@tonic-gate un = cs->cs_un; 1702*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 1703*0Sstevel@tonic-gate } 1704*0Sstevel@tonic-gate if (cs->cs_dbuffer) { 1705*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNDBUF) 1706*0Sstevel@tonic-gate un->un_dbuffer = cs->cs_dbuffer; 1707*0Sstevel@tonic-gate else 1708*0Sstevel@tonic-gate kmem_free(cs->cs_dbuffer, cs->cs_bcount + DEV_BSIZE); 1709*0Sstevel@tonic-gate } 1710*0Sstevel@tonic-gate if (cs->cs_pbuffer) { 1711*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_UNPBUF) 1712*0Sstevel@tonic-gate un->un_pbuffer = cs->cs_pbuffer; 1713*0Sstevel@tonic-gate else 1714*0Sstevel@tonic-gate kmem_free(cs->cs_pbuffer, cs->cs_bcount + DEV_BSIZE); 1715*0Sstevel@tonic-gate } 1716*0Sstevel@tonic-gate if (cs->cs_flags & (MD_RCS_UNDBUF | MD_RCS_UNPBUF)) { 1717*0Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDBUF; 1718*0Sstevel@tonic-gate cv_broadcast(&un->un_cv); 1719*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 1720*0Sstevel@tonic-gate } 1721*0Sstevel@tonic-gate } 1722*0Sstevel@tonic-gate 1723*0Sstevel@tonic-gate /* 1724*0Sstevel@tonic-gate * NAMES: raid_line_reader_lock, raid_line_writer_lock 1725*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice line reader and writer lock routines 1726*0Sstevel@tonic-gate * data column # and parity column #. 1727*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 1728*0Sstevel@tonic-gate */ 1729*0Sstevel@tonic-gate 1730*0Sstevel@tonic-gate void 1731*0Sstevel@tonic-gate raid_line_reader_lock(md_raidcs_t *cs, int resync_thread) 1732*0Sstevel@tonic-gate { 1733*0Sstevel@tonic-gate mr_unit_t *un; 1734*0Sstevel@tonic-gate md_raidcs_t *cs1; 1735*0Sstevel@tonic-gate 1736*0Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 1737*0Sstevel@tonic-gate un = cs->cs_un; 1738*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_READER; 1739*0Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 1740*0Sstevel@tonic-gate if (!panicstr) 1741*0Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 1742*0Sstevel@tonic-gate cs1 = un->un_linlck_chn; 1743*0Sstevel@tonic-gate while (cs1 != NULL) { 1744*0Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 1745*0Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1) == 1) 1746*0Sstevel@tonic-gate if (cs1->cs_flags & MD_RCS_WRITER) 1747*0Sstevel@tonic-gate break; 1748*0Sstevel@tonic-gate 1749*0Sstevel@tonic-gate if (cs1 != NULL) { 1750*0Sstevel@tonic-gate if (panicstr) 1751*0Sstevel@tonic-gate panic("md; raid line write lock held"); 1752*0Sstevel@tonic-gate un->un_linlck_flg = 1; 1753*0Sstevel@tonic-gate cv_wait(&un->un_linlck_cv, &un->un_linlck_mx); 1754*0Sstevel@tonic-gate STAT_INC(raid_read_waits); 1755*0Sstevel@tonic-gate } 1756*0Sstevel@tonic-gate } 1757*0Sstevel@tonic-gate STAT_MAX(raid_max_reader_locks, raid_reader_locks_active); 1758*0Sstevel@tonic-gate STAT_INC(raid_reader_locks); 1759*0Sstevel@tonic-gate cs1 = un->un_linlck_chn; 1760*0Sstevel@tonic-gate if (cs1 != NULL) 1761*0Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 1762*0Sstevel@tonic-gate cs->cs_linlck_next = cs1; 1763*0Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 1764*0Sstevel@tonic-gate un->un_linlck_chn = cs; 1765*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 1766*0Sstevel@tonic-gate if (resync_thread) { 1767*0Sstevel@tonic-gate diskaddr_t lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 1768*0Sstevel@tonic-gate diskaddr_t line = (lastblk + 1) / un->un_segsize; 1769*0Sstevel@tonic-gate ASSERT(raid_state_cnt(un, RCS_RESYNC)); 1770*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 1771*0Sstevel@tonic-gate un->un_resync_line_index = line; 1772*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 1773*0Sstevel@tonic-gate } 1774*0Sstevel@tonic-gate if (!panicstr) 1775*0Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 1776*0Sstevel@tonic-gate } 1777*0Sstevel@tonic-gate 1778*0Sstevel@tonic-gate int 1779*0Sstevel@tonic-gate raid_line_writer_lock(md_raidcs_t *cs, int lock) 1780*0Sstevel@tonic-gate { 1781*0Sstevel@tonic-gate mr_unit_t *un; 1782*0Sstevel@tonic-gate md_raidcs_t *cs1; 1783*0Sstevel@tonic-gate 1784*0Sstevel@tonic-gate ASSERT(cs->cs_line != MD_DISKADDR_ERROR); 1785*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WRITER; 1786*0Sstevel@tonic-gate un = cs->cs_ps->ps_un; 1787*0Sstevel@tonic-gate 1788*0Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 1789*0Sstevel@tonic-gate if (lock && !panicstr) 1790*0Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 1791*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 1792*0Sstevel@tonic-gate 1793*0Sstevel@tonic-gate cs1 = un->un_linlck_chn; 1794*0Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 1795*0Sstevel@tonic-gate if (raid_io_overlaps(cs, cs1)) 1796*0Sstevel@tonic-gate break; 1797*0Sstevel@tonic-gate 1798*0Sstevel@tonic-gate if (cs1 != NULL) { 1799*0Sstevel@tonic-gate if (panicstr) 1800*0Sstevel@tonic-gate panic("md: line writer lock inaccessible"); 1801*0Sstevel@tonic-gate goto no_lock_exit; 1802*0Sstevel@tonic-gate } 1803*0Sstevel@tonic-gate 1804*0Sstevel@tonic-gate if (raid_alloc_pwslot(cs)) { 1805*0Sstevel@tonic-gate if (panicstr) 1806*0Sstevel@tonic-gate panic("md: no prewrite slots"); 1807*0Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 1808*0Sstevel@tonic-gate goto no_lock_exit; 1809*0Sstevel@tonic-gate } 1810*0Sstevel@tonic-gate 1811*0Sstevel@tonic-gate cs1 = un->un_linlck_chn; 1812*0Sstevel@tonic-gate if (cs1 != NULL) 1813*0Sstevel@tonic-gate cs1->cs_linlck_prev = cs; 1814*0Sstevel@tonic-gate cs->cs_linlck_next = cs1; 1815*0Sstevel@tonic-gate cs->cs_linlck_prev = NULL; 1816*0Sstevel@tonic-gate un->un_linlck_chn = cs; 1817*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LLOCKD; 1818*0Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_WAITING; 1819*0Sstevel@tonic-gate STAT_INC(raid_writer_locks); 1820*0Sstevel@tonic-gate STAT_MAX(raid_max_write_locks, raid_write_locks_active); 1821*0Sstevel@tonic-gate if (lock && !panicstr) 1822*0Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 1823*0Sstevel@tonic-gate return (0); 1824*0Sstevel@tonic-gate 1825*0Sstevel@tonic-gate no_lock_exit: 1826*0Sstevel@tonic-gate /* if this is already queued then do not requeue it */ 1827*0Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 1828*0Sstevel@tonic-gate if (!lock || (cs->cs_flags & MD_RCS_WAITING)) 1829*0Sstevel@tonic-gate return (1); 1830*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_WAITING; 1831*0Sstevel@tonic-gate cs->cs_un = un; 1832*0Sstevel@tonic-gate raid_enqueue(cs); 1833*0Sstevel@tonic-gate if (lock && !panicstr) 1834*0Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 1835*0Sstevel@tonic-gate return (1); 1836*0Sstevel@tonic-gate } 1837*0Sstevel@tonic-gate 1838*0Sstevel@tonic-gate static void 1839*0Sstevel@tonic-gate raid_startio(md_raidcs_t *cs) 1840*0Sstevel@tonic-gate { 1841*0Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 1842*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 1843*0Sstevel@tonic-gate 1844*0Sstevel@tonic-gate un = md_unit_readerlock(ui); 1845*0Sstevel@tonic-gate raid_write_io(un, cs); 1846*0Sstevel@tonic-gate } 1847*0Sstevel@tonic-gate 1848*0Sstevel@tonic-gate void 1849*0Sstevel@tonic-gate raid_io_startup(mr_unit_t *un) 1850*0Sstevel@tonic-gate { 1851*0Sstevel@tonic-gate md_raidcs_t *waiting_list, *cs1; 1852*0Sstevel@tonic-gate md_raidcs_t *previous = NULL, *next = NULL; 1853*0Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(un->c.un_self_id); 1854*0Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 1855*0Sstevel@tonic-gate 1856*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&un->un_linlck_mx)); 1857*0Sstevel@tonic-gate mutex_enter(io_list_mutex); 1858*0Sstevel@tonic-gate 1859*0Sstevel@tonic-gate /* 1860*0Sstevel@tonic-gate * check to be sure there are no reader locks outstanding. If 1861*0Sstevel@tonic-gate * there are not then pass on the writer lock. 1862*0Sstevel@tonic-gate */ 1863*0Sstevel@tonic-gate waiting_list = ui->ui_io_lock->io_list_front; 1864*0Sstevel@tonic-gate while (waiting_list) { 1865*0Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 1866*0Sstevel@tonic-gate ASSERT(! (waiting_list->cs_flags & MD_RCS_LLOCKD)); 1867*0Sstevel@tonic-gate for (cs1 = un->un_linlck_chn; cs1; cs1 = cs1->cs_linlck_next) 1868*0Sstevel@tonic-gate if (raid_io_overlaps(waiting_list, cs1) == 1) 1869*0Sstevel@tonic-gate break; 1870*0Sstevel@tonic-gate /* 1871*0Sstevel@tonic-gate * there was an IOs that overlaps this io so go onto 1872*0Sstevel@tonic-gate * the next io in the waiting list 1873*0Sstevel@tonic-gate */ 1874*0Sstevel@tonic-gate if (cs1) { 1875*0Sstevel@tonic-gate previous = waiting_list; 1876*0Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 1877*0Sstevel@tonic-gate continue; 1878*0Sstevel@tonic-gate } 1879*0Sstevel@tonic-gate 1880*0Sstevel@tonic-gate /* 1881*0Sstevel@tonic-gate * There are no IOs that overlap this, so remove it from 1882*0Sstevel@tonic-gate * the waiting queue, and start it 1883*0Sstevel@tonic-gate */ 1884*0Sstevel@tonic-gate 1885*0Sstevel@tonic-gate if (raid_check_pw(waiting_list)) { 1886*0Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 1887*0Sstevel@tonic-gate previous = waiting_list; 1888*0Sstevel@tonic-gate waiting_list = waiting_list->cs_linlck_next; 1889*0Sstevel@tonic-gate continue; 1890*0Sstevel@tonic-gate } 1891*0Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_WAITING); 1892*0Sstevel@tonic-gate 1893*0Sstevel@tonic-gate next = waiting_list->cs_linlck_next; 1894*0Sstevel@tonic-gate if (previous) 1895*0Sstevel@tonic-gate previous->cs_linlck_next = next; 1896*0Sstevel@tonic-gate else 1897*0Sstevel@tonic-gate ui->ui_io_lock->io_list_front = next; 1898*0Sstevel@tonic-gate 1899*0Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) 1900*0Sstevel@tonic-gate ui->ui_io_lock->io_list_back = NULL; 1901*0Sstevel@tonic-gate 1902*0Sstevel@tonic-gate if (ui->ui_io_lock->io_list_back == waiting_list) 1903*0Sstevel@tonic-gate ui->ui_io_lock->io_list_back = previous; 1904*0Sstevel@tonic-gate 1905*0Sstevel@tonic-gate waiting_list->cs_linlck_next = NULL; 1906*0Sstevel@tonic-gate waiting_list->cs_flags &= ~MD_RCS_WAITING; 1907*0Sstevel@tonic-gate STAT_DEC(raid_write_queue_length); 1908*0Sstevel@tonic-gate if (raid_line_writer_lock(waiting_list, 0)) 1909*0Sstevel@tonic-gate panic("region locking corrupted"); 1910*0Sstevel@tonic-gate 1911*0Sstevel@tonic-gate ASSERT(waiting_list->cs_flags & MD_RCS_LLOCKD); 1912*0Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_startio, 1913*0Sstevel@tonic-gate (daemon_queue_t *)waiting_list, REQ_OLD); 1914*0Sstevel@tonic-gate waiting_list = next; 1915*0Sstevel@tonic-gate 1916*0Sstevel@tonic-gate } 1917*0Sstevel@tonic-gate mutex_exit(io_list_mutex); 1918*0Sstevel@tonic-gate } 1919*0Sstevel@tonic-gate 1920*0Sstevel@tonic-gate void 1921*0Sstevel@tonic-gate raid_line_exit(md_raidcs_t *cs) 1922*0Sstevel@tonic-gate { 1923*0Sstevel@tonic-gate mr_unit_t *un; 1924*0Sstevel@tonic-gate 1925*0Sstevel@tonic-gate un = cs->cs_ps->ps_un; 1926*0Sstevel@tonic-gate STAT_CHECK(raid_line_lock_wait, MUTEX_HELD(&un->un_linlck_mx)); 1927*0Sstevel@tonic-gate mutex_enter(&un->un_linlck_mx); 1928*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_READER) 1929*0Sstevel@tonic-gate STAT_DEC(raid_reader_locks_active); 1930*0Sstevel@tonic-gate else 1931*0Sstevel@tonic-gate STAT_DEC(raid_write_locks_active); 1932*0Sstevel@tonic-gate 1933*0Sstevel@tonic-gate if (cs->cs_linlck_prev) 1934*0Sstevel@tonic-gate cs->cs_linlck_prev->cs_linlck_next = cs->cs_linlck_next; 1935*0Sstevel@tonic-gate else 1936*0Sstevel@tonic-gate un->un_linlck_chn = cs->cs_linlck_next; 1937*0Sstevel@tonic-gate if (cs->cs_linlck_next) 1938*0Sstevel@tonic-gate cs->cs_linlck_next->cs_linlck_prev = cs->cs_linlck_prev; 1939*0Sstevel@tonic-gate 1940*0Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LLOCKD; 1941*0Sstevel@tonic-gate 1942*0Sstevel@tonic-gate if (un->un_linlck_flg) 1943*0Sstevel@tonic-gate cv_broadcast(&un->un_linlck_cv); 1944*0Sstevel@tonic-gate 1945*0Sstevel@tonic-gate un->un_linlck_flg = 0; 1946*0Sstevel@tonic-gate cs->cs_line = MD_DISKADDR_ERROR; 1947*0Sstevel@tonic-gate 1948*0Sstevel@tonic-gate raid_cancel_pwslot(cs); 1949*0Sstevel@tonic-gate /* 1950*0Sstevel@tonic-gate * now that the lock is droped go ahead and see if there are any 1951*0Sstevel@tonic-gate * other writes that can be started up 1952*0Sstevel@tonic-gate */ 1953*0Sstevel@tonic-gate raid_io_startup(un); 1954*0Sstevel@tonic-gate 1955*0Sstevel@tonic-gate mutex_exit(&un->un_linlck_mx); 1956*0Sstevel@tonic-gate } 1957*0Sstevel@tonic-gate 1958*0Sstevel@tonic-gate /* 1959*0Sstevel@tonic-gate * NAMES: raid_line, raid_pcolumn, raid_dcolumn 1960*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice APIs for mapping segment # to line #, 1961*0Sstevel@tonic-gate * data column # and parity column #. 1962*0Sstevel@tonic-gate * PARAMETERS: int segment - segment number 1963*0Sstevel@tonic-gate * mr_unit_t *un - pointer to an unit structure 1964*0Sstevel@tonic-gate * RETURNS: raid_line returns line # 1965*0Sstevel@tonic-gate * raid_dcolumn returns data column # 1966*0Sstevel@tonic-gate * raid_pcolumn returns parity column # 1967*0Sstevel@tonic-gate */ 1968*0Sstevel@tonic-gate static diskaddr_t 1969*0Sstevel@tonic-gate raid_line(diskaddr_t segment, mr_unit_t *un) 1970*0Sstevel@tonic-gate { 1971*0Sstevel@tonic-gate diskaddr_t adj_seg; 1972*0Sstevel@tonic-gate diskaddr_t line; 1973*0Sstevel@tonic-gate diskaddr_t max_orig_segment; 1974*0Sstevel@tonic-gate 1975*0Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 1976*0Sstevel@tonic-gate if (segment >= max_orig_segment) { 1977*0Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 1978*0Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 1979*0Sstevel@tonic-gate } else { 1980*0Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 1981*0Sstevel@tonic-gate } 1982*0Sstevel@tonic-gate return (line); 1983*0Sstevel@tonic-gate } 1984*0Sstevel@tonic-gate 1985*0Sstevel@tonic-gate uint_t 1986*0Sstevel@tonic-gate raid_dcolumn(diskaddr_t segment, mr_unit_t *un) 1987*0Sstevel@tonic-gate { 1988*0Sstevel@tonic-gate diskaddr_t adj_seg; 1989*0Sstevel@tonic-gate diskaddr_t line; 1990*0Sstevel@tonic-gate diskaddr_t max_orig_segment; 1991*0Sstevel@tonic-gate uint_t column; 1992*0Sstevel@tonic-gate 1993*0Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 1994*0Sstevel@tonic-gate if (segment >= max_orig_segment) { 1995*0Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 1996*0Sstevel@tonic-gate column = un->un_origcolumncnt + 1997*0Sstevel@tonic-gate (uint_t)(adj_seg / un->un_segsincolumn); 1998*0Sstevel@tonic-gate } else { 1999*0Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 2000*0Sstevel@tonic-gate column = (uint_t)((segment % (un->un_origcolumncnt - 1) + line) 2001*0Sstevel@tonic-gate % un->un_origcolumncnt); 2002*0Sstevel@tonic-gate } 2003*0Sstevel@tonic-gate return (column); 2004*0Sstevel@tonic-gate } 2005*0Sstevel@tonic-gate 2006*0Sstevel@tonic-gate uint_t 2007*0Sstevel@tonic-gate raid_pcolumn(diskaddr_t segment, mr_unit_t *un) 2008*0Sstevel@tonic-gate { 2009*0Sstevel@tonic-gate diskaddr_t adj_seg; 2010*0Sstevel@tonic-gate diskaddr_t line; 2011*0Sstevel@tonic-gate diskaddr_t max_orig_segment; 2012*0Sstevel@tonic-gate uint_t column; 2013*0Sstevel@tonic-gate 2014*0Sstevel@tonic-gate max_orig_segment = (un->un_origcolumncnt - 1) * un->un_segsincolumn; 2015*0Sstevel@tonic-gate if (segment >= max_orig_segment) { 2016*0Sstevel@tonic-gate adj_seg = segment - max_orig_segment; 2017*0Sstevel@tonic-gate line = adj_seg % un->un_segsincolumn; 2018*0Sstevel@tonic-gate } else { 2019*0Sstevel@tonic-gate line = segment / (un->un_origcolumncnt - 1); 2020*0Sstevel@tonic-gate } 2021*0Sstevel@tonic-gate column = (uint_t)((line + (un->un_origcolumncnt - 1)) 2022*0Sstevel@tonic-gate % un->un_origcolumncnt); 2023*0Sstevel@tonic-gate return (column); 2024*0Sstevel@tonic-gate } 2025*0Sstevel@tonic-gate 2026*0Sstevel@tonic-gate 2027*0Sstevel@tonic-gate /* 2028*0Sstevel@tonic-gate * Is called in raid_iosetup to probe each column to insure 2029*0Sstevel@tonic-gate * that all the columns are in 'okay' state and meet the 2030*0Sstevel@tonic-gate * 'full line' requirement. If any column is in error, 2031*0Sstevel@tonic-gate * we don't want to enable the 'full line' flag. Previously, 2032*0Sstevel@tonic-gate * we would do so and disable it only when a error is 2033*0Sstevel@tonic-gate * detected after the first 'full line' io which is too late 2034*0Sstevel@tonic-gate * and leads to the potential data corruption. 2035*0Sstevel@tonic-gate */ 2036*0Sstevel@tonic-gate static int 2037*0Sstevel@tonic-gate raid_check_cols(mr_unit_t *un) 2038*0Sstevel@tonic-gate { 2039*0Sstevel@tonic-gate buf_t bp; 2040*0Sstevel@tonic-gate char *buf; 2041*0Sstevel@tonic-gate mr_column_t *colptr; 2042*0Sstevel@tonic-gate minor_t mnum = MD_SID(un); 2043*0Sstevel@tonic-gate int i; 2044*0Sstevel@tonic-gate int err = 0; 2045*0Sstevel@tonic-gate 2046*0Sstevel@tonic-gate buf = kmem_zalloc((uint_t)DEV_BSIZE, KM_SLEEP); 2047*0Sstevel@tonic-gate 2048*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 2049*0Sstevel@tonic-gate md_dev64_t tmpdev; 2050*0Sstevel@tonic-gate 2051*0Sstevel@tonic-gate colptr = &un->un_column[i]; 2052*0Sstevel@tonic-gate 2053*0Sstevel@tonic-gate tmpdev = colptr->un_dev; 2054*0Sstevel@tonic-gate /* 2055*0Sstevel@tonic-gate * Open by device id 2056*0Sstevel@tonic-gate * If this device is hotspared 2057*0Sstevel@tonic-gate * use the hotspare key 2058*0Sstevel@tonic-gate */ 2059*0Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i) ? 2060*0Sstevel@tonic-gate colptr->un_hs_key : colptr->un_orig_key); 2061*0Sstevel@tonic-gate 2062*0Sstevel@tonic-gate if (tmpdev == NODEV64) { 2063*0Sstevel@tonic-gate err = 1; 2064*0Sstevel@tonic-gate break; 2065*0Sstevel@tonic-gate } 2066*0Sstevel@tonic-gate 2067*0Sstevel@tonic-gate colptr->un_dev = tmpdev; 2068*0Sstevel@tonic-gate 2069*0Sstevel@tonic-gate bzero((caddr_t)&bp, sizeof (buf_t)); 2070*0Sstevel@tonic-gate bp.b_back = &bp; 2071*0Sstevel@tonic-gate bp.b_forw = &bp; 2072*0Sstevel@tonic-gate bp.b_flags = (B_READ | B_BUSY); 2073*0Sstevel@tonic-gate sema_init(&bp.b_io, 0, NULL, 2074*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2075*0Sstevel@tonic-gate sema_init(&bp.b_sem, 0, NULL, 2076*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2077*0Sstevel@tonic-gate bp.b_edev = md_dev64_to_dev(colptr->un_dev); 2078*0Sstevel@tonic-gate bp.b_lblkno = colptr->un_pwstart; 2079*0Sstevel@tonic-gate bp.b_bcount = DEV_BSIZE; 2080*0Sstevel@tonic-gate bp.b_bufsize = DEV_BSIZE; 2081*0Sstevel@tonic-gate bp.b_un.b_addr = (caddr_t)buf; 2082*0Sstevel@tonic-gate (void) md_call_strategy(&bp, 0, NULL); 2083*0Sstevel@tonic-gate if (biowait(&bp)) { 2084*0Sstevel@tonic-gate err = 1; 2085*0Sstevel@tonic-gate break; 2086*0Sstevel@tonic-gate } 2087*0Sstevel@tonic-gate } 2088*0Sstevel@tonic-gate 2089*0Sstevel@tonic-gate kmem_free(buf, DEV_BSIZE); 2090*0Sstevel@tonic-gate return (err); 2091*0Sstevel@tonic-gate } 2092*0Sstevel@tonic-gate 2093*0Sstevel@tonic-gate /* 2094*0Sstevel@tonic-gate * NAME: raid_iosetup 2095*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice specific I/O set up routine which does 2096*0Sstevel@tonic-gate * all the necessary calculations to determine the location 2097*0Sstevel@tonic-gate * of the segement for the I/O. 2098*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - unit number of RAID metadevice 2099*0Sstevel@tonic-gate * diskaddr_t blkno - block number of the I/O attempt 2100*0Sstevel@tonic-gate * size_t blkcnt - block count for this I/O 2101*0Sstevel@tonic-gate * md_raidcs_t *cs - child structure for each segmented I/O 2102*0Sstevel@tonic-gate * 2103*0Sstevel@tonic-gate * NOTE: The following is an example of a raid disk layer out: 2104*0Sstevel@tonic-gate * 2105*0Sstevel@tonic-gate * Total Column = 5 2106*0Sstevel@tonic-gate * Original Column = 4 2107*0Sstevel@tonic-gate * Segment Per Column = 10 2108*0Sstevel@tonic-gate * 2109*0Sstevel@tonic-gate * Col#0 Col#1 Col#2 Col#3 Col#4 Col#5 Col#6 2110*0Sstevel@tonic-gate * ------------------------------------------------------------- 2111*0Sstevel@tonic-gate * line#0 Seg#0 Seg#1 Seg#2 Parity Seg#30 Seg#40 2112*0Sstevel@tonic-gate * line#1 Parity Seg#3 Seg#4 Seg#5 Seg#31 2113*0Sstevel@tonic-gate * line#2 Seg#8 Parity Seg#6 Seg#7 Seg#32 2114*0Sstevel@tonic-gate * line#3 Seg#10 Seg#11 Parity Seg#9 Seg#33 2115*0Sstevel@tonic-gate * line#4 Seg#12 Seg#13 Seg#14 Parity Seg#34 2116*0Sstevel@tonic-gate * line#5 Parity Seg#15 Seg#16 Seg#17 Seg#35 2117*0Sstevel@tonic-gate * line#6 Seg#20 Parity Seg#18 Seg#19 Seg#36 2118*0Sstevel@tonic-gate * line#7 Seg#22 Seg#23 Parity Seg#21 Seg#37 2119*0Sstevel@tonic-gate * line#8 Seg#24 Seg#25 Seg#26 Parity Seg#38 2120*0Sstevel@tonic-gate * line#9 Parity Seg#27 Seg#28 Seg#29 Seg#39 2121*0Sstevel@tonic-gate */ 2122*0Sstevel@tonic-gate static size_t 2123*0Sstevel@tonic-gate raid_iosetup( 2124*0Sstevel@tonic-gate mr_unit_t *un, 2125*0Sstevel@tonic-gate diskaddr_t blkno, 2126*0Sstevel@tonic-gate size_t blkcnt, 2127*0Sstevel@tonic-gate md_raidcs_t *cs 2128*0Sstevel@tonic-gate ) 2129*0Sstevel@tonic-gate { 2130*0Sstevel@tonic-gate diskaddr_t segment; 2131*0Sstevel@tonic-gate diskaddr_t segstart; 2132*0Sstevel@tonic-gate diskaddr_t segoff; 2133*0Sstevel@tonic-gate size_t leftover; 2134*0Sstevel@tonic-gate diskaddr_t line; 2135*0Sstevel@tonic-gate uint_t iosize; 2136*0Sstevel@tonic-gate uint_t colcnt; 2137*0Sstevel@tonic-gate 2138*0Sstevel@tonic-gate /* caculate the segment# and offset for the block */ 2139*0Sstevel@tonic-gate segment = blkno / un->un_segsize; 2140*0Sstevel@tonic-gate segstart = segment * un->un_segsize; 2141*0Sstevel@tonic-gate segoff = blkno - segstart; 2142*0Sstevel@tonic-gate iosize = un->un_iosize - 1; 2143*0Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 2144*0Sstevel@tonic-gate line = raid_line(segment, un); 2145*0Sstevel@tonic-gate cs->cs_dcolumn = raid_dcolumn(segment, un); 2146*0Sstevel@tonic-gate cs->cs_pcolumn = raid_pcolumn(segment, un); 2147*0Sstevel@tonic-gate cs->cs_dflags = un->un_column[cs->cs_dcolumn].un_devflags; 2148*0Sstevel@tonic-gate cs->cs_pflags = un->un_column[cs->cs_pcolumn].un_devflags; 2149*0Sstevel@tonic-gate cs->cs_line = line; 2150*0Sstevel@tonic-gate 2151*0Sstevel@tonic-gate if ((cs->cs_ps->ps_flags & MD_RPS_WRITE) && 2152*0Sstevel@tonic-gate (UNIT_STATE(un) & RCS_OKAY) && 2153*0Sstevel@tonic-gate (segoff == 0) && 2154*0Sstevel@tonic-gate (un->un_totalcolumncnt == un->un_origcolumncnt) && 2155*0Sstevel@tonic-gate (un->un_segsize < un->un_iosize) && 2156*0Sstevel@tonic-gate (un->un_iosize <= un->un_maxio) && 2157*0Sstevel@tonic-gate (blkno == line * un->un_segsize * colcnt) && 2158*0Sstevel@tonic-gate (blkcnt >= ((un->un_totalcolumncnt -1) * un->un_segsize)) && 2159*0Sstevel@tonic-gate (raid_state_cnt(un, RCS_OKAY) == un->un_origcolumncnt) && 2160*0Sstevel@tonic-gate (raid_check_cols(un) == 0)) { 2161*0Sstevel@tonic-gate 2162*0Sstevel@tonic-gate md_raidcbuf_t **cbufp; 2163*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 2164*0Sstevel@tonic-gate int i, j; 2165*0Sstevel@tonic-gate 2166*0Sstevel@tonic-gate STAT_INC(raid_full_line_writes); 2167*0Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize * colcnt); 2168*0Sstevel@tonic-gate ASSERT(blkcnt >= (un->un_segsize * colcnt)); 2169*0Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize; 2170*0Sstevel@tonic-gate cs->cs_blkcnt = un->un_segsize; 2171*0Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 2172*0Sstevel@tonic-gate cs->cs_bcount = dbtob(cs->cs_blkcnt); 2173*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_LINE; 2174*0Sstevel@tonic-gate 2175*0Sstevel@tonic-gate cbufp = &cs->cs_buflist; 2176*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 2177*0Sstevel@tonic-gate j = cs->cs_dcolumn + i; 2178*0Sstevel@tonic-gate j = j % un->un_totalcolumncnt; 2179*0Sstevel@tonic-gate 2180*0Sstevel@tonic-gate if ((j == cs->cs_dcolumn) || (j == cs->cs_pcolumn)) 2181*0Sstevel@tonic-gate continue; 2182*0Sstevel@tonic-gate cbuf = kmem_cache_alloc(raid_cbuf_cache, 2183*0Sstevel@tonic-gate MD_ALLOCFLAGS); 2184*0Sstevel@tonic-gate raid_cbuf_init(cbuf); 2185*0Sstevel@tonic-gate cbuf->cbuf_un = cs->cs_un; 2186*0Sstevel@tonic-gate cbuf->cbuf_ps = cs->cs_ps; 2187*0Sstevel@tonic-gate cbuf->cbuf_column = j; 2188*0Sstevel@tonic-gate cbuf->cbuf_bcount = dbtob(un->un_segsize); 2189*0Sstevel@tonic-gate *cbufp = cbuf; 2190*0Sstevel@tonic-gate cbufp = &cbuf->cbuf_next; 2191*0Sstevel@tonic-gate } 2192*0Sstevel@tonic-gate return (leftover); 2193*0Sstevel@tonic-gate } 2194*0Sstevel@tonic-gate 2195*0Sstevel@tonic-gate leftover = blkcnt - (un->un_segsize - segoff); 2196*0Sstevel@tonic-gate if (blkcnt > (un->un_segsize - segoff)) 2197*0Sstevel@tonic-gate blkcnt -= leftover; 2198*0Sstevel@tonic-gate else 2199*0Sstevel@tonic-gate leftover = 0; 2200*0Sstevel@tonic-gate 2201*0Sstevel@tonic-gate if (blkcnt > (size_t)iosize) { 2202*0Sstevel@tonic-gate leftover += (blkcnt - iosize); 2203*0Sstevel@tonic-gate blkcnt = iosize; 2204*0Sstevel@tonic-gate } 2205*0Sstevel@tonic-gate 2206*0Sstevel@tonic-gate /* calculate the line# and column# for the segment */ 2207*0Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_LINE; 2208*0Sstevel@tonic-gate cs->cs_blkno = line * un->un_segsize + segoff; 2209*0Sstevel@tonic-gate cs->cs_blkcnt = (uint_t)blkcnt; 2210*0Sstevel@tonic-gate cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 2211*0Sstevel@tonic-gate cs->cs_bcount = dbtob((uint_t)blkcnt); 2212*0Sstevel@tonic-gate return (leftover); 2213*0Sstevel@tonic-gate } 2214*0Sstevel@tonic-gate 2215*0Sstevel@tonic-gate /* 2216*0Sstevel@tonic-gate * NAME: raid_done 2217*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O done interrupt routine 2218*0Sstevel@tonic-gate * PARAMETERS: struct buf *bp - pointer to a buffer structure 2219*0Sstevel@tonic-gate */ 2220*0Sstevel@tonic-gate static void 2221*0Sstevel@tonic-gate raid_done(struct buf *bp) 2222*0Sstevel@tonic-gate { 2223*0Sstevel@tonic-gate md_raidcs_t *cs; 2224*0Sstevel@tonic-gate int flags, frags; 2225*0Sstevel@tonic-gate 2226*0Sstevel@tonic-gate sema_v(&bp->b_io); 2227*0Sstevel@tonic-gate cs = (md_raidcs_t *)bp->b_chain; 2228*0Sstevel@tonic-gate 2229*0Sstevel@tonic-gate ASSERT(cs != NULL); 2230*0Sstevel@tonic-gate 2231*0Sstevel@tonic-gate mutex_enter(&cs->cs_mx); 2232*0Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 2233*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ERROR; 2234*0Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 2235*0Sstevel@tonic-gate } 2236*0Sstevel@tonic-gate 2237*0Sstevel@tonic-gate flags = cs->cs_flags; 2238*0Sstevel@tonic-gate frags = --cs->cs_frags; 2239*0Sstevel@tonic-gate mutex_exit(&cs->cs_mx); 2240*0Sstevel@tonic-gate if (frags != 0) { 2241*0Sstevel@tonic-gate return; 2242*0Sstevel@tonic-gate } 2243*0Sstevel@tonic-gate 2244*0Sstevel@tonic-gate if (flags & MD_RCS_ERROR) { 2245*0Sstevel@tonic-gate if (cs->cs_error_call) { 2246*0Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_error_call, 2247*0Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 2248*0Sstevel@tonic-gate } 2249*0Sstevel@tonic-gate return; 2250*0Sstevel@tonic-gate } 2251*0Sstevel@tonic-gate 2252*0Sstevel@tonic-gate if (flags & MD_RCS_ISCALL) { 2253*0Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ISCALL); 2254*0Sstevel@tonic-gate (*(cs->cs_call))(cs); 2255*0Sstevel@tonic-gate return; 2256*0Sstevel@tonic-gate } 2257*0Sstevel@tonic-gate daemon_request(&md_done_daemon, cs->cs_call, 2258*0Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 2259*0Sstevel@tonic-gate } 2260*0Sstevel@tonic-gate /* 2261*0Sstevel@tonic-gate * the flag RIO_EXTRA is used when dealing with a column in the process 2262*0Sstevel@tonic-gate * of being resynced. During the resync, writes may have to take place 2263*0Sstevel@tonic-gate * on both the original component and a hotspare component. 2264*0Sstevel@tonic-gate */ 2265*0Sstevel@tonic-gate #define RIO_DATA 0x00100 /* use data buffer & data column */ 2266*0Sstevel@tonic-gate #define RIO_PARITY 0x00200 /* use parity buffer & parity column */ 2267*0Sstevel@tonic-gate #define RIO_WRITE 0x00400 /* issue a write */ 2268*0Sstevel@tonic-gate #define RIO_READ 0x00800 /* issue a read */ 2269*0Sstevel@tonic-gate #define RIO_PWIO 0x01000 /* do the I/O to the prewrite entry */ 2270*0Sstevel@tonic-gate #define RIO_ALT 0x02000 /* do write to alternate device */ 2271*0Sstevel@tonic-gate #define RIO_EXTRA 0x04000 /* use extra buffer */ 2272*0Sstevel@tonic-gate 2273*0Sstevel@tonic-gate #define RIO_COLMASK 0x000ff 2274*0Sstevel@tonic-gate 2275*0Sstevel@tonic-gate #define RIO_PREWRITE RIO_WRITE | RIO_PWIO 2276*0Sstevel@tonic-gate 2277*0Sstevel@tonic-gate /* 2278*0Sstevel@tonic-gate * NAME: raidio 2279*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 2280*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 2281*0Sstevel@tonic-gate */ 2282*0Sstevel@tonic-gate static void 2283*0Sstevel@tonic-gate raidio(md_raidcs_t *cs, int flags) 2284*0Sstevel@tonic-gate { 2285*0Sstevel@tonic-gate buf_t *bp; 2286*0Sstevel@tonic-gate int column; 2287*0Sstevel@tonic-gate int flag; 2288*0Sstevel@tonic-gate void *private; 2289*0Sstevel@tonic-gate mr_unit_t *un; 2290*0Sstevel@tonic-gate int iosize; 2291*0Sstevel@tonic-gate diskaddr_t pwstart; 2292*0Sstevel@tonic-gate diskaddr_t devstart; 2293*0Sstevel@tonic-gate md_dev64_t dev; 2294*0Sstevel@tonic-gate 2295*0Sstevel@tonic-gate un = cs->cs_un; 2296*0Sstevel@tonic-gate 2297*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 2298*0Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 2299*0Sstevel@tonic-gate 2300*0Sstevel@tonic-gate if (flags & RIO_DATA) { 2301*0Sstevel@tonic-gate if (flags & RIO_EXTRA) 2302*0Sstevel@tonic-gate bp = &cs->cs_hbuf; 2303*0Sstevel@tonic-gate else 2304*0Sstevel@tonic-gate bp = &cs->cs_dbuf; 2305*0Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_dbuffer; 2306*0Sstevel@tonic-gate column = cs->cs_dcolumn; 2307*0Sstevel@tonic-gate } else { 2308*0Sstevel@tonic-gate if (flags & RIO_EXTRA) 2309*0Sstevel@tonic-gate bp = &cs->cs_hbuf; 2310*0Sstevel@tonic-gate else 2311*0Sstevel@tonic-gate bp = &cs->cs_pbuf; 2312*0Sstevel@tonic-gate bp->b_un.b_addr = cs->cs_pbuffer; 2313*0Sstevel@tonic-gate column = cs->cs_pcolumn; 2314*0Sstevel@tonic-gate } 2315*0Sstevel@tonic-gate if (flags & RIO_COLMASK) 2316*0Sstevel@tonic-gate column = (flags & RIO_COLMASK) - 1; 2317*0Sstevel@tonic-gate 2318*0Sstevel@tonic-gate bp->b_bcount = cs->cs_bcount; 2319*0Sstevel@tonic-gate bp->b_bufsize = cs->cs_bcount; 2320*0Sstevel@tonic-gate iosize = un->un_iosize; 2321*0Sstevel@tonic-gate 2322*0Sstevel@tonic-gate /* check if the hotspared device will be used */ 2323*0Sstevel@tonic-gate if (flags & RIO_ALT && (flags & RIO_WRITE)) { 2324*0Sstevel@tonic-gate pwstart = un->un_column[column].un_alt_pwstart; 2325*0Sstevel@tonic-gate devstart = un->un_column[column].un_alt_devstart; 2326*0Sstevel@tonic-gate dev = un->un_column[column].un_alt_dev; 2327*0Sstevel@tonic-gate } else { 2328*0Sstevel@tonic-gate pwstart = un->un_column[column].un_pwstart; 2329*0Sstevel@tonic-gate devstart = un->un_column[column].un_devstart; 2330*0Sstevel@tonic-gate dev = un->un_column[column].un_dev; 2331*0Sstevel@tonic-gate } 2332*0Sstevel@tonic-gate 2333*0Sstevel@tonic-gate /* if not writing to log skip log header */ 2334*0Sstevel@tonic-gate if ((flags & RIO_PWIO) == 0) { 2335*0Sstevel@tonic-gate bp->b_lblkno = devstart + cs->cs_blkno; 2336*0Sstevel@tonic-gate bp->b_un.b_addr += DEV_BSIZE; 2337*0Sstevel@tonic-gate } else { 2338*0Sstevel@tonic-gate bp->b_bcount += DEV_BSIZE; 2339*0Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 2340*0Sstevel@tonic-gate if (flags & RIO_DATA) { 2341*0Sstevel@tonic-gate bp->b_lblkno = cs->cs_dpwslot * iosize + pwstart; 2342*0Sstevel@tonic-gate } else { /* not DATA -> PARITY */ 2343*0Sstevel@tonic-gate bp->b_lblkno = cs->cs_ppwslot * iosize + pwstart; 2344*0Sstevel@tonic-gate } 2345*0Sstevel@tonic-gate } 2346*0Sstevel@tonic-gate 2347*0Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR | nv_available); 2348*0Sstevel@tonic-gate bp->b_flags |= B_BUSY; 2349*0Sstevel@tonic-gate if (flags & RIO_READ) { 2350*0Sstevel@tonic-gate bp->b_flags |= B_READ; 2351*0Sstevel@tonic-gate } else { 2352*0Sstevel@tonic-gate bp->b_flags |= B_WRITE; 2353*0Sstevel@tonic-gate if ((nv_available && nv_parity && (flags & RIO_PARITY)) || 2354*0Sstevel@tonic-gate (nv_available && nv_prewrite && (flags & RIO_PWIO))) 2355*0Sstevel@tonic-gate bp->b_flags |= nv_available; 2356*0Sstevel@tonic-gate } 2357*0Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 2358*0Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(dev); 2359*0Sstevel@tonic-gate 2360*0Sstevel@tonic-gate ASSERT((bp->b_edev != 0) && (bp->b_edev != NODEV)); 2361*0Sstevel@tonic-gate 2362*0Sstevel@tonic-gate private = cs->cs_strategy_private; 2363*0Sstevel@tonic-gate flag = cs->cs_strategy_flag; 2364*0Sstevel@tonic-gate 2365*0Sstevel@tonic-gate md_call_strategy(bp, flag, private); 2366*0Sstevel@tonic-gate } 2367*0Sstevel@tonic-gate 2368*0Sstevel@tonic-gate /* 2369*0Sstevel@tonic-gate * NAME: genstandardparity 2370*0Sstevel@tonic-gate * DESCRIPTION: This routine 2371*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 2372*0Sstevel@tonic-gate */ 2373*0Sstevel@tonic-gate static void 2374*0Sstevel@tonic-gate genstandardparity(md_raidcs_t *cs) 2375*0Sstevel@tonic-gate { 2376*0Sstevel@tonic-gate uint_t *dbuf, *pbuf; 2377*0Sstevel@tonic-gate size_t wordcnt; 2378*0Sstevel@tonic-gate uint_t dsum = 0; 2379*0Sstevel@tonic-gate uint_t psum = 0; 2380*0Sstevel@tonic-gate 2381*0Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 2382*0Sstevel@tonic-gate 2383*0Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 2384*0Sstevel@tonic-gate 2385*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 2386*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 2387*0Sstevel@tonic-gate 2388*0Sstevel@tonic-gate /* Word aligned */ 2389*0Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 2390*0Sstevel@tonic-gate uint_t *uwbuf = (uint_t *)(void *)(cs->cs_addr); 2391*0Sstevel@tonic-gate uint_t uval; 2392*0Sstevel@tonic-gate 2393*0Sstevel@tonic-gate while (wordcnt--) { 2394*0Sstevel@tonic-gate uval = *uwbuf++; 2395*0Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ uval)); 2396*0Sstevel@tonic-gate ++pbuf; 2397*0Sstevel@tonic-gate *dbuf = uval; 2398*0Sstevel@tonic-gate dsum ^= uval; 2399*0Sstevel@tonic-gate ++dbuf; 2400*0Sstevel@tonic-gate } 2401*0Sstevel@tonic-gate } else { 2402*0Sstevel@tonic-gate uchar_t *ubbuf = (uchar_t *)(cs->cs_addr); 2403*0Sstevel@tonic-gate union { 2404*0Sstevel@tonic-gate uint_t wb; 2405*0Sstevel@tonic-gate uchar_t bb[4]; 2406*0Sstevel@tonic-gate } cb; 2407*0Sstevel@tonic-gate 2408*0Sstevel@tonic-gate while (wordcnt--) { 2409*0Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 2410*0Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 2411*0Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 2412*0Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 2413*0Sstevel@tonic-gate psum ^= (*pbuf = ((*pbuf ^ *dbuf) ^ cb.wb)); 2414*0Sstevel@tonic-gate ++pbuf; 2415*0Sstevel@tonic-gate *dbuf = cb.wb; 2416*0Sstevel@tonic-gate dsum ^= cb.wb; 2417*0Sstevel@tonic-gate ++dbuf; 2418*0Sstevel@tonic-gate } 2419*0Sstevel@tonic-gate } 2420*0Sstevel@tonic-gate 2421*0Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, cs->cs_un, dsum, cs->cs_pcolumn, 2422*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 2423*0Sstevel@tonic-gate 2, cs->cs_dcolumn, RAID_PWMAGIC); 2424*0Sstevel@tonic-gate 2425*0Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, cs->cs_un, psum, cs->cs_dcolumn, 2426*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 2427*0Sstevel@tonic-gate 2, cs->cs_pcolumn, RAID_PWMAGIC); 2428*0Sstevel@tonic-gate } 2429*0Sstevel@tonic-gate 2430*0Sstevel@tonic-gate static void 2431*0Sstevel@tonic-gate genlineparity(md_raidcs_t *cs) 2432*0Sstevel@tonic-gate { 2433*0Sstevel@tonic-gate 2434*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 2435*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 2436*0Sstevel@tonic-gate uint_t *pbuf, *dbuf; 2437*0Sstevel@tonic-gate uint_t *uwbuf; 2438*0Sstevel@tonic-gate uchar_t *ubbuf; 2439*0Sstevel@tonic-gate size_t wordcnt; 2440*0Sstevel@tonic-gate uint_t psum = 0, dsum = 0; 2441*0Sstevel@tonic-gate size_t count = un->un_segsize * DEV_BSIZE; 2442*0Sstevel@tonic-gate uint_t col; 2443*0Sstevel@tonic-gate buf_t *bp; 2444*0Sstevel@tonic-gate 2445*0Sstevel@tonic-gate ASSERT((cs->cs_bcount & 0x3) == 0); 2446*0Sstevel@tonic-gate 2447*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 2448*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 2449*0Sstevel@tonic-gate uwbuf = (uint_t *)(void *)(cs->cs_addr); 2450*0Sstevel@tonic-gate ubbuf = (uchar_t *)(void *)(cs->cs_addr); 2451*0Sstevel@tonic-gate 2452*0Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 2453*0Sstevel@tonic-gate 2454*0Sstevel@tonic-gate /* Word aligned */ 2455*0Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 2456*0Sstevel@tonic-gate uint_t uval; 2457*0Sstevel@tonic-gate 2458*0Sstevel@tonic-gate while (wordcnt--) { 2459*0Sstevel@tonic-gate uval = *uwbuf++; 2460*0Sstevel@tonic-gate *dbuf = uval; 2461*0Sstevel@tonic-gate *pbuf = uval; 2462*0Sstevel@tonic-gate dsum ^= uval; 2463*0Sstevel@tonic-gate ++pbuf; 2464*0Sstevel@tonic-gate ++dbuf; 2465*0Sstevel@tonic-gate } 2466*0Sstevel@tonic-gate } else { 2467*0Sstevel@tonic-gate union { 2468*0Sstevel@tonic-gate uint_t wb; 2469*0Sstevel@tonic-gate uchar_t bb[4]; 2470*0Sstevel@tonic-gate } cb; 2471*0Sstevel@tonic-gate 2472*0Sstevel@tonic-gate while (wordcnt--) { 2473*0Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 2474*0Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 2475*0Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 2476*0Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 2477*0Sstevel@tonic-gate *dbuf = cb.wb; 2478*0Sstevel@tonic-gate *pbuf = cb.wb; 2479*0Sstevel@tonic-gate dsum ^= cb.wb; 2480*0Sstevel@tonic-gate ++pbuf; 2481*0Sstevel@tonic-gate ++dbuf; 2482*0Sstevel@tonic-gate } 2483*0Sstevel@tonic-gate } 2484*0Sstevel@tonic-gate 2485*0Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, cs->cs_pcolumn, 2486*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 2487*0Sstevel@tonic-gate un->un_totalcolumncnt, cs->cs_dcolumn, RAID_PWMAGIC); 2488*0Sstevel@tonic-gate 2489*0Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_DATA); 2490*0Sstevel@tonic-gate 2491*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 2492*0Sstevel@tonic-gate 2493*0Sstevel@tonic-gate dsum = 0; 2494*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 2495*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cbuf->cbuf_buffer + DEV_BSIZE); 2496*0Sstevel@tonic-gate 2497*0Sstevel@tonic-gate wordcnt = count / sizeof (uint_t); 2498*0Sstevel@tonic-gate 2499*0Sstevel@tonic-gate col = cbuf->cbuf_column; 2500*0Sstevel@tonic-gate 2501*0Sstevel@tonic-gate /* Word aligned */ 2502*0Sstevel@tonic-gate if (((uintptr_t)cs->cs_addr & 0x3) == 0) { 2503*0Sstevel@tonic-gate uint_t uval; 2504*0Sstevel@tonic-gate 2505*0Sstevel@tonic-gate /* 2506*0Sstevel@tonic-gate * Only calculate psum when working on the last 2507*0Sstevel@tonic-gate * data buffer. 2508*0Sstevel@tonic-gate */ 2509*0Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 2510*0Sstevel@tonic-gate psum = 0; 2511*0Sstevel@tonic-gate while (wordcnt--) { 2512*0Sstevel@tonic-gate uval = *uwbuf++; 2513*0Sstevel@tonic-gate *dbuf = uval; 2514*0Sstevel@tonic-gate psum ^= (*pbuf ^= uval); 2515*0Sstevel@tonic-gate dsum ^= uval; 2516*0Sstevel@tonic-gate ++dbuf; 2517*0Sstevel@tonic-gate ++pbuf; 2518*0Sstevel@tonic-gate } 2519*0Sstevel@tonic-gate } else { 2520*0Sstevel@tonic-gate while (wordcnt--) { 2521*0Sstevel@tonic-gate uval = *uwbuf++; 2522*0Sstevel@tonic-gate *dbuf = uval; 2523*0Sstevel@tonic-gate *pbuf ^= uval; 2524*0Sstevel@tonic-gate dsum ^= uval; 2525*0Sstevel@tonic-gate ++dbuf; 2526*0Sstevel@tonic-gate ++pbuf; 2527*0Sstevel@tonic-gate } 2528*0Sstevel@tonic-gate } 2529*0Sstevel@tonic-gate } else { 2530*0Sstevel@tonic-gate union { 2531*0Sstevel@tonic-gate uint_t wb; 2532*0Sstevel@tonic-gate uchar_t bb[4]; 2533*0Sstevel@tonic-gate } cb; 2534*0Sstevel@tonic-gate 2535*0Sstevel@tonic-gate /* 2536*0Sstevel@tonic-gate * Only calculate psum when working on the last 2537*0Sstevel@tonic-gate * data buffer. 2538*0Sstevel@tonic-gate */ 2539*0Sstevel@tonic-gate if (cbuf->cbuf_next == NULL) { 2540*0Sstevel@tonic-gate psum = 0; 2541*0Sstevel@tonic-gate while (wordcnt--) { 2542*0Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 2543*0Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 2544*0Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 2545*0Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 2546*0Sstevel@tonic-gate *dbuf = cb.wb; 2547*0Sstevel@tonic-gate psum ^= (*pbuf ^= cb.wb); 2548*0Sstevel@tonic-gate dsum ^= cb.wb; 2549*0Sstevel@tonic-gate ++dbuf; 2550*0Sstevel@tonic-gate ++pbuf; 2551*0Sstevel@tonic-gate } 2552*0Sstevel@tonic-gate } else { 2553*0Sstevel@tonic-gate while (wordcnt--) { 2554*0Sstevel@tonic-gate cb.bb[0] = *ubbuf++; 2555*0Sstevel@tonic-gate cb.bb[1] = *ubbuf++; 2556*0Sstevel@tonic-gate cb.bb[2] = *ubbuf++; 2557*0Sstevel@tonic-gate cb.bb[3] = *ubbuf++; 2558*0Sstevel@tonic-gate *dbuf = cb.wb; 2559*0Sstevel@tonic-gate *pbuf ^= cb.wb; 2560*0Sstevel@tonic-gate dsum ^= cb.wb; 2561*0Sstevel@tonic-gate ++dbuf; 2562*0Sstevel@tonic-gate ++pbuf; 2563*0Sstevel@tonic-gate } 2564*0Sstevel@tonic-gate } 2565*0Sstevel@tonic-gate } 2566*0Sstevel@tonic-gate RAID_FILLIN_RPW(cbuf->cbuf_buffer, un, dsum, cs->cs_pcolumn, 2567*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 2568*0Sstevel@tonic-gate un->un_totalcolumncnt, col, RAID_PWMAGIC); 2569*0Sstevel@tonic-gate 2570*0Sstevel@tonic-gate /* 2571*0Sstevel@tonic-gate * fill in buffer for write to prewrite area 2572*0Sstevel@tonic-gate */ 2573*0Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 2574*0Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer; 2575*0Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount + DEV_BSIZE; 2576*0Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 2577*0Sstevel@tonic-gate bp->b_lblkno = (cbuf->cbuf_pwslot * un->un_iosize) + 2578*0Sstevel@tonic-gate un->un_column[col].un_pwstart; 2579*0Sstevel@tonic-gate bp->b_flags = B_WRITE | B_BUSY; 2580*0Sstevel@tonic-gate if (nv_available && nv_prewrite) 2581*0Sstevel@tonic-gate bp->b_flags |= nv_available; 2582*0Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 2583*0Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev(un->un_column[col].un_dev); 2584*0Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 2585*0Sstevel@tonic-gate md_call_strategy(bp, 2586*0Sstevel@tonic-gate cs->cs_strategy_flag, cs->cs_strategy_private); 2587*0Sstevel@tonic-gate } 2588*0Sstevel@tonic-gate 2589*0Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, cs->cs_dcolumn, 2590*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 2591*0Sstevel@tonic-gate un->un_totalcolumncnt, cs->cs_pcolumn, RAID_PWMAGIC); 2592*0Sstevel@tonic-gate 2593*0Sstevel@tonic-gate raidio(cs, RIO_PREWRITE | RIO_PARITY); 2594*0Sstevel@tonic-gate } 2595*0Sstevel@tonic-gate 2596*0Sstevel@tonic-gate /* 2597*0Sstevel@tonic-gate * NAME: raid_readregenloop 2598*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 2599*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 2600*0Sstevel@tonic-gate */ 2601*0Sstevel@tonic-gate static void 2602*0Sstevel@tonic-gate raid_readregenloop(md_raidcs_t *cs) 2603*0Sstevel@tonic-gate { 2604*0Sstevel@tonic-gate mr_unit_t *un; 2605*0Sstevel@tonic-gate md_raidps_t *ps; 2606*0Sstevel@tonic-gate uint_t *dbuf; 2607*0Sstevel@tonic-gate uint_t *pbuf; 2608*0Sstevel@tonic-gate size_t wordcnt; 2609*0Sstevel@tonic-gate 2610*0Sstevel@tonic-gate un = cs->cs_un; 2611*0Sstevel@tonic-gate 2612*0Sstevel@tonic-gate /* 2613*0Sstevel@tonic-gate * XOR the parity with data bytes, must skip the 2614*0Sstevel@tonic-gate * pre-write entry header in all data/parity buffers 2615*0Sstevel@tonic-gate */ 2616*0Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 2617*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 2618*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 2619*0Sstevel@tonic-gate while (wordcnt--) 2620*0Sstevel@tonic-gate *dbuf++ ^= *pbuf++; 2621*0Sstevel@tonic-gate 2622*0Sstevel@tonic-gate /* bump up the loop count */ 2623*0Sstevel@tonic-gate cs->cs_loop++; 2624*0Sstevel@tonic-gate 2625*0Sstevel@tonic-gate /* skip the errored component */ 2626*0Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 2627*0Sstevel@tonic-gate cs->cs_loop++; 2628*0Sstevel@tonic-gate 2629*0Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 2630*0Sstevel@tonic-gate cs->cs_frags = 1; 2631*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 2632*0Sstevel@tonic-gate return; 2633*0Sstevel@tonic-gate } 2634*0Sstevel@tonic-gate /* reaching the end sof loop */ 2635*0Sstevel@tonic-gate ps = cs->cs_ps; 2636*0Sstevel@tonic-gate bcopy(cs->cs_dbuffer + DEV_BSIZE, cs->cs_addr, cs->cs_bcount); 2637*0Sstevel@tonic-gate raid_free_child(cs, 1); 2638*0Sstevel@tonic-gate 2639*0Sstevel@tonic-gate /* decrement readfrags */ 2640*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 2641*0Sstevel@tonic-gate } 2642*0Sstevel@tonic-gate 2643*0Sstevel@tonic-gate /* 2644*0Sstevel@tonic-gate * NAME: raid_read_io 2645*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice read I/O routine 2646*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 2647*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 2648*0Sstevel@tonic-gate */ 2649*0Sstevel@tonic-gate static void 2650*0Sstevel@tonic-gate raid_read_io(mr_unit_t *un, md_raidcs_t *cs) 2651*0Sstevel@tonic-gate { 2652*0Sstevel@tonic-gate int flag; 2653*0Sstevel@tonic-gate void *private; 2654*0Sstevel@tonic-gate buf_t *bp; 2655*0Sstevel@tonic-gate buf_t *pb = cs->cs_ps->ps_bp; 2656*0Sstevel@tonic-gate mr_column_t *column; 2657*0Sstevel@tonic-gate 2658*0Sstevel@tonic-gate flag = cs->cs_strategy_flag; 2659*0Sstevel@tonic-gate private = cs->cs_strategy_private; 2660*0Sstevel@tonic-gate column = &un->un_column[cs->cs_dcolumn]; 2661*0Sstevel@tonic-gate 2662*0Sstevel@tonic-gate /* 2663*0Sstevel@tonic-gate * The component to be read is good, simply set up bp structure 2664*0Sstevel@tonic-gate * and call low level md routine doing the read. 2665*0Sstevel@tonic-gate */ 2666*0Sstevel@tonic-gate 2667*0Sstevel@tonic-gate if (COLUMN_ISOKAY(un, cs->cs_dcolumn) || 2668*0Sstevel@tonic-gate (COLUMN_ISLASTERR(un, cs->cs_dcolumn) && 2669*0Sstevel@tonic-gate (cs->cs_flags & MD_RCS_RECOVERY) == 0)) { 2670*0Sstevel@tonic-gate dev_t ddi_dev; /* needed for bioclone, so not md_dev64_t */ 2671*0Sstevel@tonic-gate ddi_dev = md_dev64_to_dev(column->un_dev); 2672*0Sstevel@tonic-gate 2673*0Sstevel@tonic-gate bp = &cs->cs_dbuf; 2674*0Sstevel@tonic-gate bp = md_bioclone(pb, cs->cs_offset, cs->cs_bcount, ddi_dev, 2675*0Sstevel@tonic-gate column->un_devstart + cs->cs_blkno, 2676*0Sstevel@tonic-gate (int (*)())raid_done, bp, KM_NOSLEEP); 2677*0Sstevel@tonic-gate 2678*0Sstevel@tonic-gate bp->b_chain = (buf_t *)cs; 2679*0Sstevel@tonic-gate 2680*0Sstevel@tonic-gate cs->cs_frags = 1; 2681*0Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 2682*0Sstevel@tonic-gate cs->cs_retry_call = raid_read_retry; 2683*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_ISCALL; 2684*0Sstevel@tonic-gate cs->cs_stage = RAID_READ_DONE; 2685*0Sstevel@tonic-gate cs->cs_call = raid_stage; 2686*0Sstevel@tonic-gate 2687*0Sstevel@tonic-gate ASSERT(bp->b_edev != 0); 2688*0Sstevel@tonic-gate 2689*0Sstevel@tonic-gate md_call_strategy(bp, flag, private); 2690*0Sstevel@tonic-gate return; 2691*0Sstevel@tonic-gate } 2692*0Sstevel@tonic-gate 2693*0Sstevel@tonic-gate /* 2694*0Sstevel@tonic-gate * The component to be read is bad, have to go through 2695*0Sstevel@tonic-gate * raid specific method to read data from other members. 2696*0Sstevel@tonic-gate */ 2697*0Sstevel@tonic-gate cs->cs_loop = 0; 2698*0Sstevel@tonic-gate /* 2699*0Sstevel@tonic-gate * NOTE: always get dbuffer before pbuffer 2700*0Sstevel@tonic-gate * and get both buffers before pwslot 2701*0Sstevel@tonic-gate * otherwise a deadlock could be introduced. 2702*0Sstevel@tonic-gate */ 2703*0Sstevel@tonic-gate raid_mapin_buf(cs); 2704*0Sstevel@tonic-gate getdbuffer(cs); 2705*0Sstevel@tonic-gate getpbuffer(cs); 2706*0Sstevel@tonic-gate if (cs->cs_loop == cs->cs_dcolumn) 2707*0Sstevel@tonic-gate cs->cs_loop++; 2708*0Sstevel@tonic-gate 2709*0Sstevel@tonic-gate /* zero out data buffer for use as a data sink */ 2710*0Sstevel@tonic-gate bzero(cs->cs_dbuffer + DEV_BSIZE, cs->cs_bcount); 2711*0Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 2712*0Sstevel@tonic-gate cs->cs_call = raid_readregenloop; 2713*0Sstevel@tonic-gate cs->cs_error_call = raid_read_error; 2714*0Sstevel@tonic-gate cs->cs_retry_call = raid_read_no_retry; 2715*0Sstevel@tonic-gate cs->cs_frags = 1; 2716*0Sstevel@tonic-gate 2717*0Sstevel@tonic-gate /* use parity buffer to read other columns */ 2718*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ | (cs->cs_loop + 1)); 2719*0Sstevel@tonic-gate } 2720*0Sstevel@tonic-gate 2721*0Sstevel@tonic-gate /* 2722*0Sstevel@tonic-gate * NAME: raid_read 2723*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 2724*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 2725*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 2726*0Sstevel@tonic-gate */ 2727*0Sstevel@tonic-gate static int 2728*0Sstevel@tonic-gate raid_read(mr_unit_t *un, md_raidcs_t *cs) 2729*0Sstevel@tonic-gate { 2730*0Sstevel@tonic-gate int error = 0; 2731*0Sstevel@tonic-gate md_raidps_t *ps; 2732*0Sstevel@tonic-gate mdi_unit_t *ui; 2733*0Sstevel@tonic-gate minor_t mnum; 2734*0Sstevel@tonic-gate 2735*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 2736*0Sstevel@tonic-gate ps = cs->cs_ps; 2737*0Sstevel@tonic-gate ui = ps->ps_ui; 2738*0Sstevel@tonic-gate raid_line_reader_lock(cs, 0); 2739*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 2740*0Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 2741*0Sstevel@tonic-gate mnum = MD_SID(un); 2742*0Sstevel@tonic-gate cs->cs_un = un; 2743*0Sstevel@tonic-gate 2744*0Sstevel@tonic-gate /* make sure the read doesn't go beyond the end of the column */ 2745*0Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > 2746*0Sstevel@tonic-gate un->un_segsize * un->un_segsincolumn) { 2747*0Sstevel@tonic-gate error = ENXIO; 2748*0Sstevel@tonic-gate } 2749*0Sstevel@tonic-gate if (error) 2750*0Sstevel@tonic-gate goto rerror; 2751*0Sstevel@tonic-gate 2752*0Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 2753*0Sstevel@tonic-gate raid_regen_parity(cs); 2754*0Sstevel@tonic-gate un = MD_UNIT(mnum); 2755*0Sstevel@tonic-gate cs->cs_un = un; 2756*0Sstevel@tonic-gate } 2757*0Sstevel@tonic-gate 2758*0Sstevel@tonic-gate raid_read_io(un, cs); 2759*0Sstevel@tonic-gate return (0); 2760*0Sstevel@tonic-gate 2761*0Sstevel@tonic-gate rerror: 2762*0Sstevel@tonic-gate raid_error_parent(ps, error); 2763*0Sstevel@tonic-gate raid_free_child(cs, 1); 2764*0Sstevel@tonic-gate /* decrement readfrags */ 2765*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 2766*0Sstevel@tonic-gate return (0); 2767*0Sstevel@tonic-gate } 2768*0Sstevel@tonic-gate 2769*0Sstevel@tonic-gate /* 2770*0Sstevel@tonic-gate * NAME: raid_write_err_retry 2771*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 2772*0Sstevel@tonic-gate * write was for parity or data only; 2773*0Sstevel@tonic-gate * complete write with error, no recovery possible 2774*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 2775*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 2776*0Sstevel@tonic-gate */ 2777*0Sstevel@tonic-gate /*ARGSUSED*/ 2778*0Sstevel@tonic-gate static void 2779*0Sstevel@tonic-gate raid_write_err_retry(mr_unit_t *un, md_raidcs_t *cs) 2780*0Sstevel@tonic-gate { 2781*0Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 2782*0Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 2783*0Sstevel@tonic-gate 2784*0Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 2785*0Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 2786*0Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 2787*0Sstevel@tonic-gate raid_error_parent(ps, EIO); 2788*0Sstevel@tonic-gate raid_free_child(cs, 1); 2789*0Sstevel@tonic-gate raid_free_parent(ps, flags); 2790*0Sstevel@tonic-gate } 2791*0Sstevel@tonic-gate 2792*0Sstevel@tonic-gate /* 2793*0Sstevel@tonic-gate * NAME: raid_write_err_retry 2794*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 2795*0Sstevel@tonic-gate * write is too far along to retry and parent 2796*0Sstevel@tonic-gate * has already been signaled with iodone. 2797*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 2798*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 2799*0Sstevel@tonic-gate */ 2800*0Sstevel@tonic-gate /*ARGSUSED*/ 2801*0Sstevel@tonic-gate static void 2802*0Sstevel@tonic-gate raid_write_no_retry(mr_unit_t *un, md_raidcs_t *cs) 2803*0Sstevel@tonic-gate { 2804*0Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 2805*0Sstevel@tonic-gate int flags = RFP_DECR_FRAGS | RFP_RLS_LOCK; 2806*0Sstevel@tonic-gate 2807*0Sstevel@tonic-gate /* decrement pwfrags if needed, and frags */ 2808*0Sstevel@tonic-gate if (!(cs->cs_flags & MD_RCS_PWDONE)) 2809*0Sstevel@tonic-gate flags |= RFP_DECR_PWFRAGS; 2810*0Sstevel@tonic-gate raid_free_child(cs, 1); 2811*0Sstevel@tonic-gate raid_free_parent(ps, flags); 2812*0Sstevel@tonic-gate } 2813*0Sstevel@tonic-gate 2814*0Sstevel@tonic-gate /* 2815*0Sstevel@tonic-gate * NAME: raid_write_retry 2816*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write retry routine 2817*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 2818*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 2819*0Sstevel@tonic-gate */ 2820*0Sstevel@tonic-gate static void 2821*0Sstevel@tonic-gate raid_write_retry(mr_unit_t *un, md_raidcs_t *cs) 2822*0Sstevel@tonic-gate { 2823*0Sstevel@tonic-gate md_raidps_t *ps; 2824*0Sstevel@tonic-gate 2825*0Sstevel@tonic-gate ps = cs->cs_ps; 2826*0Sstevel@tonic-gate 2827*0Sstevel@tonic-gate /* re-initialize the buf_t structure for raid_write() */ 2828*0Sstevel@tonic-gate cs->cs_dbuf.b_chain = (struct buf *)cs; 2829*0Sstevel@tonic-gate cs->cs_dbuf.b_back = &cs->cs_dbuf; 2830*0Sstevel@tonic-gate cs->cs_dbuf.b_forw = &cs->cs_dbuf; 2831*0Sstevel@tonic-gate cs->cs_dbuf.b_flags = B_BUSY; /* initialize flags */ 2832*0Sstevel@tonic-gate cs->cs_dbuf.b_error = 0; /* initialize error */ 2833*0Sstevel@tonic-gate cs->cs_dbuf.b_offset = -1; 2834*0Sstevel@tonic-gate /* Initialize semaphores */ 2835*0Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_io, 0, NULL, 2836*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2837*0Sstevel@tonic-gate sema_init(&cs->cs_dbuf.b_sem, 0, NULL, 2838*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2839*0Sstevel@tonic-gate 2840*0Sstevel@tonic-gate cs->cs_pbuf.b_chain = (struct buf *)cs; 2841*0Sstevel@tonic-gate cs->cs_pbuf.b_back = &cs->cs_pbuf; 2842*0Sstevel@tonic-gate cs->cs_pbuf.b_forw = &cs->cs_pbuf; 2843*0Sstevel@tonic-gate cs->cs_pbuf.b_flags = B_BUSY; /* initialize flags */ 2844*0Sstevel@tonic-gate cs->cs_pbuf.b_error = 0; /* initialize error */ 2845*0Sstevel@tonic-gate cs->cs_pbuf.b_offset = -1; 2846*0Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_io, 0, NULL, 2847*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2848*0Sstevel@tonic-gate sema_init(&cs->cs_pbuf.b_sem, 0, NULL, 2849*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2850*0Sstevel@tonic-gate 2851*0Sstevel@tonic-gate cs->cs_hbuf.b_chain = (struct buf *)cs; 2852*0Sstevel@tonic-gate cs->cs_hbuf.b_back = &cs->cs_hbuf; 2853*0Sstevel@tonic-gate cs->cs_hbuf.b_forw = &cs->cs_hbuf; 2854*0Sstevel@tonic-gate cs->cs_hbuf.b_flags = B_BUSY; /* initialize flags */ 2855*0Sstevel@tonic-gate cs->cs_hbuf.b_error = 0; /* initialize error */ 2856*0Sstevel@tonic-gate cs->cs_hbuf.b_offset = -1; 2857*0Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_io, 0, NULL, 2858*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2859*0Sstevel@tonic-gate sema_init(&cs->cs_hbuf.b_sem, 0, NULL, 2860*0Sstevel@tonic-gate SEMA_DEFAULT, NULL); 2861*0Sstevel@tonic-gate 2862*0Sstevel@tonic-gate cs->cs_flags &= ~(MD_RCS_ERROR); 2863*0Sstevel@tonic-gate /* 2864*0Sstevel@tonic-gate * If we have already done'ed the i/o but have done prewrite 2865*0Sstevel@tonic-gate * on this child, then reset PWDONE flag and bump pwfrags before 2866*0Sstevel@tonic-gate * restarting i/o. 2867*0Sstevel@tonic-gate * If pwfrags is zero, we have already 'iodone'd the i/o so 2868*0Sstevel@tonic-gate * leave things alone. We don't want to re-'done' it. 2869*0Sstevel@tonic-gate */ 2870*0Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 2871*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_PWDONE) { 2872*0Sstevel@tonic-gate cs->cs_flags &= ~MD_RCS_PWDONE; 2873*0Sstevel@tonic-gate ps->ps_pwfrags++; 2874*0Sstevel@tonic-gate } 2875*0Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 2876*0Sstevel@tonic-gate raid_write_io(un, cs); 2877*0Sstevel@tonic-gate } 2878*0Sstevel@tonic-gate 2879*0Sstevel@tonic-gate /* 2880*0Sstevel@tonic-gate * NAME: raid_wrerr 2881*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 2882*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 2883*0Sstevel@tonic-gate * LOCKS: must obtain unit writer lock while calling raid_error_state 2884*0Sstevel@tonic-gate * since a unit or column state transition may take place. 2885*0Sstevel@tonic-gate * must obtain unit reader lock to retry I/O. 2886*0Sstevel@tonic-gate */ 2887*0Sstevel@tonic-gate static void 2888*0Sstevel@tonic-gate raid_wrerr(md_raidcs_t *cs) 2889*0Sstevel@tonic-gate { 2890*0Sstevel@tonic-gate md_raidps_t *ps; 2891*0Sstevel@tonic-gate mdi_unit_t *ui; 2892*0Sstevel@tonic-gate mr_unit_t *un; 2893*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 2894*0Sstevel@tonic-gate 2895*0Sstevel@tonic-gate ps = cs->cs_ps; 2896*0Sstevel@tonic-gate ui = ps->ps_ui; 2897*0Sstevel@tonic-gate 2898*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 2899*0Sstevel@tonic-gate ASSERT(un != 0); 2900*0Sstevel@tonic-gate 2901*0Sstevel@tonic-gate if (cs->cs_dbuf.b_flags & B_ERROR) 2902*0Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_dbuf); 2903*0Sstevel@tonic-gate if (cs->cs_pbuf.b_flags & B_ERROR) 2904*0Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_pbuf); 2905*0Sstevel@tonic-gate if (cs->cs_hbuf.b_flags & B_ERROR) 2906*0Sstevel@tonic-gate (void) raid_error_state(un, &cs->cs_hbuf); 2907*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 2908*0Sstevel@tonic-gate if (cbuf->cbuf_bp.b_flags & B_ERROR) 2909*0Sstevel@tonic-gate (void) raid_error_state(un, &cbuf->cbuf_bp); 2910*0Sstevel@tonic-gate 2911*0Sstevel@tonic-gate md_unit_writerexit(ui); 2912*0Sstevel@tonic-gate 2913*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_HSREQ; 2914*0Sstevel@tonic-gate 2915*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 2916*0Sstevel@tonic-gate 2917*0Sstevel@tonic-gate /* now attempt the appropriate retry routine */ 2918*0Sstevel@tonic-gate (*(cs->cs_retry_call))(un, cs); 2919*0Sstevel@tonic-gate } 2920*0Sstevel@tonic-gate /* 2921*0Sstevel@tonic-gate * NAMES: raid_write_error 2922*0Sstevel@tonic-gate * DESCRIPTION: I/O error handling routine for a RAID metadevice write 2923*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 2924*0Sstevel@tonic-gate */ 2925*0Sstevel@tonic-gate /*ARGSUSED*/ 2926*0Sstevel@tonic-gate static void 2927*0Sstevel@tonic-gate raid_write_error(md_raidcs_t *cs) 2928*0Sstevel@tonic-gate { 2929*0Sstevel@tonic-gate md_raidps_t *ps; 2930*0Sstevel@tonic-gate mdi_unit_t *ui; 2931*0Sstevel@tonic-gate mr_unit_t *un; 2932*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 2933*0Sstevel@tonic-gate set_t setno; 2934*0Sstevel@tonic-gate 2935*0Sstevel@tonic-gate ps = cs->cs_ps; 2936*0Sstevel@tonic-gate ui = ps->ps_ui; 2937*0Sstevel@tonic-gate un = cs->cs_un; 2938*0Sstevel@tonic-gate 2939*0Sstevel@tonic-gate setno = MD_UN2SET(un); 2940*0Sstevel@tonic-gate 2941*0Sstevel@tonic-gate /* 2942*0Sstevel@tonic-gate * locate each buf that is in error on this io and then 2943*0Sstevel@tonic-gate * output an error message 2944*0Sstevel@tonic-gate */ 2945*0Sstevel@tonic-gate if ((cs->cs_dbuf.b_flags & B_ERROR) && 2946*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_ERRED) && 2947*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_dcolumn) != RCS_LAST_ERRED)) 2948*0Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 2949*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 2950*0Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_dbuf.b_edev), NULL, 0)); 2951*0Sstevel@tonic-gate 2952*0Sstevel@tonic-gate if ((cs->cs_pbuf.b_flags & B_ERROR) && 2953*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_ERRED) && 2954*0Sstevel@tonic-gate (COLUMN_STATE(un, cs->cs_pcolumn) != RCS_LAST_ERRED)) 2955*0Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 2956*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 2957*0Sstevel@tonic-gate md_devname(setno, md_expldev(cs->cs_pbuf.b_edev), NULL, 0)); 2958*0Sstevel@tonic-gate 2959*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) 2960*0Sstevel@tonic-gate if ((cbuf->cbuf_bp.b_flags & B_ERROR) && 2961*0Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_ERRED) && 2962*0Sstevel@tonic-gate (COLUMN_STATE(un, cbuf->cbuf_column) != RCS_LAST_ERRED)) 2963*0Sstevel@tonic-gate cmn_err(CE_WARN, "md %s: write error on %s", 2964*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 2965*0Sstevel@tonic-gate md_devname(setno, md_expldev(cbuf->cbuf_bp.b_edev), 2966*0Sstevel@tonic-gate NULL, 0)); 2967*0Sstevel@tonic-gate 2968*0Sstevel@tonic-gate md_unit_readerexit(ui); 2969*0Sstevel@tonic-gate 2970*0Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 2971*0Sstevel@tonic-gate 2972*0Sstevel@tonic-gate /* now schedule processing for possible state change */ 2973*0Sstevel@tonic-gate daemon_request(&md_mstr_daemon, raid_wrerr, 2974*0Sstevel@tonic-gate (daemon_queue_t *)cs, REQ_OLD); 2975*0Sstevel@tonic-gate 2976*0Sstevel@tonic-gate } 2977*0Sstevel@tonic-gate 2978*0Sstevel@tonic-gate /* 2979*0Sstevel@tonic-gate * NAME: raid_write_ponly 2980*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 2981*0Sstevel@tonic-gate * in the case where only the parity column can be written 2982*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 2983*0Sstevel@tonic-gate */ 2984*0Sstevel@tonic-gate static void 2985*0Sstevel@tonic-gate raid_write_ponly(md_raidcs_t *cs) 2986*0Sstevel@tonic-gate { 2987*0Sstevel@tonic-gate md_raidps_t *ps; 2988*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 2989*0Sstevel@tonic-gate 2990*0Sstevel@tonic-gate ps = cs->cs_ps; 2991*0Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 2992*0Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 2993*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 2994*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 2995*0Sstevel@tonic-gate cs->cs_frags = 1; 2996*0Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_PONLY_DONE; 2997*0Sstevel@tonic-gate cs->cs_call = raid_stage; 2998*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 2999*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 3000*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 3001*0Sstevel@tonic-gate cs->cs_frags++; 3002*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_WRITE); 3003*0Sstevel@tonic-gate } 3004*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 3005*0Sstevel@tonic-gate } 3006*0Sstevel@tonic-gate 3007*0Sstevel@tonic-gate /* 3008*0Sstevel@tonic-gate * NAME: raid_write_ploop 3009*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine, constructs parity from 3010*0Sstevel@tonic-gate * data in other columns. 3011*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 3012*0Sstevel@tonic-gate */ 3013*0Sstevel@tonic-gate static void 3014*0Sstevel@tonic-gate raid_write_ploop(md_raidcs_t *cs) 3015*0Sstevel@tonic-gate { 3016*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 3017*0Sstevel@tonic-gate uint_t *dbuf; 3018*0Sstevel@tonic-gate uint_t *pbuf; 3019*0Sstevel@tonic-gate size_t wordcnt; 3020*0Sstevel@tonic-gate uint_t psum = 0; 3021*0Sstevel@tonic-gate 3022*0Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 3023*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 3024*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 3025*0Sstevel@tonic-gate while (wordcnt--) 3026*0Sstevel@tonic-gate *pbuf++ ^= *dbuf++; 3027*0Sstevel@tonic-gate cs->cs_loop++; 3028*0Sstevel@tonic-gate 3029*0Sstevel@tonic-gate /* 3030*0Sstevel@tonic-gate * build parity from scratch using new data, 3031*0Sstevel@tonic-gate * skip reading the data and parity columns. 3032*0Sstevel@tonic-gate */ 3033*0Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || cs->cs_loop == cs->cs_pcolumn) 3034*0Sstevel@tonic-gate cs->cs_loop++; 3035*0Sstevel@tonic-gate 3036*0Sstevel@tonic-gate if (cs->cs_loop != un->un_totalcolumncnt) { 3037*0Sstevel@tonic-gate cs->cs_frags = 1; 3038*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 3039*0Sstevel@tonic-gate return; 3040*0Sstevel@tonic-gate } 3041*0Sstevel@tonic-gate 3042*0Sstevel@tonic-gate /* construct checksum for parity buffer */ 3043*0Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 3044*0Sstevel@tonic-gate pbuf = (uint_t *)(void *)(cs->cs_pbuffer + DEV_BSIZE); 3045*0Sstevel@tonic-gate while (wordcnt--) { 3046*0Sstevel@tonic-gate psum ^= *pbuf; 3047*0Sstevel@tonic-gate pbuf++; 3048*0Sstevel@tonic-gate } 3049*0Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_pbuffer, un, psum, -1, 3050*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 3051*0Sstevel@tonic-gate 1, cs->cs_pcolumn, RAID_PWMAGIC); 3052*0Sstevel@tonic-gate 3053*0Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 3054*0Sstevel@tonic-gate cs->cs_call = raid_write_ponly; 3055*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3056*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 3057*0Sstevel@tonic-gate cs->cs_frags = 1; 3058*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 3059*0Sstevel@tonic-gate cs->cs_frags++; 3060*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 3061*0Sstevel@tonic-gate } 3062*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 3063*0Sstevel@tonic-gate } 3064*0Sstevel@tonic-gate 3065*0Sstevel@tonic-gate /* 3066*0Sstevel@tonic-gate * NAME: raid_write_donly 3067*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 3068*0Sstevel@tonic-gate * Completed writing data to prewrite entry 3069*0Sstevel@tonic-gate * in the case where only the data column can be written 3070*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 3071*0Sstevel@tonic-gate */ 3072*0Sstevel@tonic-gate static void 3073*0Sstevel@tonic-gate raid_write_donly(md_raidcs_t *cs) 3074*0Sstevel@tonic-gate { 3075*0Sstevel@tonic-gate md_raidps_t *ps; 3076*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 3077*0Sstevel@tonic-gate 3078*0Sstevel@tonic-gate ps = cs->cs_ps; 3079*0Sstevel@tonic-gate /* WARNING: don't release unit reader lock here... */ 3080*0Sstevel@tonic-gate /* decrement pwfrags if needed, but not frags */ 3081*0Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 3082*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 3083*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 3084*0Sstevel@tonic-gate cs->cs_frags = 1; 3085*0Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONLY_DONE; 3086*0Sstevel@tonic-gate cs->cs_call = raid_stage; 3087*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3088*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 3089*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 3090*0Sstevel@tonic-gate cs->cs_frags++; 3091*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 3092*0Sstevel@tonic-gate } 3093*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 3094*0Sstevel@tonic-gate } 3095*0Sstevel@tonic-gate 3096*0Sstevel@tonic-gate /* 3097*0Sstevel@tonic-gate * NAME: raid_write_got_old 3098*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 3099*0Sstevel@tonic-gate * completed read of old data and old parity 3100*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to a child structure 3101*0Sstevel@tonic-gate */ 3102*0Sstevel@tonic-gate static void 3103*0Sstevel@tonic-gate raid_write_got_old(md_raidcs_t *cs) 3104*0Sstevel@tonic-gate { 3105*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 3106*0Sstevel@tonic-gate 3107*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(cs->cs_un)); 3108*0Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(cs->cs_un)); 3109*0Sstevel@tonic-gate 3110*0Sstevel@tonic-gate raid_mapin_buf(cs); 3111*0Sstevel@tonic-gate genstandardparity(cs); 3112*0Sstevel@tonic-gate cs->cs_frags = 2; 3113*0Sstevel@tonic-gate cs->cs_call = raid_stage; 3114*0Sstevel@tonic-gate cs->cs_stage = RAID_PREWRITE_DONE; 3115*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3116*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 3117*0Sstevel@tonic-gate 3118*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 3119*0Sstevel@tonic-gate cs->cs_frags++; 3120*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_PREWRITE); 3121*0Sstevel@tonic-gate } 3122*0Sstevel@tonic-gate 3123*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 3124*0Sstevel@tonic-gate cs->cs_frags++; 3125*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | RIO_PREWRITE); 3126*0Sstevel@tonic-gate } 3127*0Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 3128*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 3129*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_PREWRITE); 3130*0Sstevel@tonic-gate } 3131*0Sstevel@tonic-gate 3132*0Sstevel@tonic-gate /* 3133*0Sstevel@tonic-gate * NAME: raid_write_io 3134*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write I/O routine 3135*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 3136*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 3137*0Sstevel@tonic-gate */ 3138*0Sstevel@tonic-gate 3139*0Sstevel@tonic-gate /*ARGSUSED*/ 3140*0Sstevel@tonic-gate static void 3141*0Sstevel@tonic-gate raid_write_io(mr_unit_t *un, md_raidcs_t *cs) 3142*0Sstevel@tonic-gate { 3143*0Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 3144*0Sstevel@tonic-gate uint_t *dbuf; 3145*0Sstevel@tonic-gate uint_t *ubuf; 3146*0Sstevel@tonic-gate size_t wordcnt; 3147*0Sstevel@tonic-gate uint_t dsum = 0; 3148*0Sstevel@tonic-gate int pcheck; 3149*0Sstevel@tonic-gate int dcheck; 3150*0Sstevel@tonic-gate 3151*0Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_pcolumn].un_devstate & 3152*0Sstevel@tonic-gate RCS_INIT) == 0); 3153*0Sstevel@tonic-gate ASSERT((un->un_column[cs->cs_dcolumn].un_devstate & 3154*0Sstevel@tonic-gate RCS_INIT) == 0); 3155*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 3156*0Sstevel@tonic-gate ASSERT(UNIT_READER_HELD(un)); 3157*0Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_HAVE_PW_SLOTS); 3158*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 3159*0Sstevel@tonic-gate 3160*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 3161*0Sstevel@tonic-gate 3162*0Sstevel@tonic-gate ASSERT(un->un_origcolumncnt == un->un_totalcolumncnt); 3163*0Sstevel@tonic-gate raid_mapin_buf(cs); 3164*0Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 3165*0Sstevel@tonic-gate cs->cs_call = raid_stage; 3166*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3167*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 3168*0Sstevel@tonic-gate cs->cs_stage = RAID_LINE_PWDONE; 3169*0Sstevel@tonic-gate genlineparity(cs); 3170*0Sstevel@tonic-gate return; 3171*0Sstevel@tonic-gate } 3172*0Sstevel@tonic-gate 3173*0Sstevel@tonic-gate pcheck = erred_check_line(un, cs, &un->un_column[cs->cs_pcolumn]); 3174*0Sstevel@tonic-gate dcheck = erred_check_line(un, cs, &un->un_column[cs->cs_dcolumn]); 3175*0Sstevel@tonic-gate cs->cs_resync_check = pcheck << RCL_PARITY_OFFSET || dcheck; 3176*0Sstevel@tonic-gate 3177*0Sstevel@tonic-gate if (pcheck == RCL_ERRED && dcheck == RCL_ERRED) { 3178*0Sstevel@tonic-gate int err = EIO; 3179*0Sstevel@tonic-gate 3180*0Sstevel@tonic-gate if ((un->un_column[cs->cs_pcolumn].un_devstate == 3181*0Sstevel@tonic-gate RCS_LAST_ERRED) || 3182*0Sstevel@tonic-gate (un->un_column[cs->cs_dcolumn].un_devstate == 3183*0Sstevel@tonic-gate RCS_LAST_ERRED)) 3184*0Sstevel@tonic-gate err = ENXIO; 3185*0Sstevel@tonic-gate raid_error_parent(ps, err); 3186*0Sstevel@tonic-gate ASSERT(!(cs->cs_flags & MD_RCS_PWDONE)); 3187*0Sstevel@tonic-gate raid_free_child(cs, 1); 3188*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS 3189*0Sstevel@tonic-gate | RFP_RLS_LOCK | RFP_DECR_PWFRAGS); 3190*0Sstevel@tonic-gate return; 3191*0Sstevel@tonic-gate } 3192*0Sstevel@tonic-gate 3193*0Sstevel@tonic-gate if (pcheck & RCL_ERRED) { 3194*0Sstevel@tonic-gate /* 3195*0Sstevel@tonic-gate * handle case of only having data drive 3196*0Sstevel@tonic-gate */ 3197*0Sstevel@tonic-gate raid_mapin_buf(cs); 3198*0Sstevel@tonic-gate wordcnt = cs->cs_bcount / sizeof (uint_t); 3199*0Sstevel@tonic-gate 3200*0Sstevel@tonic-gate dbuf = (uint_t *)(void *)(cs->cs_dbuffer + DEV_BSIZE); 3201*0Sstevel@tonic-gate ubuf = (uint_t *)(void *)(cs->cs_addr); 3202*0Sstevel@tonic-gate 3203*0Sstevel@tonic-gate while (wordcnt--) { 3204*0Sstevel@tonic-gate *dbuf = *ubuf; 3205*0Sstevel@tonic-gate dsum ^= *ubuf; 3206*0Sstevel@tonic-gate dbuf++; 3207*0Sstevel@tonic-gate ubuf++; 3208*0Sstevel@tonic-gate } 3209*0Sstevel@tonic-gate RAID_FILLIN_RPW(cs->cs_dbuffer, un, dsum, -1, 3210*0Sstevel@tonic-gate cs->cs_blkno, cs->cs_blkcnt, cs->cs_pwid, 3211*0Sstevel@tonic-gate 1, cs->cs_dcolumn, RAID_PWMAGIC); 3212*0Sstevel@tonic-gate cs->cs_frags = 1; 3213*0Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 3214*0Sstevel@tonic-gate cs->cs_call = raid_write_donly; 3215*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3216*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 3217*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 3218*0Sstevel@tonic-gate cs->cs_frags++; 3219*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_ALT | RIO_EXTRA | 3220*0Sstevel@tonic-gate RIO_PREWRITE); 3221*0Sstevel@tonic-gate } 3222*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_PREWRITE); 3223*0Sstevel@tonic-gate return; 3224*0Sstevel@tonic-gate } 3225*0Sstevel@tonic-gate 3226*0Sstevel@tonic-gate if (dcheck & RCL_ERRED) { 3227*0Sstevel@tonic-gate /* 3228*0Sstevel@tonic-gate * handle case of only having parity drive 3229*0Sstevel@tonic-gate * build parity from scratch using new data, 3230*0Sstevel@tonic-gate * skip reading the data and parity columns. 3231*0Sstevel@tonic-gate */ 3232*0Sstevel@tonic-gate raid_mapin_buf(cs); 3233*0Sstevel@tonic-gate cs->cs_loop = 0; 3234*0Sstevel@tonic-gate while (cs->cs_loop == cs->cs_dcolumn || 3235*0Sstevel@tonic-gate cs->cs_loop == cs->cs_pcolumn) 3236*0Sstevel@tonic-gate cs->cs_loop++; 3237*0Sstevel@tonic-gate 3238*0Sstevel@tonic-gate /* copy new data in to begin building parity */ 3239*0Sstevel@tonic-gate bcopy(cs->cs_addr, cs->cs_pbuffer + DEV_BSIZE, cs->cs_bcount); 3240*0Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 3241*0Sstevel@tonic-gate cs->cs_call = raid_write_ploop; 3242*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3243*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_err_retry; 3244*0Sstevel@tonic-gate cs->cs_frags = 1; 3245*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ | (cs->cs_loop + 1)); 3246*0Sstevel@tonic-gate return; 3247*0Sstevel@tonic-gate } 3248*0Sstevel@tonic-gate /* 3249*0Sstevel@tonic-gate * handle normal cases 3250*0Sstevel@tonic-gate * read old data and old parity 3251*0Sstevel@tonic-gate */ 3252*0Sstevel@tonic-gate cs->cs_frags = 2; 3253*0Sstevel@tonic-gate cs->cs_stage = RAID_NONE; 3254*0Sstevel@tonic-gate cs->cs_call = raid_write_got_old; 3255*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3256*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_retry; 3257*0Sstevel@tonic-gate ASSERT(ps->ps_magic == RAID_PSMAGIC); 3258*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_READ); 3259*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_READ); 3260*0Sstevel@tonic-gate } 3261*0Sstevel@tonic-gate 3262*0Sstevel@tonic-gate static void 3263*0Sstevel@tonic-gate raid_enqueue(md_raidcs_t *cs) 3264*0Sstevel@tonic-gate { 3265*0Sstevel@tonic-gate mdi_unit_t *ui = cs->cs_ps->ps_ui; 3266*0Sstevel@tonic-gate kmutex_t *io_list_mutex = &ui->ui_io_lock->io_list_mutex; 3267*0Sstevel@tonic-gate md_raidcs_t *cs1; 3268*0Sstevel@tonic-gate 3269*0Sstevel@tonic-gate mutex_enter(io_list_mutex); 3270*0Sstevel@tonic-gate ASSERT(! (cs->cs_flags & MD_RCS_LLOCKD)); 3271*0Sstevel@tonic-gate if (ui->ui_io_lock->io_list_front == NULL) { 3272*0Sstevel@tonic-gate ui->ui_io_lock->io_list_front = cs; 3273*0Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 3274*0Sstevel@tonic-gate } else { 3275*0Sstevel@tonic-gate cs1 = ui->ui_io_lock->io_list_back; 3276*0Sstevel@tonic-gate cs1->cs_linlck_next = cs; 3277*0Sstevel@tonic-gate ui->ui_io_lock->io_list_back = cs; 3278*0Sstevel@tonic-gate } 3279*0Sstevel@tonic-gate STAT_INC(raid_write_waits); 3280*0Sstevel@tonic-gate STAT_MAX(raid_max_write_q_length, raid_write_queue_length); 3281*0Sstevel@tonic-gate cs->cs_linlck_next = NULL; 3282*0Sstevel@tonic-gate mutex_exit(io_list_mutex); 3283*0Sstevel@tonic-gate } 3284*0Sstevel@tonic-gate 3285*0Sstevel@tonic-gate /* 3286*0Sstevel@tonic-gate * NAME: raid_write 3287*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice write routine 3288*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to a unit structure 3289*0Sstevel@tonic-gate * md_raidcs_t *cs - pointer to a child structure 3290*0Sstevel@tonic-gate */ 3291*0Sstevel@tonic-gate 3292*0Sstevel@tonic-gate /*ARGSUSED*/ 3293*0Sstevel@tonic-gate static int 3294*0Sstevel@tonic-gate raid_write(mr_unit_t *un, md_raidcs_t *cs) 3295*0Sstevel@tonic-gate { 3296*0Sstevel@tonic-gate int error = 0; 3297*0Sstevel@tonic-gate md_raidps_t *ps; 3298*0Sstevel@tonic-gate mdi_unit_t *ui; 3299*0Sstevel@tonic-gate minor_t mnum; 3300*0Sstevel@tonic-gate clock_t timeout; 3301*0Sstevel@tonic-gate 3302*0Sstevel@tonic-gate ASSERT(IO_READER_HELD(un)); 3303*0Sstevel@tonic-gate ps = cs->cs_ps; 3304*0Sstevel@tonic-gate ui = ps->ps_ui; 3305*0Sstevel@tonic-gate 3306*0Sstevel@tonic-gate ASSERT(UNIT_STATE(un) != RUS_INIT); 3307*0Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 3308*0Sstevel@tonic-gate error = EIO; 3309*0Sstevel@tonic-gate 3310*0Sstevel@tonic-gate /* make sure the write doesn't go beyond the column */ 3311*0Sstevel@tonic-gate if (cs->cs_blkno + cs->cs_blkcnt > un->un_segsize * un->un_segsincolumn) 3312*0Sstevel@tonic-gate error = ENXIO; 3313*0Sstevel@tonic-gate if (error) 3314*0Sstevel@tonic-gate goto werror; 3315*0Sstevel@tonic-gate 3316*0Sstevel@tonic-gate getresources(cs); 3317*0Sstevel@tonic-gate 3318*0Sstevel@tonic-gate /* 3319*0Sstevel@tonic-gate * this is an advisory loop that keeps the waiting lists short 3320*0Sstevel@tonic-gate * to reduce cpu time. Since there is a race introduced by not 3321*0Sstevel@tonic-gate * aquiring all the correct mutexes, use a cv_timedwait to be 3322*0Sstevel@tonic-gate * sure the write always will wake up and start. 3323*0Sstevel@tonic-gate */ 3324*0Sstevel@tonic-gate while (raid_check_pw(cs)) { 3325*0Sstevel@tonic-gate mutex_enter(&un->un_mx); 3326*0Sstevel@tonic-gate (void) drv_getparm(LBOLT, &timeout); 3327*0Sstevel@tonic-gate timeout += md_wr_wait; 3328*0Sstevel@tonic-gate un->un_rflags |= MD_RFLAG_NEEDPW; 3329*0Sstevel@tonic-gate STAT_INC(raid_prewrite_waits); 3330*0Sstevel@tonic-gate (void) cv_timedwait(&un->un_cv, &un->un_mx, timeout); 3331*0Sstevel@tonic-gate un->un_rflags &= ~MD_RFLAG_NEEDPW; 3332*0Sstevel@tonic-gate mutex_exit(&un->un_mx); 3333*0Sstevel@tonic-gate } 3334*0Sstevel@tonic-gate 3335*0Sstevel@tonic-gate if (raid_line_writer_lock(cs, 1)) 3336*0Sstevel@tonic-gate return (0); 3337*0Sstevel@tonic-gate 3338*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 3339*0Sstevel@tonic-gate cs->cs_un = un; 3340*0Sstevel@tonic-gate mnum = MD_SID(un); 3341*0Sstevel@tonic-gate 3342*0Sstevel@tonic-gate if (un->un_state & RUS_REGEN) { 3343*0Sstevel@tonic-gate raid_regen_parity(cs); 3344*0Sstevel@tonic-gate un = MD_UNIT(mnum); 3345*0Sstevel@tonic-gate cs->cs_un = un; 3346*0Sstevel@tonic-gate } 3347*0Sstevel@tonic-gate 3348*0Sstevel@tonic-gate raid_write_io(un, cs); 3349*0Sstevel@tonic-gate return (0); 3350*0Sstevel@tonic-gate werror: 3351*0Sstevel@tonic-gate /* aquire unit reader lock sinc raid_free_child always drops it */ 3352*0Sstevel@tonic-gate raid_error_parent(ps, error); 3353*0Sstevel@tonic-gate raid_free_child(cs, 0); 3354*0Sstevel@tonic-gate /* decrement both pwfrags and frags */ 3355*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS | RFP_DECR_FRAGS | RFP_RLS_LOCK); 3356*0Sstevel@tonic-gate return (0); 3357*0Sstevel@tonic-gate } 3358*0Sstevel@tonic-gate 3359*0Sstevel@tonic-gate 3360*0Sstevel@tonic-gate /* 3361*0Sstevel@tonic-gate * NAMES: raid_stage 3362*0Sstevel@tonic-gate * DESCRIPTION: post-processing routine for a RAID metadevice 3363*0Sstevel@tonic-gate * PARAMETERS: md_raidcs_t *cs - pointer to child structure 3364*0Sstevel@tonic-gate */ 3365*0Sstevel@tonic-gate static void 3366*0Sstevel@tonic-gate raid_stage(md_raidcs_t *cs) 3367*0Sstevel@tonic-gate { 3368*0Sstevel@tonic-gate md_raidps_t *ps = cs->cs_ps; 3369*0Sstevel@tonic-gate mr_unit_t *un = cs->cs_un; 3370*0Sstevel@tonic-gate md_raidcbuf_t *cbuf; 3371*0Sstevel@tonic-gate buf_t *bp; 3372*0Sstevel@tonic-gate void *private; 3373*0Sstevel@tonic-gate int flag; 3374*0Sstevel@tonic-gate 3375*0Sstevel@tonic-gate switch (cs->cs_stage) { 3376*0Sstevel@tonic-gate case RAID_READ_DONE: 3377*0Sstevel@tonic-gate raid_free_child(cs, 1); 3378*0Sstevel@tonic-gate /* decrement readfrags */ 3379*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_READFRAGS | RFP_RLS_LOCK); 3380*0Sstevel@tonic-gate return; 3381*0Sstevel@tonic-gate 3382*0Sstevel@tonic-gate case RAID_WRITE_DONE: 3383*0Sstevel@tonic-gate case RAID_WRITE_PONLY_DONE: 3384*0Sstevel@tonic-gate case RAID_WRITE_DONLY_DONE: 3385*0Sstevel@tonic-gate /* 3386*0Sstevel@tonic-gate * Completed writing real parity and/or data. 3387*0Sstevel@tonic-gate */ 3388*0Sstevel@tonic-gate ASSERT(cs->cs_flags & MD_RCS_PWDONE); 3389*0Sstevel@tonic-gate raid_free_child(cs, 1); 3390*0Sstevel@tonic-gate /* decrement frags but not pwfrags */ 3391*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_FRAGS | RFP_RLS_LOCK); 3392*0Sstevel@tonic-gate return; 3393*0Sstevel@tonic-gate 3394*0Sstevel@tonic-gate case RAID_PREWRITE_DONE: 3395*0Sstevel@tonic-gate /* 3396*0Sstevel@tonic-gate * completed writing data and parity to prewrite entries 3397*0Sstevel@tonic-gate */ 3398*0Sstevel@tonic-gate /* 3399*0Sstevel@tonic-gate * WARNING: don't release unit reader lock here.. 3400*0Sstevel@tonic-gate * decrement pwfrags but not frags 3401*0Sstevel@tonic-gate */ 3402*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 3403*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 3404*0Sstevel@tonic-gate cs->cs_frags = 2; 3405*0Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 3406*0Sstevel@tonic-gate cs->cs_call = raid_stage; 3407*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3408*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 3409*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_pcolumn)) { 3410*0Sstevel@tonic-gate cs->cs_frags++; 3411*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_PARITY | 3412*0Sstevel@tonic-gate RIO_WRITE); 3413*0Sstevel@tonic-gate } 3414*0Sstevel@tonic-gate if (WRITE_ALT(un, cs->cs_dcolumn)) { 3415*0Sstevel@tonic-gate cs->cs_frags++; 3416*0Sstevel@tonic-gate raidio(cs, RIO_ALT | RIO_EXTRA | RIO_DATA | RIO_WRITE); 3417*0Sstevel@tonic-gate } 3418*0Sstevel@tonic-gate ASSERT(cs->cs_frags < 4); 3419*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 3420*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 3421*0Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 3422*0Sstevel@tonic-gate raid_free_pwinvalidate(cs); 3423*0Sstevel@tonic-gate } 3424*0Sstevel@tonic-gate return; 3425*0Sstevel@tonic-gate 3426*0Sstevel@tonic-gate case RAID_LINE_PWDONE: 3427*0Sstevel@tonic-gate ASSERT(cs->cs_frags == 0); 3428*0Sstevel@tonic-gate raid_free_parent(ps, RFP_DECR_PWFRAGS); 3429*0Sstevel@tonic-gate cs->cs_flags |= MD_RCS_PWDONE; 3430*0Sstevel@tonic-gate cs->cs_frags = un->un_origcolumncnt; 3431*0Sstevel@tonic-gate cs->cs_call = raid_stage; 3432*0Sstevel@tonic-gate cs->cs_error_call = raid_write_error; 3433*0Sstevel@tonic-gate cs->cs_retry_call = raid_write_no_retry; 3434*0Sstevel@tonic-gate cs->cs_stage = RAID_WRITE_DONE; 3435*0Sstevel@tonic-gate for (cbuf = cs->cs_buflist; cbuf; cbuf = cbuf->cbuf_next) { 3436*0Sstevel@tonic-gate /* 3437*0Sstevel@tonic-gate * fill in buffer for write to prewrite area 3438*0Sstevel@tonic-gate */ 3439*0Sstevel@tonic-gate bp = &cbuf->cbuf_bp; 3440*0Sstevel@tonic-gate bp->b_back = bp; 3441*0Sstevel@tonic-gate bp->b_forw = bp; 3442*0Sstevel@tonic-gate bp->b_un.b_addr = cbuf->cbuf_buffer + DEV_BSIZE; 3443*0Sstevel@tonic-gate bp->b_bcount = cbuf->cbuf_bcount; 3444*0Sstevel@tonic-gate bp->b_bufsize = cbuf->cbuf_bcount; 3445*0Sstevel@tonic-gate bp->b_lblkno = 3446*0Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_devstart + 3447*0Sstevel@tonic-gate cs->cs_blkno; 3448*0Sstevel@tonic-gate bp->b_flags &= ~(B_READ | B_WRITE | B_ERROR); 3449*0Sstevel@tonic-gate bp->b_flags &= ~nv_available; 3450*0Sstevel@tonic-gate bp->b_flags |= B_WRITE | B_BUSY; 3451*0Sstevel@tonic-gate bp->b_iodone = (int (*)())raid_done; 3452*0Sstevel@tonic-gate bp->b_edev = md_dev64_to_dev( 3453*0Sstevel@tonic-gate un->un_column[cbuf->cbuf_column].un_dev); 3454*0Sstevel@tonic-gate bp->b_chain = (struct buf *)cs; 3455*0Sstevel@tonic-gate private = cs->cs_strategy_private; 3456*0Sstevel@tonic-gate flag = cs->cs_strategy_flag; 3457*0Sstevel@tonic-gate md_call_strategy(bp, flag, private); 3458*0Sstevel@tonic-gate } 3459*0Sstevel@tonic-gate raidio(cs, RIO_DATA | RIO_WRITE); 3460*0Sstevel@tonic-gate raidio(cs, RIO_PARITY | RIO_WRITE); 3461*0Sstevel@tonic-gate if (cs->cs_pw_inval_list) { 3462*0Sstevel@tonic-gate raid_free_pwinvalidate(cs); 3463*0Sstevel@tonic-gate } 3464*0Sstevel@tonic-gate return; 3465*0Sstevel@tonic-gate 3466*0Sstevel@tonic-gate default: 3467*0Sstevel@tonic-gate ASSERT(0); 3468*0Sstevel@tonic-gate break; 3469*0Sstevel@tonic-gate } 3470*0Sstevel@tonic-gate } 3471*0Sstevel@tonic-gate /* 3472*0Sstevel@tonic-gate * NAME: md_raid_strategy 3473*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice I/O oprations entry point. 3474*0Sstevel@tonic-gate * PARAMETERS: buf_t *pb - pointer to a user I/O buffer 3475*0Sstevel@tonic-gate * int flag - metadevice specific flag 3476*0Sstevel@tonic-gate * void *private - carry over flag ?? 3477*0Sstevel@tonic-gate * 3478*0Sstevel@tonic-gate */ 3479*0Sstevel@tonic-gate 3480*0Sstevel@tonic-gate void 3481*0Sstevel@tonic-gate md_raid_strategy(buf_t *pb, int flag, void *private) 3482*0Sstevel@tonic-gate { 3483*0Sstevel@tonic-gate md_raidps_t *ps; 3484*0Sstevel@tonic-gate md_raidcs_t *cs; 3485*0Sstevel@tonic-gate int doing_writes; 3486*0Sstevel@tonic-gate int err; 3487*0Sstevel@tonic-gate mr_unit_t *un; 3488*0Sstevel@tonic-gate mdi_unit_t *ui; 3489*0Sstevel@tonic-gate size_t count; 3490*0Sstevel@tonic-gate diskaddr_t blkno; 3491*0Sstevel@tonic-gate caddr_t addr; 3492*0Sstevel@tonic-gate off_t offset; 3493*0Sstevel@tonic-gate int colcnt; 3494*0Sstevel@tonic-gate minor_t mnum; 3495*0Sstevel@tonic-gate set_t setno; 3496*0Sstevel@tonic-gate 3497*0Sstevel@tonic-gate ui = MDI_UNIT(getminor(pb->b_edev)); 3498*0Sstevel@tonic-gate md_kstat_waitq_enter(ui); 3499*0Sstevel@tonic-gate un = (mr_unit_t *)md_io_readerlock(ui); 3500*0Sstevel@tonic-gate setno = MD_MIN2SET(getminor(pb->b_edev)); 3501*0Sstevel@tonic-gate 3502*0Sstevel@tonic-gate if ((flag & MD_NOBLOCK) == 0) { 3503*0Sstevel@tonic-gate if (md_inc_iocount(setno) != 0) { 3504*0Sstevel@tonic-gate pb->b_flags |= B_ERROR; 3505*0Sstevel@tonic-gate pb->b_error = ENXIO; 3506*0Sstevel@tonic-gate pb->b_resid = pb->b_bcount; 3507*0Sstevel@tonic-gate md_io_readerexit(ui); 3508*0Sstevel@tonic-gate biodone(pb); 3509*0Sstevel@tonic-gate return; 3510*0Sstevel@tonic-gate } 3511*0Sstevel@tonic-gate } else { 3512*0Sstevel@tonic-gate md_inc_iocount_noblock(setno); 3513*0Sstevel@tonic-gate } 3514*0Sstevel@tonic-gate 3515*0Sstevel@tonic-gate mnum = MD_SID(un); 3516*0Sstevel@tonic-gate colcnt = un->un_totalcolumncnt - 1; 3517*0Sstevel@tonic-gate count = pb->b_bcount; 3518*0Sstevel@tonic-gate 3519*0Sstevel@tonic-gate STAT_CHECK(raid_512, count == 512); 3520*0Sstevel@tonic-gate STAT_CHECK(raid_1024, count == 1024); 3521*0Sstevel@tonic-gate STAT_CHECK(raid_1024_8192, count > 1024 && count < 8192); 3522*0Sstevel@tonic-gate STAT_CHECK(raid_8192, count == 8192); 3523*0Sstevel@tonic-gate STAT_CHECK(raid_8192_bigger, count > 8192); 3524*0Sstevel@tonic-gate 3525*0Sstevel@tonic-gate (void *) md_unit_readerlock(ui); 3526*0Sstevel@tonic-gate if (!(flag & MD_STR_NOTTOP)) { 3527*0Sstevel@tonic-gate err = md_checkbuf(ui, (md_unit_t *)un, pb); /* check and map */ 3528*0Sstevel@tonic-gate if (err != 0) { 3529*0Sstevel@tonic-gate md_kstat_waitq_exit(ui); 3530*0Sstevel@tonic-gate md_io_readerexit(ui); 3531*0Sstevel@tonic-gate return; 3532*0Sstevel@tonic-gate } 3533*0Sstevel@tonic-gate } 3534*0Sstevel@tonic-gate md_unit_readerexit(ui); 3535*0Sstevel@tonic-gate 3536*0Sstevel@tonic-gate STAT_INC(raid_total_io); 3537*0Sstevel@tonic-gate 3538*0Sstevel@tonic-gate /* allocate a parent structure for the user I/O */ 3539*0Sstevel@tonic-gate ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); 3540*0Sstevel@tonic-gate raid_parent_init(ps); 3541*0Sstevel@tonic-gate 3542*0Sstevel@tonic-gate /* 3543*0Sstevel@tonic-gate * Save essential information from the original buffhdr 3544*0Sstevel@tonic-gate * in the md_save structure. 3545*0Sstevel@tonic-gate */ 3546*0Sstevel@tonic-gate ps->ps_un = un; 3547*0Sstevel@tonic-gate ps->ps_ui = ui; 3548*0Sstevel@tonic-gate ps->ps_bp = pb; 3549*0Sstevel@tonic-gate ps->ps_addr = pb->b_un.b_addr; 3550*0Sstevel@tonic-gate 3551*0Sstevel@tonic-gate if ((pb->b_flags & B_READ) == 0) { 3552*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_WRITE; 3553*0Sstevel@tonic-gate doing_writes = 1; 3554*0Sstevel@tonic-gate STAT_INC(raid_writes); 3555*0Sstevel@tonic-gate } else { 3556*0Sstevel@tonic-gate ps->ps_flags |= MD_RPS_READ; 3557*0Sstevel@tonic-gate doing_writes = 0; 3558*0Sstevel@tonic-gate STAT_INC(raid_reads); 3559*0Sstevel@tonic-gate } 3560*0Sstevel@tonic-gate 3561*0Sstevel@tonic-gate count = lbtodb(pb->b_bcount); /* transfer count (in blocks) */ 3562*0Sstevel@tonic-gate blkno = pb->b_lblkno; /* block number on device */ 3563*0Sstevel@tonic-gate addr = 0; 3564*0Sstevel@tonic-gate offset = 0; 3565*0Sstevel@tonic-gate ps->ps_pwfrags = 1; 3566*0Sstevel@tonic-gate ps->ps_frags = 1; 3567*0Sstevel@tonic-gate md_kstat_waitq_to_runq(ui); 3568*0Sstevel@tonic-gate 3569*0Sstevel@tonic-gate do { 3570*0Sstevel@tonic-gate cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); 3571*0Sstevel@tonic-gate raid_child_init(cs); 3572*0Sstevel@tonic-gate cs->cs_ps = ps; 3573*0Sstevel@tonic-gate cs->cs_un = un; 3574*0Sstevel@tonic-gate cs->cs_mdunit = mnum; 3575*0Sstevel@tonic-gate cs->cs_strategy_flag = flag; 3576*0Sstevel@tonic-gate cs->cs_strategy_private = private; 3577*0Sstevel@tonic-gate cs->cs_addr = addr; 3578*0Sstevel@tonic-gate cs->cs_offset = offset; 3579*0Sstevel@tonic-gate count = raid_iosetup(un, blkno, count, cs); 3580*0Sstevel@tonic-gate if (cs->cs_flags & MD_RCS_LINE) { 3581*0Sstevel@tonic-gate blkno += (cs->cs_blkcnt * colcnt); 3582*0Sstevel@tonic-gate offset += (cs->cs_bcount * colcnt); 3583*0Sstevel@tonic-gate } else { 3584*0Sstevel@tonic-gate blkno += cs->cs_blkcnt; 3585*0Sstevel@tonic-gate offset += cs->cs_bcount; 3586*0Sstevel@tonic-gate } 3587*0Sstevel@tonic-gate /* for each cs bump up the ps_pwfrags and ps_frags fields */ 3588*0Sstevel@tonic-gate if (count) { 3589*0Sstevel@tonic-gate mutex_enter(&ps->ps_mx); 3590*0Sstevel@tonic-gate ps->ps_pwfrags++; 3591*0Sstevel@tonic-gate ps->ps_frags++; 3592*0Sstevel@tonic-gate mutex_exit(&ps->ps_mx); 3593*0Sstevel@tonic-gate if (doing_writes) 3594*0Sstevel@tonic-gate (void) raid_write(un, cs); 3595*0Sstevel@tonic-gate else 3596*0Sstevel@tonic-gate (void) raid_read(un, cs); 3597*0Sstevel@tonic-gate } 3598*0Sstevel@tonic-gate } while (count); 3599*0Sstevel@tonic-gate if (doing_writes) { 3600*0Sstevel@tonic-gate (void) raid_write(un, cs); 3601*0Sstevel@tonic-gate } else 3602*0Sstevel@tonic-gate (void) raid_read(un, cs); 3603*0Sstevel@tonic-gate 3604*0Sstevel@tonic-gate if (! (flag & MD_STR_NOTTOP) && panicstr) { 3605*0Sstevel@tonic-gate while (! (ps->ps_flags & MD_RPS_DONE)) { 3606*0Sstevel@tonic-gate md_daemon(1, &md_done_daemon); 3607*0Sstevel@tonic-gate drv_usecwait(10); 3608*0Sstevel@tonic-gate } 3609*0Sstevel@tonic-gate kmem_cache_free(raid_parent_cache, ps); 3610*0Sstevel@tonic-gate } 3611*0Sstevel@tonic-gate } 3612*0Sstevel@tonic-gate 3613*0Sstevel@tonic-gate /* 3614*0Sstevel@tonic-gate * NAMES: raid_snarf 3615*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice SNARF entry point 3616*0Sstevel@tonic-gate * PARAMETERS: md_snarfcmd_t cmd, 3617*0Sstevel@tonic-gate * set_t setno 3618*0Sstevel@tonic-gate * RETURNS: 3619*0Sstevel@tonic-gate */ 3620*0Sstevel@tonic-gate static int 3621*0Sstevel@tonic-gate raid_snarf(md_snarfcmd_t cmd, set_t setno) 3622*0Sstevel@tonic-gate { 3623*0Sstevel@tonic-gate mr_unit_t *un; 3624*0Sstevel@tonic-gate mddb_recid_t recid; 3625*0Sstevel@tonic-gate int gotsomething; 3626*0Sstevel@tonic-gate int all_raid_gotten; 3627*0Sstevel@tonic-gate mddb_type_t typ1; 3628*0Sstevel@tonic-gate uint_t ncol; 3629*0Sstevel@tonic-gate mddb_de_ic_t *dep; 3630*0Sstevel@tonic-gate mddb_rb32_t *rbp; 3631*0Sstevel@tonic-gate size_t newreqsize; 3632*0Sstevel@tonic-gate mr_unit_t *big_un; 3633*0Sstevel@tonic-gate mr_unit32_od_t *small_un; 3634*0Sstevel@tonic-gate 3635*0Sstevel@tonic-gate 3636*0Sstevel@tonic-gate if (cmd == MD_SNARF_CLEANUP) 3637*0Sstevel@tonic-gate return (0); 3638*0Sstevel@tonic-gate 3639*0Sstevel@tonic-gate all_raid_gotten = 1; 3640*0Sstevel@tonic-gate gotsomething = 0; 3641*0Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 3642*0Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 3643*0Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 3644*0Sstevel@tonic-gate 3645*0Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 3646*0Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) { 3647*0Sstevel@tonic-gate continue; 3648*0Sstevel@tonic-gate } 3649*0Sstevel@tonic-gate 3650*0Sstevel@tonic-gate dep = mddb_getrecdep(recid); 3651*0Sstevel@tonic-gate dep->de_flags = MDDB_F_RAID; 3652*0Sstevel@tonic-gate rbp = dep->de_rb; 3653*0Sstevel@tonic-gate if ((rbp->rb_revision == MDDB_REV_RB) && 3654*0Sstevel@tonic-gate ((rbp->rb_private & MD_PRV_CONVD) == 0)) { 3655*0Sstevel@tonic-gate /* 3656*0Sstevel@tonic-gate * This means, we have an old and small record 3657*0Sstevel@tonic-gate * and this record hasn't already been converted. 3658*0Sstevel@tonic-gate * Before we create an incore metadevice from this 3659*0Sstevel@tonic-gate * we have to convert it to a big record. 3660*0Sstevel@tonic-gate */ 3661*0Sstevel@tonic-gate small_un = (mr_unit32_od_t *)mddb_getrecaddr(recid); 3662*0Sstevel@tonic-gate ncol = small_un->un_totalcolumncnt; 3663*0Sstevel@tonic-gate newreqsize = sizeof (mr_unit_t) + 3664*0Sstevel@tonic-gate ((ncol - 1) * sizeof (mr_column_t)); 3665*0Sstevel@tonic-gate big_un = (mr_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 3666*0Sstevel@tonic-gate raid_convert((caddr_t)small_un, (caddr_t)big_un, 3667*0Sstevel@tonic-gate SMALL_2_BIG); 3668*0Sstevel@tonic-gate kmem_free(small_un, dep->de_reqsize); 3669*0Sstevel@tonic-gate dep->de_rb_userdata = big_un; 3670*0Sstevel@tonic-gate dep->de_reqsize = newreqsize; 3671*0Sstevel@tonic-gate un = big_un; 3672*0Sstevel@tonic-gate rbp->rb_private |= MD_PRV_CONVD; 3673*0Sstevel@tonic-gate } else { 3674*0Sstevel@tonic-gate /* Big device */ 3675*0Sstevel@tonic-gate un = (mr_unit_t *)mddb_getrecaddr(recid); 3676*0Sstevel@tonic-gate } 3677*0Sstevel@tonic-gate 3678*0Sstevel@tonic-gate /* Set revision and flag accordingly */ 3679*0Sstevel@tonic-gate if (rbp->rb_revision == MDDB_REV_RB) { 3680*0Sstevel@tonic-gate un->c.un_revision = MD_32BIT_META_DEV; 3681*0Sstevel@tonic-gate } else { 3682*0Sstevel@tonic-gate un->c.un_revision = MD_64BIT_META_DEV; 3683*0Sstevel@tonic-gate un->c.un_flag |= MD_EFILABEL; 3684*0Sstevel@tonic-gate } 3685*0Sstevel@tonic-gate 3686*0Sstevel@tonic-gate /* 3687*0Sstevel@tonic-gate * Create minor device node for snarfed entry. 3688*0Sstevel@tonic-gate */ 3689*0Sstevel@tonic-gate (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 3690*0Sstevel@tonic-gate 3691*0Sstevel@tonic-gate if (MD_UNIT(MD_SID(un)) != NULL) { 3692*0Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 3693*0Sstevel@tonic-gate continue; 3694*0Sstevel@tonic-gate } 3695*0Sstevel@tonic-gate all_raid_gotten = 0; 3696*0Sstevel@tonic-gate if (raid_build_incore((void *)un, 1) == 0) { 3697*0Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 3698*0Sstevel@tonic-gate md_create_unit_incore(MD_SID(un), &raid_md_ops, 3699*0Sstevel@tonic-gate 1); 3700*0Sstevel@tonic-gate gotsomething = 1; 3701*0Sstevel@tonic-gate } else if (un->mr_ic) { 3702*0Sstevel@tonic-gate kmem_free(un->un_column_ic, sizeof (mr_column_ic_t) * 3703*0Sstevel@tonic-gate un->un_totalcolumncnt); 3704*0Sstevel@tonic-gate kmem_free(un->mr_ic, sizeof (*un->mr_ic)); 3705*0Sstevel@tonic-gate } 3706*0Sstevel@tonic-gate } 3707*0Sstevel@tonic-gate 3708*0Sstevel@tonic-gate if (!all_raid_gotten) { 3709*0Sstevel@tonic-gate return (gotsomething); 3710*0Sstevel@tonic-gate } 3711*0Sstevel@tonic-gate 3712*0Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 3713*0Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 3714*0Sstevel@tonic-gate if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 3715*0Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_PENDDEL); 3716*0Sstevel@tonic-gate 3717*0Sstevel@tonic-gate return (0); 3718*0Sstevel@tonic-gate } 3719*0Sstevel@tonic-gate 3720*0Sstevel@tonic-gate /* 3721*0Sstevel@tonic-gate * NAMES: raid_halt 3722*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice HALT entry point 3723*0Sstevel@tonic-gate * PARAMETERS: md_haltcmd_t cmd - 3724*0Sstevel@tonic-gate * set_t setno - 3725*0Sstevel@tonic-gate * RETURNS: 3726*0Sstevel@tonic-gate */ 3727*0Sstevel@tonic-gate static int 3728*0Sstevel@tonic-gate raid_halt(md_haltcmd_t cmd, set_t setno) 3729*0Sstevel@tonic-gate { 3730*0Sstevel@tonic-gate set_t i; 3731*0Sstevel@tonic-gate mdi_unit_t *ui; 3732*0Sstevel@tonic-gate minor_t mnum; 3733*0Sstevel@tonic-gate 3734*0Sstevel@tonic-gate if (cmd == MD_HALT_CLOSE) 3735*0Sstevel@tonic-gate return (0); 3736*0Sstevel@tonic-gate 3737*0Sstevel@tonic-gate if (cmd == MD_HALT_OPEN) 3738*0Sstevel@tonic-gate return (0); 3739*0Sstevel@tonic-gate 3740*0Sstevel@tonic-gate if (cmd == MD_HALT_UNLOAD) 3741*0Sstevel@tonic-gate return (0); 3742*0Sstevel@tonic-gate 3743*0Sstevel@tonic-gate if (cmd == MD_HALT_CHECK) { 3744*0Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 3745*0Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 3746*0Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 3747*0Sstevel@tonic-gate continue; 3748*0Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 3749*0Sstevel@tonic-gate continue; 3750*0Sstevel@tonic-gate if (md_unit_isopen(ui)) 3751*0Sstevel@tonic-gate return (1); 3752*0Sstevel@tonic-gate } 3753*0Sstevel@tonic-gate return (0); 3754*0Sstevel@tonic-gate } 3755*0Sstevel@tonic-gate 3756*0Sstevel@tonic-gate if (cmd != MD_HALT_DOIT) 3757*0Sstevel@tonic-gate return (1); 3758*0Sstevel@tonic-gate 3759*0Sstevel@tonic-gate for (i = 0; i < md_nunits; i++) { 3760*0Sstevel@tonic-gate mnum = MD_MKMIN(setno, i); 3761*0Sstevel@tonic-gate if ((ui = MDI_UNIT(mnum)) == NULL) 3762*0Sstevel@tonic-gate continue; 3763*0Sstevel@tonic-gate if (ui->ui_opsindex != raid_md_ops.md_selfindex) 3764*0Sstevel@tonic-gate continue; 3765*0Sstevel@tonic-gate reset_raid((mr_unit_t *)MD_UNIT(mnum), mnum, 0); 3766*0Sstevel@tonic-gate } 3767*0Sstevel@tonic-gate return (0); 3768*0Sstevel@tonic-gate } 3769*0Sstevel@tonic-gate 3770*0Sstevel@tonic-gate /* 3771*0Sstevel@tonic-gate * NAMES: raid_close_all_devs 3772*0Sstevel@tonic-gate * DESCRIPTION: Close all the devices of the unit. 3773*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 3774*0Sstevel@tonic-gate * RETURNS: 3775*0Sstevel@tonic-gate */ 3776*0Sstevel@tonic-gate void 3777*0Sstevel@tonic-gate raid_close_all_devs(mr_unit_t *un, int init_pw, int md_cflags) 3778*0Sstevel@tonic-gate { 3779*0Sstevel@tonic-gate int i; 3780*0Sstevel@tonic-gate mr_column_t *device; 3781*0Sstevel@tonic-gate 3782*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 3783*0Sstevel@tonic-gate device = &un->un_column[i]; 3784*0Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 3785*0Sstevel@tonic-gate ASSERT((device->un_dev != (md_dev64_t)0) && 3786*0Sstevel@tonic-gate (device->un_dev != NODEV64)); 3787*0Sstevel@tonic-gate if ((device->un_devstate & RCS_OKAY) && init_pw) 3788*0Sstevel@tonic-gate (void) init_pw_area(un, device->un_dev, 3789*0Sstevel@tonic-gate device->un_pwstart, i); 3790*0Sstevel@tonic-gate md_layered_close(device->un_dev, md_cflags); 3791*0Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_ISOPEN; 3792*0Sstevel@tonic-gate } 3793*0Sstevel@tonic-gate } 3794*0Sstevel@tonic-gate } 3795*0Sstevel@tonic-gate 3796*0Sstevel@tonic-gate /* 3797*0Sstevel@tonic-gate * NAMES: raid_open_all_devs 3798*0Sstevel@tonic-gate * DESCRIPTION: Open all the components (columns) of the device unit. 3799*0Sstevel@tonic-gate * PARAMETERS: mr_unit_t *un - pointer to unit structure 3800*0Sstevel@tonic-gate * RETURNS: 3801*0Sstevel@tonic-gate */ 3802*0Sstevel@tonic-gate static int 3803*0Sstevel@tonic-gate raid_open_all_devs(mr_unit_t *un, int md_oflags) 3804*0Sstevel@tonic-gate { 3805*0Sstevel@tonic-gate minor_t mnum = MD_SID(un); 3806*0Sstevel@tonic-gate int i; 3807*0Sstevel@tonic-gate int not_opened = 0; 3808*0Sstevel@tonic-gate int commit = 0; 3809*0Sstevel@tonic-gate int col = -1; 3810*0Sstevel@tonic-gate mr_column_t *device; 3811*0Sstevel@tonic-gate set_t setno = MD_MIN2SET(MD_SID(un)); 3812*0Sstevel@tonic-gate side_t side = mddb_getsidenum(setno); 3813*0Sstevel@tonic-gate mdkey_t key; 3814*0Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 3815*0Sstevel@tonic-gate 3816*0Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 3817*0Sstevel@tonic-gate 3818*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 3819*0Sstevel@tonic-gate md_dev64_t tmpdev; 3820*0Sstevel@tonic-gate 3821*0Sstevel@tonic-gate device = &un->un_column[i]; 3822*0Sstevel@tonic-gate 3823*0Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 3824*0Sstevel@tonic-gate not_opened++; 3825*0Sstevel@tonic-gate continue; 3826*0Sstevel@tonic-gate } 3827*0Sstevel@tonic-gate 3828*0Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) 3829*0Sstevel@tonic-gate continue; 3830*0Sstevel@tonic-gate 3831*0Sstevel@tonic-gate tmpdev = device->un_dev; 3832*0Sstevel@tonic-gate /* 3833*0Sstevel@tonic-gate * Open by device id 3834*0Sstevel@tonic-gate */ 3835*0Sstevel@tonic-gate key = HOTSPARED(un, i) ? 3836*0Sstevel@tonic-gate device->un_hs_key : device->un_orig_key; 3837*0Sstevel@tonic-gate if ((md_getmajor(tmpdev) != md_major) && 3838*0Sstevel@tonic-gate md_devid_found(setno, side, key) == 1) { 3839*0Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, key); 3840*0Sstevel@tonic-gate } 3841*0Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, md_oflags)) { 3842*0Sstevel@tonic-gate device->un_dev = tmpdev; 3843*0Sstevel@tonic-gate not_opened++; 3844*0Sstevel@tonic-gate continue; 3845*0Sstevel@tonic-gate } 3846*0Sstevel@tonic-gate device->un_dev = tmpdev; 3847*0Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_ISOPEN; 3848*0Sstevel@tonic-gate } 3849*0Sstevel@tonic-gate 3850*0Sstevel@tonic-gate /* if open errors and errored devices are 1 then device can run */ 3851*0Sstevel@tonic-gate if (not_opened > 1) { 3852*0Sstevel@tonic-gate cmn_err(CE_WARN, 3853*0Sstevel@tonic-gate "md: %s failed to open. open error on %s\n", 3854*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 3855*0Sstevel@tonic-gate md_devname(MD_UN2SET(un), device->un_orig_dev, 3856*0Sstevel@tonic-gate NULL, 0)); 3857*0Sstevel@tonic-gate 3858*0Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 3859*0Sstevel@tonic-gate 3860*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 3861*0Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 3862*0Sstevel@tonic-gate 3863*0Sstevel@tonic-gate return (not_opened > 1); 3864*0Sstevel@tonic-gate } 3865*0Sstevel@tonic-gate 3866*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 3867*0Sstevel@tonic-gate device = &un->un_column[i]; 3868*0Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_ISOPEN) { 3869*0Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 3870*0Sstevel@tonic-gate /* 3871*0Sstevel@tonic-gate * At this point in time there is a possibility 3872*0Sstevel@tonic-gate * that errors were the result of a controller 3873*0Sstevel@tonic-gate * failure with more than a single column on it 3874*0Sstevel@tonic-gate * so clear out last errored columns and let errors 3875*0Sstevel@tonic-gate * re-occur is necessary. 3876*0Sstevel@tonic-gate */ 3877*0Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 3878*0Sstevel@tonic-gate commit++; 3879*0Sstevel@tonic-gate } 3880*0Sstevel@tonic-gate continue; 3881*0Sstevel@tonic-gate } 3882*0Sstevel@tonic-gate ASSERT(col == -1); 3883*0Sstevel@tonic-gate col = i; 3884*0Sstevel@tonic-gate } 3885*0Sstevel@tonic-gate 3886*0Sstevel@tonic-gate if (col != -1) { 3887*0Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 3888*0Sstevel@tonic-gate commit++; 3889*0Sstevel@tonic-gate } 3890*0Sstevel@tonic-gate 3891*0Sstevel@tonic-gate if (commit) 3892*0Sstevel@tonic-gate raid_commit(un, NULL); 3893*0Sstevel@tonic-gate 3894*0Sstevel@tonic-gate if (col != -1) { 3895*0Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 3896*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 3897*0Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 3898*0Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 3899*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 3900*0Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 3901*0Sstevel@tonic-gate } 3902*0Sstevel@tonic-gate } 3903*0Sstevel@tonic-gate 3904*0Sstevel@tonic-gate return (0); 3905*0Sstevel@tonic-gate } 3906*0Sstevel@tonic-gate 3907*0Sstevel@tonic-gate /* 3908*0Sstevel@tonic-gate * NAMES: raid_internal_open 3909*0Sstevel@tonic-gate * DESCRIPTION: Do the actual RAID open 3910*0Sstevel@tonic-gate * PARAMETERS: minor_t mnum - minor number of the RAID device 3911*0Sstevel@tonic-gate * int flag - 3912*0Sstevel@tonic-gate * int otyp - 3913*0Sstevel@tonic-gate * int md_oflags - RAID open flags 3914*0Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 3915*0Sstevel@tonic-gate */ 3916*0Sstevel@tonic-gate int 3917*0Sstevel@tonic-gate raid_internal_open(minor_t mnum, int flag, int otyp, int md_oflags) 3918*0Sstevel@tonic-gate { 3919*0Sstevel@tonic-gate mr_unit_t *un; 3920*0Sstevel@tonic-gate mdi_unit_t *ui; 3921*0Sstevel@tonic-gate int err = 0; 3922*0Sstevel@tonic-gate int replay_error = 0; 3923*0Sstevel@tonic-gate 3924*0Sstevel@tonic-gate ui = MDI_UNIT(mnum); 3925*0Sstevel@tonic-gate ASSERT(ui != NULL); 3926*0Sstevel@tonic-gate 3927*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 3928*0Sstevel@tonic-gate /* 3929*0Sstevel@tonic-gate * this MUST be checked before md_unit_isopen is checked. 3930*0Sstevel@tonic-gate * raid_init_columns sets md_unit_isopen to block reset, halt. 3931*0Sstevel@tonic-gate */ 3932*0Sstevel@tonic-gate if ((UNIT_STATE(un) & (RUS_INIT | RUS_DOI)) && 3933*0Sstevel@tonic-gate !(md_oflags & MD_OFLG_ISINIT)) { 3934*0Sstevel@tonic-gate md_unit_openclose_exit(ui); 3935*0Sstevel@tonic-gate return (EAGAIN); 3936*0Sstevel@tonic-gate } 3937*0Sstevel@tonic-gate 3938*0Sstevel@tonic-gate if ((md_oflags & MD_OFLG_ISINIT) || md_unit_isopen(ui)) { 3939*0Sstevel@tonic-gate err = md_unit_incopen(mnum, flag, otyp); 3940*0Sstevel@tonic-gate goto out; 3941*0Sstevel@tonic-gate } 3942*0Sstevel@tonic-gate 3943*0Sstevel@tonic-gate md_unit_readerexit(ui); 3944*0Sstevel@tonic-gate 3945*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_writerlock(ui); 3946*0Sstevel@tonic-gate if (raid_open_all_devs(un, md_oflags) == 0) { 3947*0Sstevel@tonic-gate if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) { 3948*0Sstevel@tonic-gate md_unit_writerexit(ui); 3949*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 3950*0Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 3951*0Sstevel@tonic-gate goto out; 3952*0Sstevel@tonic-gate } 3953*0Sstevel@tonic-gate } else { 3954*0Sstevel@tonic-gate /* 3955*0Sstevel@tonic-gate * if this unit contains more than two errored components 3956*0Sstevel@tonic-gate * should return error and close all opened devices 3957*0Sstevel@tonic-gate */ 3958*0Sstevel@tonic-gate 3959*0Sstevel@tonic-gate md_unit_writerexit(ui); 3960*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 3961*0Sstevel@tonic-gate raid_close_all_devs(un, 0, md_oflags); 3962*0Sstevel@tonic-gate md_unit_openclose_exit(ui); 3963*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 3964*0Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 3965*0Sstevel@tonic-gate return (ENXIO); 3966*0Sstevel@tonic-gate } 3967*0Sstevel@tonic-gate 3968*0Sstevel@tonic-gate if (!(MD_STATUS(un) & MD_UN_REPLAYED)) { 3969*0Sstevel@tonic-gate replay_error = raid_replay(un); 3970*0Sstevel@tonic-gate MD_STATUS(un) |= MD_UN_REPLAYED; 3971*0Sstevel@tonic-gate } 3972*0Sstevel@tonic-gate 3973*0Sstevel@tonic-gate md_unit_writerexit(ui); 3974*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_readerlock(ui); 3975*0Sstevel@tonic-gate 3976*0Sstevel@tonic-gate if ((replay_error == RAID_RPLY_READONLY) && 3977*0Sstevel@tonic-gate ((flag & (FREAD | FWRITE)) == FREAD)) { 3978*0Sstevel@tonic-gate md_unit_openclose_exit(ui); 3979*0Sstevel@tonic-gate return (0); 3980*0Sstevel@tonic-gate } 3981*0Sstevel@tonic-gate 3982*0Sstevel@tonic-gate /* allocate hotspare if possible */ 3983*0Sstevel@tonic-gate (void) raid_hotspares(); 3984*0Sstevel@tonic-gate 3985*0Sstevel@tonic-gate 3986*0Sstevel@tonic-gate out: 3987*0Sstevel@tonic-gate md_unit_openclose_exit(ui); 3988*0Sstevel@tonic-gate return (err); 3989*0Sstevel@tonic-gate } 3990*0Sstevel@tonic-gate /* 3991*0Sstevel@tonic-gate * NAMES: raid_open 3992*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice OPEN entry point 3993*0Sstevel@tonic-gate * PARAMETERS: dev_t dev - 3994*0Sstevel@tonic-gate * int flag - 3995*0Sstevel@tonic-gate * int otyp - 3996*0Sstevel@tonic-gate * cred_t * cred_p - 3997*0Sstevel@tonic-gate * int md_oflags - 3998*0Sstevel@tonic-gate * RETURNS: 3999*0Sstevel@tonic-gate */ 4000*0Sstevel@tonic-gate /*ARGSUSED1*/ 4001*0Sstevel@tonic-gate static int 4002*0Sstevel@tonic-gate raid_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 4003*0Sstevel@tonic-gate { 4004*0Sstevel@tonic-gate int error = 0; 4005*0Sstevel@tonic-gate 4006*0Sstevel@tonic-gate if (error = raid_internal_open(getminor(*dev), flag, otyp, md_oflags)) { 4007*0Sstevel@tonic-gate return (error); 4008*0Sstevel@tonic-gate } 4009*0Sstevel@tonic-gate return (0); 4010*0Sstevel@tonic-gate } 4011*0Sstevel@tonic-gate 4012*0Sstevel@tonic-gate /* 4013*0Sstevel@tonic-gate * NAMES: raid_internal_close 4014*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice CLOSE actual implementation 4015*0Sstevel@tonic-gate * PARAMETERS: minor_t - minor number of the RAID device 4016*0Sstevel@tonic-gate * int otyp - 4017*0Sstevel@tonic-gate * int init_pw - 4018*0Sstevel@tonic-gate * int md_cflags - RAID close flags 4019*0Sstevel@tonic-gate * RETURNS: 0 if successful, nonzero otherwise 4020*0Sstevel@tonic-gate */ 4021*0Sstevel@tonic-gate /*ARGSUSED*/ 4022*0Sstevel@tonic-gate int 4023*0Sstevel@tonic-gate raid_internal_close(minor_t mnum, int otyp, int init_pw, int md_cflags) 4024*0Sstevel@tonic-gate { 4025*0Sstevel@tonic-gate mdi_unit_t *ui = MDI_UNIT(mnum); 4026*0Sstevel@tonic-gate mr_unit_t *un; 4027*0Sstevel@tonic-gate int err = 0; 4028*0Sstevel@tonic-gate 4029*0Sstevel@tonic-gate /* single thread */ 4030*0Sstevel@tonic-gate un = (mr_unit_t *)md_unit_openclose_enter(ui); 4031*0Sstevel@tonic-gate 4032*0Sstevel@tonic-gate /* count closed */ 4033*0Sstevel@tonic-gate if ((err = md_unit_decopen(mnum, otyp)) != 0) 4034*0Sstevel@tonic-gate goto out; 4035*0Sstevel@tonic-gate /* close devices, if necessary */ 4036*0Sstevel@tonic-gate if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 4037*0Sstevel@tonic-gate raid_close_all_devs(un, init_pw, md_cflags); 4038*0Sstevel@tonic-gate } 4039*0Sstevel@tonic-gate 4040*0Sstevel@tonic-gate /* unlock, return success */ 4041*0Sstevel@tonic-gate out: 4042*0Sstevel@tonic-gate md_unit_openclose_exit(ui); 4043*0Sstevel@tonic-gate return (err); 4044*0Sstevel@tonic-gate } 4045*0Sstevel@tonic-gate 4046*0Sstevel@tonic-gate /* 4047*0Sstevel@tonic-gate * NAMES: raid_close 4048*0Sstevel@tonic-gate * DESCRIPTION: RAID metadevice close entry point 4049*0Sstevel@tonic-gate * PARAMETERS: dev_t dev - 4050*0Sstevel@tonic-gate * int flag - 4051*0Sstevel@tonic-gate * int otyp - 4052*0Sstevel@tonic-gate * cred_t * cred_p - 4053*0Sstevel@tonic-gate * int md_oflags - 4054*0Sstevel@tonic-gate * RETURNS: 4055*0Sstevel@tonic-gate */ 4056*0Sstevel@tonic-gate /*ARGSUSED1*/ 4057*0Sstevel@tonic-gate static int 4058*0Sstevel@tonic-gate raid_close(dev_t dev, int flag, int otyp, cred_t *cred_p, int md_cflags) 4059*0Sstevel@tonic-gate { 4060*0Sstevel@tonic-gate int retval; 4061*0Sstevel@tonic-gate 4062*0Sstevel@tonic-gate (void) md_io_writerlock(MDI_UNIT(getminor(dev))); 4063*0Sstevel@tonic-gate retval = raid_internal_close(getminor(dev), otyp, 1, md_cflags); 4064*0Sstevel@tonic-gate (void) md_io_writerexit(MDI_UNIT(getminor(dev))); 4065*0Sstevel@tonic-gate return (retval); 4066*0Sstevel@tonic-gate } 4067*0Sstevel@tonic-gate 4068*0Sstevel@tonic-gate /* 4069*0Sstevel@tonic-gate * raid_probe_close_all_devs 4070*0Sstevel@tonic-gate */ 4071*0Sstevel@tonic-gate void 4072*0Sstevel@tonic-gate raid_probe_close_all_devs(mr_unit_t *un) 4073*0Sstevel@tonic-gate { 4074*0Sstevel@tonic-gate int i; 4075*0Sstevel@tonic-gate mr_column_t *device; 4076*0Sstevel@tonic-gate 4077*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 4078*0Sstevel@tonic-gate device = &un->un_column[i]; 4079*0Sstevel@tonic-gate 4080*0Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 4081*0Sstevel@tonic-gate md_layered_close(device->un_dev, 4082*0Sstevel@tonic-gate MD_OFLG_PROBEDEV); 4083*0Sstevel@tonic-gate device->un_devflags &= ~MD_RAID_DEV_PROBEOPEN; 4084*0Sstevel@tonic-gate } 4085*0Sstevel@tonic-gate } 4086*0Sstevel@tonic-gate } 4087*0Sstevel@tonic-gate /* 4088*0Sstevel@tonic-gate * Raid_probe_dev: 4089*0Sstevel@tonic-gate * 4090*0Sstevel@tonic-gate * On entry the unit writerlock is held 4091*0Sstevel@tonic-gate */ 4092*0Sstevel@tonic-gate static int 4093*0Sstevel@tonic-gate raid_probe_dev(mdi_unit_t *ui, minor_t mnum) 4094*0Sstevel@tonic-gate { 4095*0Sstevel@tonic-gate mr_unit_t *un; 4096*0Sstevel@tonic-gate int i; 4097*0Sstevel@tonic-gate int not_opened = 0; 4098*0Sstevel@tonic-gate int commit = 0; 4099*0Sstevel@tonic-gate int col = -1; 4100*0Sstevel@tonic-gate mr_column_t *device; 4101*0Sstevel@tonic-gate int md_devopen = 0; 4102*0Sstevel@tonic-gate 4103*0Sstevel@tonic-gate if (md_unit_isopen(ui)) 4104*0Sstevel@tonic-gate md_devopen++; 4105*0Sstevel@tonic-gate 4106*0Sstevel@tonic-gate un = MD_UNIT(mnum); 4107*0Sstevel@tonic-gate /* 4108*0Sstevel@tonic-gate * If the state has been set to LAST_ERRED because 4109*0Sstevel@tonic-gate * of an error when the raid device was open at some 4110*0Sstevel@tonic-gate * point in the past, don't probe. We really don't want 4111*0Sstevel@tonic-gate * to reset the state in this case. 4112*0Sstevel@tonic-gate */ 4113*0Sstevel@tonic-gate if (UNIT_STATE(un) == RUS_LAST_ERRED) 4114*0Sstevel@tonic-gate return (0); 4115*0Sstevel@tonic-gate 4116*0Sstevel@tonic-gate ui->ui_tstate &= ~MD_INACCESSIBLE; 4117*0Sstevel@tonic-gate 4118*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 4119*0Sstevel@tonic-gate md_dev64_t tmpdev; 4120*0Sstevel@tonic-gate 4121*0Sstevel@tonic-gate device = &un->un_column[i]; 4122*0Sstevel@tonic-gate if (COLUMN_STATE(un, i) & RCS_ERRED) { 4123*0Sstevel@tonic-gate not_opened++; 4124*0Sstevel@tonic-gate continue; 4125*0Sstevel@tonic-gate } 4126*0Sstevel@tonic-gate 4127*0Sstevel@tonic-gate tmpdev = device->un_dev; 4128*0Sstevel@tonic-gate /* 4129*0Sstevel@tonic-gate * Currently the flags passed are not needed since 4130*0Sstevel@tonic-gate * there cannot be an underlying metadevice. However 4131*0Sstevel@tonic-gate * they are kept here for consistency. 4132*0Sstevel@tonic-gate * 4133*0Sstevel@tonic-gate * Open by device id 4134*0Sstevel@tonic-gate */ 4135*0Sstevel@tonic-gate tmpdev = md_resolve_bydevid(mnum, tmpdev, HOTSPARED(un, i)? 4136*0Sstevel@tonic-gate device->un_hs_key : device->un_orig_key); 4137*0Sstevel@tonic-gate if (md_layered_open(mnum, &tmpdev, 4138*0Sstevel@tonic-gate MD_OFLG_CONT_ERRS | MD_OFLG_PROBEDEV)) { 4139*0Sstevel@tonic-gate device->un_dev = tmpdev; 4140*0Sstevel@tonic-gate not_opened++; 4141*0Sstevel@tonic-gate continue; 4142*0Sstevel@tonic-gate } 4143*0Sstevel@tonic-gate device->un_dev = tmpdev; 4144*0Sstevel@tonic-gate 4145*0Sstevel@tonic-gate device->un_devflags |= MD_RAID_DEV_PROBEOPEN; 4146*0Sstevel@tonic-gate } 4147*0Sstevel@tonic-gate 4148*0Sstevel@tonic-gate /* 4149*0Sstevel@tonic-gate * The code below is careful on setting the LAST_ERRED state. 4150*0Sstevel@tonic-gate * 4151*0Sstevel@tonic-gate * If open errors and exactly one device has failed we can run. 4152*0Sstevel@tonic-gate * If more then one device fails we have to figure out when to set 4153*0Sstevel@tonic-gate * LAST_ERRED state. The rationale is to avoid unnecessary resyncs 4154*0Sstevel@tonic-gate * since they are painful and time consuming. 4155*0Sstevel@tonic-gate * 4156*0Sstevel@tonic-gate * When more than one component/column fails there are 2 scenerios. 4157*0Sstevel@tonic-gate * 4158*0Sstevel@tonic-gate * 1. Metadevice has NOT been opened: In this case, the behavior 4159*0Sstevel@tonic-gate * mimics the open symantics. ie. Only the first failed device 4160*0Sstevel@tonic-gate * is ERRED and LAST_ERRED is not set. 4161*0Sstevel@tonic-gate * 4162*0Sstevel@tonic-gate * 2. Metadevice has been opened: Here the read/write sematics are 4163*0Sstevel@tonic-gate * followed. The first failed devicce is ERRED and on the next 4164*0Sstevel@tonic-gate * failed device LAST_ERRED is set. 4165*0Sstevel@tonic-gate */ 4166*0Sstevel@tonic-gate 4167*0Sstevel@tonic-gate if (not_opened > 1 && !md_devopen) { 4168*0Sstevel@tonic-gate cmn_err(CE_WARN, 4169*0Sstevel@tonic-gate "md: %s failed to open. open error on %s\n", 4170*0Sstevel@tonic-gate md_shortname(MD_SID(un)), 4171*0Sstevel@tonic-gate md_devname(MD_UN2SET(un), device->un_orig_dev, 4172*0Sstevel@tonic-gate NULL, 0)); 4173*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 4174*0Sstevel@tonic-gate MD_UN2SET(un), MD_SID(un)); 4175*0Sstevel@tonic-gate raid_probe_close_all_devs(un); 4176*0Sstevel@tonic-gate ui->ui_tstate |= MD_INACCESSIBLE; 4177*0Sstevel@tonic-gate return (not_opened > 1); 4178*0Sstevel@tonic-gate } 4179*0Sstevel@tonic-gate 4180*0Sstevel@tonic-gate if (!md_devopen) { 4181*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 4182*0Sstevel@tonic-gate device = &un->un_column[i]; 4183*0Sstevel@tonic-gate if (device->un_devflags & MD_RAID_DEV_PROBEOPEN) { 4184*0Sstevel@tonic-gate if (device->un_devstate & RCS_LAST_ERRED) { 4185*0Sstevel@tonic-gate /* 4186*0Sstevel@tonic-gate * At this point in time there is a 4187*0Sstevel@tonic-gate * possibility that errors were the 4188*0Sstevel@tonic-gate * result of a controller failure with 4189*0Sstevel@tonic-gate * more than a single column on it so 4190*0Sstevel@tonic-gate * clear out last errored columns and 4191*0Sstevel@tonic-gate * let errors re-occur is necessary. 4192*0Sstevel@tonic-gate */ 4193*0Sstevel@tonic-gate raid_set_state(un, i, RCS_OKAY, 0); 4194*0Sstevel@tonic-gate commit++; 4195*0Sstevel@tonic-gate } 4196*0Sstevel@tonic-gate continue; 4197*0Sstevel@tonic-gate } 4198*0Sstevel@tonic-gate ASSERT(col == -1); 4199*0Sstevel@tonic-gate /* 4200*0Sstevel@tonic-gate * note if multiple devices are failing then only 4201*0Sstevel@tonic-gate * the last one is marked as error 4202*0Sstevel@tonic-gate */ 4203*0Sstevel@tonic-gate col = i; 4204*0Sstevel@tonic-gate } 4205*0Sstevel@tonic-gate 4206*0Sstevel@tonic-gate if (col != -1) { 4207*0Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 4208*0Sstevel@tonic-gate commit++; 4209*0Sstevel@tonic-gate } 4210*0Sstevel@tonic-gate 4211*0Sstevel@tonic-gate } else { 4212*0Sstevel@tonic-gate for (i = 0; i < un->un_totalcolumncnt; i++) { 4213*0Sstevel@tonic-gate device = &un->un_column[i]; 4214*0Sstevel@tonic-gate 4215*0Sstevel@tonic-gate /* if we have LAST_ERRED go ahead and commit. */ 4216*0Sstevel@tonic-gate if (un->un_state & RUS_LAST_ERRED) 4217*0Sstevel@tonic-gate break; 4218*0Sstevel@tonic-gate /* 4219*0Sstevel@tonic-gate * could not open the component 4220*0Sstevel@tonic-gate */ 4221*0Sstevel@tonic-gate 4222*0Sstevel@tonic-gate if (!(device->un_devflags & MD_RAID_DEV_PROBEOPEN)) { 4223*0Sstevel@tonic-gate col = i; 4224*0Sstevel@tonic-gate raid_set_state(un, col, RCS_ERRED, 0); 4225*0Sstevel@tonic-gate commit++; 4226*0Sstevel@tonic-gate } 4227*0Sstevel@tonic-gate } 4228*0Sstevel@tonic-gate } 4229*0Sstevel@tonic-gate 4230*0Sstevel@tonic-gate if (commit) 4231*0Sstevel@tonic-gate raid_commit(un, NULL); 4232*0Sstevel@tonic-gate 4233*0Sstevel@tonic-gate if (col != -1) { 4234*0Sstevel@tonic-gate if (COLUMN_STATE(un, col) & RCS_ERRED) { 4235*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 4236*0Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 4237*0Sstevel@tonic-gate } else if (COLUMN_STATE(un, col) & RCS_LAST_ERRED) { 4238*0Sstevel@tonic-gate SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 4239*0Sstevel@tonic-gate SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 4240*0Sstevel@tonic-gate } 4241*0Sstevel@tonic-gate } 4242*0Sstevel@tonic-gate 4243*0Sstevel@tonic-gate raid_probe_close_all_devs(un); 4244*0Sstevel@tonic-gate return (0); 4245*0Sstevel@tonic-gate } 4246*0Sstevel@tonic-gate 4247*0Sstevel@tonic-gate static int 4248*0Sstevel@tonic-gate raid_imp_set( 4249*0Sstevel@tonic-gate set_t setno 4250*0Sstevel@tonic-gate ) 4251*0Sstevel@tonic-gate { 4252*0Sstevel@tonic-gate mddb_recid_t recid; 4253*0Sstevel@tonic-gate int i, gotsomething; 4254*0Sstevel@tonic-gate mddb_type_t typ1; 4255*0Sstevel@tonic-gate mddb_de_ic_t *dep; 4256*0Sstevel@tonic-gate mddb_rb32_t *rbp; 4257*0Sstevel@tonic-gate mr_unit_t *un64; 4258*0Sstevel@tonic-gate mr_unit32_od_t *un32; 4259*0Sstevel@tonic-gate minor_t *self_id; /* minor needs to be updated */ 4260*0Sstevel@tonic-gate md_parent_t *parent_id; /* parent needs to be updated */ 4261*0Sstevel@tonic-gate mddb_recid_t *record_id; /* record id needs to be updated */ 4262*0Sstevel@tonic-gate hsp_t *hsp_id; 4263*0Sstevel@tonic-gate 4264*0Sstevel@tonic-gate gotsomething = 0; 4265*0Sstevel@tonic-gate 4266*0Sstevel@tonic-gate typ1 = (mddb_type_t)md_getshared_key(setno, 4267*0Sstevel@tonic-gate raid_md_ops.md_driver.md_drivername); 4268*0Sstevel@tonic-gate recid = mddb_makerecid(setno, 0); 4269*0Sstevel@tonic-gate 4270*0Sstevel@tonic-gate while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 4271*0Sstevel@tonic-gate if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 4272*0Sstevel@tonic-gate continue; 4273*0Sstevel@tonic-gate 4274*0Sstevel@tonic-gate dep = mddb_getrecdep(recid); 4275*0Sstevel@tonic-gate rbp = dep->de_rb; 4276*0Sstevel@tonic-gate 4277*0Sstevel@tonic-gate if (rbp->rb_revision == MDDB_REV_RB) { 4278*0Sstevel@tonic-gate /* 4279*0Sstevel@tonic-gate * Small device 4280*0Sstevel@tonic-gate */ 4281*0Sstevel@tonic-gate un32 = (mr_unit32_od_t *)mddb_getrecaddr(recid); 4282*0Sstevel@tonic-gate self_id = &(un32->c.un_self_id); 4283*0Sstevel@tonic-gate parent_id = &(un32->c.un_parent); 4284*0Sstevel@tonic-gate record_id = &(un32->c.un_record_id); 4285*0Sstevel@tonic-gate hsp_id = &(un32->un_hsp_id); 4286*0Sstevel@tonic-gate 4287*0Sstevel@tonic-gate for (i = 0; i < un32->un_totalcolumncnt; i++) { 4288*0Sstevel@tonic-gate mr_column32_od_t *device; 4289*0Sstevel@tonic-gate 4290*0Sstevel@tonic-gate device = &un32->un_column[i]; 4291*0Sstevel@tonic-gate if (!md_update_minor(setno, mddb_getsidenum 4292*0Sstevel@tonic-gate (setno), device->un_orig_key)) 4293*0Sstevel@tonic-gate goto out; 4294*0Sstevel@tonic-gate 4295*0Sstevel@tonic-gate if (device->un_hs_id != 0) 4296*0Sstevel@tonic-gate device->un_hs_id = MAKERECID( 4297*0Sstevel@tonic-gate setno, device->un_hs_id); 4298*0Sstevel@tonic-gate } 4299*0Sstevel@tonic-gate } else { 4300*0Sstevel@tonic-gate un64 = (mr_unit_t *)mddb_getrecaddr(recid); 4301*0Sstevel@tonic-gate self_id = &(un64->c.un_self_id); 4302*0Sstevel@tonic-gate parent_id = &(un64->c.un_parent); 4303*0Sstevel@tonic-gate record_id = &(un64->c.un_record_id); 4304*0Sstevel@tonic-gate hsp_id = &(un64->un_hsp_id); 4305*0Sstevel@tonic-gate 4306*0Sstevel@tonic-gate for (i = 0; i < un64->un_totalcolumncnt; i++) { 4307*0Sstevel@tonic-gate mr_column_t *device; 4308*0Sstevel@tonic-gate 4309*0Sstevel@tonic-gate device = &un64->un_column[i]; 4310*0Sstevel@tonic-gate if (!md_update_minor(setno, mddb_getsidenum 4311*0Sstevel@tonic-gate (setno), device->un_orig_key)) 4312*0Sstevel@tonic-gate goto out; 4313*0Sstevel@tonic-gate 4314*0Sstevel@tonic-gate if (device->un_hs_id != 0) 4315*0Sstevel@tonic-gate device->un_hs_id = MAKERECID( 4316*0Sstevel@tonic-gate setno, device->un_hs_id); 4317*0Sstevel@tonic-gate } 4318*0Sstevel@tonic-gate } 4319*0Sstevel@tonic-gate 4320*0Sstevel@tonic-gate /* 4321*0Sstevel@tonic-gate * Update unit with the imported setno 4322*0Sstevel@tonic-gate */ 4323*0Sstevel@tonic-gate mddb_setrecprivate(recid, MD_PRV_GOTIT); 4324*0Sstevel@tonic-gate 4325*0Sstevel@tonic-gate *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 4326*0Sstevel@tonic-gate 4327*0Sstevel@tonic-gate if (*hsp_id != -1) 4328*0Sstevel@tonic-gate *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 4329*0Sstevel@tonic-gate 4330*0Sstevel@tonic-gate if (*parent_id != MD_NO_PARENT) 4331*0Sstevel@tonic-gate *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 4332*0Sstevel@tonic-gate *record_id = MAKERECID(setno, DBID(*record_id)); 4333*0Sstevel@tonic-gate gotsomething = 1; 4334*0Sstevel@tonic-gate } 4335*0Sstevel@tonic-gate 4336*0Sstevel@tonic-gate out: 4337*0Sstevel@tonic-gate return (gotsomething); 4338*0Sstevel@tonic-gate } 4339*0Sstevel@tonic-gate 4340*0Sstevel@tonic-gate static md_named_services_t raid_named_services[] = { 4341*0Sstevel@tonic-gate {raid_hotspares, "poke hotspares" }, 4342*0Sstevel@tonic-gate {raid_rename_check, MDRNM_CHECK }, 4343*0Sstevel@tonic-gate {raid_rename_lock, MDRNM_LOCK }, 4344*0Sstevel@tonic-gate {(intptr_t (*)()) raid_rename_unlock, MDRNM_UNLOCK }, 4345*0Sstevel@tonic-gate {(intptr_t (*)()) raid_probe_dev, "probe open test" }, 4346*0Sstevel@tonic-gate {NULL, 0 } 4347*0Sstevel@tonic-gate }; 4348*0Sstevel@tonic-gate 4349*0Sstevel@tonic-gate md_ops_t raid_md_ops = { 4350*0Sstevel@tonic-gate raid_open, /* open */ 4351*0Sstevel@tonic-gate raid_close, /* close */ 4352*0Sstevel@tonic-gate md_raid_strategy, /* strategy */ 4353*0Sstevel@tonic-gate NULL, /* print */ 4354*0Sstevel@tonic-gate NULL, /* dump */ 4355*0Sstevel@tonic-gate NULL, /* read */ 4356*0Sstevel@tonic-gate NULL, /* write */ 4357*0Sstevel@tonic-gate md_raid_ioctl, /* ioctl, */ 4358*0Sstevel@tonic-gate raid_snarf, /* raid_snarf */ 4359*0Sstevel@tonic-gate raid_halt, /* raid_halt */ 4360*0Sstevel@tonic-gate NULL, /* aread */ 4361*0Sstevel@tonic-gate NULL, /* awrite */ 4362*0Sstevel@tonic-gate raid_imp_set, /* import set */ 4363*0Sstevel@tonic-gate raid_named_services 4364*0Sstevel@tonic-gate }; 4365*0Sstevel@tonic-gate 4366*0Sstevel@tonic-gate static void 4367*0Sstevel@tonic-gate init_init() 4368*0Sstevel@tonic-gate { 4369*0Sstevel@tonic-gate /* default to a second */ 4370*0Sstevel@tonic-gate if (md_wr_wait == 0) 4371*0Sstevel@tonic-gate md_wr_wait = md_hz >> 1; 4372*0Sstevel@tonic-gate 4373*0Sstevel@tonic-gate raid_parent_cache = kmem_cache_create("md_raid_parent", 4374*0Sstevel@tonic-gate sizeof (md_raidps_t), 0, raid_parent_constructor, 4375*0Sstevel@tonic-gate raid_parent_destructor, raid_run_queue, NULL, NULL, 0); 4376*0Sstevel@tonic-gate raid_child_cache = kmem_cache_create("md_raid_child", 4377*0Sstevel@tonic-gate sizeof (md_raidcs_t) - sizeof (buf_t) + biosize(), 0, 4378*0Sstevel@tonic-gate raid_child_constructor, raid_child_destructor, 4379*0Sstevel@tonic-gate raid_run_queue, NULL, NULL, 0); 4380*0Sstevel@tonic-gate raid_cbuf_cache = kmem_cache_create("md_raid_cbufs", 4381*0Sstevel@tonic-gate sizeof (md_raidcbuf_t), 0, raid_cbuf_constructor, 4382*0Sstevel@tonic-gate raid_cbuf_destructor, raid_run_queue, NULL, NULL, 0); 4383*0Sstevel@tonic-gate } 4384*0Sstevel@tonic-gate 4385*0Sstevel@tonic-gate static void 4386*0Sstevel@tonic-gate fini_uninit() 4387*0Sstevel@tonic-gate { 4388*0Sstevel@tonic-gate kmem_cache_destroy(raid_parent_cache); 4389*0Sstevel@tonic-gate kmem_cache_destroy(raid_child_cache); 4390*0Sstevel@tonic-gate kmem_cache_destroy(raid_cbuf_cache); 4391*0Sstevel@tonic-gate raid_parent_cache = raid_child_cache = raid_cbuf_cache = NULL; 4392*0Sstevel@tonic-gate } 4393*0Sstevel@tonic-gate 4394*0Sstevel@tonic-gate /* define the module linkage */ 4395*0Sstevel@tonic-gate MD_PLUGIN_MISC_MODULE("raid module %I%", init_init(), fini_uninit()) 4396