/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * NAME: raid_resync.c * DESCRIPTION: RAID driver source file containing routines related to resync * operation. * ROUTINES PROVIDED FOR EXTERNAL USE: * resync_request() - get resync lock if available * release_resync_request() - relinquish resync lock * erred_check_line() - provide write instruction for erred column * init_pw_area() - initialize pre-write area * copy_pw_area() - copy pre-write area from one device to another */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NOCOLUMN (-1) extern md_set_t md_set[]; extern kmem_cache_t *raid_child_cache; extern kmem_cache_t *raid_parent_cache; extern md_resync_t md_cpr_resync; extern major_t md_major; extern void raid_parent_init(md_raidps_t *ps); extern void raid_child_init(md_raidcs_t *ps); /* * NAMES: xor * DESCRIPTION: Xor two chunks of data together. The data referenced by * addr1 and addr2 are xor'd together for size and written into * addr1. * PARAMETERS: caddr_t addr1 - address of first chunk of data and destination * caddr_t addr2 - address of second chunk of data * u_int size - number to xor */ static void xor(caddr_t addr1, caddr_t addr2, size_t size) { while (size--) { *addr1++ ^= *addr2++; } } /* * NAME: release_resync_request * * DESCRIPTION: Release resync active flag and reset unit values accordingly. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * * LOCKS: Expects Unit Writer Lock to be held across call. */ void release_resync_request( minor_t mnum ) { mr_unit_t *un; un = MD_UNIT(mnum); ASSERT(un != NULL); un->c.un_status &= ~MD_UN_RESYNC_ACTIVE; un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC; un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC_ERRED; un->un_column[un->un_resync_index].un_devflags &= ~(MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); un->un_resync_line_index = 0; un->un_resync_index = NOCOLUMN; } /* * NAME: resync_request * * DESCRIPTION: Request resync. If resync is available (no current active * resync), mark unit as resync active and initialize. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * int column_index - index of column to resync * int copysize - copysize of ioctl request * md_error_t *ep - error output parameter * * RETURN: 0 if resync is available, 1 otherwise. * * LOCKS: Expects Unit Writer Lock to be held across call. * * NOTE: Sets un_resync_copysize to the input value in copysize, the * existing value from an incomplete previous resync with an * input value in copysize, or the lesser of the unit segment * size or maxio. */ /* ARGSUSED */ int resync_request( minor_t mnum, int column_index, size_t copysize, md_error_t *mde ) { mr_unit_t *un; un = MD_UNIT(mnum); ASSERT(un != NULL); /* if resync or grow not already active, set resync active for unit */ if (! (un->un_column[column_index].un_devflags & MD_RAID_RESYNC) && ((un->c.un_status & MD_UN_RESYNC_ACTIVE) || (un->c.un_status & MD_UN_GROW_PENDING) || (un->un_column[column_index].un_devstate & RCS_RESYNC))) { if (mde) return (mdmderror(mde, MDE_GROW_DELAYED, mnum)); return (1); } if (un->un_column[column_index].un_devstate & (RCS_ERRED | RCS_LAST_ERRED)) un->un_column[column_index].un_devflags |= MD_RAID_DEV_ERRED; else un->un_column[column_index].un_devflags &= ~MD_RAID_DEV_ERRED; un->c.un_status |= MD_UN_RESYNC_ACTIVE; un->un_resync_index = column_index; un->un_resync_line_index = 0; raid_set_state(un, column_index, RCS_RESYNC, 0); return (0); } /* * Name: alloc_bufs * * DESCRIPTION: Initialize resync_comp buffers. * * PARAMETERS: size_t bsize - size of buffer * buf_t *read_buf1 - first read buf * buf_t *read_buf2 - second read buf * buf_t *write_buf - write buf */ static void alloc_bufs(md_raidcs_t *cs, size_t bsize) { /* allocate buffers, write uses the read_buf1 buffer */ cs->cs_dbuffer = kmem_zalloc(bsize, KM_SLEEP); cs->cs_pbuffer = kmem_zalloc(bsize, KM_SLEEP); } void init_buf(buf_t *bp, int flags, size_t size) { /* zero buf */ bzero((caddr_t)bp, sizeof (buf_t)); /* set b_back and b_forw to point back to buf */ bp->b_back = bp; bp->b_forw = bp; /* set flags size */ bp->b_flags = flags; bp->b_bufsize = size; bp->b_offset = -1; /* setup semaphores */ sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); } void destroy_buf(buf_t *bp) { sema_destroy(&bp->b_io); sema_destroy(&bp->b_sem); } void reset_buf(buf_t *bp, int flags, size_t size) { destroy_buf(bp); init_buf(bp, flags, size); } /* * NAME: free_bufs * * DESCRIPTION: Free up buffers. * * PARAMETERS: size_t bsize - size of buffer * buf_t *read_buf1 - first read buf * buf_t *read_buf2 - second read buf * buf_t *write_buf - write buf */ static void free_bufs(size_t bsize, md_raidcs_t *cs) { kmem_free(cs->cs_dbuffer, bsize); kmem_free(cs->cs_pbuffer, bsize); } /* * NAME: init_pw_area * * DESCRIPTION: Initialize pre-write area to all zeros. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * md_dev64_t dev_to_write - index of column to resync * int column_index - index of column to resync * * RETURN: 1 if write error on resync device, otherwise 0 * * LOCKS: Expects Unit Reader Lock to be held across call. */ int init_pw_area( mr_unit_t *un, md_dev64_t dev_to_write, diskaddr_t pwstart, uint_t col ) { buf_t buf; caddr_t databuffer; size_t copysize; size_t bsize; int error = 0; int i; ASSERT(un != NULL); ASSERT(un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN); bsize = un->un_iosize; copysize = dbtob(bsize); databuffer = kmem_zalloc(copysize, KM_SLEEP); init_buf(&buf, (B_BUSY | B_WRITE), copysize); for (i = 0; i < un->un_pwcnt; i++) { /* magic field is 0 for 4.0 compatability */ RAID_FILLIN_RPW(databuffer, un, 0, 0, 0, 0, 0, 0, col, 0); buf.b_un.b_addr = (caddr_t)databuffer; buf.b_edev = md_dev64_to_dev(dev_to_write); buf.b_bcount = dbtob(bsize); buf.b_lblkno = pwstart + (i * un->un_iosize); /* write buf */ (void) md_call_strategy(&buf, MD_STR_NOTTOP, NULL); if (biowait(&buf)) { error = 1; break; } reset_buf(&buf, (B_BUSY | B_WRITE), copysize); } /* for */ destroy_buf(&buf); kmem_free(databuffer, copysize); return (error); } /* * NAME: raid_open_alt * * DESCRIPTION: opens the alt device used during resync. * * PARAMETERS: un * * RETURN: 0 - successfull * 1 - failed * * LOCKS: requires unit writer lock */ static int raid_open_alt(mr_unit_t *un, int index) { mr_column_t *column = &un->un_column[index]; set_t setno = MD_MIN2SET(MD_SID(un)); side_t side = mddb_getsidenum(setno); md_dev64_t tmpdev = column->un_alt_dev; /* correct locks */ ASSERT(UNIT_WRITER_HELD(un)); /* not already writing to */ ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); /* not already open */ ASSERT(! (column->un_devflags & MD_RAID_ALT_ISOPEN)); if (tmpdev != NODEV64) { /* * Open by device id. We use orig_key since alt_dev * has been set by the caller to be the same as orig_dev. */ if ((md_getmajor(tmpdev) != md_major) && md_devid_found(setno, side, column->un_orig_key) == 1) { tmpdev = md_resolve_bydevid(MD_SID(un), tmpdev, column->un_orig_key); } if (md_layered_open(MD_SID(un), &tmpdev, MD_OFLG_NULL)) { /* failed open */ column->un_alt_dev = tmpdev; return (1); } else { /* open suceeded */ column->un_alt_dev = tmpdev; column->un_devflags |= MD_RAID_ALT_ISOPEN; return (0); } } else /* no alt device to open */ return (1); } /* * NAME: raid_close_alt * * DESCRIPTION: closes the alt device used during resync. * * PARAMETERS: un - raid unit structure * indes - raid column * * RETURN: none * * LOCKS: requires unit writer lock */ static void raid_close_alt(mr_unit_t *un, int index) { mr_column_t *column = &un->un_column[index]; md_dev64_t tmpdev = column->un_alt_dev; ASSERT(UNIT_WRITER_HELD(un)); /* correct locks */ ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); /* not writing */ ASSERT(column->un_devflags & MD_RAID_ALT_ISOPEN); /* already open */ ASSERT(tmpdev != NODEV64); /* is a device */ md_layered_close(column->un_alt_dev, MD_OFLG_NULL); column->un_devflags &= ~MD_RAID_ALT_ISOPEN; column->un_alt_dev = NODEV64; } static diskaddr_t raid_resync_fillin_cs(diskaddr_t line, uint_t line_count, md_raidcs_t *cs) { mr_unit_t *un = cs->cs_un; ASSERT(line < un->un_segsincolumn); cs->cs_line = line; cs->cs_blkno = line * un->un_segsize; cs->cs_blkcnt = un->un_segsize * line_count; cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; raid_line_reader_lock(cs, 1); return (line + line_count); } /* states returned by raid_resync_line */ #define RAID_RESYNC_OKAY 0 #define RAID_RESYNC_RDERROR 2 #define RAID_RESYNC_WRERROR 3 #define RAID_RESYNC_STATE 4 int raid_resync_region( md_raidcs_t *cs, diskaddr_t line, uint_t line_count, int *single_read, hs_cmds_t *hs_state, int *err_col, md_dev64_t dev_to_write, diskaddr_t write_dev_start) { mr_unit_t *un = cs->cs_un; buf_t *readb1 = &cs->cs_pbuf; buf_t *readb2 = &cs->cs_dbuf; buf_t *writeb = &cs->cs_hbuf; diskaddr_t off; size_t tcopysize; size_t copysize; int resync; int quit = 0; size_t leftinseg; int i; resync = un->un_resync_index; off = line * un->un_segsize; copysize = un->un_resync_copysize; /* find first column to read, skip resync column */ leftinseg = un->un_segsize * line_count; while (leftinseg) { /* truncate last chunk to end if needed */ if (copysize > leftinseg) tcopysize = leftinseg; else tcopysize = copysize; leftinseg -= tcopysize; /* * One of two scenarios: * 1) resync device with hotspare ok. This implies that * we are copying from a good hotspare to a new good original * device. In this case readb1 is used as the buf for * the read from the hotspare device. * 2) For all other cases, including when in case 1) and an * error is detected on the (formerly good) hotspare device, * readb1 is used for the initial read. readb2 is used for * all other reads. Each readb2 buffer is xor'd into the * readb1 buffer. * * In both cases, writeb is used for the write, using readb1's * buffer. * * For case 2, we could alternatively perform the read for all * devices concurrently to improve performance. However, * this could diminish performance for concurrent reads and * writes if low on memory. */ /* read first buffer */ /* switch to read from good columns if single_read */ if (*single_read) { if (un->un_column[resync].un_dev == NODEV64) return (RAID_RESYNC_RDERROR); reset_buf(readb1, B_READ | B_BUSY, dbtob(copysize)); readb1->b_bcount = dbtob(tcopysize); readb1->b_un.b_addr = cs->cs_pbuffer; readb1->b_edev = md_dev64_to_dev( un->un_column[resync].un_dev); readb1->b_lblkno = un->un_column[resync].un_devstart + off; (void) md_call_strategy(readb1, MD_STR_NOTTOP, NULL); if (biowait(readb1)) { /* * at this point just start rebuilding the * data and go on since the other column * are ok. */ *single_read = 0; *hs_state = HS_BAD; un->un_column[resync].un_devflags &= ~MD_RAID_COPY_RESYNC; un->un_column[resync].un_devflags |= MD_RAID_REGEN_RESYNC; } } /* if reading from all non-resync columns */ if (!*single_read) { /* for each column, read line and xor into write buf */ bzero(cs->cs_pbuffer, dbtob(tcopysize)); for (i = 0; i < un->un_totalcolumncnt; i++) { if (un->un_column[i].un_dev == NODEV64) return (RAID_RESYNC_RDERROR); /* skip column getting resync'ed */ if (i == resync) { continue; } reset_buf(readb1, B_READ | B_BUSY, dbtob(copysize)); readb1->b_bcount = dbtob(tcopysize); readb1->b_un.b_addr = cs->cs_dbuffer; readb1->b_edev = md_dev64_to_dev( un->un_column[i].un_dev); readb1->b_lblkno = un->un_column[i].un_devstart + off; (void) md_call_strategy(readb1, MD_STR_NOTTOP, NULL); if (biowait(readb1)) { *err_col = i; quit = RAID_RESYNC_RDERROR; } if (quit) return (quit); /* xor readb2 data into readb1 */ xor(cs->cs_pbuffer, readb1->b_un.b_addr, dbtob(tcopysize)); } /* for */ } reset_buf(writeb, B_WRITE | B_BUSY, dbtob(copysize)); writeb->b_bcount = dbtob(tcopysize); writeb->b_un.b_addr = cs->cs_pbuffer; writeb->b_lblkno = off + write_dev_start; writeb->b_edev = md_dev64_to_dev(dev_to_write); /* set write block number and perform the write */ (void) md_call_strategy(writeb, MD_STR_NOTTOP, NULL); if (biowait(writeb)) { if (*single_read == 0) { *hs_state = HS_BAD; } return (RAID_RESYNC_WRERROR); } writeb->b_blkno += tcopysize; off += tcopysize; } /* while */ sema_destroy(&readb1->b_io); sema_destroy(&readb1->b_sem); sema_destroy(&readb2->b_io); sema_destroy(&readb2->b_sem); sema_destroy(&writeb->b_io); sema_destroy(&writeb->b_sem); return (RAID_RESYNC_OKAY); } /* * NAME: resync_comp * * DESCRIPTION: Resync the component. Iterate through the raid unit a line at * a time, read from the good device(s) and write the resync * device. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * md_raidcs_t *cs - child save struct * * RETURN: 0 - successfull * 1 - failed * -1 - aborted * * LOCKS: Expects Unit Reader Lock to be held across call. Acquires and * releases Line Reader Lock for per-line I/O. */ static void resync_comp( minor_t mnum, md_raidcs_t *cs ) { mdi_unit_t *ui; mr_unit_t *un; mddb_recid_t recids[2]; rcs_state_t state; md_dev64_t dev_to_write; diskaddr_t write_pwstart; diskaddr_t write_devstart; md_dev64_t dev; int resync; int i; int single_read = 0; int err; int err_cnt; int last_err; diskaddr_t line; diskaddr_t segsincolumn; size_t bsize; uint_t line_count; /* * hs_state is the state of the hotspare on the column being resynced * dev_state is the state of the resync target */ hs_cmds_t hs_state; int err_col = -1; diskaddr_t resync_end_pos; ui = MDI_UNIT(mnum); ASSERT(ui != NULL); un = cs->cs_un; md_unit_readerexit(ui); un = (mr_unit_t *)md_io_writerlock(ui); un = (mr_unit_t *)md_unit_writerlock(ui); resync = un->un_resync_index; state = un->un_column[resync].un_devstate; line_count = un->un_maxio / un->un_segsize; if (line_count == 0) { /* handle the case of segsize > maxio */ line_count = 1; bsize = un->un_maxio; } else bsize = line_count * un->un_segsize; un->un_resync_copysize = (uint_t)bsize; ASSERT(un->c.un_status & MD_UN_RESYNC_ACTIVE); ASSERT(un->un_column[resync].un_devflags & (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); /* * if the column is not in resync then just bail out. */ if (! (un->un_column[resync].un_devstate & RCS_RESYNC)) { md_unit_writerexit(ui); md_io_writerexit(ui); un = (mr_unit_t *)md_unit_readerlock(ui); return; } SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); /* identify device to write and its start block */ if (un->un_column[resync].un_alt_dev != NODEV64) { if (raid_open_alt(un, resync)) { raid_set_state(un, resync, state, 0); md_unit_writerexit(ui); md_io_writerexit(ui); un = (mr_unit_t *)md_unit_readerlock(ui); cmn_err(CE_WARN, "md: %s: %s open failed replace " "terminated", md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), un->un_column[resync].un_alt_dev, NULL, 0)); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); return; } ASSERT(un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC); dev_to_write = un->un_column[resync].un_alt_dev; write_devstart = un->un_column[resync].un_alt_devstart; write_pwstart = un->un_column[resync].un_alt_pwstart; if (un->un_column[resync].un_devflags & MD_RAID_DEV_ERRED) { single_read = 0; hs_state = HS_BAD; } else { hs_state = HS_FREE; single_read = 1; } un->un_column[resync].un_devflags |= MD_RAID_WRITE_ALT; } else { dev_to_write = un->un_column[resync].un_dev; write_devstart = un->un_column[resync].un_devstart; write_pwstart = un->un_column[resync].un_pwstart; single_read = 0; hs_state = HS_FREE; ASSERT(un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC); } alloc_bufs(cs, dbtob(bsize)); /* initialize pre-write area */ if (init_pw_area(un, dev_to_write, write_pwstart, resync)) { un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; if (un->un_column[resync].un_alt_dev != NODEV64) { raid_close_alt(un, resync); } md_unit_writerexit(ui); md_io_writerexit(ui); if (dev_to_write == un->un_column[resync].un_dev) hs_state = HS_BAD; err = RAID_RESYNC_WRERROR; goto resync_comp_error; } un->c.un_status &= ~MD_UN_RESYNC_CANCEL; segsincolumn = un->un_segsincolumn; err_cnt = raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED); /* commit the record */ md_unit_writerexit(ui); md_io_writerexit(ui); /* resync each line of the unit */ for (line = 0; line < segsincolumn; line += line_count) { /* * Update address range in child struct and lock the line. * * The reader version of the line lock is used since only * resync will use data beyond un_resync_line_index on the * resync device. */ un = (mr_unit_t *)md_io_readerlock(ui); if (line + line_count > segsincolumn) line_count = segsincolumn - line; resync_end_pos = raid_resync_fillin_cs(line, line_count, cs); (void) md_unit_readerlock(ui); ASSERT(un->un_resync_line_index == resync_end_pos); err = raid_resync_region(cs, line, (int)line_count, &single_read, &hs_state, &err_col, dev_to_write, write_devstart); /* * if the column failed to resync then stop writing directly * to the column. */ if (err) un->un_resync_line_index = 0; md_unit_readerexit(ui); raid_line_exit(cs); md_io_readerexit(ui); if (err) break; un = (mr_unit_t *)md_unit_writerlock(ui); if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) != err_cnt) { err = RAID_RESYNC_STATE; md_unit_writerexit(ui); break; } md_unit_writerexit(ui); } /* for */ resync_comp_error: un = (mr_unit_t *)md_io_writerlock(ui); (void) md_unit_writerlock(ui); un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; recids[0] = 0; recids[1] = 0; switch (err) { /* * successful resync */ case RAID_RESYNC_OKAY: /* initialize pre-write area */ if ((un->un_column[resync].un_orig_dev != NODEV64) && (un->un_column[resync].un_orig_dev == un->un_column[resync].un_alt_dev)) { /* * replacing a hot spare * release the hot spare, which will close the hotspare * and mark it closed. */ raid_hs_release(hs_state, un, &recids[0], resync); /* * make the resync target the main device and * mark open */ un->un_column[resync].un_hs_id = 0; un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; un->un_column[resync].un_devflags |= MD_RAID_DEV_ISOPEN; /* alt becomes the device so don't close it */ un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; un->un_column[resync].un_devflags &= ~MD_RAID_ALT_ISOPEN; un->un_column[resync].un_alt_dev = NODEV64; } raid_set_state(un, resync, RCS_OKAY, 0); break; case RAID_RESYNC_WRERROR: if (HOTSPARED(un, resync) && single_read && (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { /* * this is the case where the resync target is * bad but there is a good hotspare. In this * case keep the hotspare, and go back to okay. */ raid_set_state(un, resync, RCS_OKAY, 0); cmn_err(CE_WARN, "md: %s: %s write error, replace " "terminated", md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), un->un_column[resync].un_orig_dev, NULL, 0)); break; } if (HOTSPARED(un, resync)) { raid_hs_release(hs_state, un, &recids[0], resync); un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; } raid_set_state(un, resync, RCS_ERRED, 0); if (un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC) dev = un->un_column[resync].un_dev; else dev = un->un_column[resync].un_alt_dev; cmn_err(CE_WARN, "md: %s: %s write error replace terminated", md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), dev, NULL, 0)); break; case RAID_RESYNC_STATE: if (HOTSPARED(un, resync) && single_read && (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { /* * this is the case where the resync target is * bad but there is a good hotspare. In this * case keep the hotspare, and go back to okay. */ raid_set_state(un, resync, RCS_OKAY, 0); cmn_err(CE_WARN, "md: %s: needs maintenance, replace " "terminated", md_shortname(MD_SID(un))); break; } if (HOTSPARED(un, resync)) { raid_hs_release(hs_state, un, &recids[0], resync); un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; } break; case RAID_RESYNC_RDERROR: if (HOTSPARED(un, resync)) { raid_hs_release(hs_state, un, &recids[0], resync); un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; } if ((resync != err_col) && (err_col != NOCOLUMN)) raid_set_state(un, err_col, RCS_ERRED, 0); break; default: ASSERT(0); } if (un->un_column[resync].un_alt_dev != NODEV64) { raid_close_alt(un, resync); } /* * an io operation may have gotten an error and placed a * column in erred state. This will abort the resync, which * will end up in last erred. This is ugly so go through * the columns and do cleanup */ err_cnt = 0; last_err = 0; for (i = 0; i < un->un_totalcolumncnt; i++) { if (un->un_column[i].un_devstate & RCS_OKAY) continue; if (i == resync) { raid_set_state(un, i, RCS_ERRED, 1); err_cnt++; } else if (err == RAID_RESYNC_OKAY) { err_cnt++; } else { raid_set_state(un, i, RCS_LAST_ERRED, 1); last_err++; } } if ((err_cnt == 0) && (last_err == 0)) un->un_state = RUS_OKAY; else if (last_err == 0) { un->un_state = RUS_ERRED; ASSERT(err_cnt == 1); } else if (last_err > 0) { un->un_state = RUS_LAST_ERRED; } uniqtime32(&un->un_column[resync].un_devtimestamp); un->un_resync_copysize = 0; un->un_column[resync].un_devflags &= ~(MD_RAID_REGEN_RESYNC | MD_RAID_COPY_RESYNC); raid_commit(un, recids); /* release unit writer lock and acquire unit reader lock */ md_unit_writerexit(ui); md_io_writerexit(ui); (void) md_unit_readerlock(ui); if (err == RAID_RESYNC_OKAY) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } else { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) > 1) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } else { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } } free_bufs(dbtob(bsize), cs); } /* * NAME: resync_unit * * DESCRIPTION: Start of RAID resync thread. Perform up front allocations, * initializations and consistency checking, then call * resync_comp to resync the component. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * * LOCKS: Acquires and releases Unit Reader Lock to maintain unit * existence during resync. * Acquires and releases the resync count lock for cpr. */ static void resync_unit( minor_t mnum ) { mdi_unit_t *ui; mr_unit_t *un; md_raidps_t *ps = NULL; md_raidcs_t *cs = NULL; int resync; /* * Increment the raid resync count for cpr */ mutex_enter(&md_cpr_resync.md_resync_mutex); md_cpr_resync.md_raid_resync++; mutex_exit(&md_cpr_resync.md_resync_mutex); ui = MDI_UNIT(mnum); ASSERT(ui != NULL); un = (mr_unit_t *)md_unit_readerlock(ui); /* * Allocate parent and child memory pool structures. These are * only needed to lock raid lines, so only the minimal * required fields for this purpose are initialized. * * Do not use the reserve pool for resync. */ ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); raid_parent_init(ps); cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); raid_child_init(cs); resync = un->un_resync_index; ps->ps_un = un; ps->ps_ui = ui; ps->ps_flags = MD_RPS_INUSE; cs->cs_ps = ps; cs->cs_un = un; ASSERT(!(un->un_column[resync].un_devflags & MD_RAID_WRITE_ALT)); resync_comp(mnum, cs); release_resync_request(mnum); kmem_cache_free(raid_child_cache, cs); kmem_cache_free(raid_parent_cache, ps); md_unit_readerexit(ui); /* close raid unit */ (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); /* poke hot spare daemon */ (void) raid_hotspares(); /* * Decrement the raid resync count for cpr */ mutex_enter(&md_cpr_resync.md_resync_mutex); md_cpr_resync.md_raid_resync--; mutex_exit(&md_cpr_resync.md_resync_mutex); thread_exit(); } /* * NAME: raid_resync_unit * * DESCRIPTION: RAID metadevice specific resync routine. * Open the unit and start resync_unit as a separate thread. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * md_error_t *ep - output error parameter * * RETURN: On error return 1 or set ep to nonzero, otherwise return 0. * * LOCKS: Acquires and releases Unit Writer Lock. */ int raid_resync_unit( minor_t mnum, md_error_t *ep ) { mdi_unit_t *ui; set_t setno = MD_MIN2SET(mnum); mr_unit_t *un; ui = MDI_UNIT(mnum); un = MD_UNIT(mnum); if (md_get_setstatus(setno) & MD_SET_STALE) return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); ASSERT(un->un_column[un->un_resync_index].un_devflags & (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); /* Don't start a resync if the device is not available */ if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) { return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); } if (raid_internal_open(mnum, FREAD | FWRITE, OTYP_LYR, 0)) { (void) md_unit_writerlock(ui); release_resync_request(mnum); md_unit_writerexit(ui); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, setno, MD_SID(un)); return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); } /* start resync_unit thread */ (void) thread_create(NULL, 0, resync_unit, (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri); return (0); }