1 /* $OpenBSD: softraid_raid1.c,v 1.59 2014/11/18 02:37:30 tedu Exp $ */ 2 /* 3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/malloc.h> 26 #include <sys/kernel.h> 27 #include <sys/disk.h> 28 #include <sys/rwlock.h> 29 #include <sys/queue.h> 30 #include <sys/fcntl.h> 31 #include <sys/disklabel.h> 32 #include <sys/mount.h> 33 #include <sys/sensors.h> 34 #include <sys/stat.h> 35 #include <sys/task.h> 36 #include <sys/workq.h> 37 #include <sys/conf.h> 38 #include <sys/uio.h> 39 40 #include <scsi/scsi_all.h> 41 #include <scsi/scsiconf.h> 42 #include <scsi/scsi_disk.h> 43 44 #include <dev/softraidvar.h> 45 46 /* RAID 1 functions. */ 47 int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *, 48 int, int64_t); 49 int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *, 50 int, void *); 51 int sr_raid1_init(struct sr_discipline *sd); 52 int sr_raid1_rw(struct sr_workunit *); 53 int sr_raid1_wu_done(struct sr_workunit *); 54 void sr_raid1_set_chunk_state(struct sr_discipline *, int, int); 55 void sr_raid1_set_vol_state(struct sr_discipline *); 56 57 /* Discipline initialisation. */ 58 void 59 sr_raid1_discipline_init(struct sr_discipline *sd) 60 { 61 /* Fill out discipline members. */ 62 sd->sd_type = SR_MD_RAID1; 63 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 64 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE | 65 SR_CAP_REBUILD | SR_CAP_REDUNDANT; 66 sd->sd_max_wu = SR_RAID1_NOWU; 67 68 /* Setup discipline specific function pointers. */ 69 sd->sd_assemble = sr_raid1_assemble; 70 sd->sd_create = sr_raid1_create; 71 sd->sd_scsi_rw = sr_raid1_rw; 72 sd->sd_scsi_wu_done = sr_raid1_wu_done; 73 sd->sd_set_chunk_state = sr_raid1_set_chunk_state; 74 sd->sd_set_vol_state = sr_raid1_set_vol_state; 75 } 76 77 int 78 sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc, 79 int no_chunk, int64_t coerced_size) 80 { 81 if (no_chunk < 2) { 82 sr_error(sd->sd_sc, "%s requires two or more chunks", 83 sd->sd_name); 84 return EINVAL; 85 } 86 87 sd->sd_meta->ssdi.ssd_size = coerced_size; 88 89 return sr_raid1_init(sd); 90 } 91 92 int 93 sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc, 94 int no_chunk, void *data) 95 { 96 return sr_raid1_init(sd); 97 } 98 99 int 100 sr_raid1_init(struct sr_discipline *sd) 101 { 102 sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no; 103 104 return 0; 105 } 106 107 void 108 sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 109 { 110 int old_state, s; 111 112 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 113 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 114 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 115 116 /* ok to go to splbio since this only happens in error path */ 117 s = splbio(); 118 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 119 120 /* multiple IOs to the same chunk that fail will come through here */ 121 if (old_state == new_state) 122 goto done; 123 124 switch (old_state) { 125 case BIOC_SDONLINE: 126 switch (new_state) { 127 case BIOC_SDOFFLINE: 128 case BIOC_SDSCRUB: 129 break; 130 default: 131 goto die; 132 } 133 break; 134 135 case BIOC_SDOFFLINE: 136 switch (new_state) { 137 case BIOC_SDREBUILD: 138 case BIOC_SDHOTSPARE: 139 break; 140 default: 141 goto die; 142 } 143 break; 144 145 case BIOC_SDSCRUB: 146 if (new_state == BIOC_SDONLINE) { 147 ; 148 } else 149 goto die; 150 break; 151 152 case BIOC_SDREBUILD: 153 switch (new_state) { 154 case BIOC_SDONLINE: 155 break; 156 case BIOC_SDOFFLINE: 157 /* Abort rebuild since the rebuild chunk disappeared. */ 158 sd->sd_reb_abort = 1; 159 break; 160 default: 161 goto die; 162 } 163 break; 164 165 case BIOC_SDHOTSPARE: 166 switch (new_state) { 167 case BIOC_SDOFFLINE: 168 case BIOC_SDREBUILD: 169 break; 170 default: 171 goto die; 172 } 173 break; 174 175 default: 176 die: 177 splx(s); /* XXX */ 178 panic("%s: %s: %s: invalid chunk state transition " 179 "%d -> %d\n", DEVNAME(sd->sd_sc), 180 sd->sd_meta->ssd_devname, 181 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 182 old_state, new_state); 183 /* NOTREACHED */ 184 } 185 186 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 187 sd->sd_set_vol_state(sd); 188 189 sd->sd_must_flush = 1; 190 task_add(systq, &sd->sd_meta_save_task); 191 done: 192 splx(s); 193 } 194 195 void 196 sr_raid1_set_vol_state(struct sr_discipline *sd) 197 { 198 int states[SR_MAX_STATES]; 199 int new_state, i, s, nd; 200 int old_state = sd->sd_vol_status; 201 202 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 203 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 204 205 nd = sd->sd_meta->ssdi.ssd_chunk_no; 206 207 #ifdef SR_DEBUG 208 for (i = 0; i < nd; i++) 209 DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n", 210 DEVNAME(sd->sd_sc), i, 211 sd->sd_vol.sv_chunks[i]->src_meta.scm_status); 212 #endif 213 214 for (i = 0; i < SR_MAX_STATES; i++) 215 states[i] = 0; 216 217 for (i = 0; i < nd; i++) { 218 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 219 if (s >= SR_MAX_STATES) 220 panic("%s: %s: %s: invalid chunk state", 221 DEVNAME(sd->sd_sc), 222 sd->sd_meta->ssd_devname, 223 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 224 states[s]++; 225 } 226 227 if (states[BIOC_SDONLINE] == nd) 228 new_state = BIOC_SVONLINE; 229 else if (states[BIOC_SDONLINE] == 0) 230 new_state = BIOC_SVOFFLINE; 231 else if (states[BIOC_SDSCRUB] != 0) 232 new_state = BIOC_SVSCRUB; 233 else if (states[BIOC_SDREBUILD] != 0) 234 new_state = BIOC_SVREBUILD; 235 else if (states[BIOC_SDOFFLINE] != 0) 236 new_state = BIOC_SVDEGRADED; 237 else { 238 DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state " 239 "was %d\n", DEVNAME(sd->sd_sc), old_state); 240 panic("invalid volume state"); 241 } 242 243 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n", 244 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 245 old_state, new_state); 246 247 switch (old_state) { 248 case BIOC_SVONLINE: 249 switch (new_state) { 250 case BIOC_SVONLINE: /* can go to same state */ 251 case BIOC_SVOFFLINE: 252 case BIOC_SVDEGRADED: 253 case BIOC_SVREBUILD: /* happens on boot */ 254 break; 255 default: 256 goto die; 257 } 258 break; 259 260 case BIOC_SVOFFLINE: 261 /* XXX this might be a little too much */ 262 goto die; 263 264 case BIOC_SVDEGRADED: 265 switch (new_state) { 266 case BIOC_SVOFFLINE: 267 case BIOC_SVREBUILD: 268 case BIOC_SVDEGRADED: /* can go to the same state */ 269 break; 270 default: 271 goto die; 272 } 273 break; 274 275 case BIOC_SVBUILDING: 276 switch (new_state) { 277 case BIOC_SVONLINE: 278 case BIOC_SVOFFLINE: 279 case BIOC_SVBUILDING: /* can go to the same state */ 280 break; 281 default: 282 goto die; 283 } 284 break; 285 286 case BIOC_SVSCRUB: 287 switch (new_state) { 288 case BIOC_SVONLINE: 289 case BIOC_SVOFFLINE: 290 case BIOC_SVDEGRADED: 291 case BIOC_SVSCRUB: /* can go to same state */ 292 break; 293 default: 294 goto die; 295 } 296 break; 297 298 case BIOC_SVREBUILD: 299 switch (new_state) { 300 case BIOC_SVONLINE: 301 case BIOC_SVOFFLINE: 302 case BIOC_SVDEGRADED: 303 case BIOC_SVREBUILD: /* can go to the same state */ 304 break; 305 default: 306 goto die; 307 } 308 break; 309 310 default: 311 die: 312 panic("%s: %s: invalid volume state transition " 313 "%d -> %d\n", DEVNAME(sd->sd_sc), 314 sd->sd_meta->ssd_devname, 315 old_state, new_state); 316 /* NOTREACHED */ 317 } 318 319 sd->sd_vol_status = new_state; 320 321 /* If we have just become degraded, look for a hotspare. */ 322 if (new_state == BIOC_SVDEGRADED) 323 task_add(systq, &sd->sd_hotspare_rebuild_task); 324 } 325 326 int 327 sr_raid1_rw(struct sr_workunit *wu) 328 { 329 struct sr_discipline *sd = wu->swu_dis; 330 struct scsi_xfer *xs = wu->swu_xs; 331 struct sr_ccb *ccb; 332 struct sr_chunk *scp; 333 int ios, chunk, i, rt; 334 daddr_t blk; 335 336 /* blk and scsi error will be handled by sr_validate_io */ 337 if (sr_validate_io(wu, &blk, "sr_raid1_rw")) 338 goto bad; 339 340 /* calculate physical block */ 341 blk += sd->sd_meta->ssd_data_offset; 342 343 if (xs->flags & SCSI_DATA_IN) 344 ios = 1; 345 else 346 ios = sd->sd_meta->ssdi.ssd_chunk_no; 347 348 for (i = 0; i < ios; i++) { 349 if (xs->flags & SCSI_DATA_IN) { 350 rt = 0; 351 ragain: 352 /* interleave reads */ 353 chunk = sd->mds.mdd_raid1.sr1_counter++ % 354 sd->sd_meta->ssdi.ssd_chunk_no; 355 scp = sd->sd_vol.sv_chunks[chunk]; 356 switch (scp->src_meta.scm_status) { 357 case BIOC_SDONLINE: 358 case BIOC_SDSCRUB: 359 break; 360 361 case BIOC_SDOFFLINE: 362 case BIOC_SDREBUILD: 363 case BIOC_SDHOTSPARE: 364 if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) 365 goto ragain; 366 367 /* FALLTHROUGH */ 368 default: 369 /* volume offline */ 370 printf("%s: is offline, cannot read\n", 371 DEVNAME(sd->sd_sc)); 372 goto bad; 373 } 374 } else { 375 /* writes go on all working disks */ 376 chunk = i; 377 scp = sd->sd_vol.sv_chunks[chunk]; 378 switch (scp->src_meta.scm_status) { 379 case BIOC_SDONLINE: 380 case BIOC_SDSCRUB: 381 case BIOC_SDREBUILD: 382 break; 383 384 case BIOC_SDHOTSPARE: /* should never happen */ 385 case BIOC_SDOFFLINE: 386 continue; 387 388 default: 389 goto bad; 390 } 391 } 392 393 ccb = sr_ccb_rw(sd, chunk, blk, xs->datalen, xs->data, 394 xs->flags, 0); 395 if (!ccb) { 396 /* should never happen but handle more gracefully */ 397 printf("%s: %s: too many ccbs queued\n", 398 DEVNAME(sd->sd_sc), 399 sd->sd_meta->ssd_devname); 400 goto bad; 401 } 402 sr_wu_enqueue_ccb(wu, ccb); 403 } 404 405 sr_schedule_wu(wu); 406 407 return (0); 408 409 bad: 410 /* wu is unwound by sr_wu_put */ 411 return (1); 412 } 413 414 int 415 sr_raid1_wu_done(struct sr_workunit *wu) 416 { 417 struct sr_discipline *sd = wu->swu_dis; 418 struct scsi_xfer *xs = wu->swu_xs; 419 420 /* If at least one I/O succeeded, we are okay. */ 421 if (wu->swu_ios_succeeded > 0) { 422 xs->error = XS_NOERROR; 423 return SR_WU_OK; 424 } 425 426 /* If all I/O failed, retry reads and give up on writes. */ 427 if (xs->flags & SCSI_DATA_IN) { 428 printf("%s: retrying read on block %lld\n", 429 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 430 if (wu->swu_cb_active == 1) 431 panic("%s: sr_raid1_intr_cb", 432 DEVNAME(sd->sd_sc)); 433 sr_wu_release_ccbs(wu); 434 wu->swu_state = SR_WU_RESTART; 435 if (sd->sd_scsi_rw(wu) == 0) 436 return SR_WU_RESTART; 437 } else { 438 printf("%s: permanently failing write on block %lld\n", 439 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start); 440 } 441 442 wu->swu_state = SR_WU_FAILED; 443 xs->error = XS_DRIVER_STUFFUP; 444 445 return SR_WU_FAILED; 446 } 447