1 /* $OpenBSD: softraid.c,v 1.177 2009/10/29 15:21:31 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 44 #ifdef AOE 45 #include <sys/mbuf.h> 46 #include <net/if_aoe.h> 47 #endif /* AOE */ 48 49 #include <crypto/cryptodev.h> 50 51 #include <scsi/scsi_all.h> 52 #include <scsi/scsiconf.h> 53 #include <scsi/scsi_disk.h> 54 55 #include <dev/softraidvar.h> 56 #include <dev/rndvar.h> 57 58 /* #define SR_FANCY_STATS */ 59 60 #ifdef SR_DEBUG 61 #define SR_FANCY_STATS 62 uint32_t sr_debug = 0 63 /* | SR_D_CMD */ 64 /* | SR_D_MISC */ 65 /* | SR_D_INTR */ 66 /* | SR_D_IOCTL */ 67 /* | SR_D_CCB */ 68 /* | SR_D_WU */ 69 /* | SR_D_META */ 70 /* | SR_D_DIS */ 71 /* | SR_D_STATE */ 72 ; 73 #endif 74 75 int sr_match(struct device *, void *, void *); 76 void sr_attach(struct device *, struct device *, void *); 77 int sr_detach(struct device *, int); 78 int sr_activate(struct device *, int); 79 80 struct cfattach softraid_ca = { 81 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 82 sr_activate 83 }; 84 85 struct cfdriver softraid_cd = { 86 NULL, "softraid", DV_DULL 87 }; 88 89 /* scsi & discipline */ 90 int sr_scsi_cmd(struct scsi_xfer *); 91 void sr_minphys(struct buf *bp, struct scsi_link *sl); 92 void sr_copy_internal_data(struct scsi_xfer *, 93 void *, size_t); 94 int sr_scsi_ioctl(struct scsi_link *, u_long, 95 caddr_t, int, struct proc *); 96 int sr_ioctl(struct device *, u_long, caddr_t); 97 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 98 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 99 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 100 int sr_ioctl_setstate(struct sr_softc *, 101 struct bioc_setstate *); 102 int sr_ioctl_createraid(struct sr_softc *, 103 struct bioc_createraid *, int); 104 int sr_ioctl_deleteraid(struct sr_softc *, 105 struct bioc_deleteraid *); 106 void sr_chunks_unwind(struct sr_softc *, 107 struct sr_chunk_head *); 108 void sr_discipline_free(struct sr_discipline *); 109 void sr_discipline_shutdown(struct sr_discipline *); 110 int sr_discipline_init(struct sr_discipline *, int); 111 112 /* utility functions */ 113 void sr_shutdown(void *); 114 void sr_uuid_get(struct sr_uuid *); 115 void sr_uuid_print(struct sr_uuid *, int); 116 void sr_checksum_print(u_int8_t *); 117 void sr_checksum(struct sr_softc *, void *, void *, 118 u_int32_t); 119 int sr_boot_assembly(struct sr_softc *); 120 int sr_already_assembled(struct sr_discipline *); 121 int sr_hotspare(struct sr_softc *, dev_t); 122 void sr_hotspare_rebuild(struct sr_discipline *); 123 int sr_rebuild_init(struct sr_discipline *, dev_t); 124 void sr_rebuild(void *); 125 void sr_rebuild_thread(void *); 126 void sr_roam_chunks(struct sr_discipline *); 127 int sr_chunk_in_use(struct sr_softc *, dev_t); 128 129 /* don't include these on RAMDISK */ 130 #ifndef SMALL_KERNEL 131 void sr_sensors_refresh(void *); 132 int sr_sensors_create(struct sr_discipline *); 133 void sr_sensors_delete(struct sr_discipline *); 134 #endif 135 136 /* metadata */ 137 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 138 int sr_meta_attach(struct sr_discipline *, int); 139 void sr_meta_getdevname(struct sr_softc *, dev_t, char *, 140 int); 141 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 142 size_t, daddr64_t, long); 143 int sr_meta_clear(struct sr_discipline *); 144 int sr_meta_read(struct sr_discipline *); 145 int sr_meta_save(struct sr_discipline *, u_int32_t); 146 int sr_meta_validate(struct sr_discipline *, dev_t, 147 struct sr_metadata *, void *); 148 void sr_meta_chunks_create(struct sr_softc *, 149 struct sr_chunk_head *); 150 void sr_meta_init(struct sr_discipline *, 151 struct sr_chunk_head *); 152 153 /* hotplug magic */ 154 void sr_disk_attach(struct disk *, int); 155 156 struct sr_hotplug_list { 157 void (*sh_hotplug)(struct sr_discipline *, 158 struct disk *, int); 159 struct sr_discipline *sh_sd; 160 161 SLIST_ENTRY(sr_hotplug_list) shl_link; 162 }; 163 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 164 165 struct sr_hotplug_list_head sr_hotplug_callbacks; 166 extern void (*softraid_disk_attach)(struct disk *, int); 167 168 /* scsi glue */ 169 struct scsi_adapter sr_switch = { 170 sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl 171 }; 172 173 struct scsi_device sr_dev = { 174 NULL, NULL, NULL, NULL 175 }; 176 177 /* native metadata format */ 178 int sr_meta_native_bootprobe(struct sr_softc *, 179 struct device *, struct sr_metadata_list_head *); 180 #define SR_META_NOTCLAIMED (0) 181 #define SR_META_CLAIMED (1) 182 int sr_meta_native_probe(struct sr_softc *, 183 struct sr_chunk *); 184 int sr_meta_native_attach(struct sr_discipline *, int); 185 int sr_meta_native_read(struct sr_discipline *, dev_t, 186 struct sr_metadata *, void *); 187 int sr_meta_native_write(struct sr_discipline *, dev_t, 188 struct sr_metadata *,void *); 189 190 #ifdef SR_DEBUG 191 void sr_meta_print(struct sr_metadata *); 192 #else 193 #define sr_meta_print(m) 194 #endif 195 196 /* the metadata driver should remain stateless */ 197 struct sr_meta_driver { 198 daddr64_t smd_offset; /* metadata location */ 199 u_int32_t smd_size; /* size of metadata */ 200 201 int (*smd_probe)(struct sr_softc *, 202 struct sr_chunk *); 203 int (*smd_attach)(struct sr_discipline *, int); 204 int (*smd_detach)(struct sr_discipline *); 205 int (*smd_read)(struct sr_discipline *, dev_t, 206 struct sr_metadata *, void *); 207 int (*smd_write)(struct sr_discipline *, dev_t, 208 struct sr_metadata *, void *); 209 int (*smd_validate)(struct sr_discipline *, 210 struct sr_metadata *, void *); 211 } smd[] = { 212 { SR_META_OFFSET, SR_META_SIZE * 512, 213 sr_meta_native_probe, sr_meta_native_attach, NULL, 214 sr_meta_native_read, sr_meta_native_write, NULL }, 215 #define SR_META_F_NATIVE 0 216 { 0, 0, NULL, NULL, NULL, NULL } 217 #define SR_META_F_INVALID -1 218 }; 219 220 int 221 sr_meta_attach(struct sr_discipline *sd, int force) 222 { 223 struct sr_softc *sc = sd->sd_sc; 224 struct sr_chunk_head *cl; 225 struct sr_chunk *ch_entry; 226 int rv = 1, i = 0; 227 228 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 229 230 /* in memory copy of metadata */ 231 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 232 if (!sd->sd_meta) { 233 printf("%s: could not allocate memory for metadata\n", 234 DEVNAME(sc)); 235 goto bad; 236 } 237 238 if (sd->sd_meta_type != SR_META_F_NATIVE) { 239 /* in memory copy of foreign metadata */ 240 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 241 M_DEVBUF, M_ZERO); 242 if (!sd->sd_meta_foreign) { 243 /* unwind frees sd_meta */ 244 printf("%s: could not allocate memory for foreign " 245 "metadata\n", DEVNAME(sc)); 246 goto bad; 247 } 248 } 249 250 /* we have a valid list now create an array index */ 251 cl = &sd->sd_vol.sv_chunk_list; 252 SLIST_FOREACH(ch_entry, cl, src_link) { 253 i++; 254 } 255 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * i, 256 M_DEVBUF, M_WAITOK | M_ZERO); 257 258 /* fill out chunk array */ 259 i = 0; 260 SLIST_FOREACH(ch_entry, cl, src_link) 261 sd->sd_vol.sv_chunks[i++] = ch_entry; 262 263 /* attach metadata */ 264 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 265 goto bad; 266 267 rv = 0; 268 bad: 269 return (rv); 270 } 271 272 int 273 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 274 { 275 struct sr_softc *sc = sd->sd_sc; 276 struct vnode *vn; 277 struct sr_chunk *ch_entry, *ch_prev = NULL; 278 struct sr_chunk_head *cl; 279 char devname[32]; 280 int i, d, type, found, prevf, error; 281 dev_t dev; 282 283 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 284 285 if (no_chunk == 0) 286 goto unwind; 287 288 289 cl = &sd->sd_vol.sv_chunk_list; 290 291 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 292 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 293 M_WAITOK | M_ZERO); 294 /* keep disks in user supplied order */ 295 if (ch_prev) 296 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 297 else 298 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 299 ch_prev = ch_entry; 300 dev = dt[d]; 301 ch_entry->src_dev_mm = dev; 302 303 if (dev == NODEV) { 304 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 305 continue; 306 } else { 307 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 308 if (bdevvp(dev, &vn)) { 309 printf("%s:, sr_meta_probe: can't allocate " 310 "vnode\n", DEVNAME(sc)); 311 goto unwind; 312 } 313 314 /* 315 * XXX leaving dev open for now; move this to attach 316 * and figure out the open/close dance for unwind. 317 */ 318 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0); 319 if (error) { 320 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 321 "open %s\n", DEVNAME(sc), devname); 322 vput(vn); 323 goto unwind; 324 } 325 326 strlcpy(ch_entry->src_devname, devname, 327 sizeof(ch_entry->src_devname)); 328 ch_entry->src_vn = vn; 329 } 330 331 /* determine if this is a device we understand */ 332 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 333 type = smd[i].smd_probe(sc, ch_entry); 334 if (type == SR_META_F_INVALID) 335 continue; 336 else { 337 found = type; 338 break; 339 } 340 } 341 342 if (found == SR_META_F_INVALID) 343 goto unwind; 344 if (prevf == SR_META_F_INVALID) 345 prevf = found; 346 if (prevf != found) { 347 DNPRINTF(SR_D_META, "%s: prevf != found\n", 348 DEVNAME(sc)); 349 goto unwind; 350 } 351 } 352 353 return (prevf); 354 unwind: 355 return (SR_META_F_INVALID); 356 } 357 358 void 359 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 360 { 361 int maj, unit, part; 362 char *name; 363 364 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 365 DEVNAME(sc), buf, size); 366 367 if (!buf) 368 return; 369 370 maj = major(dev); 371 part = DISKPART(dev); 372 unit = DISKUNIT(dev); 373 374 name = findblkname(maj); 375 if (name == NULL) 376 return; 377 378 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 379 } 380 381 int 382 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz, 383 daddr64_t ofs, long flags) 384 { 385 struct sr_softc *sc = sd->sd_sc; 386 struct buf b; 387 int rv = 1; 388 389 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 390 DEVNAME(sc), dev, md, sz, ofs, flags); 391 392 bzero(&b, sizeof(b)); 393 394 if (md == NULL) { 395 printf("%s: read invalid metadata pointer\n", DEVNAME(sc)); 396 goto done; 397 } 398 b.b_flags = flags | B_PHYS; 399 b.b_blkno = ofs; 400 b.b_bcount = sz; 401 b.b_bufsize = sz; 402 b.b_resid = sz; 403 b.b_data = md; 404 b.b_error = 0; 405 b.b_proc = curproc; 406 b.b_dev = dev; 407 b.b_iodone = NULL; 408 if (bdevvp(dev, &b.b_vp)) { 409 printf("%s: sr_meta_rw: can't allocate vnode\n", DEVNAME(sc)); 410 goto done; 411 } 412 if ((b.b_flags & B_READ) == 0) 413 b.b_vp->v_numoutput++; 414 415 LIST_INIT(&b.b_dep); 416 VOP_STRATEGY(&b); 417 biowait(&b); 418 419 if (b.b_flags & B_ERROR) { 420 printf("%s: 0x%x i/o error on block %llu while reading " 421 "metadata %d\n", DEVNAME(sc), dev, b.b_blkno, b.b_error); 422 goto done; 423 } 424 rv = 0; 425 done: 426 if (b.b_vp) 427 vput(b.b_vp); 428 429 return (rv); 430 } 431 432 int 433 sr_meta_clear(struct sr_discipline *sd) 434 { 435 struct sr_softc *sc = sd->sd_sc; 436 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 437 struct sr_chunk *ch_entry; 438 void *m; 439 int rv = 1; 440 441 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 442 443 if (sd->sd_meta_type != SR_META_F_NATIVE) { 444 printf("%s: sr_meta_clear can not clear foreign metadata\n", 445 DEVNAME(sc)); 446 goto done; 447 } 448 449 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 450 SLIST_FOREACH(ch_entry, cl, src_link) { 451 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 452 /* XXX mark disk offline */ 453 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 454 "clear %s\n", ch_entry->src_devname); 455 rv++; 456 continue; 457 } 458 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 459 bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt)); 460 } 461 462 bzero(sd->sd_meta, SR_META_SIZE * 512); 463 464 free(m, M_DEVBUF); 465 rv = 0; 466 done: 467 return (rv); 468 } 469 470 void 471 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) 472 { 473 struct sr_chunk *ch_entry; 474 struct sr_uuid uuid; 475 int cid = 0; 476 char *name; 477 u_int64_t max_chunk_sz = 0, min_chunk_sz; 478 479 DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); 480 481 sr_uuid_get(&uuid); 482 483 /* fill out stuff and get largest chunk size while looping */ 484 SLIST_FOREACH(ch_entry, cl, src_link) { 485 name = ch_entry->src_devname; 486 ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; 487 ch_entry->src_meta.scmi.scm_chunk_id = cid++; 488 ch_entry->src_meta.scm_status = BIOC_SDONLINE; 489 strlcpy(ch_entry->src_meta.scmi.scm_devname, name, 490 sizeof(ch_entry->src_meta.scmi.scm_devname)); 491 bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, 492 sizeof(ch_entry->src_meta.scmi.scm_uuid)); 493 494 if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) 495 max_chunk_sz = ch_entry->src_meta.scmi.scm_size; 496 } 497 498 /* get smallest chunk size */ 499 min_chunk_sz = max_chunk_sz; 500 SLIST_FOREACH(ch_entry, cl, src_link) 501 if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) 502 min_chunk_sz = ch_entry->src_meta.scmi.scm_size; 503 504 /* equalize all sizes */ 505 SLIST_FOREACH(ch_entry, cl, src_link) 506 ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; 507 508 /* whine if chunks are not the same size */ 509 if (min_chunk_sz != max_chunk_sz) 510 printf("%s: chunk sizes are not equal; up to %llu blocks " 511 "wasted per chunk\n", 512 DEVNAME(sc), max_chunk_sz - min_chunk_sz); 513 } 514 515 void 516 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) 517 { 518 struct sr_softc *sc = sd->sd_sc; 519 struct sr_metadata *sm = sd->sd_meta; 520 struct sr_meta_chunk *im_sc; 521 struct sr_meta_opt *im_so; 522 int i, chunk_no; 523 524 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 525 526 if (!sm) 527 return; 528 529 /* initial metadata */ 530 sm->ssdi.ssd_magic = SR_MAGIC; 531 sm->ssdi.ssd_version = SR_META_VERSION; 532 sm->ssd_ondisk = 0; 533 sm->ssdi.ssd_flags = sd->sd_meta_flags; 534 /* get uuid from chunk 0 */ 535 bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, 536 &sm->ssdi.ssd_uuid, 537 sizeof(struct sr_uuid)); 538 539 /* volume is filled in createraid */ 540 541 /* add missing chunk bits */ 542 chunk_no = sm->ssdi.ssd_chunk_no; 543 for (i = 0; i < chunk_no; i++) { 544 im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; 545 im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; 546 sr_checksum(sc, im_sc, &im_sc->scm_checksum, 547 sizeof(struct sr_meta_chunk_invariant)); 548 549 /* carry optional meta also in chunk area */ 550 im_so = &sd->sd_vol.sv_chunks[i]->src_opt; 551 bzero(im_so, sizeof(*im_so)); 552 if (sd->sd_type == SR_MD_CRYPTO) { 553 sm->ssdi.ssd_opt_no = 1; 554 im_so->somi.som_type = SR_OPT_CRYPTO; 555 556 /* 557 * copy encrypted key / passphrase into optional 558 * metadata area 559 */ 560 bcopy(&sd->mds.mdd_crypto.scr_meta, 561 &im_so->somi.som_meta.smm_crypto, 562 sizeof(im_so->somi.som_meta.smm_crypto)); 563 564 sr_checksum(sc, im_so, im_so->som_checksum, 565 sizeof(struct sr_meta_opt_invariant)); 566 } 567 } 568 } 569 570 void 571 sr_meta_save_callback(void *arg1, void *arg2) 572 { 573 struct sr_discipline *sd = arg1; 574 int s; 575 576 s = splbio(); 577 578 if (sr_meta_save(arg1, SR_META_DIRTY)) 579 printf("%s: save metadata failed\n", 580 DEVNAME(sd->sd_sc)); 581 582 sd->sd_must_flush = 0; 583 splx(s); 584 } 585 586 int 587 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 588 { 589 struct sr_softc *sc = sd->sd_sc; 590 struct sr_metadata *sm = sd->sd_meta, *m; 591 struct sr_meta_driver *s; 592 struct sr_chunk *src; 593 struct sr_meta_chunk *cm; 594 struct sr_workunit wu; 595 struct sr_meta_opt *om; 596 int i; 597 598 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 599 DEVNAME(sc), sd->sd_meta->ssd_devname); 600 601 if (!sm) { 602 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 603 goto bad; 604 } 605 606 /* meta scratchpad */ 607 s = &smd[sd->sd_meta_type]; 608 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 609 if (!m) { 610 printf("%s: could not allocate metadata scratch area\n", 611 DEVNAME(sc)); 612 goto bad; 613 } 614 615 if (sm->ssdi.ssd_opt_no > 1) 616 panic("not yet save > 1 optional metadata members"); 617 618 /* from here on out metadata is updated */ 619 restart: 620 sm->ssd_ondisk++; 621 sm->ssd_meta_flags = flags; 622 bcopy(sm, m, sizeof(*m)); 623 624 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 625 src = sd->sd_vol.sv_chunks[i]; 626 cm = (struct sr_meta_chunk *)(m + 1); 627 bcopy(&src->src_meta, cm + i, sizeof(*cm)); 628 } 629 630 /* optional metadata */ 631 om = (struct sr_meta_opt *)(cm + i); 632 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 633 bcopy(&src->src_opt, om + i, sizeof(*om)); 634 sr_checksum(sc, om, &om->som_checksum, 635 sizeof(struct sr_meta_opt_invariant)); 636 } 637 638 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 639 src = sd->sd_vol.sv_chunks[i]; 640 641 /* skip disks that are offline */ 642 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 643 continue; 644 645 /* calculate metadata checksum for correct chunk */ 646 m->ssdi.ssd_chunk_id = i; 647 sr_checksum(sc, m, &m->ssd_checksum, 648 sizeof(struct sr_meta_invariant)); 649 650 #ifdef SR_DEBUG 651 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 652 "chunkid: %d checksum: ", 653 DEVNAME(sc), src->src_meta.scmi.scm_devname, 654 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 655 656 if (sr_debug & SR_D_META) 657 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 658 DNPRINTF(SR_D_META, "\n"); 659 sr_meta_print(m); 660 #endif 661 662 /* translate and write to disk */ 663 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 664 printf("%s: could not write metadata to %s\n", 665 DEVNAME(sc), src->src_devname); 666 /* restart the meta write */ 667 src->src_meta.scm_status = BIOC_SDOFFLINE; 668 /* XXX recalculate volume status */ 669 goto restart; 670 } 671 } 672 673 /* not all disciplines have sync */ 674 if (sd->sd_scsi_sync) { 675 bzero(&wu, sizeof(wu)); 676 wu.swu_fake = 1; 677 wu.swu_dis = sd; 678 sd->sd_scsi_sync(&wu); 679 } 680 free(m, M_DEVBUF); 681 return (0); 682 bad: 683 return (1); 684 } 685 686 int 687 sr_meta_read(struct sr_discipline *sd) 688 { 689 #ifdef SR_DEBUG 690 struct sr_softc *sc = sd->sd_sc; 691 #endif 692 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 693 struct sr_metadata *sm; 694 struct sr_chunk *ch_entry; 695 struct sr_meta_chunk *cp; 696 struct sr_meta_driver *s; 697 struct sr_meta_opt *om; 698 void *fm = NULL; 699 int no_disk = 0, got_meta = 0; 700 701 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 702 703 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 704 s = &smd[sd->sd_meta_type]; 705 if (sd->sd_meta_type != SR_META_F_NATIVE) 706 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 707 708 cp = (struct sr_meta_chunk *)(sm + 1); 709 SLIST_FOREACH(ch_entry, cl, src_link) { 710 /* skip disks that are offline */ 711 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 712 DNPRINTF(SR_D_META, 713 "%s: %s chunk marked offline, spoofing status\n", 714 DEVNAME(sc), ch_entry->src_devname); 715 cp++; /* adjust chunk pointer to match failure */ 716 continue; 717 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 718 /* read and translate */ 719 /* XXX mark chunk offline, elsewhere!! */ 720 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 721 cp++; /* adjust chunk pointer to match failure */ 722 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 723 DEVNAME(sc)); 724 continue; 725 } 726 727 if (sm->ssdi.ssd_magic != SR_MAGIC) { 728 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 729 DEVNAME(sc)); 730 continue; 731 } 732 733 /* validate metadata */ 734 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 735 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 736 DEVNAME(sc)); 737 no_disk = -1; 738 goto done; 739 } 740 741 /* assume first chunk contains metadata */ 742 if (got_meta == 0) { 743 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 744 got_meta = 1; 745 } 746 747 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 748 749 if (sm->ssdi.ssd_opt_no > 1) 750 panic("not yet read > 1 optional metadata members"); 751 752 if (sm->ssdi.ssd_opt_no) { 753 om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + 754 sizeof(struct sr_meta_chunk) * 755 sm->ssdi.ssd_chunk_no); 756 bcopy(om, &ch_entry->src_opt, 757 sizeof(ch_entry->src_opt)); 758 759 if (om->somi.som_type == SR_OPT_CRYPTO) { 760 bcopy( 761 &ch_entry->src_opt.somi.som_meta.smm_crypto, 762 &sd->mds.mdd_crypto.scr_meta, 763 sizeof(sd->mds.mdd_crypto.scr_meta)); 764 } 765 } 766 767 cp++; 768 no_disk++; 769 } 770 771 free(sm, M_DEVBUF); 772 if (fm) 773 free(fm, M_DEVBUF); 774 775 done: 776 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 777 no_disk); 778 return (no_disk); 779 } 780 781 int 782 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 783 void *fm) 784 { 785 struct sr_softc *sc = sd->sd_sc; 786 struct sr_meta_driver *s; 787 #ifdef SR_DEBUG 788 struct sr_meta_chunk *mc; 789 #endif 790 char devname[32]; 791 int rv = 1; 792 u_int8_t checksum[MD5_DIGEST_LENGTH]; 793 794 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 795 796 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 797 798 s = &smd[sd->sd_meta_type]; 799 if (sd->sd_meta_type != SR_META_F_NATIVE) 800 if (s->smd_validate(sd, sm, fm)) { 801 printf("%s: invalid foreign metadata\n", DEVNAME(sc)); 802 goto done; 803 } 804 805 /* 806 * at this point all foreign metadata has been translated to the native 807 * format and will be treated just like the native format 808 */ 809 810 if (sm->ssdi.ssd_magic != SR_MAGIC) { 811 printf("%s: not valid softraid metadata\n", DEVNAME(sc)); 812 goto done; 813 } 814 815 if (sm->ssdi.ssd_version != SR_META_VERSION) { 816 printf("%s: %s can not read metadata version %u, expected %u\n", 817 DEVNAME(sc), devname, sm->ssdi.ssd_version, 818 SR_META_VERSION); 819 goto done; 820 } 821 822 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 823 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 824 printf("%s: invalid metadata checksum\n", DEVNAME(sc)); 825 goto done; 826 } 827 828 /* XXX do other checksums */ 829 830 #ifdef SR_DEBUG 831 /* warn if disk changed order */ 832 mc = (struct sr_meta_chunk *)(sm + 1); 833 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 834 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 835 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 836 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 837 devname); 838 #endif 839 840 /* we have meta data on disk */ 841 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 842 DEVNAME(sc), devname); 843 844 rv = 0; 845 done: 846 return (rv); 847 } 848 849 int 850 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv, 851 struct sr_metadata_list_head *mlh) 852 { 853 struct vnode *vn; 854 struct disklabel label; 855 struct sr_metadata *md; 856 struct sr_discipline *fake_sd; 857 struct sr_metadata_list *mle; 858 char devname[32]; 859 dev_t dev, devr; 860 int error, i, majdev; 861 int rv = SR_META_NOTCLAIMED; 862 863 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 864 865 majdev = findblkmajor(dv); 866 if (majdev == -1) 867 goto done; 868 dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 869 if (bdevvp(dev, &vn)) { 870 printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n", 871 DEVNAME(sc)); 872 goto done; 873 } 874 875 /* open device */ 876 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 877 if (error) { 878 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 879 "failed\n", DEVNAME(sc)); 880 vput(vn); 881 goto done; 882 } 883 884 /* get disklabel */ 885 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0); 886 if (error) { 887 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 888 "failed\n", DEVNAME(sc)); 889 VOP_CLOSE(vn, FREAD, NOCRED, 0); 890 vput(vn); 891 goto done; 892 } 893 894 /* we are done, close device */ 895 error = VOP_CLOSE(vn, FREAD, NOCRED, 0); 896 if (error) { 897 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 898 "failed\n", DEVNAME(sc)); 899 vput(vn); 900 goto done; 901 } 902 vput(vn); 903 904 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 905 if (md == NULL) { 906 printf("%s: not enough memory for metadata buffer\n", 907 DEVNAME(sc)); 908 goto done; 909 } 910 911 /* create fake sd to use utility functions */ 912 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_ZERO); 913 if (fake_sd == NULL) { 914 printf("%s: not enough memory for fake discipline\n", 915 DEVNAME(sc)); 916 goto nosd; 917 } 918 fake_sd->sd_sc = sc; 919 fake_sd->sd_meta_type = SR_META_F_NATIVE; 920 921 for (i = 0; i < MAXPARTITIONS; i++) { 922 if (label.d_partitions[i].p_fstype != FS_RAID) 923 continue; 924 925 /* open partition */ 926 devr = MAKEDISKDEV(majdev, dv->dv_unit, i); 927 if (bdevvp(devr, &vn)) { 928 printf("%s:, sr_meta_native_bootprobe: can't allocate " 929 "vnode for partition\n", DEVNAME(sc)); 930 goto done; 931 } 932 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 933 if (error) { 934 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 935 "open failed, partition %d\n", 936 DEVNAME(sc), i); 937 vput(vn); 938 continue; 939 } 940 941 if (sr_meta_native_read(fake_sd, devr, md, NULL)) { 942 printf("%s: native bootprobe could not read native " 943 "metadata\n", DEVNAME(sc)); 944 VOP_CLOSE(vn, FREAD, NOCRED, 0); 945 vput(vn); 946 continue; 947 } 948 949 /* are we a softraid partition? */ 950 if (md->ssdi.ssd_magic != SR_MAGIC) { 951 VOP_CLOSE(vn, FREAD, NOCRED, 0); 952 vput(vn); 953 continue; 954 } 955 956 sr_meta_getdevname(sc, devr, devname, sizeof(devname)); 957 if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) { 958 if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) { 959 DNPRINTF(SR_D_META, "%s: don't save %s\n", 960 DEVNAME(sc), devname); 961 } else { 962 /* XXX fix M_WAITOK, this is boot time */ 963 mle = malloc(sizeof(*mle), M_DEVBUF, 964 M_WAITOK | M_ZERO); 965 bcopy(md, &mle->sml_metadata, 966 SR_META_SIZE * 512); 967 mle->sml_mm = devr; 968 mle->sml_vn = vn; 969 SLIST_INSERT_HEAD(mlh, mle, sml_link); 970 rv = SR_META_CLAIMED; 971 } 972 } 973 974 /* we are done, close partition */ 975 VOP_CLOSE(vn, FREAD, NOCRED, 0); 976 vput(vn); 977 } 978 979 free(fake_sd, M_DEVBUF); 980 nosd: 981 free(md, M_DEVBUF); 982 done: 983 return (rv); 984 } 985 986 int 987 sr_boot_assembly(struct sr_softc *sc) 988 { 989 struct device *dv; 990 struct bioc_createraid bc; 991 struct sr_metadata_list_head mlh; 992 struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; 993 struct sr_metadata *metadata; 994 struct sr_boot_volume_head bvh; 995 struct sr_boot_volume *vol, *vp1, *vp2; 996 struct sr_meta_chunk *hm; 997 struct sr_chunk_head *cl; 998 struct sr_chunk *hotspare, *chunk, *last; 999 u_int32_t chunk_id; 1000 u_int64_t *ondisk = NULL; 1001 dev_t *devs = NULL; 1002 char devname[32]; 1003 int rv = 0, i; 1004 1005 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1006 1007 SLIST_INIT(&mlh); 1008 1009 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1010 if (dv->dv_class != DV_DISK) 1011 continue; 1012 1013 /* Only check sd(4) and wd(4) devices. */ 1014 if (strcmp(dv->dv_cfdata->cf_driver->cd_name, "sd") && 1015 strcmp(dv->dv_cfdata->cf_driver->cd_name, "wd")) 1016 continue; 1017 1018 /* native softraid uses partitions */ 1019 if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED) 1020 continue; 1021 1022 /* probe non-native disks */ 1023 } 1024 1025 /* 1026 * Create a list of volumes and associate chunks with each volume. 1027 */ 1028 SLIST_INIT(&bvh); 1029 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { 1030 1031 mlenext = SLIST_NEXT(mle, sml_link); 1032 SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); 1033 1034 metadata = (struct sr_metadata *)&mle->sml_metadata; 1035 mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; 1036 1037 SLIST_FOREACH(vol, &bvh, sbv_link) { 1038 if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1039 sizeof(metadata->ssdi.ssd_uuid)) == 0) 1040 break; 1041 } 1042 1043 if (vol == NULL) { 1044 vol = malloc(sizeof(struct sr_boot_volume), 1045 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1046 if (vol == NULL) { 1047 printf("%s: failed to allocate boot volume!\n", 1048 DEVNAME(sc)); 1049 goto unwind; 1050 } 1051 1052 vol->sbv_level = metadata->ssdi.ssd_level; 1053 vol->sbv_volid = metadata->ssdi.ssd_volid; 1054 vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; 1055 bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1056 sizeof(metadata->ssdi.ssd_uuid)); 1057 SLIST_INIT(&vol->sml); 1058 1059 /* Maintain volume order. */ 1060 vp2 = NULL; 1061 SLIST_FOREACH(vp1, &bvh, sbv_link) { 1062 if (vp1->sbv_volid > vol->sbv_volid) 1063 break; 1064 vp2 = vp1; 1065 } 1066 if (vp2 == NULL) { 1067 DNPRINTF(SR_D_META, "%s: insert volume %u " 1068 "at head\n", DEVNAME(sc), vol->sbv_volid); 1069 SLIST_INSERT_HEAD(&bvh, vol, sbv_link); 1070 } else { 1071 DNPRINTF(SR_D_META, "%s: insert volume %u " 1072 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1073 vp2->sbv_volid); 1074 SLIST_INSERT_AFTER(vp2, vol, sbv_link); 1075 } 1076 } 1077 1078 /* Maintain chunk order. */ 1079 mle2 = NULL; 1080 SLIST_FOREACH(mle1, &vol->sml, sml_link) { 1081 if (mle1->sml_chunk_id > mle->sml_chunk_id) 1082 break; 1083 mle2 = mle1; 1084 } 1085 if (mle2 == NULL) { 1086 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1087 "at head\n", DEVNAME(sc), vol->sbv_volid, 1088 mle->sml_chunk_id); 1089 SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); 1090 } else { 1091 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1092 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1093 mle->sml_chunk_id, mle2->sml_chunk_id); 1094 SLIST_INSERT_AFTER(mle2, mle, sml_link); 1095 } 1096 1097 vol->sbv_dev_no++; 1098 } 1099 1100 /* Allocate memory for device and ondisk version arrays. */ 1101 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1102 M_NOWAIT | M_CANFAIL); 1103 if (devs == NULL) { 1104 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1105 goto unwind; 1106 } 1107 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1108 M_NOWAIT | M_CANFAIL); 1109 if (ondisk == NULL) { 1110 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1111 goto unwind; 1112 } 1113 1114 /* 1115 * Assemble hotspare "volumes". 1116 */ 1117 SLIST_FOREACH(vol, &bvh, sbv_link) { 1118 1119 /* Check if this is a hotspare "volume". */ 1120 if (vol->sbv_level != SR_HOTSPARE_LEVEL || 1121 vol->sbv_chunk_no != 1) 1122 continue; 1123 1124 #ifdef SR_DEBUG 1125 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1126 DEVNAME(sc)); 1127 if (sr_debug & SR_D_META) 1128 sr_uuid_print(&vol->sbv_uuid, 0); 1129 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1130 vol->sbv_volid, vol->sbv_chunk_no); 1131 #endif 1132 1133 /* Create hotspare chunk metadata. */ 1134 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1135 M_NOWAIT | M_CANFAIL | M_ZERO); 1136 if (hotspare == NULL) { 1137 printf("%s: failed to allocate hotspare\n", 1138 DEVNAME(sc)); 1139 goto unwind; 1140 } 1141 1142 mle = SLIST_FIRST(&vol->sml); 1143 sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname)); 1144 hotspare->src_dev_mm = mle->sml_mm; 1145 hotspare->src_vn = mle->sml_vn; 1146 strlcpy(hotspare->src_devname, devname, 1147 sizeof(hotspare->src_devname)); 1148 hotspare->src_size = metadata->ssdi.ssd_size; 1149 1150 hm = &hotspare->src_meta; 1151 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1152 hm->scmi.scm_chunk_id = 0; 1153 hm->scmi.scm_size = metadata->ssdi.ssd_size; 1154 hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size; 1155 strlcpy(hm->scmi.scm_devname, devname, 1156 sizeof(hm->scmi.scm_devname)); 1157 bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1158 sizeof(struct sr_uuid)); 1159 1160 sr_checksum(sc, hm, &hm->scm_checksum, 1161 sizeof(struct sr_meta_chunk_invariant)); 1162 1163 hm->scm_status = BIOC_SDHOTSPARE; 1164 1165 /* Add chunk to hotspare list. */ 1166 rw_enter_write(&sc->sc_hs_lock); 1167 cl = &sc->sc_hotspare_list; 1168 if (SLIST_EMPTY(cl)) 1169 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1170 else { 1171 SLIST_FOREACH(chunk, cl, src_link) 1172 last = chunk; 1173 SLIST_INSERT_AFTER(last, hotspare, src_link); 1174 } 1175 sc->sc_hotspare_no++; 1176 rw_exit_write(&sc->sc_hs_lock); 1177 1178 } 1179 1180 /* 1181 * Assemble RAID volumes. 1182 */ 1183 SLIST_FOREACH(vol, &bvh, sbv_link) { 1184 1185 /* Check if this is a hotspare "volume". */ 1186 if (vol->sbv_level == SR_HOTSPARE_LEVEL && 1187 vol->sbv_chunk_no == 1) 1188 continue; 1189 1190 #ifdef SR_DEBUG 1191 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1192 if (sr_debug & SR_D_META) 1193 sr_uuid_print(&vol->sbv_uuid, 0); 1194 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1195 vol->sbv_volid, vol->sbv_chunk_no); 1196 #endif 1197 1198 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1199 devs[i] = NODEV; /* mark device as illegal */ 1200 ondisk[i] = 0; 1201 } 1202 1203 SLIST_FOREACH(mle, &vol->sml, sml_link) { 1204 metadata = (struct sr_metadata *)&mle->sml_metadata; 1205 chunk_id = metadata->ssdi.ssd_chunk_id; 1206 1207 if (devs[chunk_id] != NODEV) { 1208 vol->sbv_dev_no--; 1209 sr_meta_getdevname(sc, mle->sml_mm, devname, 1210 sizeof(devname)); 1211 printf("%s: found duplicate chunk %u for " 1212 "volume %u on device %s\n", DEVNAME(sc), 1213 chunk_id, vol->sbv_volid, devname); 1214 } 1215 1216 if (devs[chunk_id] == NODEV || 1217 metadata->ssd_ondisk > ondisk[chunk_id]) { 1218 devs[chunk_id] = mle->sml_mm; 1219 ondisk[chunk_id] = metadata->ssd_ondisk; 1220 DNPRINTF(SR_D_META, "%s: using ondisk " 1221 "metadata version %llu for chunk %u\n", 1222 DEVNAME(sc), ondisk[chunk_id], chunk_id); 1223 } 1224 } 1225 1226 if (vol->sbv_chunk_no != vol->sbv_dev_no) { 1227 printf("%s: not all chunks were provided; " 1228 "attempting to bring volume %d online\n", 1229 DEVNAME(sc), vol->sbv_volid); 1230 } 1231 1232 bzero(&bc, sizeof(bc)); 1233 bc.bc_level = vol->sbv_level; 1234 bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); 1235 bc.bc_dev_list = devs; 1236 bc.bc_flags = BIOC_SCDEVT; 1237 1238 rw_enter_write(&sc->sc_lock); 1239 sr_ioctl_createraid(sc, &bc, 0); 1240 rw_exit_write(&sc->sc_lock); 1241 1242 rv++; 1243 } 1244 1245 /* done with metadata */ 1246 unwind: 1247 for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { 1248 vp2 = SLIST_NEXT(vp1, sbv_link); 1249 for (mle1 = SLIST_FIRST(&vp1->sml); 1250 mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { 1251 mle2 = SLIST_NEXT(mle1, sml_link); 1252 free(mle1, M_DEVBUF); 1253 } 1254 free(vp1, M_DEVBUF); 1255 } 1256 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { 1257 mle2 = SLIST_NEXT(mle, sml_link); 1258 free(mle, M_DEVBUF); 1259 } 1260 SLIST_INIT(&mlh); 1261 1262 if (devs) 1263 free(devs, M_DEVBUF); 1264 if (ondisk) 1265 free(ondisk, M_DEVBUF); 1266 1267 return (rv); 1268 } 1269 1270 int 1271 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1272 { 1273 struct disklabel label; 1274 char *devname; 1275 int error, part; 1276 daddr64_t size; 1277 1278 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1279 DEVNAME(sc), ch_entry->src_devname); 1280 1281 devname = ch_entry->src_devname; 1282 part = DISKPART(ch_entry->src_dev_mm); 1283 1284 /* get disklabel */ 1285 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1286 NOCRED, 0); 1287 if (error) { 1288 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1289 DEVNAME(sc), devname); 1290 goto unwind; 1291 } 1292 1293 /* make sure the partition is of the right type */ 1294 if (label.d_partitions[part].p_fstype != FS_RAID) { 1295 DNPRINTF(SR_D_META, 1296 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1297 devname, 1298 label.d_partitions[part].p_fstype); 1299 goto unwind; 1300 } 1301 1302 size = DL_GETPSIZE(&label.d_partitions[part]) - 1303 SR_META_SIZE - SR_META_OFFSET; 1304 if (size <= 0) { 1305 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1306 devname); 1307 goto unwind; 1308 } 1309 ch_entry->src_size = size; 1310 1311 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1312 devname, size); 1313 1314 return (SR_META_F_NATIVE); 1315 unwind: 1316 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1317 devname ? devname : "nodev"); 1318 return (SR_META_F_INVALID); 1319 } 1320 1321 int 1322 sr_meta_native_attach(struct sr_discipline *sd, int force) 1323 { 1324 struct sr_softc *sc = sd->sd_sc; 1325 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1326 struct sr_metadata *md = NULL; 1327 struct sr_chunk *ch_entry, *ch_next; 1328 struct sr_uuid uuid; 1329 u_int64_t version = 0; 1330 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1331 1332 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1333 1334 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 1335 if (md == NULL) { 1336 printf("%s: not enough memory for metadata buffer\n", 1337 DEVNAME(sc)); 1338 goto bad; 1339 } 1340 1341 bzero(&uuid, sizeof uuid); 1342 1343 sr = not_sr = d = 0; 1344 SLIST_FOREACH(ch_entry, cl, src_link) { 1345 if (ch_entry->src_dev_mm == NODEV) 1346 continue; 1347 1348 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1349 printf("%s: could not read native metadata\n", 1350 DEVNAME(sc)); 1351 goto bad; 1352 } 1353 1354 if (md->ssdi.ssd_magic == SR_MAGIC) { 1355 sr++; 1356 if (d == 0) { 1357 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1358 expected = md->ssdi.ssd_chunk_no; 1359 version = md->ssd_ondisk; 1360 d++; 1361 continue; 1362 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1363 sizeof uuid)) { 1364 printf("%s: not part of the same volume\n", 1365 DEVNAME(sc)); 1366 goto bad; 1367 } 1368 if (md->ssd_ondisk != version) { 1369 old_meta++; 1370 version = MAX(md->ssd_ondisk, version); 1371 } 1372 } else 1373 not_sr++; 1374 } 1375 1376 if (sr && not_sr) { 1377 printf("%s: not all chunks are of the native metadata format\n", 1378 DEVNAME(sc)); 1379 goto bad; 1380 } 1381 1382 /* mixed metadata versions; mark bad disks offline */ 1383 if (old_meta) { 1384 d = 0; 1385 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1386 ch_entry = ch_next, d++) { 1387 ch_next = SLIST_NEXT(ch_entry, src_link); 1388 1389 /* XXX do we want to read this again? */ 1390 if (ch_entry->src_dev_mm == NODEV) 1391 panic("src_dev_mm == NODEV"); 1392 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1393 NULL)) 1394 printf("%s: could not read native metadata\n", 1395 DEVNAME(sc)); 1396 if (md->ssd_ondisk != version) 1397 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1398 BIOC_SDOFFLINE; 1399 } 1400 } 1401 1402 if (expected != sr && !force && expected != -1) { 1403 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1404 "anyway\n", DEVNAME(sc)); 1405 } 1406 1407 rv = 0; 1408 bad: 1409 if (md) 1410 free(md, M_DEVBUF); 1411 return (rv); 1412 } 1413 1414 int 1415 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1416 struct sr_metadata *md, void *fm) 1417 { 1418 #ifdef SR_DEBUG 1419 struct sr_softc *sc = sd->sd_sc; 1420 #endif 1421 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1422 DEVNAME(sc), dev, md); 1423 1424 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1425 B_READ)); 1426 } 1427 1428 int 1429 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1430 struct sr_metadata *md, void *fm) 1431 { 1432 #ifdef SR_DEBUG 1433 struct sr_softc *sc = sd->sd_sc; 1434 #endif 1435 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1436 DEVNAME(sc), dev, md); 1437 1438 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1439 B_WRITE)); 1440 } 1441 1442 void 1443 sr_hotplug_register(struct sr_discipline *sd, void *func) 1444 { 1445 struct sr_hotplug_list *mhe; 1446 1447 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1448 DEVNAME(sd->sd_sc), func); 1449 1450 /* make sure we aren't on the list yet */ 1451 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1452 if (mhe->sh_hotplug == func) 1453 return; 1454 1455 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1456 M_WAITOK | M_ZERO); 1457 mhe->sh_hotplug = func; 1458 mhe->sh_sd = sd; 1459 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1460 } 1461 1462 void 1463 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1464 { 1465 struct sr_hotplug_list *mhe; 1466 1467 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1468 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1469 1470 /* make sure we are on the list yet */ 1471 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1472 if (mhe->sh_hotplug == func) { 1473 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1474 sr_hotplug_list, shl_link); 1475 free(mhe, M_DEVBUF); 1476 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1477 SLIST_INIT(&sr_hotplug_callbacks); 1478 return; 1479 } 1480 } 1481 1482 void 1483 sr_disk_attach(struct disk *diskp, int action) 1484 { 1485 struct sr_hotplug_list *mhe; 1486 1487 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1488 if (mhe->sh_sd->sd_ready) 1489 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1490 } 1491 1492 int 1493 sr_match(struct device *parent, void *match, void *aux) 1494 { 1495 return (1); 1496 } 1497 1498 void 1499 sr_attach(struct device *parent, struct device *self, void *aux) 1500 { 1501 struct sr_softc *sc = (void *)self; 1502 1503 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1504 1505 rw_init(&sc->sc_lock, "sr_lock"); 1506 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1507 1508 SLIST_INIT(&sr_hotplug_callbacks); 1509 SLIST_INIT(&sc->sc_hotspare_list); 1510 1511 if (bio_register(&sc->sc_dev, sr_ioctl) != 0) 1512 printf("%s: controller registration failed", DEVNAME(sc)); 1513 else 1514 sc->sc_ioctl = sr_ioctl; 1515 1516 printf("\n"); 1517 1518 softraid_disk_attach = sr_disk_attach; 1519 1520 sr_boot_assembly(sc); 1521 } 1522 1523 int 1524 sr_detach(struct device *self, int flags) 1525 { 1526 return (0); 1527 } 1528 1529 int 1530 sr_activate(struct device *self, int act) 1531 { 1532 return (1); 1533 } 1534 1535 void 1536 sr_minphys(struct buf *bp, struct scsi_link *sl) 1537 { 1538 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1539 1540 /* XXX currently using SR_MAXFER = MAXPHYS */ 1541 if (bp->b_bcount > SR_MAXFER) 1542 bp->b_bcount = SR_MAXFER; 1543 minphys(bp); 1544 } 1545 1546 void 1547 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1548 { 1549 size_t copy_cnt; 1550 1551 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1552 xs, size); 1553 1554 if (xs->datalen) { 1555 copy_cnt = MIN(size, xs->datalen); 1556 bcopy(v, xs->data, copy_cnt); 1557 } 1558 } 1559 1560 int 1561 sr_ccb_alloc(struct sr_discipline *sd) 1562 { 1563 struct sr_ccb *ccb; 1564 int i; 1565 1566 if (!sd) 1567 return (1); 1568 1569 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1570 1571 if (sd->sd_ccb) 1572 return (1); 1573 1574 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1575 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1576 TAILQ_INIT(&sd->sd_ccb_freeq); 1577 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1578 ccb = &sd->sd_ccb[i]; 1579 ccb->ccb_dis = sd; 1580 sr_ccb_put(ccb); 1581 } 1582 1583 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1584 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1585 1586 return (0); 1587 } 1588 1589 void 1590 sr_ccb_free(struct sr_discipline *sd) 1591 { 1592 struct sr_ccb *ccb; 1593 1594 if (!sd) 1595 return; 1596 1597 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1598 1599 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1600 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1601 1602 if (sd->sd_ccb) 1603 free(sd->sd_ccb, M_DEVBUF); 1604 } 1605 1606 struct sr_ccb * 1607 sr_ccb_get(struct sr_discipline *sd) 1608 { 1609 struct sr_ccb *ccb; 1610 int s; 1611 1612 s = splbio(); 1613 1614 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1615 if (ccb) { 1616 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1617 ccb->ccb_state = SR_CCB_INPROGRESS; 1618 } 1619 1620 splx(s); 1621 1622 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1623 ccb); 1624 1625 return (ccb); 1626 } 1627 1628 void 1629 sr_ccb_put(struct sr_ccb *ccb) 1630 { 1631 struct sr_discipline *sd = ccb->ccb_dis; 1632 int s; 1633 1634 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1635 ccb); 1636 1637 s = splbio(); 1638 1639 ccb->ccb_wu = NULL; 1640 ccb->ccb_state = SR_CCB_FREE; 1641 ccb->ccb_target = -1; 1642 ccb->ccb_opaque = NULL; 1643 1644 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1645 1646 splx(s); 1647 } 1648 1649 int 1650 sr_wu_alloc(struct sr_discipline *sd) 1651 { 1652 struct sr_workunit *wu; 1653 int i, no_wu; 1654 1655 if (!sd) 1656 return (1); 1657 1658 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 1659 sd, sd->sd_max_wu); 1660 1661 if (sd->sd_wu) 1662 return (1); 1663 1664 no_wu = sd->sd_max_wu; 1665 sd->sd_wu_pending = no_wu; 1666 1667 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 1668 M_DEVBUF, M_WAITOK | M_ZERO); 1669 TAILQ_INIT(&sd->sd_wu_freeq); 1670 TAILQ_INIT(&sd->sd_wu_pendq); 1671 TAILQ_INIT(&sd->sd_wu_defq); 1672 for (i = 0; i < no_wu; i++) { 1673 wu = &sd->sd_wu[i]; 1674 wu->swu_dis = sd; 1675 sr_wu_put(wu); 1676 } 1677 1678 return (0); 1679 } 1680 1681 void 1682 sr_wu_free(struct sr_discipline *sd) 1683 { 1684 struct sr_workunit *wu; 1685 1686 if (!sd) 1687 return; 1688 1689 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 1690 1691 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 1692 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1693 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 1694 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 1695 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 1696 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 1697 1698 if (sd->sd_wu) 1699 free(sd->sd_wu, M_DEVBUF); 1700 } 1701 1702 void 1703 sr_wu_put(struct sr_workunit *wu) 1704 { 1705 struct sr_discipline *sd = wu->swu_dis; 1706 struct sr_ccb *ccb; 1707 1708 int s; 1709 1710 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 1711 1712 s = splbio(); 1713 1714 wu->swu_xs = NULL; 1715 wu->swu_state = SR_WU_FREE; 1716 wu->swu_ios_complete = 0; 1717 wu->swu_ios_failed = 0; 1718 wu->swu_ios_succeeded = 0; 1719 wu->swu_io_count = 0; 1720 wu->swu_blk_start = 0; 1721 wu->swu_blk_end = 0; 1722 wu->swu_collider = NULL; 1723 wu->swu_fake = 0; 1724 wu->swu_flags = 0; 1725 1726 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1727 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1728 sr_ccb_put(ccb); 1729 } 1730 TAILQ_INIT(&wu->swu_ccb); 1731 1732 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 1733 sd->sd_wu_pending--; 1734 1735 /* wake up sleepers */ 1736 #ifdef DIAGNOSTIC 1737 if (sd->sd_wu_sleep < 0) 1738 panic("negative wu sleepers"); 1739 #endif /* DIAGNOSTIC */ 1740 if (sd->sd_wu_sleep) 1741 wakeup(&sd->sd_wu_sleep); 1742 1743 splx(s); 1744 } 1745 1746 struct sr_workunit * 1747 sr_wu_get(struct sr_discipline *sd, int canwait) 1748 { 1749 struct sr_workunit *wu; 1750 int s; 1751 1752 s = splbio(); 1753 1754 for (;;) { 1755 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 1756 if (wu) { 1757 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1758 wu->swu_state = SR_WU_INPROGRESS; 1759 sd->sd_wu_pending++; 1760 break; 1761 } else if (wu == NULL && canwait) { 1762 sd->sd_wu_sleep++; 1763 tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0); 1764 sd->sd_wu_sleep--; 1765 } else 1766 break; 1767 } 1768 1769 splx(s); 1770 1771 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 1772 1773 return (wu); 1774 } 1775 1776 void 1777 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 1778 { 1779 int s; 1780 1781 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 1782 1783 s = splbio(); 1784 scsi_done(xs); 1785 splx(s); 1786 } 1787 1788 int 1789 sr_scsi_cmd(struct scsi_xfer *xs) 1790 { 1791 int s; 1792 struct scsi_link *link = xs->sc_link; 1793 struct sr_softc *sc = link->adapter_softc; 1794 struct sr_workunit *wu = NULL; 1795 struct sr_discipline *sd; 1796 1797 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p " 1798 "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags); 1799 1800 sd = sc->sc_dis[link->scsibus]; 1801 if (sd == NULL) { 1802 s = splhigh(); 1803 sd = sc->sc_attach_dis; 1804 splx(s); 1805 1806 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n", 1807 DEVNAME(sc), sd); 1808 if (sd == NULL) { 1809 printf("%s: sr_scsi_cmd NULL discipline\n", 1810 DEVNAME(sc)); 1811 goto stuffup; 1812 } 1813 } 1814 1815 if (sd->sd_deleted) { 1816 printf("%s: %s device is being deleted, failing io\n", 1817 DEVNAME(sc), sd->sd_meta->ssd_devname); 1818 goto stuffup; 1819 } 1820 1821 /* 1822 * we'll let the midlayer deal with stalls instead of being clever 1823 * and sending sr_wu_get !(xs->flags & SCSI_NOSLEEP) in cansleep 1824 */ 1825 if ((wu = sr_wu_get(sd, 0)) == NULL) { 1826 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc)); 1827 return (NO_CCB); 1828 } 1829 1830 xs->error = XS_NOERROR; 1831 wu->swu_xs = xs; 1832 1833 /* the midlayer will query LUNs so report sense to stop scanning */ 1834 if (link->target != 0 || link->lun != 0) { 1835 DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n", 1836 DEVNAME(sc), link->target, link->lun); 1837 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 1838 SSD_ERRCODE_VALID; 1839 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 1840 sd->sd_scsi_sense.add_sense_code = 0x25; 1841 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 1842 sd->sd_scsi_sense.extra_len = 4; 1843 goto stuffup; 1844 } 1845 1846 switch (xs->cmd->opcode) { 1847 case READ_COMMAND: 1848 case READ_BIG: 1849 case READ_16: 1850 case WRITE_COMMAND: 1851 case WRITE_BIG: 1852 case WRITE_16: 1853 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 1854 DEVNAME(sc), xs->cmd->opcode); 1855 if (sd->sd_scsi_rw(wu)) 1856 goto stuffup; 1857 break; 1858 1859 case SYNCHRONIZE_CACHE: 1860 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 1861 DEVNAME(sc)); 1862 if (sd->sd_scsi_sync(wu)) 1863 goto stuffup; 1864 goto complete; 1865 1866 case TEST_UNIT_READY: 1867 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 1868 DEVNAME(sc)); 1869 if (sd->sd_scsi_tur(wu)) 1870 goto stuffup; 1871 goto complete; 1872 1873 case START_STOP: 1874 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 1875 DEVNAME(sc)); 1876 if (sd->sd_scsi_start_stop(wu)) 1877 goto stuffup; 1878 goto complete; 1879 1880 case INQUIRY: 1881 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 1882 DEVNAME(sc)); 1883 if (sd->sd_scsi_inquiry(wu)) 1884 goto stuffup; 1885 goto complete; 1886 1887 case READ_CAPACITY: 1888 case READ_CAPACITY_16: 1889 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 1890 DEVNAME(sc), xs->cmd->opcode); 1891 if (sd->sd_scsi_read_cap(wu)) 1892 goto stuffup; 1893 goto complete; 1894 1895 case REQUEST_SENSE: 1896 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 1897 DEVNAME(sc)); 1898 if (sd->sd_scsi_req_sense(wu)) 1899 goto stuffup; 1900 goto complete; 1901 1902 default: 1903 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 1904 DEVNAME(sc), xs->cmd->opcode); 1905 /* XXX might need to add generic function to handle others */ 1906 goto stuffup; 1907 } 1908 1909 return (SUCCESSFULLY_QUEUED); 1910 stuffup: 1911 if (sd && sd->sd_scsi_sense.error_code) { 1912 xs->error = XS_SENSE; 1913 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 1914 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 1915 } else { 1916 xs->error = XS_DRIVER_STUFFUP; 1917 xs->flags |= ITSDONE; 1918 } 1919 complete: 1920 if (wu) 1921 sr_wu_put(wu); 1922 sr_scsi_done(sd, xs); 1923 return (COMPLETE); 1924 } 1925 int 1926 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag, 1927 struct proc *p) 1928 { 1929 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 1930 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 1931 1932 return (sr_ioctl(link->adapter_softc, cmd, addr)); 1933 } 1934 1935 int 1936 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr) 1937 { 1938 struct sr_softc *sc = (struct sr_softc *)dev; 1939 int rv = 0; 1940 1941 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc)); 1942 1943 rw_enter_write(&sc->sc_lock); 1944 1945 switch (cmd) { 1946 case BIOCINQ: 1947 DNPRINTF(SR_D_IOCTL, "inq\n"); 1948 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 1949 break; 1950 1951 case BIOCVOL: 1952 DNPRINTF(SR_D_IOCTL, "vol\n"); 1953 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 1954 break; 1955 1956 case BIOCDISK: 1957 DNPRINTF(SR_D_IOCTL, "disk\n"); 1958 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 1959 break; 1960 1961 case BIOCALARM: 1962 DNPRINTF(SR_D_IOCTL, "alarm\n"); 1963 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 1964 break; 1965 1966 case BIOCBLINK: 1967 DNPRINTF(SR_D_IOCTL, "blink\n"); 1968 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 1969 break; 1970 1971 case BIOCSETSTATE: 1972 DNPRINTF(SR_D_IOCTL, "setstate\n"); 1973 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 1974 break; 1975 1976 case BIOCCREATERAID: 1977 DNPRINTF(SR_D_IOCTL, "createraid\n"); 1978 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 1979 break; 1980 1981 case BIOCDELETERAID: 1982 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 1983 break; 1984 default: 1985 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 1986 rv = ENOTTY; 1987 } 1988 1989 rw_exit_write(&sc->sc_lock); 1990 1991 return (rv); 1992 } 1993 1994 int 1995 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 1996 { 1997 int i, vol, disk; 1998 1999 for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++) 2000 /* XXX this will not work when we stagger disciplines */ 2001 if (sc->sc_dis[i]) { 2002 vol++; 2003 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2004 } 2005 2006 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2007 bi->bi_novol = vol + sc->sc_hotspare_no; 2008 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2009 2010 return (0); 2011 } 2012 2013 int 2014 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2015 { 2016 int i, vol, rv = EINVAL; 2017 struct sr_discipline *sd; 2018 struct sr_chunk *hotspare; 2019 daddr64_t rb, sz; 2020 2021 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2022 /* XXX this will not work when we stagger disciplines */ 2023 if (sc->sc_dis[i]) 2024 vol++; 2025 if (vol != bv->bv_volid) 2026 continue; 2027 2028 sd = sc->sc_dis[i]; 2029 bv->bv_status = sd->sd_vol_status; 2030 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2031 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2032 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2033 if (bv->bv_status == BIOC_SVREBUILD) { 2034 sz = sd->sd_meta->ssdi.ssd_size; 2035 rb = sd->sd_meta->ssd_rebuild; 2036 if (rb > 0) 2037 bv->bv_percent = 100 - 2038 ((sz * 100 - rb * 100) / sz) - 1; 2039 else 2040 bv->bv_percent = 0; 2041 } 2042 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2043 sizeof(bv->bv_dev)); 2044 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2045 sizeof(bv->bv_vendor)); 2046 rv = 0; 2047 goto done; 2048 } 2049 2050 /* Check hotspares list. */ 2051 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2052 vol++; 2053 if (vol != bv->bv_volid) 2054 continue; 2055 2056 bv->bv_status = BIOC_SVONLINE; 2057 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2058 bv->bv_level = -1; /* Hotspare. */ 2059 bv->bv_nodisk = 1; 2060 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2061 sizeof(bv->bv_dev)); 2062 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2063 sizeof(bv->bv_vendor)); 2064 rv = 0; 2065 goto done; 2066 } 2067 2068 done: 2069 return (rv); 2070 } 2071 2072 int 2073 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2074 { 2075 int i, vol, rv = EINVAL, id; 2076 struct sr_chunk *src, *hotspare; 2077 2078 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2079 /* XXX this will not work when we stagger disciplines */ 2080 if (sc->sc_dis[i]) 2081 vol++; 2082 if (vol != bd->bd_volid) 2083 continue; 2084 2085 id = bd->bd_diskid; 2086 if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2087 break; 2088 2089 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2090 bd->bd_status = src->src_meta.scm_status; 2091 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2092 bd->bd_channel = vol; 2093 bd->bd_target = id; 2094 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2095 sizeof(bd->bd_vendor)); 2096 rv = 0; 2097 goto done; 2098 } 2099 2100 /* Check hotspares list. */ 2101 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2102 vol++; 2103 if (vol != bd->bd_volid) 2104 continue; 2105 2106 if (bd->bd_diskid != 0) 2107 break; 2108 2109 bd->bd_status = hotspare->src_meta.scm_status; 2110 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2111 bd->bd_channel = vol; 2112 bd->bd_target = bd->bd_diskid; 2113 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2114 sizeof(bd->bd_vendor)); 2115 rv = 0; 2116 goto done; 2117 } 2118 2119 done: 2120 return (rv); 2121 } 2122 2123 int 2124 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2125 { 2126 int rv = EINVAL; 2127 int i, vol, found, c; 2128 struct sr_discipline *sd = NULL; 2129 struct sr_chunk *ch_entry; 2130 struct sr_chunk_head *cl; 2131 2132 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2133 goto done; 2134 2135 if (bs->bs_status == BIOC_SSHOTSPARE) { 2136 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2137 goto done; 2138 } 2139 2140 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2141 /* XXX this will not work when we stagger disciplines */ 2142 if (sc->sc_dis[i]) 2143 vol++; 2144 if (vol != bs->bs_volid) 2145 continue; 2146 sd = sc->sc_dis[i]; 2147 break; 2148 } 2149 if (sd == NULL) 2150 goto done; 2151 2152 switch (bs->bs_status) { 2153 case BIOC_SSOFFLINE: 2154 /* Take chunk offline */ 2155 found = c = 0; 2156 cl = &sd->sd_vol.sv_chunk_list; 2157 SLIST_FOREACH(ch_entry, cl, src_link) { 2158 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2159 found = 1; 2160 break; 2161 } 2162 c++; 2163 } 2164 if (found == 0) { 2165 printf("%s: chunk not part of array\n", DEVNAME(sc)); 2166 goto done; 2167 } 2168 2169 /* XXX: check current state first */ 2170 sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE); 2171 2172 if (sr_meta_save(sd, SR_META_DIRTY)) { 2173 printf("%s: could not save metadata to %s\n", 2174 DEVNAME(sc), sd->sd_meta->ssd_devname); 2175 goto done; 2176 } 2177 rv = 0; 2178 break; 2179 2180 case BIOC_SDSCRUB: 2181 break; 2182 2183 case BIOC_SSREBUILD: 2184 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id); 2185 break; 2186 2187 default: 2188 printf("%s: unsupported state request %d\n", 2189 DEVNAME(sc), bs->bs_status); 2190 } 2191 2192 done: 2193 return (rv); 2194 } 2195 2196 int 2197 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2198 { 2199 struct sr_discipline *sd; 2200 struct sr_chunk *chunk; 2201 int i, c; 2202 2203 /* See if chunk is already in use. */ 2204 for (i = 0; i < SR_MAXSCSIBUS; i++) { 2205 if (!sc->sc_dis[i]) 2206 continue; 2207 sd = sc->sc_dis[i]; 2208 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2209 chunk = sd->sd_vol.sv_chunks[c]; 2210 if (chunk->src_dev_mm == dev) 2211 return chunk->src_meta.scm_status; 2212 } 2213 } 2214 2215 /* Check hotspares list. */ 2216 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2217 if (chunk->src_dev_mm == dev) 2218 return chunk->src_meta.scm_status; 2219 2220 return BIOC_SDINVALID; 2221 } 2222 2223 int 2224 sr_hotspare(struct sr_softc *sc, dev_t dev) 2225 { 2226 struct sr_discipline *sd = NULL; 2227 struct sr_metadata *sm = NULL; 2228 struct sr_meta_chunk *hm; 2229 struct sr_chunk_head *cl; 2230 struct sr_chunk *hotspare, *chunk, *last; 2231 struct sr_uuid uuid; 2232 struct disklabel label; 2233 struct vnode *vn; 2234 daddr64_t size; 2235 char devname[32]; 2236 int rv = EINVAL; 2237 int c, part, open = 0; 2238 2239 /* 2240 * Add device to global hotspares list. 2241 */ 2242 2243 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2244 2245 /* Make sure chunk is not already in use. */ 2246 c = sr_chunk_in_use(sc, dev); 2247 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2248 if (c == BIOC_SDHOTSPARE) 2249 printf("%s: %s is already a hotspare\n", 2250 DEVNAME(sc), devname); 2251 else 2252 printf("%s: %s is already in use\n", 2253 DEVNAME(sc), devname); 2254 goto done; 2255 } 2256 2257 /* XXX - See if there is an existing degraded volume... */ 2258 2259 /* Open device. */ 2260 if (bdevvp(dev, &vn)) { 2261 printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc)); 2262 goto done; 2263 } 2264 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2265 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2266 DEVNAME(sc), devname); 2267 vput(vn); 2268 goto fail; 2269 } 2270 open = 1; /* close dev on error */ 2271 2272 /* Get partition details. */ 2273 part = DISKPART(dev); 2274 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2275 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2276 DEVNAME(sc)); 2277 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2278 vput(vn); 2279 goto fail; 2280 } 2281 if (label.d_partitions[part].p_fstype != FS_RAID) { 2282 printf("%s: %s partition not of type RAID (%d)\n", 2283 DEVNAME(sc), devname, 2284 label.d_partitions[part].p_fstype); 2285 goto fail; 2286 } 2287 2288 /* Calculate partition size. */ 2289 size = DL_GETPSIZE(&label.d_partitions[part]) - 2290 SR_META_SIZE - SR_META_OFFSET; 2291 2292 /* 2293 * Create and populate chunk metadata. 2294 */ 2295 2296 sr_uuid_get(&uuid); 2297 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2298 2299 hotspare->src_dev_mm = dev; 2300 hotspare->src_vn = vn; 2301 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2302 hotspare->src_size = size; 2303 2304 hm = &hotspare->src_meta; 2305 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2306 hm->scmi.scm_chunk_id = 0; 2307 hm->scmi.scm_size = size; 2308 hm->scmi.scm_coerced_size = size; 2309 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2310 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2311 2312 sr_checksum(sc, hm, &hm->scm_checksum, 2313 sizeof(struct sr_meta_chunk_invariant)); 2314 2315 hm->scm_status = BIOC_SDHOTSPARE; 2316 2317 /* 2318 * Create and populate our own discipline and metadata. 2319 */ 2320 2321 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2322 sm->ssdi.ssd_magic = SR_MAGIC; 2323 sm->ssdi.ssd_version = SR_META_VERSION; 2324 sm->ssd_ondisk = 0; 2325 sm->ssdi.ssd_flags = 0; 2326 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2327 sm->ssdi.ssd_chunk_no = 1; 2328 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2329 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2330 sm->ssdi.ssd_size = size; 2331 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2332 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2333 "SR %s", "HOTSPARE"); 2334 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2335 "%03d", SR_META_VERSION); 2336 2337 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2338 sd->sd_sc = sc; 2339 sd->sd_meta = sm; 2340 sd->sd_meta_type = SR_META_F_NATIVE; 2341 sd->sd_vol_status = BIOC_SVONLINE; 2342 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2343 2344 /* Add chunk to volume. */ 2345 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2346 M_WAITOK | M_ZERO); 2347 sd->sd_vol.sv_chunks[0] = hotspare; 2348 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2349 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2350 2351 /* Save metadata. */ 2352 if (sr_meta_save(sd, SR_META_DIRTY)) { 2353 printf("%s: could not save metadata to %s\n", 2354 DEVNAME(sc), devname); 2355 goto fail; 2356 } 2357 2358 /* 2359 * Add chunk to hotspare list. 2360 */ 2361 rw_enter_write(&sc->sc_hs_lock); 2362 cl = &sc->sc_hotspare_list; 2363 if (SLIST_EMPTY(cl)) 2364 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2365 else { 2366 SLIST_FOREACH(chunk, cl, src_link) 2367 last = chunk; 2368 SLIST_INSERT_AFTER(last, hotspare, src_link); 2369 } 2370 sc->sc_hotspare_no++; 2371 rw_exit_write(&sc->sc_hs_lock); 2372 2373 rv = 0; 2374 goto done; 2375 2376 fail: 2377 if (hotspare) 2378 free(hotspare, M_DEVBUF); 2379 2380 done: 2381 if (sd && sd->sd_vol.sv_chunks) 2382 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2383 if (sd) 2384 free(sd, M_DEVBUF); 2385 if (sm) 2386 free(sm, M_DEVBUF); 2387 if (open) { 2388 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2389 vput(vn); 2390 } 2391 2392 return (rv); 2393 } 2394 2395 void 2396 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2397 { 2398 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2399 } 2400 2401 void 2402 sr_hotspare_rebuild(struct sr_discipline *sd) 2403 { 2404 struct sr_chunk_head *cl; 2405 struct sr_chunk *hotspare, *chunk = NULL; 2406 struct sr_workunit *wu; 2407 struct sr_ccb *ccb; 2408 int i, s, chunk_no, busy; 2409 2410 /* 2411 * Attempt to locate a hotspare and initiate rebuild. 2412 */ 2413 2414 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2415 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2416 BIOC_SDOFFLINE) { 2417 chunk_no = i; 2418 chunk = sd->sd_vol.sv_chunks[i]; 2419 break; 2420 } 2421 } 2422 2423 if (chunk == NULL) { 2424 printf("%s: no offline chunk found on %s!\n", 2425 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 2426 return; 2427 } 2428 2429 /* See if we have a suitable hotspare... */ 2430 rw_enter_write(&sd->sd_sc->sc_hs_lock); 2431 cl = &sd->sd_sc->sc_hotspare_list; 2432 SLIST_FOREACH(hotspare, cl, src_link) 2433 if (hotspare->src_size >= chunk->src_size) 2434 break; 2435 2436 if (hotspare != NULL) { 2437 2438 printf("%s: %s volume degraded, will attempt to " 2439 "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc), 2440 sd->sd_meta->ssd_devname, hotspare->src_devname); 2441 2442 /* 2443 * Ensure that all pending I/O completes on the failed chunk 2444 * before trying to initiate a rebuild. 2445 */ 2446 i = 0; 2447 do { 2448 busy = 0; 2449 2450 s = splbio(); 2451 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2452 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2453 if (ccb->ccb_target == chunk_no) 2454 busy = 1; 2455 } 2456 } 2457 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2458 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2459 if (ccb->ccb_target == chunk_no) 2460 busy = 1; 2461 } 2462 } 2463 splx(s); 2464 2465 if (busy) { 2466 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2467 i++; 2468 } 2469 2470 } while (busy && i < 120); 2471 2472 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2473 "complete on failed chunk %s\n", DEVNAME(sd->sd_sc), 2474 i, chunk->src_devname); 2475 2476 if (busy) { 2477 printf("%s: pending I/O failed to complete on " 2478 "failed chunk %s, hotspare rebuild aborted...\n", 2479 DEVNAME(sd->sd_sc), chunk->src_devname); 2480 goto done; 2481 } 2482 2483 s = splbio(); 2484 rw_enter_write(&sd->sd_sc->sc_lock); 2485 if (sr_rebuild_init(sd, hotspare->src_dev_mm) == 0) { 2486 2487 /* Remove hotspare from available list. */ 2488 sd->sd_sc->sc_hotspare_no--; 2489 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2490 free(hotspare, M_DEVBUF); 2491 2492 } 2493 rw_exit_write(&sd->sd_sc->sc_lock); 2494 splx(s); 2495 } 2496 done: 2497 rw_exit_write(&sd->sd_sc->sc_hs_lock); 2498 } 2499 2500 int 2501 sr_rebuild_init(struct sr_discipline *sd, dev_t dev) 2502 { 2503 struct sr_softc *sc = sd->sd_sc; 2504 int rv = EINVAL, part; 2505 int c, found, open = 0; 2506 char devname[32]; 2507 struct vnode *vn; 2508 daddr64_t size, csize; 2509 struct disklabel label; 2510 struct sr_meta_chunk *old, *new; 2511 2512 /* 2513 * Attempt to initiate a rebuild onto the specified device. 2514 */ 2515 2516 if (!sd->sd_rebuild) { 2517 printf("%s: discipline does not support rebuild\n", 2518 DEVNAME(sc)); 2519 goto done; 2520 } 2521 2522 /* make sure volume is in the right state */ 2523 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2524 printf("%s: rebuild already in progress\n", DEVNAME(sc)); 2525 goto done; 2526 } 2527 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2528 printf("%s: %s not degraded\n", DEVNAME(sc), 2529 sd->sd_meta->ssd_devname); 2530 goto done; 2531 } 2532 2533 /* find offline chunk */ 2534 for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 2535 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 2536 BIOC_SDOFFLINE) { 2537 found = c; 2538 new = &sd->sd_vol.sv_chunks[c]->src_meta; 2539 if (c > 0) 2540 break; /* roll at least once over the for */ 2541 } else { 2542 csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size; 2543 old = &sd->sd_vol.sv_chunks[c]->src_meta; 2544 if (found != -1) 2545 break; 2546 } 2547 if (found == -1) { 2548 printf("%s: no offline chunks available for rebuild\n", 2549 DEVNAME(sc)); 2550 goto done; 2551 } 2552 2553 /* populate meta entry */ 2554 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2555 if (bdevvp(dev, &vn)) { 2556 printf("%s:, sr_rebuild_init: can't allocate vnode\n", 2557 DEVNAME(sc)); 2558 goto done; 2559 } 2560 2561 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2562 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2563 "open %s\n", DEVNAME(sc), devname); 2564 vput(vn); 2565 goto done; 2566 } 2567 open = 1; /* close dev on error */ 2568 2569 /* get partition */ 2570 part = DISKPART(dev); 2571 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2572 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2573 DEVNAME(sc)); 2574 goto done; 2575 } 2576 if (label.d_partitions[part].p_fstype != FS_RAID) { 2577 printf("%s: %s partition not of type RAID (%d)\n", 2578 DEVNAME(sc), devname, 2579 label.d_partitions[part].p_fstype); 2580 goto done; 2581 } 2582 2583 /* is partition large enough? */ 2584 size = DL_GETPSIZE(&label.d_partitions[part]) - 2585 SR_META_SIZE - SR_META_OFFSET; 2586 if (size < csize) { 2587 printf("%s: partition too small, at least %llu B required\n", 2588 DEVNAME(sc), csize << DEV_BSHIFT); 2589 goto done; 2590 } else if (size > csize) 2591 printf("%s: partition too large, wasting %llu B\n", 2592 DEVNAME(sc), (size - csize) << DEV_BSHIFT); 2593 2594 /* make sure we are not stomping on some other partition */ 2595 c = sr_chunk_in_use(sc, dev); 2596 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2597 printf("%s: %s is already in use\n", DEVNAME(sc), devname); 2598 goto done; 2599 } 2600 2601 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2602 sd->sd_meta->ssd_rebuild = 0; 2603 2604 /* recreate metadata */ 2605 open = 0; /* leave dev open from here on out */ 2606 sd->sd_vol.sv_chunks[found]->src_dev_mm = dev; 2607 sd->sd_vol.sv_chunks[found]->src_vn = vn; 2608 new->scmi.scm_volid = old->scmi.scm_volid; 2609 new->scmi.scm_chunk_id = found; 2610 strlcpy(new->scmi.scm_devname, devname, 2611 sizeof new->scmi.scm_devname); 2612 new->scmi.scm_size = size; 2613 new->scmi.scm_coerced_size = old->scmi.scm_coerced_size; 2614 bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid, 2615 sizeof new->scmi.scm_uuid); 2616 sr_checksum(sc, new, &new->scm_checksum, 2617 sizeof(struct sr_meta_chunk_invariant)); 2618 sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD); 2619 if (sr_meta_save(sd, SR_META_DIRTY)) { 2620 printf("%s: could not save metadata to %s\n", 2621 DEVNAME(sc), devname); 2622 open = 1; 2623 goto done; 2624 } 2625 2626 printf("%s: rebuild of %s started on %s\n", DEVNAME(sc), 2627 sd->sd_meta->ssd_devname, devname); 2628 2629 sd->sd_reb_abort = 0; 2630 kthread_create_deferred(sr_rebuild, sd); 2631 2632 rv = 0; 2633 done: 2634 if (open) { 2635 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2636 vput(vn); 2637 } 2638 2639 return (rv); 2640 } 2641 2642 void 2643 sr_roam_chunks(struct sr_discipline *sd) 2644 { 2645 struct sr_softc *sc = sd->sd_sc; 2646 struct sr_chunk *chunk; 2647 struct sr_meta_chunk *meta; 2648 int roamed = 0; 2649 2650 /* Have any chunks roamed? */ 2651 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 2652 2653 meta = &chunk->src_meta; 2654 2655 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 2656 sizeof(meta->scmi.scm_devname))) { 2657 2658 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 2659 meta->scmi.scm_devname, chunk->src_devname); 2660 2661 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 2662 sizeof(meta->scmi.scm_devname)); 2663 2664 roamed++; 2665 } 2666 } 2667 2668 if (roamed) 2669 sr_meta_save(sd, SR_META_DIRTY); 2670 } 2671 2672 int 2673 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 2674 { 2675 dev_t *dt; 2676 int i, s, no_chunk, rv = EINVAL, vol; 2677 int no_meta, updatemeta = 0, disk = 1; 2678 u_int64_t vol_size; 2679 int32_t strip_size = 0; 2680 struct sr_chunk_head *cl; 2681 struct sr_discipline *sd = NULL; 2682 struct sr_chunk *ch_entry; 2683 struct device *dev, *dev2; 2684 struct scsibus_attach_args saa; 2685 2686 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 2687 DEVNAME(sc), user); 2688 2689 /* user input */ 2690 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 2691 goto unwind; 2692 2693 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 2694 if (user) { 2695 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 2696 goto unwind; 2697 } else 2698 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 2699 2700 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2701 sd->sd_sc = sc; 2702 2703 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 2704 cl = &sd->sd_vol.sv_chunk_list; 2705 SLIST_INIT(cl); 2706 2707 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 2708 if (sd->sd_meta_type == SR_META_F_INVALID) { 2709 printf("%s: invalid metadata format\n", DEVNAME(sc)); 2710 goto unwind; 2711 } 2712 2713 if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) { 2714 printf("%s: can't attach metadata type %d\n", DEVNAME(sc), 2715 sd->sd_meta_type); 2716 goto unwind; 2717 } 2718 2719 /* force the raid volume by clearing metadata region */ 2720 if (bc->bc_flags & BIOC_SCFORCE) { 2721 /* make sure disk isn't up and running */ 2722 if (sr_meta_read(sd)) 2723 if (sr_already_assembled(sd)) { 2724 printf("%s: disk ", DEVNAME(sc)); 2725 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2726 printf(" is currently in use; can't force " 2727 "create\n"); 2728 goto unwind; 2729 } 2730 2731 if (sr_meta_clear(sd)) { 2732 printf("%s: failed to clear metadata\n", DEVNAME(sc)); 2733 goto unwind; 2734 } 2735 } 2736 2737 if ((no_meta = sr_meta_read(sd)) == 0) { 2738 /* fill out all chunk metadata */ 2739 sr_meta_chunks_create(sc, cl); 2740 ch_entry = SLIST_FIRST(cl); 2741 2742 /* no metadata available */ 2743 switch (bc->bc_level) { 2744 case 0: 2745 if (no_chunk < 2) 2746 goto unwind; 2747 strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name)); 2748 /* 2749 * XXX add variable strip size later even though 2750 * MAXPHYS is really the clever value, users like 2751 * to tinker with that type of stuff 2752 */ 2753 strip_size = MAXPHYS; 2754 vol_size = 2755 (ch_entry->src_meta.scmi.scm_coerced_size & 2756 ~((strip_size >> DEV_BSHIFT) - 1)) * no_chunk; 2757 break; 2758 case 1: 2759 if (no_chunk < 2) 2760 goto unwind; 2761 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 2762 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2763 break; 2764 case 4: 2765 case 5: 2766 if (no_chunk < 3) 2767 goto unwind; 2768 if (bc->bc_level == 4) 2769 strlcpy(sd->sd_name, "RAID 4", 2770 sizeof(sd->sd_name)); 2771 else 2772 strlcpy(sd->sd_name, "RAID 5", 2773 sizeof(sd->sd_name)); 2774 /* 2775 * XXX add variable strip size later even though 2776 * MAXPHYS is really the clever value, users like 2777 * to tinker with that type of stuff 2778 */ 2779 strip_size = MAXPHYS; 2780 vol_size = 2781 (ch_entry->src_meta.scmi.scm_coerced_size & 2782 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 1); 2783 break; 2784 //#ifdef not_yet 2785 case 6: 2786 if (no_chunk < 4) 2787 goto unwind; 2788 strlcpy(sd->sd_name, "RAID 6", 2789 sizeof(sd->sd_name)); 2790 /* 2791 * XXX add variable strip size later even though 2792 * MAXPHYS is really the clever value, users like 2793 * to tinker with that type of stuff 2794 */ 2795 strip_size = MAXPHYS; 2796 vol_size = 2797 (ch_entry->src_meta.scmi.scm_coerced_size & 2798 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 2); 2799 break; 2800 //#endif /* not_yet */ 2801 #ifdef AOE 2802 #ifdef not_yet 2803 case 'A': 2804 /* target */ 2805 if (no_chunk != 1) 2806 goto unwind; 2807 strlcpy(sd->sd_name, "AOE TARG", sizeof(sd->sd_name)); 2808 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2809 break; 2810 case 'a': 2811 /* initiator */ 2812 if (no_chunk != 1) 2813 goto unwind; 2814 strlcpy(sd->sd_name, "AOE INIT", sizeof(sd->sd_name)); 2815 break; 2816 #endif /* not_yet */ 2817 #endif /* AOE */ 2818 #ifdef CRYPTO 2819 case 'C': 2820 DNPRINTF(SR_D_IOCTL, 2821 "%s: sr_ioctl_createraid: no_chunk %d\n", 2822 DEVNAME(sc), no_chunk); 2823 2824 if (no_chunk != 1) 2825 goto unwind; 2826 2827 /* no hint available yet */ 2828 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2829 bc->bc_opaque_status = BIOC_SOINOUT_FAILED; 2830 rv = 0; 2831 goto unwind; 2832 } 2833 2834 if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE)) 2835 goto unwind; 2836 2837 if (sr_crypto_get_kdf(bc, sd)) 2838 goto unwind; 2839 2840 strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name)); 2841 vol_size = ch_entry->src_meta.scmi.scm_size; 2842 2843 sr_crypto_create_keys(sd); 2844 2845 break; 2846 #endif /* CRYPTO */ 2847 default: 2848 goto unwind; 2849 } 2850 2851 /* fill out all volume metadata */ 2852 DNPRINTF(SR_D_IOCTL, 2853 "%s: sr_ioctl_createraid: vol_size: %lld\n", 2854 DEVNAME(sc), vol_size); 2855 sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; 2856 sd->sd_meta->ssdi.ssd_size = vol_size; 2857 sd->sd_vol_status = BIOC_SVONLINE; 2858 sd->sd_meta->ssdi.ssd_level = bc->bc_level; 2859 sd->sd_meta->ssdi.ssd_strip_size = strip_size; 2860 strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", 2861 sizeof(sd->sd_meta->ssdi.ssd_vendor)); 2862 snprintf(sd->sd_meta->ssdi.ssd_product, 2863 sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", 2864 sd->sd_name); 2865 snprintf(sd->sd_meta->ssdi.ssd_revision, 2866 sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", 2867 SR_META_VERSION); 2868 2869 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2870 updatemeta = 1; 2871 } else if (no_meta == no_chunk) { 2872 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 2873 printf("%s: %s was not shutdown properly\n", 2874 DEVNAME(sc), sd->sd_meta->ssd_devname); 2875 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 2876 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 2877 "metadata\n", DEVNAME(sc)); 2878 goto unwind; 2879 } 2880 if (sr_already_assembled(sd)) { 2881 printf("%s: disk ", DEVNAME(sc)); 2882 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2883 printf(" already assembled\n"); 2884 goto unwind; 2885 } 2886 #ifdef CRYPTO 2887 /* provide userland with kdf hint */ 2888 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2889 if (bc->bc_opaque == NULL) 2890 goto unwind; 2891 2892 if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) < 2893 bc->bc_opaque_size) 2894 goto unwind; 2895 2896 if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint, 2897 bc->bc_opaque, bc->bc_opaque_size)) 2898 goto unwind; 2899 2900 /* we're done */ 2901 bc->bc_opaque_status = BIOC_SOINOUT_OK; 2902 rv = 0; 2903 goto unwind; 2904 } 2905 /* get kdf with maskkey from userland */ 2906 if (bc->bc_opaque_flags & BIOC_SOIN) { 2907 if (sr_crypto_get_kdf(bc, sd)) 2908 goto unwind; 2909 } 2910 #endif /* CRYPTO */ 2911 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 2912 DEVNAME(sc)); 2913 updatemeta = 0; 2914 } else if (no_meta == -1) { 2915 printf("%s: one of the chunks has corrupt metadata; aborting " 2916 "assembly\n", DEVNAME(sc)); 2917 goto unwind; 2918 } else { 2919 if (sr_already_assembled(sd)) { 2920 printf("%s: disk ", DEVNAME(sc)); 2921 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2922 printf(" already assembled; will not partial " 2923 "assemble it\n"); 2924 goto unwind; 2925 } 2926 printf("%s: trying to bring up %s degraded\n", DEVNAME(sc), 2927 sd->sd_meta->ssd_devname); 2928 } 2929 2930 /* metadata SHALL be fully filled in at this point */ 2931 2932 /* Make sure that metadata level matches assembly level. */ 2933 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 2934 printf("%s: volume level does not match metadata level!\n", 2935 DEVNAME(sc)); 2936 goto unwind; 2937 } 2938 2939 if (sr_discipline_init(sd, sd->sd_meta->ssdi.ssd_level)) { 2940 printf("%s: could not initialize discipline\n", DEVNAME(sc)); 2941 goto unwind; 2942 } 2943 2944 /* allocate all resources */ 2945 if ((rv = sd->sd_alloc_resources(sd))) 2946 goto unwind; 2947 2948 if (disk) { 2949 /* set volume status */ 2950 sd->sd_set_vol_state(sd); 2951 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 2952 printf("%s: %s offline, will not be brought online\n", 2953 DEVNAME(sc), sd->sd_meta->ssd_devname); 2954 goto unwind; 2955 } 2956 2957 /* setup scsi midlayer */ 2958 if (sd->sd_openings) 2959 sd->sd_link.openings = sd->sd_openings(sd); 2960 else 2961 sd->sd_link.openings = sd->sd_max_wu; 2962 sd->sd_link.device = &sr_dev; 2963 sd->sd_link.device_softc = sc; 2964 sd->sd_link.adapter_softc = sc; 2965 sd->sd_link.adapter = &sr_switch; 2966 sd->sd_link.adapter_target = SR_MAX_LD; 2967 sd->sd_link.adapter_buswidth = 1; 2968 bzero(&saa, sizeof(saa)); 2969 saa.saa_sc_link = &sd->sd_link; 2970 2971 /* 2972 * we passed all checks return ENXIO if volume can't be created 2973 */ 2974 rv = ENXIO; 2975 2976 /* clear sense data */ 2977 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2978 2979 /* use temporary discipline pointer */ 2980 s = splhigh(); 2981 sc->sc_attach_dis = sd; 2982 splx(s); 2983 dev2 = config_found(&sc->sc_dev, &saa, scsiprint); 2984 s = splhigh(); 2985 sc->sc_attach_dis = NULL; 2986 splx(s); 2987 TAILQ_FOREACH(dev, &alldevs, dv_list) 2988 if (dev->dv_parent == dev2) 2989 break; 2990 if (dev == NULL) 2991 goto unwind; 2992 2993 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n", 2994 DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus); 2995 2996 sc->sc_dis[sd->sd_link.scsibus] = sd; 2997 for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++) 2998 if (sc->sc_dis[i]) 2999 vol++; 3000 sd->sd_scsibus_dev = dev2; 3001 3002 rv = 0; 3003 if (updatemeta) { 3004 /* fill out remaining volume metadata */ 3005 sd->sd_meta->ssdi.ssd_volid = vol; 3006 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3007 sizeof(sd->sd_meta->ssd_devname)); 3008 sr_meta_init(sd, cl); 3009 } else { 3010 if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3011 sizeof(dev->dv_xname))) { 3012 printf("%s: volume %s is roaming, it used to " 3013 "be %s, updating metadata\n", 3014 DEVNAME(sc), dev->dv_xname, 3015 sd->sd_meta->ssd_devname); 3016 3017 sd->sd_meta->ssdi.ssd_volid = vol; 3018 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3019 sizeof(sd->sd_meta->ssd_devname)); 3020 } 3021 } 3022 3023 /* Update device name on any chunks which roamed. */ 3024 sr_roam_chunks(sd); 3025 3026 #ifndef SMALL_KERNEL 3027 if (sr_sensors_create(sd)) 3028 printf("%s: unable to create sensor for %s\n", 3029 DEVNAME(sc), dev->dv_xname); 3030 else 3031 sd->sd_vol.sv_sensor_valid = 1; 3032 #endif /* SMALL_KERNEL */ 3033 } else { 3034 /* we are not an os disk */ 3035 if (updatemeta) { 3036 /* fill out remaining volume metadata */ 3037 sd->sd_meta->ssdi.ssd_volid = 0; 3038 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3039 sizeof(sd->sd_meta->ssd_devname)); 3040 sr_meta_init(sd, cl); 3041 } 3042 if (sd->sd_start_discipline(sd)) 3043 goto unwind; 3044 } 3045 3046 /* save metadata to disk */ 3047 rv = sr_meta_save(sd, SR_META_DIRTY); 3048 sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd); 3049 3050 if (sd->sd_vol_status == BIOC_SVREBUILD) 3051 kthread_create_deferred(sr_rebuild, sd); 3052 3053 sd->sd_ready = 1; 3054 3055 return (rv); 3056 unwind: 3057 sr_discipline_shutdown(sd); 3058 3059 return (rv); 3060 } 3061 3062 int 3063 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3064 { 3065 struct sr_discipline *sd = NULL; 3066 int rv = 1; 3067 int i; 3068 3069 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3070 dr->bd_dev); 3071 3072 for (i = 0; i < SR_MAXSCSIBUS; i++) 3073 if (sc->sc_dis[i]) { 3074 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3075 dr->bd_dev, 3076 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3077 sd = sc->sc_dis[i]; 3078 break; 3079 } 3080 } 3081 3082 if (sd == NULL) 3083 goto bad; 3084 3085 sd->sd_deleted = 1; 3086 sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE; 3087 sr_shutdown(sd); 3088 3089 rv = 0; 3090 bad: 3091 return (rv); 3092 } 3093 3094 void 3095 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3096 { 3097 struct sr_chunk *ch_entry, *ch_next; 3098 3099 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3100 3101 if (!cl) 3102 return; 3103 3104 for (ch_entry = SLIST_FIRST(cl); 3105 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3106 ch_next = SLIST_NEXT(ch_entry, src_link); 3107 3108 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3109 DEVNAME(sc), ch_entry->src_devname); 3110 if (ch_entry->src_vn) { 3111 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 0); 3112 vput(ch_entry->src_vn); 3113 } 3114 free(ch_entry, M_DEVBUF); 3115 } 3116 SLIST_INIT(cl); 3117 } 3118 3119 void 3120 sr_discipline_free(struct sr_discipline *sd) 3121 { 3122 struct sr_softc *sc; 3123 int i; 3124 3125 if (!sd) 3126 return; 3127 3128 sc = sd->sd_sc; 3129 3130 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3131 DEVNAME(sc), 3132 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3133 if (sd->sd_free_resources) 3134 sd->sd_free_resources(sd); 3135 if (sd->sd_vol.sv_chunks) 3136 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3137 if (sd->sd_meta) 3138 free(sd->sd_meta, M_DEVBUF); 3139 if (sd->sd_meta_foreign) 3140 free(sd->sd_meta_foreign, M_DEVBUF); 3141 3142 for (i = 0; i < SR_MAXSCSIBUS; i++) 3143 if (sc->sc_dis[i] == sd) { 3144 sc->sc_dis[i] = NULL; 3145 break; 3146 } 3147 3148 free(sd, M_DEVBUF); 3149 } 3150 3151 void 3152 sr_discipline_shutdown(struct sr_discipline *sd) 3153 { 3154 struct sr_softc *sc = sd->sd_sc; 3155 int s; 3156 3157 if (!sd || !sc) 3158 return; 3159 3160 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3161 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3162 3163 s = splbio(); 3164 3165 sd->sd_ready = 0; 3166 3167 if (sd->sd_shutdownhook) 3168 shutdownhook_disestablish(sd->sd_shutdownhook); 3169 3170 /* make sure there isn't a sync pending and yield */ 3171 wakeup(sd); 3172 while (sd->sd_sync || sd->sd_must_flush) 3173 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3174 EWOULDBLOCK) 3175 break; 3176 3177 #ifndef SMALL_KERNEL 3178 sr_sensors_delete(sd); 3179 #endif /* SMALL_KERNEL */ 3180 3181 if (sd->sd_scsibus_dev) 3182 config_detach(sd->sd_scsibus_dev, DETACH_FORCE); 3183 3184 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3185 3186 if (sd) 3187 sr_discipline_free(sd); 3188 3189 splx(s); 3190 } 3191 3192 int 3193 sr_discipline_init(struct sr_discipline *sd, int level) 3194 { 3195 int rv = 1; 3196 3197 switch (level) { 3198 case 0: 3199 sr_raid0_discipline_init(sd); 3200 break; 3201 case 1: 3202 sr_raid1_discipline_init(sd); 3203 break; 3204 case 4: 3205 case 5: 3206 if (level == 4) 3207 sd->sd_type = SR_MD_RAID4; 3208 else 3209 sd->sd_type = SR_MD_RAID5; 3210 sr_raidp_discipline_init(sd); 3211 break; 3212 case 6: 3213 sd->sd_type = SR_MD_RAID6; 3214 sr_raid6_discipline_init(sd); 3215 break; 3216 #ifdef AOE 3217 /* AOE target. */ 3218 case 'A': 3219 sr_aoe_server_discipline_init(sd); 3220 break; 3221 /* AOE initiator. */ 3222 case 'a': 3223 sr_aoe_discipline_init(sd); 3224 break; 3225 #endif 3226 #ifdef CRYPTO 3227 case 'C': 3228 sr_crypto_discipline_init(sd); 3229 break; 3230 #endif 3231 default: 3232 goto bad; 3233 } 3234 3235 rv = 0; 3236 bad: 3237 return (rv); 3238 } 3239 3240 int 3241 sr_raid_inquiry(struct sr_workunit *wu) 3242 { 3243 struct sr_discipline *sd = wu->swu_dis; 3244 struct scsi_xfer *xs = wu->swu_xs; 3245 struct scsi_inquiry_data inq; 3246 3247 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3248 3249 bzero(&inq, sizeof(inq)); 3250 inq.device = T_DIRECT; 3251 inq.dev_qual2 = 0; 3252 inq.version = 2; 3253 inq.response_format = 2; 3254 inq.additional_length = 32; 3255 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3256 sizeof(inq.vendor)); 3257 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3258 sizeof(inq.product)); 3259 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3260 sizeof(inq.revision)); 3261 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3262 3263 return (0); 3264 } 3265 3266 int 3267 sr_raid_read_cap(struct sr_workunit *wu) 3268 { 3269 struct sr_discipline *sd = wu->swu_dis; 3270 struct scsi_xfer *xs = wu->swu_xs; 3271 struct scsi_read_cap_data rcd; 3272 struct scsi_read_cap_data_16 rcd16; 3273 int rv = 1; 3274 3275 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3276 3277 if (xs->cmd->opcode == READ_CAPACITY) { 3278 bzero(&rcd, sizeof(rcd)); 3279 if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu) 3280 _lto4b(0xffffffff, rcd.addr); 3281 else 3282 _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr); 3283 _lto4b(512, rcd.length); 3284 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3285 rv = 0; 3286 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3287 bzero(&rcd16, sizeof(rcd16)); 3288 _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr); 3289 _lto4b(512, rcd16.length); 3290 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3291 rv = 0; 3292 } 3293 3294 return (rv); 3295 } 3296 3297 int 3298 sr_raid_tur(struct sr_workunit *wu) 3299 { 3300 struct sr_discipline *sd = wu->swu_dis; 3301 3302 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3303 3304 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3305 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3306 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3307 sd->sd_scsi_sense.add_sense_code = 0x04; 3308 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3309 sd->sd_scsi_sense.extra_len = 4; 3310 return (1); 3311 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3312 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3313 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3314 sd->sd_scsi_sense.add_sense_code = 0x05; 3315 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3316 sd->sd_scsi_sense.extra_len = 4; 3317 return (1); 3318 } 3319 3320 return (0); 3321 } 3322 3323 int 3324 sr_raid_request_sense(struct sr_workunit *wu) 3325 { 3326 struct sr_discipline *sd = wu->swu_dis; 3327 struct scsi_xfer *xs = wu->swu_xs; 3328 3329 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3330 DEVNAME(sd->sd_sc)); 3331 3332 /* use latest sense data */ 3333 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3334 3335 /* clear sense data */ 3336 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3337 3338 return (0); 3339 } 3340 3341 int 3342 sr_raid_start_stop(struct sr_workunit *wu) 3343 { 3344 struct sr_discipline *sd = wu->swu_dis; 3345 struct scsi_xfer *xs = wu->swu_xs; 3346 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3347 int rv = 1; 3348 3349 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3350 DEVNAME(sd->sd_sc)); 3351 3352 if (!ss) 3353 return (rv); 3354 3355 if (ss->byte2 == 0x00) { 3356 /* START */ 3357 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3358 /* bring volume online */ 3359 /* XXX check to see if volume can be brought online */ 3360 sd->sd_vol_status = BIOC_SVONLINE; 3361 } 3362 rv = 0; 3363 } else /* XXX is this the check? if (byte == 0x01) */ { 3364 /* STOP */ 3365 if (sd->sd_vol_status == BIOC_SVONLINE) { 3366 /* bring volume offline */ 3367 sd->sd_vol_status = BIOC_SVOFFLINE; 3368 } 3369 rv = 0; 3370 } 3371 3372 return (rv); 3373 } 3374 3375 int 3376 sr_raid_sync(struct sr_workunit *wu) 3377 { 3378 struct sr_discipline *sd = wu->swu_dis; 3379 int s, rv = 0, ios; 3380 3381 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3382 3383 /* when doing a fake sync don't count the wu */ 3384 ios = wu->swu_fake ? 0 : 1; 3385 3386 s = splbio(); 3387 sd->sd_sync = 1; 3388 3389 while (sd->sd_wu_pending > ios) 3390 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3391 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3392 DEVNAME(sd->sd_sc)); 3393 rv = 1; 3394 break; 3395 } 3396 3397 sd->sd_sync = 0; 3398 splx(s); 3399 3400 wakeup(&sd->sd_sync); 3401 3402 return (rv); 3403 } 3404 3405 void 3406 sr_raid_startwu(struct sr_workunit *wu) 3407 { 3408 struct sr_discipline *sd = wu->swu_dis; 3409 struct sr_ccb *ccb; 3410 3411 splassert(IPL_BIO); 3412 3413 if (wu->swu_state == SR_WU_RESTART) 3414 /* 3415 * no need to put the wu on the pending queue since we 3416 * are restarting the io 3417 */ 3418 ; 3419 else 3420 /* move wu to pending queue */ 3421 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3422 3423 /* start all individual ios */ 3424 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3425 VOP_STRATEGY(&ccb->ccb_buf); 3426 } 3427 } 3428 3429 void 3430 sr_checksum_print(u_int8_t *md5) 3431 { 3432 int i; 3433 3434 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 3435 printf("%02x", md5[i]); 3436 } 3437 3438 void 3439 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 3440 { 3441 MD5_CTX ctx; 3442 3443 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 3444 md5, len); 3445 3446 MD5Init(&ctx); 3447 MD5Update(&ctx, src, len); 3448 MD5Final(md5, &ctx); 3449 } 3450 3451 void 3452 sr_uuid_get(struct sr_uuid *uuid) 3453 { 3454 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 3455 /* UUID version 4: random */ 3456 uuid->sui_id[6] &= 0x0f; 3457 uuid->sui_id[6] |= 0x40; 3458 /* RFC4122 variant */ 3459 uuid->sui_id[8] &= 0x3f; 3460 uuid->sui_id[8] |= 0x80; 3461 } 3462 3463 void 3464 sr_uuid_print(struct sr_uuid *uuid, int cr) 3465 { 3466 printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 3467 "%02x%02x%02x%02x%02x%02x", 3468 uuid->sui_id[0], uuid->sui_id[1], 3469 uuid->sui_id[2], uuid->sui_id[3], 3470 uuid->sui_id[4], uuid->sui_id[5], 3471 uuid->sui_id[6], uuid->sui_id[7], 3472 uuid->sui_id[8], uuid->sui_id[9], 3473 uuid->sui_id[10], uuid->sui_id[11], 3474 uuid->sui_id[12], uuid->sui_id[13], 3475 uuid->sui_id[14], uuid->sui_id[15]); 3476 3477 if (cr) 3478 printf("\n"); 3479 } 3480 3481 int 3482 sr_already_assembled(struct sr_discipline *sd) 3483 { 3484 struct sr_softc *sc = sd->sd_sc; 3485 int i; 3486 3487 for (i = 0; i < SR_MAXSCSIBUS; i++) 3488 if (sc->sc_dis[i]) 3489 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 3490 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 3491 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 3492 return (1); 3493 3494 return (0); 3495 } 3496 3497 int32_t 3498 sr_validate_stripsize(u_int32_t b) 3499 { 3500 int s = 0; 3501 3502 if (b % 512) 3503 return (-1); 3504 3505 while ((b & 1) == 0) { 3506 b >>= 1; 3507 s++; 3508 } 3509 3510 /* only multiple of twos */ 3511 b >>= 1; 3512 if (b) 3513 return(-1); 3514 3515 return (s); 3516 } 3517 3518 void 3519 sr_shutdown(void *arg) 3520 { 3521 struct sr_discipline *sd = arg; 3522 #ifdef SR_DEBUG 3523 struct sr_softc *sc = sd->sd_sc; 3524 #endif 3525 DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", 3526 DEVNAME(sc), sd->sd_meta->ssd_devname); 3527 3528 /* abort rebuild and drain io */ 3529 sd->sd_reb_abort = 1; 3530 while (sd->sd_reb_active) 3531 tsleep(sd, PWAIT, "sr_shutdown", 1); 3532 3533 sr_meta_save(sd, 0); 3534 3535 sr_discipline_shutdown(sd); 3536 } 3537 3538 int 3539 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 3540 { 3541 struct sr_discipline *sd = wu->swu_dis; 3542 struct scsi_xfer *xs = wu->swu_xs; 3543 int rv = 1; 3544 3545 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 3546 xs->cmd->opcode); 3547 3548 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3549 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 3550 DEVNAME(sd->sd_sc), func); 3551 goto bad; 3552 } 3553 3554 if (xs->datalen == 0) { 3555 printf("%s: %s: illegal block count for %s\n", 3556 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3557 goto bad; 3558 } 3559 3560 if (xs->cmdlen == 10) 3561 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 3562 else if (xs->cmdlen == 16) 3563 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 3564 else if (xs->cmdlen == 6) 3565 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 3566 else { 3567 printf("%s: %s: illegal cmdlen for %s\n", 3568 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3569 goto bad; 3570 } 3571 3572 wu->swu_blk_start = *blk; 3573 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 3574 3575 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 3576 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 3577 "end: %lld length: %d\n", 3578 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 3579 wu->swu_blk_end, xs->datalen); 3580 3581 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 3582 SSD_ERRCODE_VALID; 3583 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 3584 sd->sd_scsi_sense.add_sense_code = 0x21; 3585 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3586 sd->sd_scsi_sense.extra_len = 4; 3587 goto bad; 3588 } 3589 3590 rv = 0; 3591 bad: 3592 return (rv); 3593 } 3594 3595 int 3596 sr_check_io_collision(struct sr_workunit *wu) 3597 { 3598 struct sr_discipline *sd = wu->swu_dis; 3599 struct sr_workunit *wup; 3600 3601 splassert(IPL_BIO); 3602 3603 /* walk queue backwards and fill in collider if we have one */ 3604 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 3605 if (wu->swu_blk_end < wup->swu_blk_start || 3606 wup->swu_blk_end < wu->swu_blk_start) 3607 continue; 3608 3609 /* we have an LBA collision, defer wu */ 3610 wu->swu_state = SR_WU_DEFERRED; 3611 if (wup->swu_collider) 3612 /* wu is on deferred queue, append to last wu */ 3613 while (wup->swu_collider) 3614 wup = wup->swu_collider; 3615 3616 wup->swu_collider = wu; 3617 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 3618 sd->sd_wu_collisions++; 3619 goto queued; 3620 } 3621 3622 return (0); 3623 queued: 3624 return (1); 3625 } 3626 3627 void 3628 sr_rebuild(void *arg) 3629 { 3630 struct sr_discipline *sd = arg; 3631 struct sr_softc *sc = sd->sd_sc; 3632 3633 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 3634 DEVNAME(sc)) != 0) 3635 printf("%s: unable to start backgound operation\n", 3636 DEVNAME(sc)); 3637 } 3638 3639 void 3640 sr_rebuild_thread(void *arg) 3641 { 3642 struct sr_discipline *sd = arg; 3643 struct sr_softc *sc = sd->sd_sc; 3644 daddr64_t whole_blk, partial_blk, blk, sz, lba; 3645 daddr64_t psz, rb, restart; 3646 uint64_t mysize = 0; 3647 struct sr_workunit *wu_r, *wu_w; 3648 struct scsi_xfer xs_r, xs_w; 3649 struct scsi_rw_16 cr, cw; 3650 int c, s, slept, percent = 0, old_percent = -1; 3651 u_int8_t *buf; 3652 3653 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 3654 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 3655 3656 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 3657 if (restart > whole_blk) { 3658 printf("%s: bogus rebuild restart offset, starting from 0\n", 3659 DEVNAME(sc)); 3660 restart = 0; 3661 } 3662 if (restart) { 3663 /* 3664 * XXX there is a hole here; there is a posibility that we 3665 * had a restart however the chunk that was supposed to 3666 * be rebuilt is no longer valid; we can reach this situation 3667 * when a rebuild is in progress and the box crashes and 3668 * on reboot the rebuild chunk is different (like zero'd or 3669 * replaced). We need to check the uuid of the chunk that is 3670 * being rebuilt to assert this. 3671 */ 3672 psz = sd->sd_meta->ssdi.ssd_size; 3673 rb = sd->sd_meta->ssd_rebuild; 3674 if (rb > 0) 3675 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3676 else 3677 percent = 0; 3678 printf("%s: resuming rebuild on %s at %llu%%\n", 3679 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 3680 } 3681 3682 sd->sd_reb_active = 1; 3683 3684 buf = malloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, M_DEVBUF, M_WAITOK); 3685 for (blk = restart; blk <= whole_blk; blk++) { 3686 if (blk == whole_blk) 3687 sz = partial_blk; 3688 else 3689 sz = SR_REBUILD_IO_SIZE; 3690 mysize += sz; 3691 lba = blk * sz; 3692 3693 /* get some wu */ 3694 if ((wu_r = sr_wu_get(sd, 1)) == NULL) 3695 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 3696 if ((wu_w = sr_wu_get(sd, 1)) == NULL) 3697 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 3698 3699 /* setup read io */ 3700 bzero(&xs_r, sizeof xs_r); 3701 bzero(&cr, sizeof cr); 3702 xs_r.error = XS_NOERROR; 3703 xs_r.flags = SCSI_DATA_IN; 3704 xs_r.datalen = sz << DEV_BSHIFT; 3705 xs_r.data = buf; 3706 xs_r.cmdlen = 16; 3707 cr.opcode = READ_16; 3708 _lto4b(sz, cr.length); 3709 _lto8b(lba, cr.addr); 3710 xs_r.cmd = (struct scsi_generic *)&cr; 3711 wu_r->swu_flags |= SR_WUF_REBUILD; 3712 wu_r->swu_xs = &xs_r; 3713 if (sd->sd_scsi_rw(wu_r)) { 3714 printf("%s: could not create read io\n", 3715 DEVNAME(sc)); 3716 goto fail; 3717 } 3718 3719 /* setup write io */ 3720 bzero(&xs_w, sizeof xs_w); 3721 bzero(&cw, sizeof cw); 3722 xs_w.error = XS_NOERROR; 3723 xs_w.flags = SCSI_DATA_OUT; 3724 xs_w.datalen = sz << DEV_BSHIFT; 3725 xs_w.data = buf; 3726 xs_w.cmdlen = 16; 3727 cw.opcode = WRITE_16; 3728 _lto4b(sz, cw.length); 3729 _lto8b(lba, cw.addr); 3730 xs_w.cmd = (struct scsi_generic *)&cw; 3731 wu_w->swu_flags |= SR_WUF_REBUILD; 3732 wu_w->swu_xs = &xs_w; 3733 if (sd->sd_scsi_rw(wu_w)) { 3734 printf("%s: could not create write io\n", 3735 DEVNAME(sc)); 3736 goto fail; 3737 } 3738 3739 /* 3740 * collide with the read io so that we get automatically 3741 * started when the read is done 3742 */ 3743 wu_w->swu_state = SR_WU_DEFERRED; 3744 wu_r->swu_collider = wu_w; 3745 s = splbio(); 3746 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 3747 3748 /* schedule io */ 3749 if (sr_check_io_collision(wu_r)) 3750 goto queued; 3751 3752 sr_raid_startwu(wu_r); 3753 queued: 3754 splx(s); 3755 3756 /* wait for read completion */ 3757 slept = 0; 3758 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 3759 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 3760 slept = 1; 3761 } 3762 /* yield if we didn't sleep */ 3763 if (slept == 0) 3764 tsleep(sc, PWAIT, "sr_yield", 1); 3765 3766 sr_wu_put(wu_r); 3767 sr_wu_put(wu_w); 3768 3769 sd->sd_meta->ssd_rebuild = lba; 3770 3771 /* save metadata every percent */ 3772 psz = sd->sd_meta->ssdi.ssd_size; 3773 rb = sd->sd_meta->ssd_rebuild; 3774 if (rb > 0) 3775 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3776 else 3777 percent = 0; 3778 if (percent != old_percent && blk != whole_blk) { 3779 if (sr_meta_save(sd, SR_META_DIRTY)) 3780 printf("%s: could not save metadata to %s\n", 3781 DEVNAME(sc), sd->sd_meta->ssd_devname); 3782 old_percent = percent; 3783 } 3784 3785 if (sd->sd_reb_abort) 3786 goto abort; 3787 } 3788 3789 /* all done */ 3790 sd->sd_meta->ssd_rebuild = 0; 3791 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 3792 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 3793 BIOC_SDREBUILD) { 3794 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 3795 break; 3796 } 3797 3798 abort: 3799 if (sr_meta_save(sd, SR_META_DIRTY)) 3800 printf("%s: could not save metadata to %s\n", 3801 DEVNAME(sc), sd->sd_meta->ssd_devname); 3802 fail: 3803 free(buf, M_DEVBUF); 3804 sd->sd_reb_active = 0; 3805 kthread_exit(0); 3806 } 3807 3808 #ifndef SMALL_KERNEL 3809 int 3810 sr_sensors_create(struct sr_discipline *sd) 3811 { 3812 struct sr_softc *sc = sd->sd_sc; 3813 int rv = 1; 3814 3815 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 3816 DEVNAME(sc), sd->sd_meta->ssd_devname); 3817 3818 strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc), 3819 sizeof(sd->sd_vol.sv_sensordev.xname)); 3820 3821 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 3822 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 3823 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 3824 sizeof(sd->sd_vol.sv_sensor.desc)); 3825 3826 sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor); 3827 3828 if (sc->sc_sensors_running == 0) { 3829 if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) 3830 goto bad; 3831 sc->sc_sensors_running = 1; 3832 } 3833 sensordev_install(&sd->sd_vol.sv_sensordev); 3834 3835 rv = 0; 3836 bad: 3837 return (rv); 3838 } 3839 3840 void 3841 sr_sensors_delete(struct sr_discipline *sd) 3842 { 3843 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 3844 3845 if (sd->sd_vol.sv_sensor_valid) 3846 sensordev_deinstall(&sd->sd_vol.sv_sensordev); 3847 } 3848 3849 void 3850 sr_sensors_refresh(void *arg) 3851 { 3852 struct sr_softc *sc = arg; 3853 struct sr_volume *sv; 3854 struct sr_discipline *sd; 3855 int i, vol; 3856 3857 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 3858 3859 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3860 /* XXX this will not work when we stagger disciplines */ 3861 if (!sc->sc_dis[i]) 3862 continue; 3863 3864 sd = sc->sc_dis[i]; 3865 sv = &sd->sd_vol; 3866 3867 switch(sd->sd_vol_status) { 3868 case BIOC_SVOFFLINE: 3869 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 3870 sv->sv_sensor.status = SENSOR_S_CRIT; 3871 break; 3872 3873 case BIOC_SVDEGRADED: 3874 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 3875 sv->sv_sensor.status = SENSOR_S_WARN; 3876 break; 3877 3878 case BIOC_SVSCRUB: 3879 case BIOC_SVONLINE: 3880 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 3881 sv->sv_sensor.status = SENSOR_S_OK; 3882 break; 3883 3884 default: 3885 sv->sv_sensor.value = 0; /* unknown */ 3886 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 3887 } 3888 } 3889 } 3890 #endif /* SMALL_KERNEL */ 3891 3892 #ifdef SR_FANCY_STATS 3893 void sr_print_stats(void); 3894 3895 void 3896 sr_print_stats(void) 3897 { 3898 struct sr_softc *sc; 3899 struct sr_discipline *sd; 3900 int i, vol; 3901 3902 for (i = 0; i < softraid_cd.cd_ndevs; i++) 3903 if (softraid_cd.cd_devs[i]) { 3904 sc = softraid_cd.cd_devs[i]; 3905 /* we'll only have one softc */ 3906 break; 3907 } 3908 3909 if (!sc) { 3910 printf("no softraid softc found\n"); 3911 return; 3912 } 3913 3914 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3915 /* XXX this will not work when we stagger disciplines */ 3916 if (!sc->sc_dis[i]) 3917 continue; 3918 3919 sd = sc->sc_dis[i]; 3920 printf("%s: ios pending: %d collisions %llu\n", 3921 sd->sd_meta->ssd_devname, 3922 sd->sd_wu_pending, 3923 sd->sd_wu_collisions); 3924 } 3925 } 3926 #endif /* SR_FANCY_STATS */ 3927 3928 #ifdef SR_DEBUG 3929 void 3930 sr_meta_print(struct sr_metadata *m) 3931 { 3932 int i; 3933 struct sr_meta_chunk *mc; 3934 struct sr_meta_opt *mo; 3935 3936 if (!(sr_debug & SR_D_META)) 3937 return; 3938 3939 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 3940 printf("\tssd_version %d\n", m->ssdi.ssd_version); 3941 printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags); 3942 printf("\tssd_uuid "); 3943 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 3944 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 3945 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 3946 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 3947 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 3948 printf("\tssd_level %d\n", m->ssdi.ssd_level); 3949 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 3950 printf("\tssd_devname %s\n", m->ssd_devname); 3951 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 3952 printf("\tssd_product %s\n", m->ssdi.ssd_product); 3953 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 3954 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 3955 printf("\tssd_checksum "); 3956 sr_checksum_print(m->ssd_checksum); 3957 printf("\n"); 3958 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 3959 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 3960 3961 mc = (struct sr_meta_chunk *)(m + 1); 3962 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 3963 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 3964 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 3965 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 3966 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 3967 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 3968 printf("\t\tscm_uuid "); 3969 sr_uuid_print(&mc->scmi.scm_uuid, 1); 3970 printf("\t\tscm_checksum "); 3971 sr_checksum_print(mc->scm_checksum); 3972 printf("\n"); 3973 printf("\t\tscm_status %d\n", mc->scm_status); 3974 } 3975 3976 mo = (struct sr_meta_opt *)(mc); 3977 for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { 3978 printf("\t\t\tsom_type %d\n", mo->somi.som_type); 3979 printf("\t\t\tsom_checksum "); 3980 sr_checksum_print(mo->som_checksum); 3981 printf("\n"); 3982 } 3983 } 3984 3985 void 3986 sr_dump_mem(u_int8_t *p, int len) 3987 { 3988 int i; 3989 3990 for (i = 0; i < len; i++) 3991 printf("%02x ", *p++); 3992 printf("\n"); 3993 } 3994 3995 #endif /* SR_DEBUG */ 3996