1 /* $OpenBSD: softraid.c,v 1.178 2009/11/15 13:32:04 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 44 #ifdef AOE 45 #include <sys/mbuf.h> 46 #include <net/if_aoe.h> 47 #endif /* AOE */ 48 49 #include <crypto/cryptodev.h> 50 51 #include <scsi/scsi_all.h> 52 #include <scsi/scsiconf.h> 53 #include <scsi/scsi_disk.h> 54 55 #include <dev/softraidvar.h> 56 #include <dev/rndvar.h> 57 58 /* #define SR_FANCY_STATS */ 59 60 #ifdef SR_DEBUG 61 #define SR_FANCY_STATS 62 uint32_t sr_debug = 0 63 /* | SR_D_CMD */ 64 /* | SR_D_MISC */ 65 /* | SR_D_INTR */ 66 /* | SR_D_IOCTL */ 67 /* | SR_D_CCB */ 68 /* | SR_D_WU */ 69 /* | SR_D_META */ 70 /* | SR_D_DIS */ 71 /* | SR_D_STATE */ 72 ; 73 #endif 74 75 int sr_match(struct device *, void *, void *); 76 void sr_attach(struct device *, struct device *, void *); 77 int sr_detach(struct device *, int); 78 int sr_activate(struct device *, int); 79 80 struct cfattach softraid_ca = { 81 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 82 sr_activate 83 }; 84 85 struct cfdriver softraid_cd = { 86 NULL, "softraid", DV_DULL 87 }; 88 89 /* scsi & discipline */ 90 int sr_scsi_cmd(struct scsi_xfer *); 91 void sr_minphys(struct buf *bp, struct scsi_link *sl); 92 void sr_copy_internal_data(struct scsi_xfer *, 93 void *, size_t); 94 int sr_scsi_ioctl(struct scsi_link *, u_long, 95 caddr_t, int, struct proc *); 96 int sr_ioctl(struct device *, u_long, caddr_t); 97 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 98 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 99 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 100 int sr_ioctl_setstate(struct sr_softc *, 101 struct bioc_setstate *); 102 int sr_ioctl_createraid(struct sr_softc *, 103 struct bioc_createraid *, int); 104 int sr_ioctl_deleteraid(struct sr_softc *, 105 struct bioc_deleteraid *); 106 void sr_chunks_unwind(struct sr_softc *, 107 struct sr_chunk_head *); 108 void sr_discipline_free(struct sr_discipline *); 109 void sr_discipline_shutdown(struct sr_discipline *); 110 int sr_discipline_init(struct sr_discipline *, int); 111 112 /* utility functions */ 113 void sr_shutdown(void *); 114 void sr_uuid_get(struct sr_uuid *); 115 void sr_uuid_print(struct sr_uuid *, int); 116 void sr_checksum_print(u_int8_t *); 117 void sr_checksum(struct sr_softc *, void *, void *, 118 u_int32_t); 119 int sr_boot_assembly(struct sr_softc *); 120 int sr_already_assembled(struct sr_discipline *); 121 int sr_hotspare(struct sr_softc *, dev_t); 122 void sr_hotspare_rebuild(struct sr_discipline *); 123 int sr_rebuild_init(struct sr_discipline *, dev_t); 124 void sr_rebuild(void *); 125 void sr_rebuild_thread(void *); 126 void sr_roam_chunks(struct sr_discipline *); 127 int sr_chunk_in_use(struct sr_softc *, dev_t); 128 129 /* don't include these on RAMDISK */ 130 #ifndef SMALL_KERNEL 131 void sr_sensors_refresh(void *); 132 int sr_sensors_create(struct sr_discipline *); 133 void sr_sensors_delete(struct sr_discipline *); 134 #endif 135 136 /* metadata */ 137 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 138 int sr_meta_attach(struct sr_discipline *, int); 139 void sr_meta_getdevname(struct sr_softc *, dev_t, char *, 140 int); 141 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 142 size_t, daddr64_t, long); 143 int sr_meta_clear(struct sr_discipline *); 144 int sr_meta_read(struct sr_discipline *); 145 int sr_meta_save(struct sr_discipline *, u_int32_t); 146 int sr_meta_validate(struct sr_discipline *, dev_t, 147 struct sr_metadata *, void *); 148 void sr_meta_chunks_create(struct sr_softc *, 149 struct sr_chunk_head *); 150 void sr_meta_init(struct sr_discipline *, 151 struct sr_chunk_head *); 152 153 /* hotplug magic */ 154 void sr_disk_attach(struct disk *, int); 155 156 struct sr_hotplug_list { 157 void (*sh_hotplug)(struct sr_discipline *, 158 struct disk *, int); 159 struct sr_discipline *sh_sd; 160 161 SLIST_ENTRY(sr_hotplug_list) shl_link; 162 }; 163 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 164 165 struct sr_hotplug_list_head sr_hotplug_callbacks; 166 extern void (*softraid_disk_attach)(struct disk *, int); 167 168 /* scsi glue */ 169 struct scsi_adapter sr_switch = { 170 sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl 171 }; 172 173 struct scsi_device sr_dev = { 174 NULL, NULL, NULL, NULL 175 }; 176 177 /* native metadata format */ 178 int sr_meta_native_bootprobe(struct sr_softc *, 179 struct device *, struct sr_metadata_list_head *); 180 #define SR_META_NOTCLAIMED (0) 181 #define SR_META_CLAIMED (1) 182 int sr_meta_native_probe(struct sr_softc *, 183 struct sr_chunk *); 184 int sr_meta_native_attach(struct sr_discipline *, int); 185 int sr_meta_native_read(struct sr_discipline *, dev_t, 186 struct sr_metadata *, void *); 187 int sr_meta_native_write(struct sr_discipline *, dev_t, 188 struct sr_metadata *,void *); 189 190 #ifdef SR_DEBUG 191 void sr_meta_print(struct sr_metadata *); 192 #else 193 #define sr_meta_print(m) 194 #endif 195 196 /* the metadata driver should remain stateless */ 197 struct sr_meta_driver { 198 daddr64_t smd_offset; /* metadata location */ 199 u_int32_t smd_size; /* size of metadata */ 200 201 int (*smd_probe)(struct sr_softc *, 202 struct sr_chunk *); 203 int (*smd_attach)(struct sr_discipline *, int); 204 int (*smd_detach)(struct sr_discipline *); 205 int (*smd_read)(struct sr_discipline *, dev_t, 206 struct sr_metadata *, void *); 207 int (*smd_write)(struct sr_discipline *, dev_t, 208 struct sr_metadata *, void *); 209 int (*smd_validate)(struct sr_discipline *, 210 struct sr_metadata *, void *); 211 } smd[] = { 212 { SR_META_OFFSET, SR_META_SIZE * 512, 213 sr_meta_native_probe, sr_meta_native_attach, NULL, 214 sr_meta_native_read, sr_meta_native_write, NULL }, 215 #define SR_META_F_NATIVE 0 216 { 0, 0, NULL, NULL, NULL, NULL } 217 #define SR_META_F_INVALID -1 218 }; 219 220 int 221 sr_meta_attach(struct sr_discipline *sd, int force) 222 { 223 struct sr_softc *sc = sd->sd_sc; 224 struct sr_chunk_head *cl; 225 struct sr_chunk *ch_entry; 226 int rv = 1, i = 0; 227 228 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 229 230 /* in memory copy of metadata */ 231 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 232 if (!sd->sd_meta) { 233 printf("%s: could not allocate memory for metadata\n", 234 DEVNAME(sc)); 235 goto bad; 236 } 237 238 if (sd->sd_meta_type != SR_META_F_NATIVE) { 239 /* in memory copy of foreign metadata */ 240 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 241 M_DEVBUF, M_ZERO); 242 if (!sd->sd_meta_foreign) { 243 /* unwind frees sd_meta */ 244 printf("%s: could not allocate memory for foreign " 245 "metadata\n", DEVNAME(sc)); 246 goto bad; 247 } 248 } 249 250 /* we have a valid list now create an array index */ 251 cl = &sd->sd_vol.sv_chunk_list; 252 SLIST_FOREACH(ch_entry, cl, src_link) { 253 i++; 254 } 255 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * i, 256 M_DEVBUF, M_WAITOK | M_ZERO); 257 258 /* fill out chunk array */ 259 i = 0; 260 SLIST_FOREACH(ch_entry, cl, src_link) 261 sd->sd_vol.sv_chunks[i++] = ch_entry; 262 263 /* attach metadata */ 264 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 265 goto bad; 266 267 rv = 0; 268 bad: 269 return (rv); 270 } 271 272 int 273 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 274 { 275 struct sr_softc *sc = sd->sd_sc; 276 struct vnode *vn; 277 struct sr_chunk *ch_entry, *ch_prev = NULL; 278 struct sr_chunk_head *cl; 279 char devname[32]; 280 int i, d, type, found, prevf, error; 281 dev_t dev; 282 283 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 284 285 if (no_chunk == 0) 286 goto unwind; 287 288 289 cl = &sd->sd_vol.sv_chunk_list; 290 291 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 292 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 293 M_WAITOK | M_ZERO); 294 /* keep disks in user supplied order */ 295 if (ch_prev) 296 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 297 else 298 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 299 ch_prev = ch_entry; 300 dev = dt[d]; 301 ch_entry->src_dev_mm = dev; 302 303 if (dev == NODEV) { 304 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 305 continue; 306 } else { 307 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 308 if (bdevvp(dev, &vn)) { 309 printf("%s:, sr_meta_probe: can't allocate " 310 "vnode\n", DEVNAME(sc)); 311 goto unwind; 312 } 313 314 /* 315 * XXX leaving dev open for now; move this to attach 316 * and figure out the open/close dance for unwind. 317 */ 318 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0); 319 if (error) { 320 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 321 "open %s\n", DEVNAME(sc), devname); 322 vput(vn); 323 goto unwind; 324 } 325 326 strlcpy(ch_entry->src_devname, devname, 327 sizeof(ch_entry->src_devname)); 328 ch_entry->src_vn = vn; 329 } 330 331 /* determine if this is a device we understand */ 332 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 333 type = smd[i].smd_probe(sc, ch_entry); 334 if (type == SR_META_F_INVALID) 335 continue; 336 else { 337 found = type; 338 break; 339 } 340 } 341 342 if (found == SR_META_F_INVALID) 343 goto unwind; 344 if (prevf == SR_META_F_INVALID) 345 prevf = found; 346 if (prevf != found) { 347 DNPRINTF(SR_D_META, "%s: prevf != found\n", 348 DEVNAME(sc)); 349 goto unwind; 350 } 351 } 352 353 return (prevf); 354 unwind: 355 return (SR_META_F_INVALID); 356 } 357 358 void 359 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 360 { 361 int maj, unit, part; 362 char *name; 363 364 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 365 DEVNAME(sc), buf, size); 366 367 if (!buf) 368 return; 369 370 maj = major(dev); 371 part = DISKPART(dev); 372 unit = DISKUNIT(dev); 373 374 name = findblkname(maj); 375 if (name == NULL) 376 return; 377 378 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 379 } 380 381 int 382 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz, 383 daddr64_t ofs, long flags) 384 { 385 struct sr_softc *sc = sd->sd_sc; 386 struct buf b; 387 int rv = 1; 388 389 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 390 DEVNAME(sc), dev, md, sz, ofs, flags); 391 392 bzero(&b, sizeof(b)); 393 394 if (md == NULL) { 395 printf("%s: read invalid metadata pointer\n", DEVNAME(sc)); 396 goto done; 397 } 398 b.b_flags = flags | B_PHYS; 399 b.b_blkno = ofs; 400 b.b_bcount = sz; 401 b.b_bufsize = sz; 402 b.b_resid = sz; 403 b.b_data = md; 404 b.b_error = 0; 405 b.b_proc = curproc; 406 b.b_dev = dev; 407 b.b_iodone = NULL; 408 if (bdevvp(dev, &b.b_vp)) { 409 printf("%s: sr_meta_rw: can't allocate vnode\n", DEVNAME(sc)); 410 goto done; 411 } 412 if ((b.b_flags & B_READ) == 0) 413 b.b_vp->v_numoutput++; 414 415 LIST_INIT(&b.b_dep); 416 VOP_STRATEGY(&b); 417 biowait(&b); 418 419 if (b.b_flags & B_ERROR) { 420 printf("%s: 0x%x i/o error on block %llu while reading " 421 "metadata %d\n", DEVNAME(sc), dev, b.b_blkno, b.b_error); 422 goto done; 423 } 424 rv = 0; 425 done: 426 if (b.b_vp) 427 vput(b.b_vp); 428 429 return (rv); 430 } 431 432 int 433 sr_meta_clear(struct sr_discipline *sd) 434 { 435 struct sr_softc *sc = sd->sd_sc; 436 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 437 struct sr_chunk *ch_entry; 438 void *m; 439 int rv = 1; 440 441 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 442 443 if (sd->sd_meta_type != SR_META_F_NATIVE) { 444 printf("%s: sr_meta_clear can not clear foreign metadata\n", 445 DEVNAME(sc)); 446 goto done; 447 } 448 449 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 450 SLIST_FOREACH(ch_entry, cl, src_link) { 451 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 452 /* XXX mark disk offline */ 453 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 454 "clear %s\n", ch_entry->src_devname); 455 rv++; 456 continue; 457 } 458 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 459 bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt)); 460 } 461 462 bzero(sd->sd_meta, SR_META_SIZE * 512); 463 464 free(m, M_DEVBUF); 465 rv = 0; 466 done: 467 return (rv); 468 } 469 470 void 471 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) 472 { 473 struct sr_chunk *ch_entry; 474 struct sr_uuid uuid; 475 int cid = 0; 476 char *name; 477 u_int64_t max_chunk_sz = 0, min_chunk_sz; 478 479 DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); 480 481 sr_uuid_get(&uuid); 482 483 /* fill out stuff and get largest chunk size while looping */ 484 SLIST_FOREACH(ch_entry, cl, src_link) { 485 name = ch_entry->src_devname; 486 ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; 487 ch_entry->src_meta.scmi.scm_chunk_id = cid++; 488 ch_entry->src_meta.scm_status = BIOC_SDONLINE; 489 strlcpy(ch_entry->src_meta.scmi.scm_devname, name, 490 sizeof(ch_entry->src_meta.scmi.scm_devname)); 491 bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, 492 sizeof(ch_entry->src_meta.scmi.scm_uuid)); 493 494 if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) 495 max_chunk_sz = ch_entry->src_meta.scmi.scm_size; 496 } 497 498 /* get smallest chunk size */ 499 min_chunk_sz = max_chunk_sz; 500 SLIST_FOREACH(ch_entry, cl, src_link) 501 if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) 502 min_chunk_sz = ch_entry->src_meta.scmi.scm_size; 503 504 /* equalize all sizes */ 505 SLIST_FOREACH(ch_entry, cl, src_link) 506 ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; 507 508 /* whine if chunks are not the same size */ 509 if (min_chunk_sz != max_chunk_sz) 510 printf("%s: chunk sizes are not equal; up to %llu blocks " 511 "wasted per chunk\n", 512 DEVNAME(sc), max_chunk_sz - min_chunk_sz); 513 } 514 515 void 516 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) 517 { 518 struct sr_softc *sc = sd->sd_sc; 519 struct sr_metadata *sm = sd->sd_meta; 520 struct sr_meta_chunk *im_sc; 521 struct sr_meta_opt *im_so; 522 int i, chunk_no; 523 524 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 525 526 if (!sm) 527 return; 528 529 /* initial metadata */ 530 sm->ssdi.ssd_magic = SR_MAGIC; 531 sm->ssdi.ssd_version = SR_META_VERSION; 532 sm->ssd_ondisk = 0; 533 sm->ssdi.ssd_flags = sd->sd_meta_flags; 534 /* get uuid from chunk 0 */ 535 bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, 536 &sm->ssdi.ssd_uuid, 537 sizeof(struct sr_uuid)); 538 539 /* volume is filled in createraid */ 540 541 /* add missing chunk bits */ 542 chunk_no = sm->ssdi.ssd_chunk_no; 543 for (i = 0; i < chunk_no; i++) { 544 im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; 545 im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; 546 sr_checksum(sc, im_sc, &im_sc->scm_checksum, 547 sizeof(struct sr_meta_chunk_invariant)); 548 549 /* carry optional meta also in chunk area */ 550 im_so = &sd->sd_vol.sv_chunks[i]->src_opt; 551 bzero(im_so, sizeof(*im_so)); 552 if (sd->sd_type == SR_MD_CRYPTO) { 553 sm->ssdi.ssd_opt_no = 1; 554 im_so->somi.som_type = SR_OPT_CRYPTO; 555 556 /* 557 * copy encrypted key / passphrase into optional 558 * metadata area 559 */ 560 bcopy(&sd->mds.mdd_crypto.scr_meta, 561 &im_so->somi.som_meta.smm_crypto, 562 sizeof(im_so->somi.som_meta.smm_crypto)); 563 564 sr_checksum(sc, im_so, im_so->som_checksum, 565 sizeof(struct sr_meta_opt_invariant)); 566 } 567 } 568 } 569 570 void 571 sr_meta_save_callback(void *arg1, void *arg2) 572 { 573 struct sr_discipline *sd = arg1; 574 int s; 575 576 s = splbio(); 577 578 if (sr_meta_save(arg1, SR_META_DIRTY)) 579 printf("%s: save metadata failed\n", 580 DEVNAME(sd->sd_sc)); 581 582 sd->sd_must_flush = 0; 583 splx(s); 584 } 585 586 int 587 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 588 { 589 struct sr_softc *sc = sd->sd_sc; 590 struct sr_metadata *sm = sd->sd_meta, *m; 591 struct sr_meta_driver *s; 592 struct sr_chunk *src; 593 struct sr_meta_chunk *cm; 594 struct sr_workunit wu; 595 struct sr_meta_opt *om; 596 int i; 597 598 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 599 DEVNAME(sc), sd->sd_meta->ssd_devname); 600 601 if (!sm) { 602 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 603 goto bad; 604 } 605 606 /* meta scratchpad */ 607 s = &smd[sd->sd_meta_type]; 608 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 609 if (!m) { 610 printf("%s: could not allocate metadata scratch area\n", 611 DEVNAME(sc)); 612 goto bad; 613 } 614 615 if (sm->ssdi.ssd_opt_no > 1) 616 panic("not yet save > 1 optional metadata members"); 617 618 /* from here on out metadata is updated */ 619 restart: 620 sm->ssd_ondisk++; 621 sm->ssd_meta_flags = flags; 622 bcopy(sm, m, sizeof(*m)); 623 624 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 625 src = sd->sd_vol.sv_chunks[i]; 626 cm = (struct sr_meta_chunk *)(m + 1); 627 bcopy(&src->src_meta, cm + i, sizeof(*cm)); 628 } 629 630 /* optional metadata */ 631 om = (struct sr_meta_opt *)(cm + i); 632 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 633 bcopy(&src->src_opt, om + i, sizeof(*om)); 634 sr_checksum(sc, om, &om->som_checksum, 635 sizeof(struct sr_meta_opt_invariant)); 636 } 637 638 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 639 src = sd->sd_vol.sv_chunks[i]; 640 641 /* skip disks that are offline */ 642 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 643 continue; 644 645 /* calculate metadata checksum for correct chunk */ 646 m->ssdi.ssd_chunk_id = i; 647 sr_checksum(sc, m, &m->ssd_checksum, 648 sizeof(struct sr_meta_invariant)); 649 650 #ifdef SR_DEBUG 651 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 652 "chunkid: %d checksum: ", 653 DEVNAME(sc), src->src_meta.scmi.scm_devname, 654 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 655 656 if (sr_debug & SR_D_META) 657 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 658 DNPRINTF(SR_D_META, "\n"); 659 sr_meta_print(m); 660 #endif 661 662 /* translate and write to disk */ 663 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 664 printf("%s: could not write metadata to %s\n", 665 DEVNAME(sc), src->src_devname); 666 /* restart the meta write */ 667 src->src_meta.scm_status = BIOC_SDOFFLINE; 668 /* XXX recalculate volume status */ 669 goto restart; 670 } 671 } 672 673 /* not all disciplines have sync */ 674 if (sd->sd_scsi_sync) { 675 bzero(&wu, sizeof(wu)); 676 wu.swu_fake = 1; 677 wu.swu_dis = sd; 678 sd->sd_scsi_sync(&wu); 679 } 680 free(m, M_DEVBUF); 681 return (0); 682 bad: 683 return (1); 684 } 685 686 int 687 sr_meta_read(struct sr_discipline *sd) 688 { 689 #ifdef SR_DEBUG 690 struct sr_softc *sc = sd->sd_sc; 691 #endif 692 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 693 struct sr_metadata *sm; 694 struct sr_chunk *ch_entry; 695 struct sr_meta_chunk *cp; 696 struct sr_meta_driver *s; 697 struct sr_meta_opt *om; 698 void *fm = NULL; 699 int no_disk = 0, got_meta = 0; 700 701 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 702 703 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 704 s = &smd[sd->sd_meta_type]; 705 if (sd->sd_meta_type != SR_META_F_NATIVE) 706 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 707 708 cp = (struct sr_meta_chunk *)(sm + 1); 709 SLIST_FOREACH(ch_entry, cl, src_link) { 710 /* skip disks that are offline */ 711 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 712 DNPRINTF(SR_D_META, 713 "%s: %s chunk marked offline, spoofing status\n", 714 DEVNAME(sc), ch_entry->src_devname); 715 cp++; /* adjust chunk pointer to match failure */ 716 continue; 717 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 718 /* read and translate */ 719 /* XXX mark chunk offline, elsewhere!! */ 720 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 721 cp++; /* adjust chunk pointer to match failure */ 722 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 723 DEVNAME(sc)); 724 continue; 725 } 726 727 if (sm->ssdi.ssd_magic != SR_MAGIC) { 728 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 729 DEVNAME(sc)); 730 continue; 731 } 732 733 /* validate metadata */ 734 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 735 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 736 DEVNAME(sc)); 737 no_disk = -1; 738 goto done; 739 } 740 741 /* assume first chunk contains metadata */ 742 if (got_meta == 0) { 743 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 744 got_meta = 1; 745 } 746 747 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 748 749 if (sm->ssdi.ssd_opt_no > 1) 750 panic("not yet read > 1 optional metadata members"); 751 752 if (sm->ssdi.ssd_opt_no) { 753 om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + 754 sizeof(struct sr_meta_chunk) * 755 sm->ssdi.ssd_chunk_no); 756 bcopy(om, &ch_entry->src_opt, 757 sizeof(ch_entry->src_opt)); 758 759 if (om->somi.som_type == SR_OPT_CRYPTO) { 760 bcopy( 761 &ch_entry->src_opt.somi.som_meta.smm_crypto, 762 &sd->mds.mdd_crypto.scr_meta, 763 sizeof(sd->mds.mdd_crypto.scr_meta)); 764 } 765 } 766 767 cp++; 768 no_disk++; 769 } 770 771 free(sm, M_DEVBUF); 772 if (fm) 773 free(fm, M_DEVBUF); 774 775 done: 776 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 777 no_disk); 778 return (no_disk); 779 } 780 781 int 782 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 783 void *fm) 784 { 785 struct sr_softc *sc = sd->sd_sc; 786 struct sr_meta_driver *s; 787 #ifdef SR_DEBUG 788 struct sr_meta_chunk *mc; 789 #endif 790 char devname[32]; 791 int rv = 1; 792 u_int8_t checksum[MD5_DIGEST_LENGTH]; 793 794 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 795 796 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 797 798 s = &smd[sd->sd_meta_type]; 799 if (sd->sd_meta_type != SR_META_F_NATIVE) 800 if (s->smd_validate(sd, sm, fm)) { 801 printf("%s: invalid foreign metadata\n", DEVNAME(sc)); 802 goto done; 803 } 804 805 /* 806 * at this point all foreign metadata has been translated to the native 807 * format and will be treated just like the native format 808 */ 809 810 if (sm->ssdi.ssd_magic != SR_MAGIC) { 811 printf("%s: not valid softraid metadata\n", DEVNAME(sc)); 812 goto done; 813 } 814 815 if (sm->ssdi.ssd_version != SR_META_VERSION) { 816 printf("%s: %s can not read metadata version %u, expected %u\n", 817 DEVNAME(sc), devname, sm->ssdi.ssd_version, 818 SR_META_VERSION); 819 goto done; 820 } 821 822 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 823 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 824 printf("%s: invalid metadata checksum\n", DEVNAME(sc)); 825 goto done; 826 } 827 828 /* XXX do other checksums */ 829 830 #ifdef SR_DEBUG 831 /* warn if disk changed order */ 832 mc = (struct sr_meta_chunk *)(sm + 1); 833 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 834 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 835 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 836 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 837 devname); 838 #endif 839 840 /* we have meta data on disk */ 841 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 842 DEVNAME(sc), devname); 843 844 rv = 0; 845 done: 846 return (rv); 847 } 848 849 int 850 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv, 851 struct sr_metadata_list_head *mlh) 852 { 853 struct vnode *vn; 854 struct disklabel label; 855 struct sr_metadata *md = NULL; 856 struct sr_discipline *fake_sd = NULL; 857 struct sr_metadata_list *mle; 858 char devname[32]; 859 dev_t dev, devr; 860 int error, i, majdev; 861 int rv = SR_META_NOTCLAIMED; 862 863 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 864 865 majdev = findblkmajor(dv); 866 if (majdev == -1) 867 goto done; 868 dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 869 if (bdevvp(dev, &vn)) { 870 printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n", 871 DEVNAME(sc)); 872 goto done; 873 } 874 875 /* open device */ 876 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 877 if (error) { 878 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 879 "failed\n", DEVNAME(sc)); 880 vput(vn); 881 goto done; 882 } 883 884 /* get disklabel */ 885 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0); 886 if (error) { 887 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 888 "failed\n", DEVNAME(sc)); 889 VOP_CLOSE(vn, FREAD, NOCRED, 0); 890 vput(vn); 891 goto done; 892 } 893 894 /* we are done, close device */ 895 error = VOP_CLOSE(vn, FREAD, NOCRED, 0); 896 if (error) { 897 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 898 "failed\n", DEVNAME(sc)); 899 vput(vn); 900 goto done; 901 } 902 vput(vn); 903 904 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 905 if (md == NULL) { 906 printf("%s: not enough memory for metadata buffer\n", 907 DEVNAME(sc)); 908 goto done; 909 } 910 911 /* create fake sd to use utility functions */ 912 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_ZERO); 913 if (fake_sd == NULL) { 914 printf("%s: not enough memory for fake discipline\n", 915 DEVNAME(sc)); 916 goto done; 917 } 918 fake_sd->sd_sc = sc; 919 fake_sd->sd_meta_type = SR_META_F_NATIVE; 920 921 for (i = 0; i < MAXPARTITIONS; i++) { 922 if (label.d_partitions[i].p_fstype != FS_RAID) 923 continue; 924 925 /* open partition */ 926 devr = MAKEDISKDEV(majdev, dv->dv_unit, i); 927 if (bdevvp(devr, &vn)) { 928 printf("%s:, sr_meta_native_bootprobe: can't allocate " 929 "vnode for partition\n", DEVNAME(sc)); 930 goto done; 931 } 932 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 933 if (error) { 934 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 935 "open failed, partition %d\n", 936 DEVNAME(sc), i); 937 vput(vn); 938 continue; 939 } 940 941 if (sr_meta_native_read(fake_sd, devr, md, NULL)) { 942 printf("%s: native bootprobe could not read native " 943 "metadata\n", DEVNAME(sc)); 944 VOP_CLOSE(vn, FREAD, NOCRED, 0); 945 vput(vn); 946 continue; 947 } 948 949 /* are we a softraid partition? */ 950 if (md->ssdi.ssd_magic != SR_MAGIC) { 951 VOP_CLOSE(vn, FREAD, NOCRED, 0); 952 vput(vn); 953 continue; 954 } 955 956 sr_meta_getdevname(sc, devr, devname, sizeof(devname)); 957 if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) { 958 if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) { 959 DNPRINTF(SR_D_META, "%s: don't save %s\n", 960 DEVNAME(sc), devname); 961 } else { 962 /* XXX fix M_WAITOK, this is boot time */ 963 mle = malloc(sizeof(*mle), M_DEVBUF, 964 M_WAITOK | M_ZERO); 965 bcopy(md, &mle->sml_metadata, 966 SR_META_SIZE * 512); 967 mle->sml_mm = devr; 968 mle->sml_vn = vn; 969 SLIST_INSERT_HEAD(mlh, mle, sml_link); 970 rv = SR_META_CLAIMED; 971 } 972 } 973 974 /* we are done, close partition */ 975 VOP_CLOSE(vn, FREAD, NOCRED, 0); 976 vput(vn); 977 } 978 979 done: 980 if (fake_sd) 981 free(fake_sd, M_DEVBUF); 982 if (md) 983 free(md, M_DEVBUF); 984 985 return (rv); 986 } 987 988 int 989 sr_boot_assembly(struct sr_softc *sc) 990 { 991 struct device *dv; 992 struct bioc_createraid bc; 993 struct sr_metadata_list_head mlh; 994 struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; 995 struct sr_metadata *metadata; 996 struct sr_boot_volume_head bvh; 997 struct sr_boot_volume *vol, *vp1, *vp2; 998 struct sr_meta_chunk *hm; 999 struct sr_chunk_head *cl; 1000 struct sr_chunk *hotspare, *chunk, *last; 1001 u_int32_t chunk_id; 1002 u_int64_t *ondisk = NULL; 1003 dev_t *devs = NULL; 1004 char devname[32]; 1005 int rv = 0, i; 1006 1007 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1008 1009 SLIST_INIT(&mlh); 1010 1011 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1012 if (dv->dv_class != DV_DISK) 1013 continue; 1014 1015 /* Only check sd(4) and wd(4) devices. */ 1016 if (strcmp(dv->dv_cfdata->cf_driver->cd_name, "sd") && 1017 strcmp(dv->dv_cfdata->cf_driver->cd_name, "wd")) 1018 continue; 1019 1020 /* native softraid uses partitions */ 1021 if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED) 1022 continue; 1023 1024 /* probe non-native disks */ 1025 } 1026 1027 /* 1028 * Create a list of volumes and associate chunks with each volume. 1029 */ 1030 SLIST_INIT(&bvh); 1031 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { 1032 1033 mlenext = SLIST_NEXT(mle, sml_link); 1034 SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); 1035 1036 metadata = (struct sr_metadata *)&mle->sml_metadata; 1037 mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; 1038 1039 SLIST_FOREACH(vol, &bvh, sbv_link) { 1040 if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1041 sizeof(metadata->ssdi.ssd_uuid)) == 0) 1042 break; 1043 } 1044 1045 if (vol == NULL) { 1046 vol = malloc(sizeof(struct sr_boot_volume), 1047 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1048 if (vol == NULL) { 1049 printf("%s: failed to allocate boot volume!\n", 1050 DEVNAME(sc)); 1051 goto unwind; 1052 } 1053 1054 vol->sbv_level = metadata->ssdi.ssd_level; 1055 vol->sbv_volid = metadata->ssdi.ssd_volid; 1056 vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; 1057 bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1058 sizeof(metadata->ssdi.ssd_uuid)); 1059 SLIST_INIT(&vol->sml); 1060 1061 /* Maintain volume order. */ 1062 vp2 = NULL; 1063 SLIST_FOREACH(vp1, &bvh, sbv_link) { 1064 if (vp1->sbv_volid > vol->sbv_volid) 1065 break; 1066 vp2 = vp1; 1067 } 1068 if (vp2 == NULL) { 1069 DNPRINTF(SR_D_META, "%s: insert volume %u " 1070 "at head\n", DEVNAME(sc), vol->sbv_volid); 1071 SLIST_INSERT_HEAD(&bvh, vol, sbv_link); 1072 } else { 1073 DNPRINTF(SR_D_META, "%s: insert volume %u " 1074 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1075 vp2->sbv_volid); 1076 SLIST_INSERT_AFTER(vp2, vol, sbv_link); 1077 } 1078 } 1079 1080 /* Maintain chunk order. */ 1081 mle2 = NULL; 1082 SLIST_FOREACH(mle1, &vol->sml, sml_link) { 1083 if (mle1->sml_chunk_id > mle->sml_chunk_id) 1084 break; 1085 mle2 = mle1; 1086 } 1087 if (mle2 == NULL) { 1088 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1089 "at head\n", DEVNAME(sc), vol->sbv_volid, 1090 mle->sml_chunk_id); 1091 SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); 1092 } else { 1093 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1094 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1095 mle->sml_chunk_id, mle2->sml_chunk_id); 1096 SLIST_INSERT_AFTER(mle2, mle, sml_link); 1097 } 1098 1099 vol->sbv_dev_no++; 1100 } 1101 1102 /* Allocate memory for device and ondisk version arrays. */ 1103 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1104 M_NOWAIT | M_CANFAIL); 1105 if (devs == NULL) { 1106 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1107 goto unwind; 1108 } 1109 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1110 M_NOWAIT | M_CANFAIL); 1111 if (ondisk == NULL) { 1112 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1113 goto unwind; 1114 } 1115 1116 /* 1117 * Assemble hotspare "volumes". 1118 */ 1119 SLIST_FOREACH(vol, &bvh, sbv_link) { 1120 1121 /* Check if this is a hotspare "volume". */ 1122 if (vol->sbv_level != SR_HOTSPARE_LEVEL || 1123 vol->sbv_chunk_no != 1) 1124 continue; 1125 1126 #ifdef SR_DEBUG 1127 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1128 DEVNAME(sc)); 1129 if (sr_debug & SR_D_META) 1130 sr_uuid_print(&vol->sbv_uuid, 0); 1131 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1132 vol->sbv_volid, vol->sbv_chunk_no); 1133 #endif 1134 1135 /* Create hotspare chunk metadata. */ 1136 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1137 M_NOWAIT | M_CANFAIL | M_ZERO); 1138 if (hotspare == NULL) { 1139 printf("%s: failed to allocate hotspare\n", 1140 DEVNAME(sc)); 1141 goto unwind; 1142 } 1143 1144 mle = SLIST_FIRST(&vol->sml); 1145 sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname)); 1146 hotspare->src_dev_mm = mle->sml_mm; 1147 hotspare->src_vn = mle->sml_vn; 1148 strlcpy(hotspare->src_devname, devname, 1149 sizeof(hotspare->src_devname)); 1150 hotspare->src_size = metadata->ssdi.ssd_size; 1151 1152 hm = &hotspare->src_meta; 1153 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1154 hm->scmi.scm_chunk_id = 0; 1155 hm->scmi.scm_size = metadata->ssdi.ssd_size; 1156 hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size; 1157 strlcpy(hm->scmi.scm_devname, devname, 1158 sizeof(hm->scmi.scm_devname)); 1159 bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1160 sizeof(struct sr_uuid)); 1161 1162 sr_checksum(sc, hm, &hm->scm_checksum, 1163 sizeof(struct sr_meta_chunk_invariant)); 1164 1165 hm->scm_status = BIOC_SDHOTSPARE; 1166 1167 /* Add chunk to hotspare list. */ 1168 rw_enter_write(&sc->sc_hs_lock); 1169 cl = &sc->sc_hotspare_list; 1170 if (SLIST_EMPTY(cl)) 1171 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1172 else { 1173 SLIST_FOREACH(chunk, cl, src_link) 1174 last = chunk; 1175 SLIST_INSERT_AFTER(last, hotspare, src_link); 1176 } 1177 sc->sc_hotspare_no++; 1178 rw_exit_write(&sc->sc_hs_lock); 1179 1180 } 1181 1182 /* 1183 * Assemble RAID volumes. 1184 */ 1185 SLIST_FOREACH(vol, &bvh, sbv_link) { 1186 1187 /* Check if this is a hotspare "volume". */ 1188 if (vol->sbv_level == SR_HOTSPARE_LEVEL && 1189 vol->sbv_chunk_no == 1) 1190 continue; 1191 1192 #ifdef SR_DEBUG 1193 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1194 if (sr_debug & SR_D_META) 1195 sr_uuid_print(&vol->sbv_uuid, 0); 1196 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1197 vol->sbv_volid, vol->sbv_chunk_no); 1198 #endif 1199 1200 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1201 devs[i] = NODEV; /* mark device as illegal */ 1202 ondisk[i] = 0; 1203 } 1204 1205 SLIST_FOREACH(mle, &vol->sml, sml_link) { 1206 metadata = (struct sr_metadata *)&mle->sml_metadata; 1207 chunk_id = metadata->ssdi.ssd_chunk_id; 1208 1209 if (devs[chunk_id] != NODEV) { 1210 vol->sbv_dev_no--; 1211 sr_meta_getdevname(sc, mle->sml_mm, devname, 1212 sizeof(devname)); 1213 printf("%s: found duplicate chunk %u for " 1214 "volume %u on device %s\n", DEVNAME(sc), 1215 chunk_id, vol->sbv_volid, devname); 1216 } 1217 1218 if (devs[chunk_id] == NODEV || 1219 metadata->ssd_ondisk > ondisk[chunk_id]) { 1220 devs[chunk_id] = mle->sml_mm; 1221 ondisk[chunk_id] = metadata->ssd_ondisk; 1222 DNPRINTF(SR_D_META, "%s: using ondisk " 1223 "metadata version %llu for chunk %u\n", 1224 DEVNAME(sc), ondisk[chunk_id], chunk_id); 1225 } 1226 } 1227 1228 if (vol->sbv_chunk_no != vol->sbv_dev_no) { 1229 printf("%s: not all chunks were provided; " 1230 "attempting to bring volume %d online\n", 1231 DEVNAME(sc), vol->sbv_volid); 1232 } 1233 1234 bzero(&bc, sizeof(bc)); 1235 bc.bc_level = vol->sbv_level; 1236 bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); 1237 bc.bc_dev_list = devs; 1238 bc.bc_flags = BIOC_SCDEVT; 1239 1240 rw_enter_write(&sc->sc_lock); 1241 sr_ioctl_createraid(sc, &bc, 0); 1242 rw_exit_write(&sc->sc_lock); 1243 1244 rv++; 1245 } 1246 1247 /* done with metadata */ 1248 unwind: 1249 for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { 1250 vp2 = SLIST_NEXT(vp1, sbv_link); 1251 for (mle1 = SLIST_FIRST(&vp1->sml); 1252 mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { 1253 mle2 = SLIST_NEXT(mle1, sml_link); 1254 free(mle1, M_DEVBUF); 1255 } 1256 free(vp1, M_DEVBUF); 1257 } 1258 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { 1259 mle2 = SLIST_NEXT(mle, sml_link); 1260 free(mle, M_DEVBUF); 1261 } 1262 SLIST_INIT(&mlh); 1263 1264 if (devs) 1265 free(devs, M_DEVBUF); 1266 if (ondisk) 1267 free(ondisk, M_DEVBUF); 1268 1269 return (rv); 1270 } 1271 1272 int 1273 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1274 { 1275 struct disklabel label; 1276 char *devname; 1277 int error, part; 1278 daddr64_t size; 1279 1280 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1281 DEVNAME(sc), ch_entry->src_devname); 1282 1283 devname = ch_entry->src_devname; 1284 part = DISKPART(ch_entry->src_dev_mm); 1285 1286 /* get disklabel */ 1287 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1288 NOCRED, 0); 1289 if (error) { 1290 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1291 DEVNAME(sc), devname); 1292 goto unwind; 1293 } 1294 1295 /* make sure the partition is of the right type */ 1296 if (label.d_partitions[part].p_fstype != FS_RAID) { 1297 DNPRINTF(SR_D_META, 1298 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1299 devname, 1300 label.d_partitions[part].p_fstype); 1301 goto unwind; 1302 } 1303 1304 size = DL_GETPSIZE(&label.d_partitions[part]) - 1305 SR_META_SIZE - SR_META_OFFSET; 1306 if (size <= 0) { 1307 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1308 devname); 1309 goto unwind; 1310 } 1311 ch_entry->src_size = size; 1312 1313 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1314 devname, size); 1315 1316 return (SR_META_F_NATIVE); 1317 unwind: 1318 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1319 devname ? devname : "nodev"); 1320 return (SR_META_F_INVALID); 1321 } 1322 1323 int 1324 sr_meta_native_attach(struct sr_discipline *sd, int force) 1325 { 1326 struct sr_softc *sc = sd->sd_sc; 1327 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1328 struct sr_metadata *md = NULL; 1329 struct sr_chunk *ch_entry, *ch_next; 1330 struct sr_uuid uuid; 1331 u_int64_t version = 0; 1332 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1333 1334 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1335 1336 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 1337 if (md == NULL) { 1338 printf("%s: not enough memory for metadata buffer\n", 1339 DEVNAME(sc)); 1340 goto bad; 1341 } 1342 1343 bzero(&uuid, sizeof uuid); 1344 1345 sr = not_sr = d = 0; 1346 SLIST_FOREACH(ch_entry, cl, src_link) { 1347 if (ch_entry->src_dev_mm == NODEV) 1348 continue; 1349 1350 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1351 printf("%s: could not read native metadata\n", 1352 DEVNAME(sc)); 1353 goto bad; 1354 } 1355 1356 if (md->ssdi.ssd_magic == SR_MAGIC) { 1357 sr++; 1358 if (d == 0) { 1359 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1360 expected = md->ssdi.ssd_chunk_no; 1361 version = md->ssd_ondisk; 1362 d++; 1363 continue; 1364 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1365 sizeof uuid)) { 1366 printf("%s: not part of the same volume\n", 1367 DEVNAME(sc)); 1368 goto bad; 1369 } 1370 if (md->ssd_ondisk != version) { 1371 old_meta++; 1372 version = MAX(md->ssd_ondisk, version); 1373 } 1374 } else 1375 not_sr++; 1376 } 1377 1378 if (sr && not_sr) { 1379 printf("%s: not all chunks are of the native metadata format\n", 1380 DEVNAME(sc)); 1381 goto bad; 1382 } 1383 1384 /* mixed metadata versions; mark bad disks offline */ 1385 if (old_meta) { 1386 d = 0; 1387 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1388 ch_entry = ch_next, d++) { 1389 ch_next = SLIST_NEXT(ch_entry, src_link); 1390 1391 /* XXX do we want to read this again? */ 1392 if (ch_entry->src_dev_mm == NODEV) 1393 panic("src_dev_mm == NODEV"); 1394 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1395 NULL)) 1396 printf("%s: could not read native metadata\n", 1397 DEVNAME(sc)); 1398 if (md->ssd_ondisk != version) 1399 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1400 BIOC_SDOFFLINE; 1401 } 1402 } 1403 1404 if (expected != sr && !force && expected != -1) { 1405 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1406 "anyway\n", DEVNAME(sc)); 1407 } 1408 1409 rv = 0; 1410 bad: 1411 if (md) 1412 free(md, M_DEVBUF); 1413 return (rv); 1414 } 1415 1416 int 1417 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1418 struct sr_metadata *md, void *fm) 1419 { 1420 #ifdef SR_DEBUG 1421 struct sr_softc *sc = sd->sd_sc; 1422 #endif 1423 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1424 DEVNAME(sc), dev, md); 1425 1426 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1427 B_READ)); 1428 } 1429 1430 int 1431 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1432 struct sr_metadata *md, void *fm) 1433 { 1434 #ifdef SR_DEBUG 1435 struct sr_softc *sc = sd->sd_sc; 1436 #endif 1437 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1438 DEVNAME(sc), dev, md); 1439 1440 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1441 B_WRITE)); 1442 } 1443 1444 void 1445 sr_hotplug_register(struct sr_discipline *sd, void *func) 1446 { 1447 struct sr_hotplug_list *mhe; 1448 1449 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1450 DEVNAME(sd->sd_sc), func); 1451 1452 /* make sure we aren't on the list yet */ 1453 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1454 if (mhe->sh_hotplug == func) 1455 return; 1456 1457 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1458 M_WAITOK | M_ZERO); 1459 mhe->sh_hotplug = func; 1460 mhe->sh_sd = sd; 1461 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1462 } 1463 1464 void 1465 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1466 { 1467 struct sr_hotplug_list *mhe; 1468 1469 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1470 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1471 1472 /* make sure we are on the list yet */ 1473 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1474 if (mhe->sh_hotplug == func) { 1475 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1476 sr_hotplug_list, shl_link); 1477 free(mhe, M_DEVBUF); 1478 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1479 SLIST_INIT(&sr_hotplug_callbacks); 1480 return; 1481 } 1482 } 1483 1484 void 1485 sr_disk_attach(struct disk *diskp, int action) 1486 { 1487 struct sr_hotplug_list *mhe; 1488 1489 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1490 if (mhe->sh_sd->sd_ready) 1491 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1492 } 1493 1494 int 1495 sr_match(struct device *parent, void *match, void *aux) 1496 { 1497 return (1); 1498 } 1499 1500 void 1501 sr_attach(struct device *parent, struct device *self, void *aux) 1502 { 1503 struct sr_softc *sc = (void *)self; 1504 1505 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1506 1507 rw_init(&sc->sc_lock, "sr_lock"); 1508 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1509 1510 SLIST_INIT(&sr_hotplug_callbacks); 1511 SLIST_INIT(&sc->sc_hotspare_list); 1512 1513 if (bio_register(&sc->sc_dev, sr_ioctl) != 0) 1514 printf("%s: controller registration failed", DEVNAME(sc)); 1515 else 1516 sc->sc_ioctl = sr_ioctl; 1517 1518 printf("\n"); 1519 1520 softraid_disk_attach = sr_disk_attach; 1521 1522 sr_boot_assembly(sc); 1523 } 1524 1525 int 1526 sr_detach(struct device *self, int flags) 1527 { 1528 return (0); 1529 } 1530 1531 int 1532 sr_activate(struct device *self, int act) 1533 { 1534 return (1); 1535 } 1536 1537 void 1538 sr_minphys(struct buf *bp, struct scsi_link *sl) 1539 { 1540 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1541 1542 /* XXX currently using SR_MAXFER = MAXPHYS */ 1543 if (bp->b_bcount > SR_MAXFER) 1544 bp->b_bcount = SR_MAXFER; 1545 minphys(bp); 1546 } 1547 1548 void 1549 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1550 { 1551 size_t copy_cnt; 1552 1553 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1554 xs, size); 1555 1556 if (xs->datalen) { 1557 copy_cnt = MIN(size, xs->datalen); 1558 bcopy(v, xs->data, copy_cnt); 1559 } 1560 } 1561 1562 int 1563 sr_ccb_alloc(struct sr_discipline *sd) 1564 { 1565 struct sr_ccb *ccb; 1566 int i; 1567 1568 if (!sd) 1569 return (1); 1570 1571 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1572 1573 if (sd->sd_ccb) 1574 return (1); 1575 1576 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1577 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1578 TAILQ_INIT(&sd->sd_ccb_freeq); 1579 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1580 ccb = &sd->sd_ccb[i]; 1581 ccb->ccb_dis = sd; 1582 sr_ccb_put(ccb); 1583 } 1584 1585 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1586 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1587 1588 return (0); 1589 } 1590 1591 void 1592 sr_ccb_free(struct sr_discipline *sd) 1593 { 1594 struct sr_ccb *ccb; 1595 1596 if (!sd) 1597 return; 1598 1599 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1600 1601 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1602 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1603 1604 if (sd->sd_ccb) 1605 free(sd->sd_ccb, M_DEVBUF); 1606 } 1607 1608 struct sr_ccb * 1609 sr_ccb_get(struct sr_discipline *sd) 1610 { 1611 struct sr_ccb *ccb; 1612 int s; 1613 1614 s = splbio(); 1615 1616 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1617 if (ccb) { 1618 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1619 ccb->ccb_state = SR_CCB_INPROGRESS; 1620 } 1621 1622 splx(s); 1623 1624 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1625 ccb); 1626 1627 return (ccb); 1628 } 1629 1630 void 1631 sr_ccb_put(struct sr_ccb *ccb) 1632 { 1633 struct sr_discipline *sd = ccb->ccb_dis; 1634 int s; 1635 1636 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1637 ccb); 1638 1639 s = splbio(); 1640 1641 ccb->ccb_wu = NULL; 1642 ccb->ccb_state = SR_CCB_FREE; 1643 ccb->ccb_target = -1; 1644 ccb->ccb_opaque = NULL; 1645 1646 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1647 1648 splx(s); 1649 } 1650 1651 int 1652 sr_wu_alloc(struct sr_discipline *sd) 1653 { 1654 struct sr_workunit *wu; 1655 int i, no_wu; 1656 1657 if (!sd) 1658 return (1); 1659 1660 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 1661 sd, sd->sd_max_wu); 1662 1663 if (sd->sd_wu) 1664 return (1); 1665 1666 no_wu = sd->sd_max_wu; 1667 sd->sd_wu_pending = no_wu; 1668 1669 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 1670 M_DEVBUF, M_WAITOK | M_ZERO); 1671 TAILQ_INIT(&sd->sd_wu_freeq); 1672 TAILQ_INIT(&sd->sd_wu_pendq); 1673 TAILQ_INIT(&sd->sd_wu_defq); 1674 for (i = 0; i < no_wu; i++) { 1675 wu = &sd->sd_wu[i]; 1676 wu->swu_dis = sd; 1677 sr_wu_put(wu); 1678 } 1679 1680 return (0); 1681 } 1682 1683 void 1684 sr_wu_free(struct sr_discipline *sd) 1685 { 1686 struct sr_workunit *wu; 1687 1688 if (!sd) 1689 return; 1690 1691 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 1692 1693 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 1694 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1695 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 1696 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 1697 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 1698 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 1699 1700 if (sd->sd_wu) 1701 free(sd->sd_wu, M_DEVBUF); 1702 } 1703 1704 void 1705 sr_wu_put(struct sr_workunit *wu) 1706 { 1707 struct sr_discipline *sd = wu->swu_dis; 1708 struct sr_ccb *ccb; 1709 1710 int s; 1711 1712 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 1713 1714 s = splbio(); 1715 1716 wu->swu_xs = NULL; 1717 wu->swu_state = SR_WU_FREE; 1718 wu->swu_ios_complete = 0; 1719 wu->swu_ios_failed = 0; 1720 wu->swu_ios_succeeded = 0; 1721 wu->swu_io_count = 0; 1722 wu->swu_blk_start = 0; 1723 wu->swu_blk_end = 0; 1724 wu->swu_collider = NULL; 1725 wu->swu_fake = 0; 1726 wu->swu_flags = 0; 1727 1728 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1729 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1730 sr_ccb_put(ccb); 1731 } 1732 TAILQ_INIT(&wu->swu_ccb); 1733 1734 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 1735 sd->sd_wu_pending--; 1736 1737 /* wake up sleepers */ 1738 #ifdef DIAGNOSTIC 1739 if (sd->sd_wu_sleep < 0) 1740 panic("negative wu sleepers"); 1741 #endif /* DIAGNOSTIC */ 1742 if (sd->sd_wu_sleep) 1743 wakeup(&sd->sd_wu_sleep); 1744 1745 splx(s); 1746 } 1747 1748 struct sr_workunit * 1749 sr_wu_get(struct sr_discipline *sd, int canwait) 1750 { 1751 struct sr_workunit *wu; 1752 int s; 1753 1754 s = splbio(); 1755 1756 for (;;) { 1757 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 1758 if (wu) { 1759 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1760 wu->swu_state = SR_WU_INPROGRESS; 1761 sd->sd_wu_pending++; 1762 break; 1763 } else if (wu == NULL && canwait) { 1764 sd->sd_wu_sleep++; 1765 tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0); 1766 sd->sd_wu_sleep--; 1767 } else 1768 break; 1769 } 1770 1771 splx(s); 1772 1773 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 1774 1775 return (wu); 1776 } 1777 1778 void 1779 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 1780 { 1781 int s; 1782 1783 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 1784 1785 s = splbio(); 1786 scsi_done(xs); 1787 splx(s); 1788 } 1789 1790 int 1791 sr_scsi_cmd(struct scsi_xfer *xs) 1792 { 1793 int s; 1794 struct scsi_link *link = xs->sc_link; 1795 struct sr_softc *sc = link->adapter_softc; 1796 struct sr_workunit *wu = NULL; 1797 struct sr_discipline *sd; 1798 1799 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p " 1800 "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags); 1801 1802 sd = sc->sc_dis[link->scsibus]; 1803 if (sd == NULL) { 1804 s = splhigh(); 1805 sd = sc->sc_attach_dis; 1806 splx(s); 1807 1808 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n", 1809 DEVNAME(sc), sd); 1810 if (sd == NULL) { 1811 printf("%s: sr_scsi_cmd NULL discipline\n", 1812 DEVNAME(sc)); 1813 goto stuffup; 1814 } 1815 } 1816 1817 if (sd->sd_deleted) { 1818 printf("%s: %s device is being deleted, failing io\n", 1819 DEVNAME(sc), sd->sd_meta->ssd_devname); 1820 goto stuffup; 1821 } 1822 1823 /* 1824 * we'll let the midlayer deal with stalls instead of being clever 1825 * and sending sr_wu_get !(xs->flags & SCSI_NOSLEEP) in cansleep 1826 */ 1827 if ((wu = sr_wu_get(sd, 0)) == NULL) { 1828 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc)); 1829 return (NO_CCB); 1830 } 1831 1832 xs->error = XS_NOERROR; 1833 wu->swu_xs = xs; 1834 1835 /* the midlayer will query LUNs so report sense to stop scanning */ 1836 if (link->target != 0 || link->lun != 0) { 1837 DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n", 1838 DEVNAME(sc), link->target, link->lun); 1839 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 1840 SSD_ERRCODE_VALID; 1841 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 1842 sd->sd_scsi_sense.add_sense_code = 0x25; 1843 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 1844 sd->sd_scsi_sense.extra_len = 4; 1845 goto stuffup; 1846 } 1847 1848 switch (xs->cmd->opcode) { 1849 case READ_COMMAND: 1850 case READ_BIG: 1851 case READ_16: 1852 case WRITE_COMMAND: 1853 case WRITE_BIG: 1854 case WRITE_16: 1855 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 1856 DEVNAME(sc), xs->cmd->opcode); 1857 if (sd->sd_scsi_rw(wu)) 1858 goto stuffup; 1859 break; 1860 1861 case SYNCHRONIZE_CACHE: 1862 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 1863 DEVNAME(sc)); 1864 if (sd->sd_scsi_sync(wu)) 1865 goto stuffup; 1866 goto complete; 1867 1868 case TEST_UNIT_READY: 1869 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 1870 DEVNAME(sc)); 1871 if (sd->sd_scsi_tur(wu)) 1872 goto stuffup; 1873 goto complete; 1874 1875 case START_STOP: 1876 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 1877 DEVNAME(sc)); 1878 if (sd->sd_scsi_start_stop(wu)) 1879 goto stuffup; 1880 goto complete; 1881 1882 case INQUIRY: 1883 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 1884 DEVNAME(sc)); 1885 if (sd->sd_scsi_inquiry(wu)) 1886 goto stuffup; 1887 goto complete; 1888 1889 case READ_CAPACITY: 1890 case READ_CAPACITY_16: 1891 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 1892 DEVNAME(sc), xs->cmd->opcode); 1893 if (sd->sd_scsi_read_cap(wu)) 1894 goto stuffup; 1895 goto complete; 1896 1897 case REQUEST_SENSE: 1898 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 1899 DEVNAME(sc)); 1900 if (sd->sd_scsi_req_sense(wu)) 1901 goto stuffup; 1902 goto complete; 1903 1904 default: 1905 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 1906 DEVNAME(sc), xs->cmd->opcode); 1907 /* XXX might need to add generic function to handle others */ 1908 goto stuffup; 1909 } 1910 1911 return (SUCCESSFULLY_QUEUED); 1912 stuffup: 1913 if (sd && sd->sd_scsi_sense.error_code) { 1914 xs->error = XS_SENSE; 1915 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 1916 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 1917 } else { 1918 xs->error = XS_DRIVER_STUFFUP; 1919 xs->flags |= ITSDONE; 1920 } 1921 complete: 1922 if (wu) 1923 sr_wu_put(wu); 1924 sr_scsi_done(sd, xs); 1925 return (COMPLETE); 1926 } 1927 int 1928 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag, 1929 struct proc *p) 1930 { 1931 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 1932 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 1933 1934 return (sr_ioctl(link->adapter_softc, cmd, addr)); 1935 } 1936 1937 int 1938 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr) 1939 { 1940 struct sr_softc *sc = (struct sr_softc *)dev; 1941 int rv = 0; 1942 1943 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc)); 1944 1945 rw_enter_write(&sc->sc_lock); 1946 1947 switch (cmd) { 1948 case BIOCINQ: 1949 DNPRINTF(SR_D_IOCTL, "inq\n"); 1950 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 1951 break; 1952 1953 case BIOCVOL: 1954 DNPRINTF(SR_D_IOCTL, "vol\n"); 1955 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 1956 break; 1957 1958 case BIOCDISK: 1959 DNPRINTF(SR_D_IOCTL, "disk\n"); 1960 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 1961 break; 1962 1963 case BIOCALARM: 1964 DNPRINTF(SR_D_IOCTL, "alarm\n"); 1965 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 1966 break; 1967 1968 case BIOCBLINK: 1969 DNPRINTF(SR_D_IOCTL, "blink\n"); 1970 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 1971 break; 1972 1973 case BIOCSETSTATE: 1974 DNPRINTF(SR_D_IOCTL, "setstate\n"); 1975 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 1976 break; 1977 1978 case BIOCCREATERAID: 1979 DNPRINTF(SR_D_IOCTL, "createraid\n"); 1980 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 1981 break; 1982 1983 case BIOCDELETERAID: 1984 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 1985 break; 1986 default: 1987 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 1988 rv = ENOTTY; 1989 } 1990 1991 rw_exit_write(&sc->sc_lock); 1992 1993 return (rv); 1994 } 1995 1996 int 1997 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 1998 { 1999 int i, vol, disk; 2000 2001 for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++) 2002 /* XXX this will not work when we stagger disciplines */ 2003 if (sc->sc_dis[i]) { 2004 vol++; 2005 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2006 } 2007 2008 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2009 bi->bi_novol = vol + sc->sc_hotspare_no; 2010 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2011 2012 return (0); 2013 } 2014 2015 int 2016 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2017 { 2018 int i, vol, rv = EINVAL; 2019 struct sr_discipline *sd; 2020 struct sr_chunk *hotspare; 2021 daddr64_t rb, sz; 2022 2023 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2024 /* XXX this will not work when we stagger disciplines */ 2025 if (sc->sc_dis[i]) 2026 vol++; 2027 if (vol != bv->bv_volid) 2028 continue; 2029 2030 sd = sc->sc_dis[i]; 2031 bv->bv_status = sd->sd_vol_status; 2032 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2033 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2034 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2035 if (bv->bv_status == BIOC_SVREBUILD) { 2036 sz = sd->sd_meta->ssdi.ssd_size; 2037 rb = sd->sd_meta->ssd_rebuild; 2038 if (rb > 0) 2039 bv->bv_percent = 100 - 2040 ((sz * 100 - rb * 100) / sz) - 1; 2041 else 2042 bv->bv_percent = 0; 2043 } 2044 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2045 sizeof(bv->bv_dev)); 2046 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2047 sizeof(bv->bv_vendor)); 2048 rv = 0; 2049 goto done; 2050 } 2051 2052 /* Check hotspares list. */ 2053 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2054 vol++; 2055 if (vol != bv->bv_volid) 2056 continue; 2057 2058 bv->bv_status = BIOC_SVONLINE; 2059 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2060 bv->bv_level = -1; /* Hotspare. */ 2061 bv->bv_nodisk = 1; 2062 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2063 sizeof(bv->bv_dev)); 2064 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2065 sizeof(bv->bv_vendor)); 2066 rv = 0; 2067 goto done; 2068 } 2069 2070 done: 2071 return (rv); 2072 } 2073 2074 int 2075 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2076 { 2077 int i, vol, rv = EINVAL, id; 2078 struct sr_chunk *src, *hotspare; 2079 2080 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2081 /* XXX this will not work when we stagger disciplines */ 2082 if (sc->sc_dis[i]) 2083 vol++; 2084 if (vol != bd->bd_volid) 2085 continue; 2086 2087 id = bd->bd_diskid; 2088 if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2089 break; 2090 2091 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2092 bd->bd_status = src->src_meta.scm_status; 2093 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2094 bd->bd_channel = vol; 2095 bd->bd_target = id; 2096 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2097 sizeof(bd->bd_vendor)); 2098 rv = 0; 2099 goto done; 2100 } 2101 2102 /* Check hotspares list. */ 2103 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2104 vol++; 2105 if (vol != bd->bd_volid) 2106 continue; 2107 2108 if (bd->bd_diskid != 0) 2109 break; 2110 2111 bd->bd_status = hotspare->src_meta.scm_status; 2112 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2113 bd->bd_channel = vol; 2114 bd->bd_target = bd->bd_diskid; 2115 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2116 sizeof(bd->bd_vendor)); 2117 rv = 0; 2118 goto done; 2119 } 2120 2121 done: 2122 return (rv); 2123 } 2124 2125 int 2126 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2127 { 2128 int rv = EINVAL; 2129 int i, vol, found, c; 2130 struct sr_discipline *sd = NULL; 2131 struct sr_chunk *ch_entry; 2132 struct sr_chunk_head *cl; 2133 2134 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2135 goto done; 2136 2137 if (bs->bs_status == BIOC_SSHOTSPARE) { 2138 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2139 goto done; 2140 } 2141 2142 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2143 /* XXX this will not work when we stagger disciplines */ 2144 if (sc->sc_dis[i]) 2145 vol++; 2146 if (vol != bs->bs_volid) 2147 continue; 2148 sd = sc->sc_dis[i]; 2149 break; 2150 } 2151 if (sd == NULL) 2152 goto done; 2153 2154 switch (bs->bs_status) { 2155 case BIOC_SSOFFLINE: 2156 /* Take chunk offline */ 2157 found = c = 0; 2158 cl = &sd->sd_vol.sv_chunk_list; 2159 SLIST_FOREACH(ch_entry, cl, src_link) { 2160 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2161 found = 1; 2162 break; 2163 } 2164 c++; 2165 } 2166 if (found == 0) { 2167 printf("%s: chunk not part of array\n", DEVNAME(sc)); 2168 goto done; 2169 } 2170 2171 /* XXX: check current state first */ 2172 sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE); 2173 2174 if (sr_meta_save(sd, SR_META_DIRTY)) { 2175 printf("%s: could not save metadata to %s\n", 2176 DEVNAME(sc), sd->sd_meta->ssd_devname); 2177 goto done; 2178 } 2179 rv = 0; 2180 break; 2181 2182 case BIOC_SDSCRUB: 2183 break; 2184 2185 case BIOC_SSREBUILD: 2186 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id); 2187 break; 2188 2189 default: 2190 printf("%s: unsupported state request %d\n", 2191 DEVNAME(sc), bs->bs_status); 2192 } 2193 2194 done: 2195 return (rv); 2196 } 2197 2198 int 2199 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2200 { 2201 struct sr_discipline *sd; 2202 struct sr_chunk *chunk; 2203 int i, c; 2204 2205 /* See if chunk is already in use. */ 2206 for (i = 0; i < SR_MAXSCSIBUS; i++) { 2207 if (!sc->sc_dis[i]) 2208 continue; 2209 sd = sc->sc_dis[i]; 2210 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2211 chunk = sd->sd_vol.sv_chunks[c]; 2212 if (chunk->src_dev_mm == dev) 2213 return chunk->src_meta.scm_status; 2214 } 2215 } 2216 2217 /* Check hotspares list. */ 2218 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2219 if (chunk->src_dev_mm == dev) 2220 return chunk->src_meta.scm_status; 2221 2222 return BIOC_SDINVALID; 2223 } 2224 2225 int 2226 sr_hotspare(struct sr_softc *sc, dev_t dev) 2227 { 2228 struct sr_discipline *sd = NULL; 2229 struct sr_metadata *sm = NULL; 2230 struct sr_meta_chunk *hm; 2231 struct sr_chunk_head *cl; 2232 struct sr_chunk *hotspare, *chunk, *last; 2233 struct sr_uuid uuid; 2234 struct disklabel label; 2235 struct vnode *vn; 2236 daddr64_t size; 2237 char devname[32]; 2238 int rv = EINVAL; 2239 int c, part, open = 0; 2240 2241 /* 2242 * Add device to global hotspares list. 2243 */ 2244 2245 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2246 2247 /* Make sure chunk is not already in use. */ 2248 c = sr_chunk_in_use(sc, dev); 2249 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2250 if (c == BIOC_SDHOTSPARE) 2251 printf("%s: %s is already a hotspare\n", 2252 DEVNAME(sc), devname); 2253 else 2254 printf("%s: %s is already in use\n", 2255 DEVNAME(sc), devname); 2256 goto done; 2257 } 2258 2259 /* XXX - See if there is an existing degraded volume... */ 2260 2261 /* Open device. */ 2262 if (bdevvp(dev, &vn)) { 2263 printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc)); 2264 goto done; 2265 } 2266 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2267 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2268 DEVNAME(sc), devname); 2269 vput(vn); 2270 goto fail; 2271 } 2272 open = 1; /* close dev on error */ 2273 2274 /* Get partition details. */ 2275 part = DISKPART(dev); 2276 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2277 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2278 DEVNAME(sc)); 2279 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2280 vput(vn); 2281 goto fail; 2282 } 2283 if (label.d_partitions[part].p_fstype != FS_RAID) { 2284 printf("%s: %s partition not of type RAID (%d)\n", 2285 DEVNAME(sc), devname, 2286 label.d_partitions[part].p_fstype); 2287 goto fail; 2288 } 2289 2290 /* Calculate partition size. */ 2291 size = DL_GETPSIZE(&label.d_partitions[part]) - 2292 SR_META_SIZE - SR_META_OFFSET; 2293 2294 /* 2295 * Create and populate chunk metadata. 2296 */ 2297 2298 sr_uuid_get(&uuid); 2299 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2300 2301 hotspare->src_dev_mm = dev; 2302 hotspare->src_vn = vn; 2303 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2304 hotspare->src_size = size; 2305 2306 hm = &hotspare->src_meta; 2307 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2308 hm->scmi.scm_chunk_id = 0; 2309 hm->scmi.scm_size = size; 2310 hm->scmi.scm_coerced_size = size; 2311 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2312 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2313 2314 sr_checksum(sc, hm, &hm->scm_checksum, 2315 sizeof(struct sr_meta_chunk_invariant)); 2316 2317 hm->scm_status = BIOC_SDHOTSPARE; 2318 2319 /* 2320 * Create and populate our own discipline and metadata. 2321 */ 2322 2323 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2324 sm->ssdi.ssd_magic = SR_MAGIC; 2325 sm->ssdi.ssd_version = SR_META_VERSION; 2326 sm->ssd_ondisk = 0; 2327 sm->ssdi.ssd_flags = 0; 2328 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2329 sm->ssdi.ssd_chunk_no = 1; 2330 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2331 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2332 sm->ssdi.ssd_size = size; 2333 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2334 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2335 "SR %s", "HOTSPARE"); 2336 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2337 "%03d", SR_META_VERSION); 2338 2339 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2340 sd->sd_sc = sc; 2341 sd->sd_meta = sm; 2342 sd->sd_meta_type = SR_META_F_NATIVE; 2343 sd->sd_vol_status = BIOC_SVONLINE; 2344 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2345 2346 /* Add chunk to volume. */ 2347 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2348 M_WAITOK | M_ZERO); 2349 sd->sd_vol.sv_chunks[0] = hotspare; 2350 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2351 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2352 2353 /* Save metadata. */ 2354 if (sr_meta_save(sd, SR_META_DIRTY)) { 2355 printf("%s: could not save metadata to %s\n", 2356 DEVNAME(sc), devname); 2357 goto fail; 2358 } 2359 2360 /* 2361 * Add chunk to hotspare list. 2362 */ 2363 rw_enter_write(&sc->sc_hs_lock); 2364 cl = &sc->sc_hotspare_list; 2365 if (SLIST_EMPTY(cl)) 2366 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2367 else { 2368 SLIST_FOREACH(chunk, cl, src_link) 2369 last = chunk; 2370 SLIST_INSERT_AFTER(last, hotspare, src_link); 2371 } 2372 sc->sc_hotspare_no++; 2373 rw_exit_write(&sc->sc_hs_lock); 2374 2375 rv = 0; 2376 goto done; 2377 2378 fail: 2379 if (hotspare) 2380 free(hotspare, M_DEVBUF); 2381 2382 done: 2383 if (sd && sd->sd_vol.sv_chunks) 2384 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2385 if (sd) 2386 free(sd, M_DEVBUF); 2387 if (sm) 2388 free(sm, M_DEVBUF); 2389 if (open) { 2390 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2391 vput(vn); 2392 } 2393 2394 return (rv); 2395 } 2396 2397 void 2398 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2399 { 2400 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2401 } 2402 2403 void 2404 sr_hotspare_rebuild(struct sr_discipline *sd) 2405 { 2406 struct sr_chunk_head *cl; 2407 struct sr_chunk *hotspare, *chunk = NULL; 2408 struct sr_workunit *wu; 2409 struct sr_ccb *ccb; 2410 int i, s, chunk_no, busy; 2411 2412 /* 2413 * Attempt to locate a hotspare and initiate rebuild. 2414 */ 2415 2416 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2417 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2418 BIOC_SDOFFLINE) { 2419 chunk_no = i; 2420 chunk = sd->sd_vol.sv_chunks[i]; 2421 break; 2422 } 2423 } 2424 2425 if (chunk == NULL) { 2426 printf("%s: no offline chunk found on %s!\n", 2427 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 2428 return; 2429 } 2430 2431 /* See if we have a suitable hotspare... */ 2432 rw_enter_write(&sd->sd_sc->sc_hs_lock); 2433 cl = &sd->sd_sc->sc_hotspare_list; 2434 SLIST_FOREACH(hotspare, cl, src_link) 2435 if (hotspare->src_size >= chunk->src_size) 2436 break; 2437 2438 if (hotspare != NULL) { 2439 2440 printf("%s: %s volume degraded, will attempt to " 2441 "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc), 2442 sd->sd_meta->ssd_devname, hotspare->src_devname); 2443 2444 /* 2445 * Ensure that all pending I/O completes on the failed chunk 2446 * before trying to initiate a rebuild. 2447 */ 2448 i = 0; 2449 do { 2450 busy = 0; 2451 2452 s = splbio(); 2453 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2454 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2455 if (ccb->ccb_target == chunk_no) 2456 busy = 1; 2457 } 2458 } 2459 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2460 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2461 if (ccb->ccb_target == chunk_no) 2462 busy = 1; 2463 } 2464 } 2465 splx(s); 2466 2467 if (busy) { 2468 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2469 i++; 2470 } 2471 2472 } while (busy && i < 120); 2473 2474 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2475 "complete on failed chunk %s\n", DEVNAME(sd->sd_sc), 2476 i, chunk->src_devname); 2477 2478 if (busy) { 2479 printf("%s: pending I/O failed to complete on " 2480 "failed chunk %s, hotspare rebuild aborted...\n", 2481 DEVNAME(sd->sd_sc), chunk->src_devname); 2482 goto done; 2483 } 2484 2485 s = splbio(); 2486 rw_enter_write(&sd->sd_sc->sc_lock); 2487 if (sr_rebuild_init(sd, hotspare->src_dev_mm) == 0) { 2488 2489 /* Remove hotspare from available list. */ 2490 sd->sd_sc->sc_hotspare_no--; 2491 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2492 free(hotspare, M_DEVBUF); 2493 2494 } 2495 rw_exit_write(&sd->sd_sc->sc_lock); 2496 splx(s); 2497 } 2498 done: 2499 rw_exit_write(&sd->sd_sc->sc_hs_lock); 2500 } 2501 2502 int 2503 sr_rebuild_init(struct sr_discipline *sd, dev_t dev) 2504 { 2505 struct sr_softc *sc = sd->sd_sc; 2506 int rv = EINVAL, part; 2507 int c, found, open = 0; 2508 char devname[32]; 2509 struct vnode *vn; 2510 daddr64_t size, csize; 2511 struct disklabel label; 2512 struct sr_meta_chunk *old, *new; 2513 2514 /* 2515 * Attempt to initiate a rebuild onto the specified device. 2516 */ 2517 2518 if (!sd->sd_rebuild) { 2519 printf("%s: discipline does not support rebuild\n", 2520 DEVNAME(sc)); 2521 goto done; 2522 } 2523 2524 /* make sure volume is in the right state */ 2525 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2526 printf("%s: rebuild already in progress\n", DEVNAME(sc)); 2527 goto done; 2528 } 2529 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2530 printf("%s: %s not degraded\n", DEVNAME(sc), 2531 sd->sd_meta->ssd_devname); 2532 goto done; 2533 } 2534 2535 /* find offline chunk */ 2536 for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 2537 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 2538 BIOC_SDOFFLINE) { 2539 found = c; 2540 new = &sd->sd_vol.sv_chunks[c]->src_meta; 2541 if (c > 0) 2542 break; /* roll at least once over the for */ 2543 } else { 2544 csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size; 2545 old = &sd->sd_vol.sv_chunks[c]->src_meta; 2546 if (found != -1) 2547 break; 2548 } 2549 if (found == -1) { 2550 printf("%s: no offline chunks available for rebuild\n", 2551 DEVNAME(sc)); 2552 goto done; 2553 } 2554 2555 /* populate meta entry */ 2556 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2557 if (bdevvp(dev, &vn)) { 2558 printf("%s:, sr_rebuild_init: can't allocate vnode\n", 2559 DEVNAME(sc)); 2560 goto done; 2561 } 2562 2563 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2564 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2565 "open %s\n", DEVNAME(sc), devname); 2566 vput(vn); 2567 goto done; 2568 } 2569 open = 1; /* close dev on error */ 2570 2571 /* get partition */ 2572 part = DISKPART(dev); 2573 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2574 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2575 DEVNAME(sc)); 2576 goto done; 2577 } 2578 if (label.d_partitions[part].p_fstype != FS_RAID) { 2579 printf("%s: %s partition not of type RAID (%d)\n", 2580 DEVNAME(sc), devname, 2581 label.d_partitions[part].p_fstype); 2582 goto done; 2583 } 2584 2585 /* is partition large enough? */ 2586 size = DL_GETPSIZE(&label.d_partitions[part]) - 2587 SR_META_SIZE - SR_META_OFFSET; 2588 if (size < csize) { 2589 printf("%s: partition too small, at least %llu B required\n", 2590 DEVNAME(sc), csize << DEV_BSHIFT); 2591 goto done; 2592 } else if (size > csize) 2593 printf("%s: partition too large, wasting %llu B\n", 2594 DEVNAME(sc), (size - csize) << DEV_BSHIFT); 2595 2596 /* make sure we are not stomping on some other partition */ 2597 c = sr_chunk_in_use(sc, dev); 2598 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2599 printf("%s: %s is already in use\n", DEVNAME(sc), devname); 2600 goto done; 2601 } 2602 2603 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2604 sd->sd_meta->ssd_rebuild = 0; 2605 2606 /* recreate metadata */ 2607 open = 0; /* leave dev open from here on out */ 2608 sd->sd_vol.sv_chunks[found]->src_dev_mm = dev; 2609 sd->sd_vol.sv_chunks[found]->src_vn = vn; 2610 new->scmi.scm_volid = old->scmi.scm_volid; 2611 new->scmi.scm_chunk_id = found; 2612 strlcpy(new->scmi.scm_devname, devname, 2613 sizeof new->scmi.scm_devname); 2614 new->scmi.scm_size = size; 2615 new->scmi.scm_coerced_size = old->scmi.scm_coerced_size; 2616 bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid, 2617 sizeof new->scmi.scm_uuid); 2618 sr_checksum(sc, new, &new->scm_checksum, 2619 sizeof(struct sr_meta_chunk_invariant)); 2620 sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD); 2621 if (sr_meta_save(sd, SR_META_DIRTY)) { 2622 printf("%s: could not save metadata to %s\n", 2623 DEVNAME(sc), devname); 2624 open = 1; 2625 goto done; 2626 } 2627 2628 printf("%s: rebuild of %s started on %s\n", DEVNAME(sc), 2629 sd->sd_meta->ssd_devname, devname); 2630 2631 sd->sd_reb_abort = 0; 2632 kthread_create_deferred(sr_rebuild, sd); 2633 2634 rv = 0; 2635 done: 2636 if (open) { 2637 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2638 vput(vn); 2639 } 2640 2641 return (rv); 2642 } 2643 2644 void 2645 sr_roam_chunks(struct sr_discipline *sd) 2646 { 2647 struct sr_softc *sc = sd->sd_sc; 2648 struct sr_chunk *chunk; 2649 struct sr_meta_chunk *meta; 2650 int roamed = 0; 2651 2652 /* Have any chunks roamed? */ 2653 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 2654 2655 meta = &chunk->src_meta; 2656 2657 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 2658 sizeof(meta->scmi.scm_devname))) { 2659 2660 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 2661 meta->scmi.scm_devname, chunk->src_devname); 2662 2663 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 2664 sizeof(meta->scmi.scm_devname)); 2665 2666 roamed++; 2667 } 2668 } 2669 2670 if (roamed) 2671 sr_meta_save(sd, SR_META_DIRTY); 2672 } 2673 2674 int 2675 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 2676 { 2677 dev_t *dt; 2678 int i, s, no_chunk, rv = EINVAL, vol; 2679 int no_meta, updatemeta = 0, disk = 1; 2680 u_int64_t vol_size; 2681 int32_t strip_size = 0; 2682 struct sr_chunk_head *cl; 2683 struct sr_discipline *sd = NULL; 2684 struct sr_chunk *ch_entry; 2685 struct device *dev, *dev2; 2686 struct scsibus_attach_args saa; 2687 2688 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 2689 DEVNAME(sc), user); 2690 2691 /* user input */ 2692 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 2693 goto unwind; 2694 2695 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 2696 if (user) { 2697 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 2698 goto unwind; 2699 } else 2700 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 2701 2702 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2703 sd->sd_sc = sc; 2704 2705 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 2706 cl = &sd->sd_vol.sv_chunk_list; 2707 SLIST_INIT(cl); 2708 2709 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 2710 if (sd->sd_meta_type == SR_META_F_INVALID) { 2711 printf("%s: invalid metadata format\n", DEVNAME(sc)); 2712 goto unwind; 2713 } 2714 2715 if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) { 2716 printf("%s: can't attach metadata type %d\n", DEVNAME(sc), 2717 sd->sd_meta_type); 2718 goto unwind; 2719 } 2720 2721 /* force the raid volume by clearing metadata region */ 2722 if (bc->bc_flags & BIOC_SCFORCE) { 2723 /* make sure disk isn't up and running */ 2724 if (sr_meta_read(sd)) 2725 if (sr_already_assembled(sd)) { 2726 printf("%s: disk ", DEVNAME(sc)); 2727 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2728 printf(" is currently in use; can't force " 2729 "create\n"); 2730 goto unwind; 2731 } 2732 2733 if (sr_meta_clear(sd)) { 2734 printf("%s: failed to clear metadata\n", DEVNAME(sc)); 2735 goto unwind; 2736 } 2737 } 2738 2739 if ((no_meta = sr_meta_read(sd)) == 0) { 2740 /* fill out all chunk metadata */ 2741 sr_meta_chunks_create(sc, cl); 2742 ch_entry = SLIST_FIRST(cl); 2743 2744 /* no metadata available */ 2745 switch (bc->bc_level) { 2746 case 0: 2747 if (no_chunk < 2) 2748 goto unwind; 2749 strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name)); 2750 /* 2751 * XXX add variable strip size later even though 2752 * MAXPHYS is really the clever value, users like 2753 * to tinker with that type of stuff 2754 */ 2755 strip_size = MAXPHYS; 2756 vol_size = 2757 (ch_entry->src_meta.scmi.scm_coerced_size & 2758 ~((strip_size >> DEV_BSHIFT) - 1)) * no_chunk; 2759 break; 2760 case 1: 2761 if (no_chunk < 2) 2762 goto unwind; 2763 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 2764 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2765 break; 2766 case 4: 2767 case 5: 2768 if (no_chunk < 3) 2769 goto unwind; 2770 if (bc->bc_level == 4) 2771 strlcpy(sd->sd_name, "RAID 4", 2772 sizeof(sd->sd_name)); 2773 else 2774 strlcpy(sd->sd_name, "RAID 5", 2775 sizeof(sd->sd_name)); 2776 /* 2777 * XXX add variable strip size later even though 2778 * MAXPHYS is really the clever value, users like 2779 * to tinker with that type of stuff 2780 */ 2781 strip_size = MAXPHYS; 2782 vol_size = 2783 (ch_entry->src_meta.scmi.scm_coerced_size & 2784 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 1); 2785 break; 2786 //#ifdef not_yet 2787 case 6: 2788 if (no_chunk < 4) 2789 goto unwind; 2790 strlcpy(sd->sd_name, "RAID 6", 2791 sizeof(sd->sd_name)); 2792 /* 2793 * XXX add variable strip size later even though 2794 * MAXPHYS is really the clever value, users like 2795 * to tinker with that type of stuff 2796 */ 2797 strip_size = MAXPHYS; 2798 vol_size = 2799 (ch_entry->src_meta.scmi.scm_coerced_size & 2800 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 2); 2801 break; 2802 //#endif /* not_yet */ 2803 #ifdef AOE 2804 #ifdef not_yet 2805 case 'A': 2806 /* target */ 2807 if (no_chunk != 1) 2808 goto unwind; 2809 strlcpy(sd->sd_name, "AOE TARG", sizeof(sd->sd_name)); 2810 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2811 break; 2812 case 'a': 2813 /* initiator */ 2814 if (no_chunk != 1) 2815 goto unwind; 2816 strlcpy(sd->sd_name, "AOE INIT", sizeof(sd->sd_name)); 2817 break; 2818 #endif /* not_yet */ 2819 #endif /* AOE */ 2820 #ifdef CRYPTO 2821 case 'C': 2822 DNPRINTF(SR_D_IOCTL, 2823 "%s: sr_ioctl_createraid: no_chunk %d\n", 2824 DEVNAME(sc), no_chunk); 2825 2826 if (no_chunk != 1) 2827 goto unwind; 2828 2829 /* no hint available yet */ 2830 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2831 bc->bc_opaque_status = BIOC_SOINOUT_FAILED; 2832 rv = 0; 2833 goto unwind; 2834 } 2835 2836 if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE)) 2837 goto unwind; 2838 2839 if (sr_crypto_get_kdf(bc, sd)) 2840 goto unwind; 2841 2842 strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name)); 2843 vol_size = ch_entry->src_meta.scmi.scm_size; 2844 2845 sr_crypto_create_keys(sd); 2846 2847 break; 2848 #endif /* CRYPTO */ 2849 default: 2850 goto unwind; 2851 } 2852 2853 /* fill out all volume metadata */ 2854 DNPRINTF(SR_D_IOCTL, 2855 "%s: sr_ioctl_createraid: vol_size: %lld\n", 2856 DEVNAME(sc), vol_size); 2857 sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; 2858 sd->sd_meta->ssdi.ssd_size = vol_size; 2859 sd->sd_vol_status = BIOC_SVONLINE; 2860 sd->sd_meta->ssdi.ssd_level = bc->bc_level; 2861 sd->sd_meta->ssdi.ssd_strip_size = strip_size; 2862 strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", 2863 sizeof(sd->sd_meta->ssdi.ssd_vendor)); 2864 snprintf(sd->sd_meta->ssdi.ssd_product, 2865 sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", 2866 sd->sd_name); 2867 snprintf(sd->sd_meta->ssdi.ssd_revision, 2868 sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", 2869 SR_META_VERSION); 2870 2871 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2872 updatemeta = 1; 2873 } else if (no_meta == no_chunk) { 2874 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 2875 printf("%s: %s was not shutdown properly\n", 2876 DEVNAME(sc), sd->sd_meta->ssd_devname); 2877 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 2878 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 2879 "metadata\n", DEVNAME(sc)); 2880 goto unwind; 2881 } 2882 if (sr_already_assembled(sd)) { 2883 printf("%s: disk ", DEVNAME(sc)); 2884 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2885 printf(" already assembled\n"); 2886 goto unwind; 2887 } 2888 #ifdef CRYPTO 2889 /* provide userland with kdf hint */ 2890 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2891 if (bc->bc_opaque == NULL) 2892 goto unwind; 2893 2894 if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) < 2895 bc->bc_opaque_size) 2896 goto unwind; 2897 2898 if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint, 2899 bc->bc_opaque, bc->bc_opaque_size)) 2900 goto unwind; 2901 2902 /* we're done */ 2903 bc->bc_opaque_status = BIOC_SOINOUT_OK; 2904 rv = 0; 2905 goto unwind; 2906 } 2907 /* get kdf with maskkey from userland */ 2908 if (bc->bc_opaque_flags & BIOC_SOIN) { 2909 if (sr_crypto_get_kdf(bc, sd)) 2910 goto unwind; 2911 } 2912 #endif /* CRYPTO */ 2913 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 2914 DEVNAME(sc)); 2915 updatemeta = 0; 2916 } else if (no_meta == -1) { 2917 printf("%s: one of the chunks has corrupt metadata; aborting " 2918 "assembly\n", DEVNAME(sc)); 2919 goto unwind; 2920 } else { 2921 if (sr_already_assembled(sd)) { 2922 printf("%s: disk ", DEVNAME(sc)); 2923 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2924 printf(" already assembled; will not partial " 2925 "assemble it\n"); 2926 goto unwind; 2927 } 2928 printf("%s: trying to bring up %s degraded\n", DEVNAME(sc), 2929 sd->sd_meta->ssd_devname); 2930 } 2931 2932 /* metadata SHALL be fully filled in at this point */ 2933 2934 /* Make sure that metadata level matches assembly level. */ 2935 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 2936 printf("%s: volume level does not match metadata level!\n", 2937 DEVNAME(sc)); 2938 goto unwind; 2939 } 2940 2941 if (sr_discipline_init(sd, sd->sd_meta->ssdi.ssd_level)) { 2942 printf("%s: could not initialize discipline\n", DEVNAME(sc)); 2943 goto unwind; 2944 } 2945 2946 /* allocate all resources */ 2947 if ((rv = sd->sd_alloc_resources(sd))) 2948 goto unwind; 2949 2950 if (disk) { 2951 /* set volume status */ 2952 sd->sd_set_vol_state(sd); 2953 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 2954 printf("%s: %s offline, will not be brought online\n", 2955 DEVNAME(sc), sd->sd_meta->ssd_devname); 2956 goto unwind; 2957 } 2958 2959 /* setup scsi midlayer */ 2960 if (sd->sd_openings) 2961 sd->sd_link.openings = sd->sd_openings(sd); 2962 else 2963 sd->sd_link.openings = sd->sd_max_wu; 2964 sd->sd_link.device = &sr_dev; 2965 sd->sd_link.device_softc = sc; 2966 sd->sd_link.adapter_softc = sc; 2967 sd->sd_link.adapter = &sr_switch; 2968 sd->sd_link.adapter_target = SR_MAX_LD; 2969 sd->sd_link.adapter_buswidth = 1; 2970 bzero(&saa, sizeof(saa)); 2971 saa.saa_sc_link = &sd->sd_link; 2972 2973 /* 2974 * we passed all checks return ENXIO if volume can't be created 2975 */ 2976 rv = ENXIO; 2977 2978 /* clear sense data */ 2979 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2980 2981 /* use temporary discipline pointer */ 2982 s = splhigh(); 2983 sc->sc_attach_dis = sd; 2984 splx(s); 2985 dev2 = config_found(&sc->sc_dev, &saa, scsiprint); 2986 s = splhigh(); 2987 sc->sc_attach_dis = NULL; 2988 splx(s); 2989 TAILQ_FOREACH(dev, &alldevs, dv_list) 2990 if (dev->dv_parent == dev2) 2991 break; 2992 if (dev == NULL) 2993 goto unwind; 2994 2995 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n", 2996 DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus); 2997 2998 sc->sc_dis[sd->sd_link.scsibus] = sd; 2999 for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++) 3000 if (sc->sc_dis[i]) 3001 vol++; 3002 sd->sd_scsibus_dev = dev2; 3003 3004 rv = 0; 3005 if (updatemeta) { 3006 /* fill out remaining volume metadata */ 3007 sd->sd_meta->ssdi.ssd_volid = vol; 3008 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3009 sizeof(sd->sd_meta->ssd_devname)); 3010 sr_meta_init(sd, cl); 3011 } else { 3012 if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3013 sizeof(dev->dv_xname))) { 3014 printf("%s: volume %s is roaming, it used to " 3015 "be %s, updating metadata\n", 3016 DEVNAME(sc), dev->dv_xname, 3017 sd->sd_meta->ssd_devname); 3018 3019 sd->sd_meta->ssdi.ssd_volid = vol; 3020 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3021 sizeof(sd->sd_meta->ssd_devname)); 3022 } 3023 } 3024 3025 /* Update device name on any chunks which roamed. */ 3026 sr_roam_chunks(sd); 3027 3028 #ifndef SMALL_KERNEL 3029 if (sr_sensors_create(sd)) 3030 printf("%s: unable to create sensor for %s\n", 3031 DEVNAME(sc), dev->dv_xname); 3032 else 3033 sd->sd_vol.sv_sensor_valid = 1; 3034 #endif /* SMALL_KERNEL */ 3035 } else { 3036 /* we are not an os disk */ 3037 if (updatemeta) { 3038 /* fill out remaining volume metadata */ 3039 sd->sd_meta->ssdi.ssd_volid = 0; 3040 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3041 sizeof(sd->sd_meta->ssd_devname)); 3042 sr_meta_init(sd, cl); 3043 } 3044 if (sd->sd_start_discipline(sd)) 3045 goto unwind; 3046 } 3047 3048 /* save metadata to disk */ 3049 rv = sr_meta_save(sd, SR_META_DIRTY); 3050 sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd); 3051 3052 if (sd->sd_vol_status == BIOC_SVREBUILD) 3053 kthread_create_deferred(sr_rebuild, sd); 3054 3055 sd->sd_ready = 1; 3056 3057 return (rv); 3058 unwind: 3059 sr_discipline_shutdown(sd); 3060 3061 return (rv); 3062 } 3063 3064 int 3065 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3066 { 3067 struct sr_discipline *sd = NULL; 3068 int rv = 1; 3069 int i; 3070 3071 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3072 dr->bd_dev); 3073 3074 for (i = 0; i < SR_MAXSCSIBUS; i++) 3075 if (sc->sc_dis[i]) { 3076 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3077 dr->bd_dev, 3078 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3079 sd = sc->sc_dis[i]; 3080 break; 3081 } 3082 } 3083 3084 if (sd == NULL) 3085 goto bad; 3086 3087 sd->sd_deleted = 1; 3088 sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE; 3089 sr_shutdown(sd); 3090 3091 rv = 0; 3092 bad: 3093 return (rv); 3094 } 3095 3096 void 3097 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3098 { 3099 struct sr_chunk *ch_entry, *ch_next; 3100 3101 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3102 3103 if (!cl) 3104 return; 3105 3106 for (ch_entry = SLIST_FIRST(cl); 3107 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3108 ch_next = SLIST_NEXT(ch_entry, src_link); 3109 3110 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3111 DEVNAME(sc), ch_entry->src_devname); 3112 if (ch_entry->src_vn) { 3113 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 0); 3114 vput(ch_entry->src_vn); 3115 } 3116 free(ch_entry, M_DEVBUF); 3117 } 3118 SLIST_INIT(cl); 3119 } 3120 3121 void 3122 sr_discipline_free(struct sr_discipline *sd) 3123 { 3124 struct sr_softc *sc; 3125 int i; 3126 3127 if (!sd) 3128 return; 3129 3130 sc = sd->sd_sc; 3131 3132 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3133 DEVNAME(sc), 3134 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3135 if (sd->sd_free_resources) 3136 sd->sd_free_resources(sd); 3137 if (sd->sd_vol.sv_chunks) 3138 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3139 if (sd->sd_meta) 3140 free(sd->sd_meta, M_DEVBUF); 3141 if (sd->sd_meta_foreign) 3142 free(sd->sd_meta_foreign, M_DEVBUF); 3143 3144 for (i = 0; i < SR_MAXSCSIBUS; i++) 3145 if (sc->sc_dis[i] == sd) { 3146 sc->sc_dis[i] = NULL; 3147 break; 3148 } 3149 3150 free(sd, M_DEVBUF); 3151 } 3152 3153 void 3154 sr_discipline_shutdown(struct sr_discipline *sd) 3155 { 3156 struct sr_softc *sc = sd->sd_sc; 3157 int s; 3158 3159 if (!sd || !sc) 3160 return; 3161 3162 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3163 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3164 3165 s = splbio(); 3166 3167 sd->sd_ready = 0; 3168 3169 if (sd->sd_shutdownhook) 3170 shutdownhook_disestablish(sd->sd_shutdownhook); 3171 3172 /* make sure there isn't a sync pending and yield */ 3173 wakeup(sd); 3174 while (sd->sd_sync || sd->sd_must_flush) 3175 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3176 EWOULDBLOCK) 3177 break; 3178 3179 #ifndef SMALL_KERNEL 3180 sr_sensors_delete(sd); 3181 #endif /* SMALL_KERNEL */ 3182 3183 if (sd->sd_scsibus_dev) 3184 config_detach(sd->sd_scsibus_dev, DETACH_FORCE); 3185 3186 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3187 3188 if (sd) 3189 sr_discipline_free(sd); 3190 3191 splx(s); 3192 } 3193 3194 int 3195 sr_discipline_init(struct sr_discipline *sd, int level) 3196 { 3197 int rv = 1; 3198 3199 switch (level) { 3200 case 0: 3201 sr_raid0_discipline_init(sd); 3202 break; 3203 case 1: 3204 sr_raid1_discipline_init(sd); 3205 break; 3206 case 4: 3207 case 5: 3208 if (level == 4) 3209 sd->sd_type = SR_MD_RAID4; 3210 else 3211 sd->sd_type = SR_MD_RAID5; 3212 sr_raidp_discipline_init(sd); 3213 break; 3214 case 6: 3215 sd->sd_type = SR_MD_RAID6; 3216 sr_raid6_discipline_init(sd); 3217 break; 3218 #ifdef AOE 3219 /* AOE target. */ 3220 case 'A': 3221 sr_aoe_server_discipline_init(sd); 3222 break; 3223 /* AOE initiator. */ 3224 case 'a': 3225 sr_aoe_discipline_init(sd); 3226 break; 3227 #endif 3228 #ifdef CRYPTO 3229 case 'C': 3230 sr_crypto_discipline_init(sd); 3231 break; 3232 #endif 3233 default: 3234 goto bad; 3235 } 3236 3237 rv = 0; 3238 bad: 3239 return (rv); 3240 } 3241 3242 int 3243 sr_raid_inquiry(struct sr_workunit *wu) 3244 { 3245 struct sr_discipline *sd = wu->swu_dis; 3246 struct scsi_xfer *xs = wu->swu_xs; 3247 struct scsi_inquiry_data inq; 3248 3249 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3250 3251 bzero(&inq, sizeof(inq)); 3252 inq.device = T_DIRECT; 3253 inq.dev_qual2 = 0; 3254 inq.version = 2; 3255 inq.response_format = 2; 3256 inq.additional_length = 32; 3257 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3258 sizeof(inq.vendor)); 3259 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3260 sizeof(inq.product)); 3261 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3262 sizeof(inq.revision)); 3263 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3264 3265 return (0); 3266 } 3267 3268 int 3269 sr_raid_read_cap(struct sr_workunit *wu) 3270 { 3271 struct sr_discipline *sd = wu->swu_dis; 3272 struct scsi_xfer *xs = wu->swu_xs; 3273 struct scsi_read_cap_data rcd; 3274 struct scsi_read_cap_data_16 rcd16; 3275 int rv = 1; 3276 3277 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3278 3279 if (xs->cmd->opcode == READ_CAPACITY) { 3280 bzero(&rcd, sizeof(rcd)); 3281 if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu) 3282 _lto4b(0xffffffff, rcd.addr); 3283 else 3284 _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr); 3285 _lto4b(512, rcd.length); 3286 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3287 rv = 0; 3288 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3289 bzero(&rcd16, sizeof(rcd16)); 3290 _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr); 3291 _lto4b(512, rcd16.length); 3292 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3293 rv = 0; 3294 } 3295 3296 return (rv); 3297 } 3298 3299 int 3300 sr_raid_tur(struct sr_workunit *wu) 3301 { 3302 struct sr_discipline *sd = wu->swu_dis; 3303 3304 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3305 3306 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3307 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3308 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3309 sd->sd_scsi_sense.add_sense_code = 0x04; 3310 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3311 sd->sd_scsi_sense.extra_len = 4; 3312 return (1); 3313 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3314 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3315 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3316 sd->sd_scsi_sense.add_sense_code = 0x05; 3317 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3318 sd->sd_scsi_sense.extra_len = 4; 3319 return (1); 3320 } 3321 3322 return (0); 3323 } 3324 3325 int 3326 sr_raid_request_sense(struct sr_workunit *wu) 3327 { 3328 struct sr_discipline *sd = wu->swu_dis; 3329 struct scsi_xfer *xs = wu->swu_xs; 3330 3331 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3332 DEVNAME(sd->sd_sc)); 3333 3334 /* use latest sense data */ 3335 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3336 3337 /* clear sense data */ 3338 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3339 3340 return (0); 3341 } 3342 3343 int 3344 sr_raid_start_stop(struct sr_workunit *wu) 3345 { 3346 struct sr_discipline *sd = wu->swu_dis; 3347 struct scsi_xfer *xs = wu->swu_xs; 3348 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3349 int rv = 1; 3350 3351 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3352 DEVNAME(sd->sd_sc)); 3353 3354 if (!ss) 3355 return (rv); 3356 3357 if (ss->byte2 == 0x00) { 3358 /* START */ 3359 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3360 /* bring volume online */ 3361 /* XXX check to see if volume can be brought online */ 3362 sd->sd_vol_status = BIOC_SVONLINE; 3363 } 3364 rv = 0; 3365 } else /* XXX is this the check? if (byte == 0x01) */ { 3366 /* STOP */ 3367 if (sd->sd_vol_status == BIOC_SVONLINE) { 3368 /* bring volume offline */ 3369 sd->sd_vol_status = BIOC_SVOFFLINE; 3370 } 3371 rv = 0; 3372 } 3373 3374 return (rv); 3375 } 3376 3377 int 3378 sr_raid_sync(struct sr_workunit *wu) 3379 { 3380 struct sr_discipline *sd = wu->swu_dis; 3381 int s, rv = 0, ios; 3382 3383 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3384 3385 /* when doing a fake sync don't count the wu */ 3386 ios = wu->swu_fake ? 0 : 1; 3387 3388 s = splbio(); 3389 sd->sd_sync = 1; 3390 3391 while (sd->sd_wu_pending > ios) 3392 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3393 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3394 DEVNAME(sd->sd_sc)); 3395 rv = 1; 3396 break; 3397 } 3398 3399 sd->sd_sync = 0; 3400 splx(s); 3401 3402 wakeup(&sd->sd_sync); 3403 3404 return (rv); 3405 } 3406 3407 void 3408 sr_raid_startwu(struct sr_workunit *wu) 3409 { 3410 struct sr_discipline *sd = wu->swu_dis; 3411 struct sr_ccb *ccb; 3412 3413 splassert(IPL_BIO); 3414 3415 if (wu->swu_state == SR_WU_RESTART) 3416 /* 3417 * no need to put the wu on the pending queue since we 3418 * are restarting the io 3419 */ 3420 ; 3421 else 3422 /* move wu to pending queue */ 3423 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3424 3425 /* start all individual ios */ 3426 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3427 VOP_STRATEGY(&ccb->ccb_buf); 3428 } 3429 } 3430 3431 void 3432 sr_checksum_print(u_int8_t *md5) 3433 { 3434 int i; 3435 3436 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 3437 printf("%02x", md5[i]); 3438 } 3439 3440 void 3441 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 3442 { 3443 MD5_CTX ctx; 3444 3445 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 3446 md5, len); 3447 3448 MD5Init(&ctx); 3449 MD5Update(&ctx, src, len); 3450 MD5Final(md5, &ctx); 3451 } 3452 3453 void 3454 sr_uuid_get(struct sr_uuid *uuid) 3455 { 3456 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 3457 /* UUID version 4: random */ 3458 uuid->sui_id[6] &= 0x0f; 3459 uuid->sui_id[6] |= 0x40; 3460 /* RFC4122 variant */ 3461 uuid->sui_id[8] &= 0x3f; 3462 uuid->sui_id[8] |= 0x80; 3463 } 3464 3465 void 3466 sr_uuid_print(struct sr_uuid *uuid, int cr) 3467 { 3468 printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 3469 "%02x%02x%02x%02x%02x%02x", 3470 uuid->sui_id[0], uuid->sui_id[1], 3471 uuid->sui_id[2], uuid->sui_id[3], 3472 uuid->sui_id[4], uuid->sui_id[5], 3473 uuid->sui_id[6], uuid->sui_id[7], 3474 uuid->sui_id[8], uuid->sui_id[9], 3475 uuid->sui_id[10], uuid->sui_id[11], 3476 uuid->sui_id[12], uuid->sui_id[13], 3477 uuid->sui_id[14], uuid->sui_id[15]); 3478 3479 if (cr) 3480 printf("\n"); 3481 } 3482 3483 int 3484 sr_already_assembled(struct sr_discipline *sd) 3485 { 3486 struct sr_softc *sc = sd->sd_sc; 3487 int i; 3488 3489 for (i = 0; i < SR_MAXSCSIBUS; i++) 3490 if (sc->sc_dis[i]) 3491 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 3492 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 3493 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 3494 return (1); 3495 3496 return (0); 3497 } 3498 3499 int32_t 3500 sr_validate_stripsize(u_int32_t b) 3501 { 3502 int s = 0; 3503 3504 if (b % 512) 3505 return (-1); 3506 3507 while ((b & 1) == 0) { 3508 b >>= 1; 3509 s++; 3510 } 3511 3512 /* only multiple of twos */ 3513 b >>= 1; 3514 if (b) 3515 return(-1); 3516 3517 return (s); 3518 } 3519 3520 void 3521 sr_shutdown(void *arg) 3522 { 3523 struct sr_discipline *sd = arg; 3524 #ifdef SR_DEBUG 3525 struct sr_softc *sc = sd->sd_sc; 3526 #endif 3527 DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", 3528 DEVNAME(sc), sd->sd_meta->ssd_devname); 3529 3530 /* abort rebuild and drain io */ 3531 sd->sd_reb_abort = 1; 3532 while (sd->sd_reb_active) 3533 tsleep(sd, PWAIT, "sr_shutdown", 1); 3534 3535 sr_meta_save(sd, 0); 3536 3537 sr_discipline_shutdown(sd); 3538 } 3539 3540 int 3541 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 3542 { 3543 struct sr_discipline *sd = wu->swu_dis; 3544 struct scsi_xfer *xs = wu->swu_xs; 3545 int rv = 1; 3546 3547 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 3548 xs->cmd->opcode); 3549 3550 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3551 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 3552 DEVNAME(sd->sd_sc), func); 3553 goto bad; 3554 } 3555 3556 if (xs->datalen == 0) { 3557 printf("%s: %s: illegal block count for %s\n", 3558 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3559 goto bad; 3560 } 3561 3562 if (xs->cmdlen == 10) 3563 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 3564 else if (xs->cmdlen == 16) 3565 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 3566 else if (xs->cmdlen == 6) 3567 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 3568 else { 3569 printf("%s: %s: illegal cmdlen for %s\n", 3570 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3571 goto bad; 3572 } 3573 3574 wu->swu_blk_start = *blk; 3575 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 3576 3577 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 3578 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 3579 "end: %lld length: %d\n", 3580 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 3581 wu->swu_blk_end, xs->datalen); 3582 3583 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 3584 SSD_ERRCODE_VALID; 3585 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 3586 sd->sd_scsi_sense.add_sense_code = 0x21; 3587 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3588 sd->sd_scsi_sense.extra_len = 4; 3589 goto bad; 3590 } 3591 3592 rv = 0; 3593 bad: 3594 return (rv); 3595 } 3596 3597 int 3598 sr_check_io_collision(struct sr_workunit *wu) 3599 { 3600 struct sr_discipline *sd = wu->swu_dis; 3601 struct sr_workunit *wup; 3602 3603 splassert(IPL_BIO); 3604 3605 /* walk queue backwards and fill in collider if we have one */ 3606 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 3607 if (wu->swu_blk_end < wup->swu_blk_start || 3608 wup->swu_blk_end < wu->swu_blk_start) 3609 continue; 3610 3611 /* we have an LBA collision, defer wu */ 3612 wu->swu_state = SR_WU_DEFERRED; 3613 if (wup->swu_collider) 3614 /* wu is on deferred queue, append to last wu */ 3615 while (wup->swu_collider) 3616 wup = wup->swu_collider; 3617 3618 wup->swu_collider = wu; 3619 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 3620 sd->sd_wu_collisions++; 3621 goto queued; 3622 } 3623 3624 return (0); 3625 queued: 3626 return (1); 3627 } 3628 3629 void 3630 sr_rebuild(void *arg) 3631 { 3632 struct sr_discipline *sd = arg; 3633 struct sr_softc *sc = sd->sd_sc; 3634 3635 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 3636 DEVNAME(sc)) != 0) 3637 printf("%s: unable to start backgound operation\n", 3638 DEVNAME(sc)); 3639 } 3640 3641 void 3642 sr_rebuild_thread(void *arg) 3643 { 3644 struct sr_discipline *sd = arg; 3645 struct sr_softc *sc = sd->sd_sc; 3646 daddr64_t whole_blk, partial_blk, blk, sz, lba; 3647 daddr64_t psz, rb, restart; 3648 uint64_t mysize = 0; 3649 struct sr_workunit *wu_r, *wu_w; 3650 struct scsi_xfer xs_r, xs_w; 3651 struct scsi_rw_16 cr, cw; 3652 int c, s, slept, percent = 0, old_percent = -1; 3653 u_int8_t *buf; 3654 3655 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 3656 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 3657 3658 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 3659 if (restart > whole_blk) { 3660 printf("%s: bogus rebuild restart offset, starting from 0\n", 3661 DEVNAME(sc)); 3662 restart = 0; 3663 } 3664 if (restart) { 3665 /* 3666 * XXX there is a hole here; there is a posibility that we 3667 * had a restart however the chunk that was supposed to 3668 * be rebuilt is no longer valid; we can reach this situation 3669 * when a rebuild is in progress and the box crashes and 3670 * on reboot the rebuild chunk is different (like zero'd or 3671 * replaced). We need to check the uuid of the chunk that is 3672 * being rebuilt to assert this. 3673 */ 3674 psz = sd->sd_meta->ssdi.ssd_size; 3675 rb = sd->sd_meta->ssd_rebuild; 3676 if (rb > 0) 3677 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3678 else 3679 percent = 0; 3680 printf("%s: resuming rebuild on %s at %llu%%\n", 3681 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 3682 } 3683 3684 sd->sd_reb_active = 1; 3685 3686 buf = malloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, M_DEVBUF, M_WAITOK); 3687 for (blk = restart; blk <= whole_blk; blk++) { 3688 if (blk == whole_blk) 3689 sz = partial_blk; 3690 else 3691 sz = SR_REBUILD_IO_SIZE; 3692 mysize += sz; 3693 lba = blk * sz; 3694 3695 /* get some wu */ 3696 if ((wu_r = sr_wu_get(sd, 1)) == NULL) 3697 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 3698 if ((wu_w = sr_wu_get(sd, 1)) == NULL) 3699 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 3700 3701 /* setup read io */ 3702 bzero(&xs_r, sizeof xs_r); 3703 bzero(&cr, sizeof cr); 3704 xs_r.error = XS_NOERROR; 3705 xs_r.flags = SCSI_DATA_IN; 3706 xs_r.datalen = sz << DEV_BSHIFT; 3707 xs_r.data = buf; 3708 xs_r.cmdlen = 16; 3709 cr.opcode = READ_16; 3710 _lto4b(sz, cr.length); 3711 _lto8b(lba, cr.addr); 3712 xs_r.cmd = (struct scsi_generic *)&cr; 3713 wu_r->swu_flags |= SR_WUF_REBUILD; 3714 wu_r->swu_xs = &xs_r; 3715 if (sd->sd_scsi_rw(wu_r)) { 3716 printf("%s: could not create read io\n", 3717 DEVNAME(sc)); 3718 goto fail; 3719 } 3720 3721 /* setup write io */ 3722 bzero(&xs_w, sizeof xs_w); 3723 bzero(&cw, sizeof cw); 3724 xs_w.error = XS_NOERROR; 3725 xs_w.flags = SCSI_DATA_OUT; 3726 xs_w.datalen = sz << DEV_BSHIFT; 3727 xs_w.data = buf; 3728 xs_w.cmdlen = 16; 3729 cw.opcode = WRITE_16; 3730 _lto4b(sz, cw.length); 3731 _lto8b(lba, cw.addr); 3732 xs_w.cmd = (struct scsi_generic *)&cw; 3733 wu_w->swu_flags |= SR_WUF_REBUILD; 3734 wu_w->swu_xs = &xs_w; 3735 if (sd->sd_scsi_rw(wu_w)) { 3736 printf("%s: could not create write io\n", 3737 DEVNAME(sc)); 3738 goto fail; 3739 } 3740 3741 /* 3742 * collide with the read io so that we get automatically 3743 * started when the read is done 3744 */ 3745 wu_w->swu_state = SR_WU_DEFERRED; 3746 wu_r->swu_collider = wu_w; 3747 s = splbio(); 3748 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 3749 3750 /* schedule io */ 3751 if (sr_check_io_collision(wu_r)) 3752 goto queued; 3753 3754 sr_raid_startwu(wu_r); 3755 queued: 3756 splx(s); 3757 3758 /* wait for read completion */ 3759 slept = 0; 3760 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 3761 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 3762 slept = 1; 3763 } 3764 /* yield if we didn't sleep */ 3765 if (slept == 0) 3766 tsleep(sc, PWAIT, "sr_yield", 1); 3767 3768 sr_wu_put(wu_r); 3769 sr_wu_put(wu_w); 3770 3771 sd->sd_meta->ssd_rebuild = lba; 3772 3773 /* save metadata every percent */ 3774 psz = sd->sd_meta->ssdi.ssd_size; 3775 rb = sd->sd_meta->ssd_rebuild; 3776 if (rb > 0) 3777 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3778 else 3779 percent = 0; 3780 if (percent != old_percent && blk != whole_blk) { 3781 if (sr_meta_save(sd, SR_META_DIRTY)) 3782 printf("%s: could not save metadata to %s\n", 3783 DEVNAME(sc), sd->sd_meta->ssd_devname); 3784 old_percent = percent; 3785 } 3786 3787 if (sd->sd_reb_abort) 3788 goto abort; 3789 } 3790 3791 /* all done */ 3792 sd->sd_meta->ssd_rebuild = 0; 3793 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 3794 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 3795 BIOC_SDREBUILD) { 3796 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 3797 break; 3798 } 3799 3800 abort: 3801 if (sr_meta_save(sd, SR_META_DIRTY)) 3802 printf("%s: could not save metadata to %s\n", 3803 DEVNAME(sc), sd->sd_meta->ssd_devname); 3804 fail: 3805 free(buf, M_DEVBUF); 3806 sd->sd_reb_active = 0; 3807 kthread_exit(0); 3808 } 3809 3810 #ifndef SMALL_KERNEL 3811 int 3812 sr_sensors_create(struct sr_discipline *sd) 3813 { 3814 struct sr_softc *sc = sd->sd_sc; 3815 int rv = 1; 3816 3817 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 3818 DEVNAME(sc), sd->sd_meta->ssd_devname); 3819 3820 strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc), 3821 sizeof(sd->sd_vol.sv_sensordev.xname)); 3822 3823 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 3824 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 3825 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 3826 sizeof(sd->sd_vol.sv_sensor.desc)); 3827 3828 sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor); 3829 3830 if (sc->sc_sensors_running == 0) { 3831 if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) 3832 goto bad; 3833 sc->sc_sensors_running = 1; 3834 } 3835 sensordev_install(&sd->sd_vol.sv_sensordev); 3836 3837 rv = 0; 3838 bad: 3839 return (rv); 3840 } 3841 3842 void 3843 sr_sensors_delete(struct sr_discipline *sd) 3844 { 3845 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 3846 3847 if (sd->sd_vol.sv_sensor_valid) 3848 sensordev_deinstall(&sd->sd_vol.sv_sensordev); 3849 } 3850 3851 void 3852 sr_sensors_refresh(void *arg) 3853 { 3854 struct sr_softc *sc = arg; 3855 struct sr_volume *sv; 3856 struct sr_discipline *sd; 3857 int i, vol; 3858 3859 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 3860 3861 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3862 /* XXX this will not work when we stagger disciplines */ 3863 if (!sc->sc_dis[i]) 3864 continue; 3865 3866 sd = sc->sc_dis[i]; 3867 sv = &sd->sd_vol; 3868 3869 switch(sd->sd_vol_status) { 3870 case BIOC_SVOFFLINE: 3871 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 3872 sv->sv_sensor.status = SENSOR_S_CRIT; 3873 break; 3874 3875 case BIOC_SVDEGRADED: 3876 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 3877 sv->sv_sensor.status = SENSOR_S_WARN; 3878 break; 3879 3880 case BIOC_SVSCRUB: 3881 case BIOC_SVONLINE: 3882 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 3883 sv->sv_sensor.status = SENSOR_S_OK; 3884 break; 3885 3886 default: 3887 sv->sv_sensor.value = 0; /* unknown */ 3888 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 3889 } 3890 } 3891 } 3892 #endif /* SMALL_KERNEL */ 3893 3894 #ifdef SR_FANCY_STATS 3895 void sr_print_stats(void); 3896 3897 void 3898 sr_print_stats(void) 3899 { 3900 struct sr_softc *sc; 3901 struct sr_discipline *sd; 3902 int i, vol; 3903 3904 for (i = 0; i < softraid_cd.cd_ndevs; i++) 3905 if (softraid_cd.cd_devs[i]) { 3906 sc = softraid_cd.cd_devs[i]; 3907 /* we'll only have one softc */ 3908 break; 3909 } 3910 3911 if (!sc) { 3912 printf("no softraid softc found\n"); 3913 return; 3914 } 3915 3916 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3917 /* XXX this will not work when we stagger disciplines */ 3918 if (!sc->sc_dis[i]) 3919 continue; 3920 3921 sd = sc->sc_dis[i]; 3922 printf("%s: ios pending: %d collisions %llu\n", 3923 sd->sd_meta->ssd_devname, 3924 sd->sd_wu_pending, 3925 sd->sd_wu_collisions); 3926 } 3927 } 3928 #endif /* SR_FANCY_STATS */ 3929 3930 #ifdef SR_DEBUG 3931 void 3932 sr_meta_print(struct sr_metadata *m) 3933 { 3934 int i; 3935 struct sr_meta_chunk *mc; 3936 struct sr_meta_opt *mo; 3937 3938 if (!(sr_debug & SR_D_META)) 3939 return; 3940 3941 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 3942 printf("\tssd_version %d\n", m->ssdi.ssd_version); 3943 printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags); 3944 printf("\tssd_uuid "); 3945 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 3946 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 3947 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 3948 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 3949 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 3950 printf("\tssd_level %d\n", m->ssdi.ssd_level); 3951 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 3952 printf("\tssd_devname %s\n", m->ssd_devname); 3953 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 3954 printf("\tssd_product %s\n", m->ssdi.ssd_product); 3955 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 3956 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 3957 printf("\tssd_checksum "); 3958 sr_checksum_print(m->ssd_checksum); 3959 printf("\n"); 3960 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 3961 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 3962 3963 mc = (struct sr_meta_chunk *)(m + 1); 3964 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 3965 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 3966 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 3967 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 3968 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 3969 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 3970 printf("\t\tscm_uuid "); 3971 sr_uuid_print(&mc->scmi.scm_uuid, 1); 3972 printf("\t\tscm_checksum "); 3973 sr_checksum_print(mc->scm_checksum); 3974 printf("\n"); 3975 printf("\t\tscm_status %d\n", mc->scm_status); 3976 } 3977 3978 mo = (struct sr_meta_opt *)(mc); 3979 for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { 3980 printf("\t\t\tsom_type %d\n", mo->somi.som_type); 3981 printf("\t\t\tsom_checksum "); 3982 sr_checksum_print(mo->som_checksum); 3983 printf("\n"); 3984 } 3985 } 3986 3987 void 3988 sr_dump_mem(u_int8_t *p, int len) 3989 { 3990 int i; 3991 3992 for (i = 0; i < len; i++) 3993 printf("%02x ", *p++); 3994 printf("\n"); 3995 } 3996 3997 #endif /* SR_DEBUG */ 3998