1 /* $OpenBSD: softraid.c,v 1.174 2009/09/21 16:38:13 marco Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 44 #ifdef AOE 45 #include <sys/mbuf.h> 46 #include <net/if_aoe.h> 47 #endif /* AOE */ 48 49 #include <crypto/cryptodev.h> 50 51 #include <scsi/scsi_all.h> 52 #include <scsi/scsiconf.h> 53 #include <scsi/scsi_disk.h> 54 55 #include <dev/softraidvar.h> 56 #include <dev/rndvar.h> 57 58 /* #define SR_FANCY_STATS */ 59 60 #ifdef SR_DEBUG 61 #define SR_FANCY_STATS 62 uint32_t sr_debug = 0 63 /* | SR_D_CMD */ 64 /* | SR_D_MISC */ 65 /* | SR_D_INTR */ 66 /* | SR_D_IOCTL */ 67 /* | SR_D_CCB */ 68 /* | SR_D_WU */ 69 /* | SR_D_META */ 70 /* | SR_D_DIS */ 71 /* | SR_D_STATE */ 72 ; 73 #endif 74 75 int sr_match(struct device *, void *, void *); 76 void sr_attach(struct device *, struct device *, void *); 77 int sr_detach(struct device *, int); 78 int sr_activate(struct device *, enum devact); 79 80 struct cfattach softraid_ca = { 81 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 82 sr_activate 83 }; 84 85 struct cfdriver softraid_cd = { 86 NULL, "softraid", DV_DULL 87 }; 88 89 /* scsi & discipline */ 90 int sr_scsi_cmd(struct scsi_xfer *); 91 void sr_minphys(struct buf *bp, struct scsi_link *sl); 92 void sr_copy_internal_data(struct scsi_xfer *, 93 void *, size_t); 94 int sr_scsi_ioctl(struct scsi_link *, u_long, 95 caddr_t, int, struct proc *); 96 int sr_ioctl(struct device *, u_long, caddr_t); 97 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 98 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 99 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 100 int sr_ioctl_setstate(struct sr_softc *, 101 struct bioc_setstate *); 102 int sr_ioctl_createraid(struct sr_softc *, 103 struct bioc_createraid *, int); 104 int sr_ioctl_deleteraid(struct sr_softc *, 105 struct bioc_deleteraid *); 106 void sr_chunks_unwind(struct sr_softc *, 107 struct sr_chunk_head *); 108 void sr_discipline_free(struct sr_discipline *); 109 void sr_discipline_shutdown(struct sr_discipline *); 110 int sr_discipline_init(struct sr_discipline *, int); 111 112 /* utility functions */ 113 void sr_shutdown(void *); 114 void sr_uuid_get(struct sr_uuid *); 115 void sr_uuid_print(struct sr_uuid *, int); 116 void sr_checksum_print(u_int8_t *); 117 void sr_checksum(struct sr_softc *, void *, void *, 118 u_int32_t); 119 int sr_boot_assembly(struct sr_softc *); 120 int sr_already_assembled(struct sr_discipline *); 121 int sr_hotspare(struct sr_softc *, dev_t); 122 void sr_hotspare_rebuild(struct sr_discipline *); 123 int sr_rebuild_init(struct sr_discipline *, dev_t); 124 void sr_rebuild(void *); 125 void sr_rebuild_thread(void *); 126 void sr_roam_chunks(struct sr_discipline *); 127 int sr_chunk_in_use(struct sr_softc *, dev_t); 128 129 /* don't include these on RAMDISK */ 130 #ifndef SMALL_KERNEL 131 void sr_sensors_refresh(void *); 132 int sr_sensors_create(struct sr_discipline *); 133 void sr_sensors_delete(struct sr_discipline *); 134 #endif 135 136 /* metadata */ 137 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 138 int sr_meta_attach(struct sr_discipline *, int); 139 void sr_meta_getdevname(struct sr_softc *, dev_t, char *, 140 int); 141 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 142 size_t, daddr64_t, long); 143 int sr_meta_clear(struct sr_discipline *); 144 int sr_meta_read(struct sr_discipline *); 145 int sr_meta_save(struct sr_discipline *, u_int32_t); 146 int sr_meta_validate(struct sr_discipline *, dev_t, 147 struct sr_metadata *, void *); 148 void sr_meta_chunks_create(struct sr_softc *, 149 struct sr_chunk_head *); 150 void sr_meta_init(struct sr_discipline *, 151 struct sr_chunk_head *); 152 153 /* hotplug magic */ 154 void sr_disk_attach(struct disk *, int); 155 156 struct sr_hotplug_list { 157 void (*sh_hotplug)(struct sr_discipline *, 158 struct disk *, int); 159 struct sr_discipline *sh_sd; 160 161 SLIST_ENTRY(sr_hotplug_list) shl_link; 162 }; 163 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 164 165 struct sr_hotplug_list_head sr_hotplug_callbacks; 166 extern void (*softraid_disk_attach)(struct disk *, int); 167 168 /* scsi glue */ 169 struct scsi_adapter sr_switch = { 170 sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl 171 }; 172 173 struct scsi_device sr_dev = { 174 NULL, NULL, NULL, NULL 175 }; 176 177 /* native metadata format */ 178 int sr_meta_native_bootprobe(struct sr_softc *, 179 struct device *, struct sr_metadata_list_head *); 180 #define SR_META_NOTCLAIMED (0) 181 #define SR_META_CLAIMED (1) 182 int sr_meta_native_probe(struct sr_softc *, 183 struct sr_chunk *); 184 int sr_meta_native_attach(struct sr_discipline *, int); 185 int sr_meta_native_read(struct sr_discipline *, dev_t, 186 struct sr_metadata *, void *); 187 int sr_meta_native_write(struct sr_discipline *, dev_t, 188 struct sr_metadata *,void *); 189 190 #ifdef SR_DEBUG 191 void sr_meta_print(struct sr_metadata *); 192 #else 193 #define sr_meta_print(m) 194 #endif 195 196 /* the metadata driver should remain stateless */ 197 struct sr_meta_driver { 198 daddr64_t smd_offset; /* metadata location */ 199 u_int32_t smd_size; /* size of metadata */ 200 201 int (*smd_probe)(struct sr_softc *, 202 struct sr_chunk *); 203 int (*smd_attach)(struct sr_discipline *, int); 204 int (*smd_detach)(struct sr_discipline *); 205 int (*smd_read)(struct sr_discipline *, dev_t, 206 struct sr_metadata *, void *); 207 int (*smd_write)(struct sr_discipline *, dev_t, 208 struct sr_metadata *, void *); 209 int (*smd_validate)(struct sr_discipline *, 210 struct sr_metadata *, void *); 211 } smd[] = { 212 { SR_META_OFFSET, SR_META_SIZE * 512, 213 sr_meta_native_probe, sr_meta_native_attach, NULL, 214 sr_meta_native_read, sr_meta_native_write, NULL }, 215 #define SR_META_F_NATIVE 0 216 { 0, 0, NULL, NULL, NULL, NULL } 217 #define SR_META_F_INVALID -1 218 }; 219 220 int 221 sr_meta_attach(struct sr_discipline *sd, int force) 222 { 223 struct sr_softc *sc = sd->sd_sc; 224 struct sr_chunk_head *cl; 225 struct sr_chunk *ch_entry; 226 int rv = 1, i = 0; 227 228 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 229 230 /* in memory copy of metadata */ 231 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 232 if (!sd->sd_meta) { 233 printf("%s: could not allocate memory for metadata\n", 234 DEVNAME(sc)); 235 goto bad; 236 } 237 238 if (sd->sd_meta_type != SR_META_F_NATIVE) { 239 /* in memory copy of foreign metadata */ 240 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 241 M_DEVBUF, M_ZERO); 242 if (!sd->sd_meta_foreign) { 243 /* unwind frees sd_meta */ 244 printf("%s: could not allocate memory for foreign " 245 "metadata\n", DEVNAME(sc)); 246 goto bad; 247 } 248 } 249 250 /* we have a valid list now create an array index */ 251 cl = &sd->sd_vol.sv_chunk_list; 252 SLIST_FOREACH(ch_entry, cl, src_link) { 253 i++; 254 } 255 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * i, 256 M_DEVBUF, M_WAITOK | M_ZERO); 257 258 /* fill out chunk array */ 259 i = 0; 260 SLIST_FOREACH(ch_entry, cl, src_link) 261 sd->sd_vol.sv_chunks[i++] = ch_entry; 262 263 /* attach metadata */ 264 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 265 goto bad; 266 267 rv = 0; 268 bad: 269 return (rv); 270 } 271 272 int 273 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 274 { 275 struct sr_softc *sc = sd->sd_sc; 276 struct vnode *vn; 277 struct sr_chunk *ch_entry, *ch_prev = NULL; 278 struct sr_chunk_head *cl; 279 char devname[32]; 280 int i, d, type, found, prevf, error; 281 dev_t dev; 282 283 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 284 285 if (no_chunk == 0) 286 goto unwind; 287 288 289 cl = &sd->sd_vol.sv_chunk_list; 290 291 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 292 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 293 M_WAITOK | M_ZERO); 294 /* keep disks in user supplied order */ 295 if (ch_prev) 296 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 297 else 298 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 299 ch_prev = ch_entry; 300 dev = dt[d]; 301 ch_entry->src_dev_mm = dev; 302 303 if (dev == NODEV) { 304 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 305 continue; 306 } else { 307 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 308 if (bdevvp(dev, &vn)) { 309 printf("%s:, sr_meta_probe: can't allocate " 310 "vnode\n", DEVNAME(sc)); 311 goto unwind; 312 } 313 314 /* 315 * XXX leaving dev open for now; move this to attach 316 * and figure out the open/close dance for unwind. 317 */ 318 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0); 319 if (error) { 320 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 321 "open %s\n", DEVNAME(sc), devname); 322 vput(vn); 323 goto unwind; 324 } 325 326 strlcpy(ch_entry->src_devname, devname, 327 sizeof(ch_entry->src_devname)); 328 ch_entry->src_vn = vn; 329 } 330 331 /* determine if this is a device we understand */ 332 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 333 type = smd[i].smd_probe(sc, ch_entry); 334 if (type == SR_META_F_INVALID) 335 continue; 336 else { 337 found = type; 338 break; 339 } 340 } 341 342 if (found == SR_META_F_INVALID) 343 goto unwind; 344 if (prevf == SR_META_F_INVALID) 345 prevf = found; 346 if (prevf != found) { 347 DNPRINTF(SR_D_META, "%s: prevf != found\n", 348 DEVNAME(sc)); 349 goto unwind; 350 } 351 } 352 353 return (prevf); 354 unwind: 355 return (SR_META_F_INVALID); 356 } 357 358 void 359 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 360 { 361 int maj, unit, part; 362 char *name; 363 364 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 365 DEVNAME(sc), buf, size); 366 367 if (!buf) 368 return; 369 370 maj = major(dev); 371 part = DISKPART(dev); 372 unit = DISKUNIT(dev); 373 374 name = findblkname(maj); 375 if (name == NULL) 376 return; 377 378 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 379 } 380 381 int 382 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz, 383 daddr64_t ofs, long flags) 384 { 385 struct sr_softc *sc = sd->sd_sc; 386 struct buf b; 387 int rv = 1; 388 389 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 390 DEVNAME(sc), dev, md, sz, ofs, flags); 391 392 bzero(&b, sizeof(b)); 393 394 if (md == NULL) { 395 printf("%s: read invalid metadata pointer\n", DEVNAME(sc)); 396 goto done; 397 } 398 b.b_flags = flags | B_PHYS; 399 b.b_blkno = ofs; 400 b.b_bcount = sz; 401 b.b_bufsize = sz; 402 b.b_resid = sz; 403 b.b_data = md; 404 b.b_error = 0; 405 b.b_proc = curproc; 406 b.b_dev = dev; 407 b.b_iodone = NULL; 408 if (bdevvp(dev, &b.b_vp)) { 409 printf("%s: sr_meta_rw: can't allocate vnode\n", DEVNAME(sc)); 410 goto done; 411 } 412 if ((b.b_flags & B_READ) == 0) 413 b.b_vp->v_numoutput++; 414 415 LIST_INIT(&b.b_dep); 416 VOP_STRATEGY(&b); 417 biowait(&b); 418 419 if (b.b_flags & B_ERROR) { 420 printf("%s: 0x%x i/o error on block %llu while reading " 421 "metadata %d\n", DEVNAME(sc), dev, b.b_blkno, b.b_error); 422 goto done; 423 } 424 rv = 0; 425 done: 426 if (b.b_vp) 427 vput(b.b_vp); 428 429 return (rv); 430 } 431 432 int 433 sr_meta_clear(struct sr_discipline *sd) 434 { 435 struct sr_softc *sc = sd->sd_sc; 436 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 437 struct sr_chunk *ch_entry; 438 void *m; 439 int rv = 1; 440 441 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 442 443 if (sd->sd_meta_type != SR_META_F_NATIVE) { 444 printf("%s: sr_meta_clear can not clear foreign metadata\n", 445 DEVNAME(sc)); 446 goto done; 447 } 448 449 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 450 SLIST_FOREACH(ch_entry, cl, src_link) { 451 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 452 /* XXX mark disk offline */ 453 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 454 "clear %s\n", ch_entry->src_devname); 455 rv++; 456 continue; 457 } 458 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 459 bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt)); 460 } 461 462 bzero(sd->sd_meta, SR_META_SIZE * 512); 463 464 free(m, M_DEVBUF); 465 rv = 0; 466 done: 467 return (rv); 468 } 469 470 void 471 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) 472 { 473 struct sr_chunk *ch_entry; 474 struct sr_uuid uuid; 475 int cid = 0; 476 char *name; 477 u_int64_t max_chunk_sz = 0, min_chunk_sz; 478 479 DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); 480 481 sr_uuid_get(&uuid); 482 483 /* fill out stuff and get largest chunk size while looping */ 484 SLIST_FOREACH(ch_entry, cl, src_link) { 485 name = ch_entry->src_devname; 486 ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; 487 ch_entry->src_meta.scmi.scm_chunk_id = cid++; 488 ch_entry->src_meta.scm_status = BIOC_SDONLINE; 489 strlcpy(ch_entry->src_meta.scmi.scm_devname, name, 490 sizeof(ch_entry->src_meta.scmi.scm_devname)); 491 bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, 492 sizeof(ch_entry->src_meta.scmi.scm_uuid)); 493 494 if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) 495 max_chunk_sz = ch_entry->src_meta.scmi.scm_size; 496 } 497 498 /* get smallest chunk size */ 499 min_chunk_sz = max_chunk_sz; 500 SLIST_FOREACH(ch_entry, cl, src_link) 501 if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) 502 min_chunk_sz = ch_entry->src_meta.scmi.scm_size; 503 504 /* equalize all sizes */ 505 SLIST_FOREACH(ch_entry, cl, src_link) 506 ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; 507 508 /* whine if chunks are not the same size */ 509 if (min_chunk_sz != max_chunk_sz) 510 printf("%s: chunk sizes are not equal; up to %llu blocks " 511 "wasted per chunk\n", 512 DEVNAME(sc), max_chunk_sz - min_chunk_sz); 513 } 514 515 void 516 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) 517 { 518 struct sr_softc *sc = sd->sd_sc; 519 struct sr_metadata *sm = sd->sd_meta; 520 struct sr_meta_chunk *im_sc; 521 struct sr_meta_opt *im_so; 522 int i, chunk_no; 523 524 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 525 526 if (!sm) 527 return; 528 529 /* initial metadata */ 530 sm->ssdi.ssd_magic = SR_MAGIC; 531 sm->ssdi.ssd_version = SR_META_VERSION; 532 sm->ssd_ondisk = 0; 533 sm->ssdi.ssd_flags = sd->sd_meta_flags; 534 /* get uuid from chunk 0 */ 535 bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, 536 &sm->ssdi.ssd_uuid, 537 sizeof(struct sr_uuid)); 538 539 /* volume is filled in createraid */ 540 541 /* add missing chunk bits */ 542 chunk_no = sm->ssdi.ssd_chunk_no; 543 for (i = 0; i < chunk_no; i++) { 544 im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; 545 im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; 546 sr_checksum(sc, im_sc, &im_sc->scm_checksum, 547 sizeof(struct sr_meta_chunk_invariant)); 548 549 /* carry optional meta also in chunk area */ 550 im_so = &sd->sd_vol.sv_chunks[i]->src_opt; 551 bzero(im_so, sizeof(*im_so)); 552 if (sd->sd_type == SR_MD_CRYPTO) { 553 sm->ssdi.ssd_opt_no = 1; 554 im_so->somi.som_type = SR_OPT_CRYPTO; 555 556 /* 557 * copy encrypted key / passphrase into optional 558 * metadata area 559 */ 560 bcopy(&sd->mds.mdd_crypto.scr_meta, 561 &im_so->somi.som_meta.smm_crypto, 562 sizeof(im_so->somi.som_meta.smm_crypto)); 563 564 sr_checksum(sc, im_so, im_so->som_checksum, 565 sizeof(struct sr_meta_opt_invariant)); 566 } 567 } 568 } 569 570 void 571 sr_meta_save_callback(void *arg1, void *arg2) 572 { 573 struct sr_discipline *sd = arg1; 574 int s; 575 576 s = splbio(); 577 578 if (sr_meta_save(arg1, SR_META_DIRTY)) 579 printf("%s: save metadata failed\n", 580 DEVNAME(sd->sd_sc)); 581 582 sd->sd_must_flush = 0; 583 splx(s); 584 } 585 586 int 587 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 588 { 589 struct sr_softc *sc = sd->sd_sc; 590 struct sr_metadata *sm = sd->sd_meta, *m; 591 struct sr_meta_driver *s; 592 struct sr_chunk *src; 593 struct sr_meta_chunk *cm; 594 struct sr_workunit wu; 595 struct sr_meta_opt *om; 596 int i; 597 598 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 599 DEVNAME(sc), sd->sd_meta->ssd_devname); 600 601 if (!sm) { 602 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 603 goto bad; 604 } 605 606 /* meta scratchpad */ 607 s = &smd[sd->sd_meta_type]; 608 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 609 if (!m) { 610 printf("%s: could not allocate metadata scratch area\n", 611 DEVNAME(sc)); 612 goto bad; 613 } 614 615 if (sm->ssdi.ssd_opt_no > 1) 616 panic("not yet save > 1 optional metadata members"); 617 618 /* from here on out metadata is updated */ 619 restart: 620 sm->ssd_ondisk++; 621 sm->ssd_meta_flags = flags; 622 bcopy(sm, m, sizeof(*m)); 623 624 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 625 src = sd->sd_vol.sv_chunks[i]; 626 cm = (struct sr_meta_chunk *)(m + 1); 627 bcopy(&src->src_meta, cm + i, sizeof(*cm)); 628 } 629 630 /* optional metadata */ 631 om = (struct sr_meta_opt *)(cm + i); 632 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 633 bcopy(&src->src_opt, om + i, sizeof(*om)); 634 sr_checksum(sc, om, &om->som_checksum, 635 sizeof(struct sr_meta_opt_invariant)); 636 } 637 638 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 639 src = sd->sd_vol.sv_chunks[i]; 640 641 /* skip disks that are offline */ 642 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 643 continue; 644 645 /* calculate metadata checksum for correct chunk */ 646 m->ssdi.ssd_chunk_id = i; 647 sr_checksum(sc, m, &m->ssd_checksum, 648 sizeof(struct sr_meta_invariant)); 649 650 #ifdef SR_DEBUG 651 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 652 "chunkid: %d checksum: ", 653 DEVNAME(sc), src->src_meta.scmi.scm_devname, 654 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 655 656 if (sr_debug & SR_D_META) 657 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 658 DNPRINTF(SR_D_META, "\n"); 659 sr_meta_print(m); 660 #endif 661 662 /* translate and write to disk */ 663 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 664 printf("%s: could not write metadata to %s\n", 665 DEVNAME(sc), src->src_devname); 666 /* restart the meta write */ 667 src->src_meta.scm_status = BIOC_SDOFFLINE; 668 /* XXX recalculate volume status */ 669 goto restart; 670 } 671 } 672 673 /* not all disciplines have sync */ 674 if (sd->sd_scsi_sync) { 675 bzero(&wu, sizeof(wu)); 676 wu.swu_fake = 1; 677 wu.swu_dis = sd; 678 sd->sd_scsi_sync(&wu); 679 } 680 free(m, M_DEVBUF); 681 return (0); 682 bad: 683 return (1); 684 } 685 686 int 687 sr_meta_read(struct sr_discipline *sd) 688 { 689 #ifdef SR_DEBUG 690 struct sr_softc *sc = sd->sd_sc; 691 #endif 692 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 693 struct sr_metadata *sm; 694 struct sr_chunk *ch_entry; 695 struct sr_meta_chunk *cp; 696 struct sr_meta_driver *s; 697 struct sr_meta_opt *om; 698 void *fm = NULL; 699 int no_disk = 0, got_meta = 0; 700 701 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 702 703 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 704 s = &smd[sd->sd_meta_type]; 705 if (sd->sd_meta_type != SR_META_F_NATIVE) 706 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 707 708 cp = (struct sr_meta_chunk *)(sm + 1); 709 SLIST_FOREACH(ch_entry, cl, src_link) { 710 /* skip disks that are offline */ 711 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 712 DNPRINTF(SR_D_META, 713 "%s: %s chunk marked offline, spoofing status\n", 714 DEVNAME(sc), ch_entry->src_devname); 715 cp++; /* adjust chunk pointer to match failure */ 716 continue; 717 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 718 /* read and translate */ 719 /* XXX mark chunk offline, elsewhere!! */ 720 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 721 cp++; /* adjust chunk pointer to match failure */ 722 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 723 DEVNAME(sc)); 724 continue; 725 } 726 727 if (sm->ssdi.ssd_magic != SR_MAGIC) { 728 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 729 DEVNAME(sc)); 730 continue; 731 } 732 733 /* validate metadata */ 734 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 735 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 736 DEVNAME(sc)); 737 no_disk = -1; 738 goto done; 739 } 740 741 /* assume first chunk contains metadata */ 742 if (got_meta == 0) { 743 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 744 got_meta = 1; 745 } 746 747 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 748 749 if (sm->ssdi.ssd_opt_no > 1) 750 panic("not yet read > 1 optional metadata members"); 751 752 if (sm->ssdi.ssd_opt_no) { 753 om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + 754 sizeof(struct sr_meta_chunk) * 755 sm->ssdi.ssd_chunk_no); 756 bcopy(om, &ch_entry->src_opt, 757 sizeof(ch_entry->src_opt)); 758 759 if (om->somi.som_type == SR_OPT_CRYPTO) { 760 bcopy( 761 &ch_entry->src_opt.somi.som_meta.smm_crypto, 762 &sd->mds.mdd_crypto.scr_meta, 763 sizeof(sd->mds.mdd_crypto.scr_meta)); 764 } 765 } 766 767 cp++; 768 no_disk++; 769 } 770 771 free(sm, M_DEVBUF); 772 if (fm) 773 free(fm, M_DEVBUF); 774 775 done: 776 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 777 no_disk); 778 return (no_disk); 779 } 780 781 int 782 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 783 void *fm) 784 { 785 struct sr_softc *sc = sd->sd_sc; 786 struct sr_meta_driver *s; 787 #ifdef SR_DEBUG 788 struct sr_meta_chunk *mc; 789 #endif 790 char devname[32]; 791 int rv = 1; 792 u_int8_t checksum[MD5_DIGEST_LENGTH]; 793 794 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 795 796 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 797 798 s = &smd[sd->sd_meta_type]; 799 if (sd->sd_meta_type != SR_META_F_NATIVE) 800 if (s->smd_validate(sd, sm, fm)) { 801 printf("%s: invalid foreign metadata\n", DEVNAME(sc)); 802 goto done; 803 } 804 805 /* 806 * at this point all foreign metadata has been translated to the native 807 * format and will be treated just like the native format 808 */ 809 810 if (sm->ssdi.ssd_magic != SR_MAGIC) { 811 printf("%s: not valid softraid metadata\n", DEVNAME(sc)); 812 goto done; 813 } 814 815 if (sm->ssdi.ssd_version != SR_META_VERSION) { 816 printf("%s: %s can not read metadata version %u, expected %u\n", 817 DEVNAME(sc), devname, sm->ssdi.ssd_version, 818 SR_META_VERSION); 819 goto done; 820 } 821 822 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 823 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 824 printf("%s: invalid metadata checksum\n", DEVNAME(sc)); 825 goto done; 826 } 827 828 /* XXX do other checksums */ 829 830 #ifdef SR_DEBUG 831 /* warn if disk changed order */ 832 mc = (struct sr_meta_chunk *)(sm + 1); 833 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 834 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 835 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 836 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 837 devname); 838 #endif 839 840 /* we have meta data on disk */ 841 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 842 DEVNAME(sc), devname); 843 844 rv = 0; 845 done: 846 return (rv); 847 } 848 849 int 850 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv, 851 struct sr_metadata_list_head *mlh) 852 { 853 struct vnode *vn; 854 struct disklabel label; 855 struct sr_metadata *md; 856 struct sr_discipline *fake_sd; 857 struct sr_metadata_list *mle; 858 char devname[32]; 859 dev_t dev, devr; 860 int error, i, majdev; 861 int rv = SR_META_NOTCLAIMED; 862 863 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 864 865 majdev = findblkmajor(dv); 866 if (majdev == -1) 867 goto done; 868 dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 869 if (bdevvp(dev, &vn)) { 870 printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n", 871 DEVNAME(sc)); 872 goto done; 873 } 874 875 /* open device */ 876 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 877 if (error) { 878 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 879 "failed\n", DEVNAME(sc)); 880 vput(vn); 881 goto done; 882 } 883 884 /* get disklabel */ 885 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0); 886 if (error) { 887 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 888 "failed\n", DEVNAME(sc)); 889 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 890 vput(vn); 891 goto done; 892 } 893 894 /* we are done, close device */ 895 error = VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 896 if (error) { 897 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 898 "failed\n", DEVNAME(sc)); 899 vput(vn); 900 goto done; 901 } 902 vput(vn); 903 904 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 905 if (md == NULL) { 906 printf("%s: not enough memory for metadata buffer\n", 907 DEVNAME(sc)); 908 goto done; 909 } 910 911 /* create fake sd to use utility functions */ 912 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_ZERO); 913 if (fake_sd == NULL) { 914 printf("%s: not enough memory for fake discipline\n", 915 DEVNAME(sc)); 916 goto nosd; 917 } 918 fake_sd->sd_sc = sc; 919 fake_sd->sd_meta_type = SR_META_F_NATIVE; 920 921 for (i = 0; i < MAXPARTITIONS; i++) { 922 if (label.d_partitions[i].p_fstype != FS_RAID) 923 continue; 924 925 /* open partition */ 926 devr = MAKEDISKDEV(majdev, dv->dv_unit, i); 927 if (bdevvp(devr, &vn)) { 928 printf("%s:, sr_meta_native_bootprobe: can't allocate " 929 "vnode for partition\n", DEVNAME(sc)); 930 goto done; 931 } 932 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 933 if (error) { 934 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 935 "open failed, partition %d\n", 936 DEVNAME(sc), i); 937 vput(vn); 938 continue; 939 } 940 941 if (sr_meta_native_read(fake_sd, devr, md, NULL)) { 942 printf("%s: native bootprobe could not read native " 943 "metadata\n", DEVNAME(sc)); 944 VOP_CLOSE(vn, FREAD, NOCRED, 0); 945 vput(vn); 946 continue; 947 } 948 949 /* are we a softraid partition? */ 950 if (md->ssdi.ssd_magic != SR_MAGIC) { 951 VOP_CLOSE(vn, FREAD, NOCRED, 0); 952 vput(vn); 953 continue; 954 } 955 956 sr_meta_getdevname(sc, devr, devname, sizeof(devname)); 957 if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) { 958 if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) { 959 DNPRINTF(SR_D_META, "%s: don't save %s\n", 960 DEVNAME(sc), devname); 961 } else { 962 /* XXX fix M_WAITOK, this is boot time */ 963 mle = malloc(sizeof(*mle), M_DEVBUF, 964 M_WAITOK | M_ZERO); 965 bcopy(md, &mle->sml_metadata, 966 SR_META_SIZE * 512); 967 mle->sml_mm = devr; 968 mle->sml_vn = vn; 969 SLIST_INSERT_HEAD(mlh, mle, sml_link); 970 rv = SR_META_CLAIMED; 971 } 972 } 973 974 /* we are done, close partition */ 975 VOP_CLOSE(vn, FREAD, NOCRED, 0); 976 vput(vn); 977 } 978 979 free(fake_sd, M_DEVBUF); 980 nosd: 981 free(md, M_DEVBUF); 982 done: 983 return (rv); 984 } 985 986 int 987 sr_boot_assembly(struct sr_softc *sc) 988 { 989 struct device *dv; 990 struct bioc_createraid bc; 991 struct sr_metadata_list_head mlh; 992 struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; 993 struct sr_metadata *metadata; 994 struct sr_boot_volume_head bvh; 995 struct sr_boot_volume *vol, *vp1, *vp2; 996 struct sr_meta_chunk *hm; 997 struct sr_chunk_head *cl; 998 struct sr_chunk *hotspare, *chunk, *last; 999 u_int32_t chunk_id; 1000 u_int64_t *ondisk = NULL; 1001 dev_t *devs = NULL; 1002 char devname[32]; 1003 int rv = 0, i; 1004 1005 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1006 1007 SLIST_INIT(&mlh); 1008 1009 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1010 if (dv->dv_class != DV_DISK) 1011 continue; 1012 1013 /* XXX is there a better way of excluding some devices? */ 1014 if (!strncmp(dv->dv_xname, "fd", 2) || 1015 !strncmp(dv->dv_xname, "cd", 2) || 1016 !strncmp(dv->dv_xname, "rx", 2)) 1017 continue; 1018 1019 /* native softraid uses partitions */ 1020 if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED) 1021 continue; 1022 1023 /* probe non-native disks */ 1024 } 1025 1026 /* 1027 * Create a list of volumes and associate chunks with each volume. 1028 */ 1029 SLIST_INIT(&bvh); 1030 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { 1031 1032 mlenext = SLIST_NEXT(mle, sml_link); 1033 SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); 1034 1035 metadata = (struct sr_metadata *)&mle->sml_metadata; 1036 mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; 1037 1038 SLIST_FOREACH(vol, &bvh, sbv_link) { 1039 if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1040 sizeof(metadata->ssdi.ssd_uuid)) == 0) 1041 break; 1042 } 1043 1044 if (vol == NULL) { 1045 vol = malloc(sizeof(struct sr_boot_volume), 1046 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1047 if (vol == NULL) { 1048 printf("%s: failed to allocate boot volume!\n", 1049 DEVNAME(sc)); 1050 goto unwind; 1051 } 1052 1053 vol->sbv_level = metadata->ssdi.ssd_level; 1054 vol->sbv_volid = metadata->ssdi.ssd_volid; 1055 vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; 1056 bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1057 sizeof(metadata->ssdi.ssd_uuid)); 1058 SLIST_INIT(&vol->sml); 1059 1060 /* Maintain volume order. */ 1061 vp2 = NULL; 1062 SLIST_FOREACH(vp1, &bvh, sbv_link) { 1063 if (vp1->sbv_volid > vol->sbv_volid) 1064 break; 1065 vp2 = vp1; 1066 } 1067 if (vp2 == NULL) { 1068 DNPRINTF(SR_D_META, "%s: insert volume %u " 1069 "at head\n", DEVNAME(sc), vol->sbv_volid); 1070 SLIST_INSERT_HEAD(&bvh, vol, sbv_link); 1071 } else { 1072 DNPRINTF(SR_D_META, "%s: insert volume %u " 1073 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1074 vp2->sbv_volid); 1075 SLIST_INSERT_AFTER(vp2, vol, sbv_link); 1076 } 1077 } 1078 1079 /* Maintain chunk order. */ 1080 mle2 = NULL; 1081 SLIST_FOREACH(mle1, &vol->sml, sml_link) { 1082 if (mle1->sml_chunk_id > mle->sml_chunk_id) 1083 break; 1084 mle2 = mle1; 1085 } 1086 if (mle2 == NULL) { 1087 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1088 "at head\n", DEVNAME(sc), vol->sbv_volid, 1089 mle->sml_chunk_id); 1090 SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); 1091 } else { 1092 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1093 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1094 mle->sml_chunk_id, mle2->sml_chunk_id); 1095 SLIST_INSERT_AFTER(mle2, mle, sml_link); 1096 } 1097 1098 vol->sbv_dev_no++; 1099 } 1100 1101 /* Allocate memory for device and ondisk version arrays. */ 1102 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1103 M_NOWAIT | M_CANFAIL); 1104 if (devs == NULL) { 1105 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1106 goto unwind; 1107 } 1108 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1109 M_NOWAIT | M_CANFAIL); 1110 if (ondisk == NULL) { 1111 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1112 goto unwind; 1113 } 1114 1115 /* 1116 * Assemble hotspare "volumes". 1117 */ 1118 SLIST_FOREACH(vol, &bvh, sbv_link) { 1119 1120 /* Check if this is a hotspare "volume". */ 1121 if (vol->sbv_level != SR_HOTSPARE_LEVEL || 1122 vol->sbv_chunk_no != 1) 1123 continue; 1124 1125 #ifdef SR_DEBUG 1126 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1127 DEVNAME(sc)); 1128 if (sr_debug & SR_D_META) 1129 sr_uuid_print(&vol->sbv_uuid, 0); 1130 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1131 vol->sbv_volid, vol->sbv_chunk_no); 1132 #endif 1133 1134 /* Create hotspare chunk metadata. */ 1135 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1136 M_NOWAIT | M_CANFAIL | M_ZERO); 1137 if (hotspare == NULL) { 1138 printf("%s: failed to allocate hotspare\n", 1139 DEVNAME(sc)); 1140 goto unwind; 1141 } 1142 1143 mle = SLIST_FIRST(&vol->sml); 1144 sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname)); 1145 hotspare->src_dev_mm = mle->sml_mm; 1146 hotspare->src_vn = mle->sml_vn; 1147 strlcpy(hotspare->src_devname, devname, 1148 sizeof(hotspare->src_devname)); 1149 hotspare->src_size = metadata->ssdi.ssd_size; 1150 1151 hm = &hotspare->src_meta; 1152 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1153 hm->scmi.scm_chunk_id = 0; 1154 hm->scmi.scm_size = metadata->ssdi.ssd_size; 1155 hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size; 1156 strlcpy(hm->scmi.scm_devname, devname, 1157 sizeof(hm->scmi.scm_devname)); 1158 bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1159 sizeof(struct sr_uuid)); 1160 1161 sr_checksum(sc, hm, &hm->scm_checksum, 1162 sizeof(struct sr_meta_chunk_invariant)); 1163 1164 hm->scm_status = BIOC_SDHOTSPARE; 1165 1166 /* Add chunk to hotspare list. */ 1167 rw_enter_write(&sc->sc_hs_lock); 1168 cl = &sc->sc_hotspare_list; 1169 if (SLIST_EMPTY(cl)) 1170 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1171 else { 1172 SLIST_FOREACH(chunk, cl, src_link) 1173 last = chunk; 1174 SLIST_INSERT_AFTER(last, hotspare, src_link); 1175 } 1176 sc->sc_hotspare_no++; 1177 rw_exit_write(&sc->sc_hs_lock); 1178 1179 } 1180 1181 /* 1182 * Assemble RAID volumes. 1183 */ 1184 SLIST_FOREACH(vol, &bvh, sbv_link) { 1185 1186 /* Check if this is a hotspare "volume". */ 1187 if (vol->sbv_level == SR_HOTSPARE_LEVEL && 1188 vol->sbv_chunk_no == 1) 1189 continue; 1190 1191 #ifdef SR_DEBUG 1192 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1193 if (sr_debug & SR_D_META) 1194 sr_uuid_print(&vol->sbv_uuid, 0); 1195 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1196 vol->sbv_volid, vol->sbv_chunk_no); 1197 #endif 1198 1199 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1200 devs[i] = NODEV; /* mark device as illegal */ 1201 ondisk[i] = 0; 1202 } 1203 1204 SLIST_FOREACH(mle, &vol->sml, sml_link) { 1205 metadata = (struct sr_metadata *)&mle->sml_metadata; 1206 chunk_id = metadata->ssdi.ssd_chunk_id; 1207 1208 if (devs[chunk_id] != NODEV) { 1209 vol->sbv_dev_no--; 1210 sr_meta_getdevname(sc, mle->sml_mm, devname, 1211 sizeof(devname)); 1212 printf("%s: found duplicate chunk %u for " 1213 "volume %u on device %s\n", DEVNAME(sc), 1214 chunk_id, vol->sbv_volid, devname); 1215 } 1216 1217 if (devs[chunk_id] == NODEV || 1218 metadata->ssd_ondisk > ondisk[chunk_id]) { 1219 devs[chunk_id] = mle->sml_mm; 1220 ondisk[chunk_id] = metadata->ssd_ondisk; 1221 DNPRINTF(SR_D_META, "%s: using ondisk " 1222 "metadata version %llu for chunk %u\n", 1223 DEVNAME(sc), ondisk[chunk_id], chunk_id); 1224 } 1225 } 1226 1227 if (vol->sbv_chunk_no != vol->sbv_dev_no) { 1228 printf("%s: not all chunks were provided; " 1229 "attempting to bring volume %d online\n", 1230 DEVNAME(sc), vol->sbv_volid); 1231 } 1232 1233 bzero(&bc, sizeof(bc)); 1234 bc.bc_level = vol->sbv_level; 1235 bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); 1236 bc.bc_dev_list = devs; 1237 bc.bc_flags = BIOC_SCDEVT; 1238 1239 rw_enter_write(&sc->sc_lock); 1240 sr_ioctl_createraid(sc, &bc, 0); 1241 rw_exit_write(&sc->sc_lock); 1242 1243 rv++; 1244 } 1245 1246 /* done with metadata */ 1247 unwind: 1248 for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { 1249 vp2 = SLIST_NEXT(vp1, sbv_link); 1250 for (mle1 = SLIST_FIRST(&vp1->sml); 1251 mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { 1252 mle2 = SLIST_NEXT(mle1, sml_link); 1253 free(mle1, M_DEVBUF); 1254 } 1255 free(vp1, M_DEVBUF); 1256 } 1257 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { 1258 mle2 = SLIST_NEXT(mle, sml_link); 1259 free(mle, M_DEVBUF); 1260 } 1261 SLIST_INIT(&mlh); 1262 1263 if (devs) 1264 free(devs, M_DEVBUF); 1265 if (ondisk) 1266 free(ondisk, M_DEVBUF); 1267 1268 return (rv); 1269 } 1270 1271 int 1272 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1273 { 1274 struct disklabel label; 1275 char *devname; 1276 int error, part; 1277 daddr64_t size; 1278 1279 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1280 DEVNAME(sc), ch_entry->src_devname); 1281 1282 devname = ch_entry->src_devname; 1283 part = DISKPART(ch_entry->src_dev_mm); 1284 1285 /* get disklabel */ 1286 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1287 NOCRED, 0); 1288 if (error) { 1289 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1290 DEVNAME(sc), devname); 1291 goto unwind; 1292 } 1293 1294 /* make sure the partition is of the right type */ 1295 if (label.d_partitions[part].p_fstype != FS_RAID) { 1296 DNPRINTF(SR_D_META, 1297 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1298 devname, 1299 label.d_partitions[part].p_fstype); 1300 goto unwind; 1301 } 1302 1303 size = DL_GETPSIZE(&label.d_partitions[part]) - 1304 SR_META_SIZE - SR_META_OFFSET; 1305 if (size <= 0) { 1306 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1307 devname); 1308 goto unwind; 1309 } 1310 ch_entry->src_size = size; 1311 1312 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1313 devname, size); 1314 1315 return (SR_META_F_NATIVE); 1316 unwind: 1317 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1318 devname ? devname : "nodev"); 1319 return (SR_META_F_INVALID); 1320 } 1321 1322 int 1323 sr_meta_native_attach(struct sr_discipline *sd, int force) 1324 { 1325 struct sr_softc *sc = sd->sd_sc; 1326 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1327 struct sr_metadata *md = NULL; 1328 struct sr_chunk *ch_entry, *ch_next; 1329 struct sr_uuid uuid; 1330 u_int64_t version = 0; 1331 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1332 1333 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1334 1335 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 1336 if (md == NULL) { 1337 printf("%s: not enough memory for metadata buffer\n", 1338 DEVNAME(sc)); 1339 goto bad; 1340 } 1341 1342 bzero(&uuid, sizeof uuid); 1343 1344 sr = not_sr = d = 0; 1345 SLIST_FOREACH(ch_entry, cl, src_link) { 1346 if (ch_entry->src_dev_mm == NODEV) 1347 continue; 1348 1349 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1350 printf("%s: could not read native metadata\n", 1351 DEVNAME(sc)); 1352 goto bad; 1353 } 1354 1355 if (md->ssdi.ssd_magic == SR_MAGIC) { 1356 sr++; 1357 if (d == 0) { 1358 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1359 expected = md->ssdi.ssd_chunk_no; 1360 version = md->ssd_ondisk; 1361 d++; 1362 continue; 1363 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1364 sizeof uuid)) { 1365 printf("%s: not part of the same volume\n", 1366 DEVNAME(sc)); 1367 goto bad; 1368 } 1369 if (md->ssd_ondisk != version) { 1370 old_meta++; 1371 version = MAX(md->ssd_ondisk, version); 1372 } 1373 } else 1374 not_sr++; 1375 } 1376 1377 if (sr && not_sr) { 1378 printf("%s: not all chunks are of the native metadata format\n", 1379 DEVNAME(sc)); 1380 goto bad; 1381 } 1382 1383 /* mixed metadata versions; mark bad disks offline */ 1384 if (old_meta) { 1385 d = 0; 1386 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1387 ch_entry = ch_next, d++) { 1388 ch_next = SLIST_NEXT(ch_entry, src_link); 1389 1390 /* XXX do we want to read this again? */ 1391 if (ch_entry->src_dev_mm == NODEV) 1392 panic("src_dev_mm == NODEV"); 1393 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1394 NULL)) 1395 printf("%s: could not read native metadata\n", 1396 DEVNAME(sc)); 1397 if (md->ssd_ondisk != version) 1398 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1399 BIOC_SDOFFLINE; 1400 } 1401 } 1402 1403 if (expected != sr && !force && expected != -1) { 1404 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1405 "anyway\n", DEVNAME(sc)); 1406 } 1407 1408 rv = 0; 1409 bad: 1410 if (md) 1411 free(md, M_DEVBUF); 1412 return (rv); 1413 } 1414 1415 int 1416 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1417 struct sr_metadata *md, void *fm) 1418 { 1419 #ifdef SR_DEBUG 1420 struct sr_softc *sc = sd->sd_sc; 1421 #endif 1422 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1423 DEVNAME(sc), dev, md); 1424 1425 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1426 B_READ)); 1427 } 1428 1429 int 1430 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1431 struct sr_metadata *md, void *fm) 1432 { 1433 #ifdef SR_DEBUG 1434 struct sr_softc *sc = sd->sd_sc; 1435 #endif 1436 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1437 DEVNAME(sc), dev, md); 1438 1439 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1440 B_WRITE)); 1441 } 1442 1443 void 1444 sr_hotplug_register(struct sr_discipline *sd, void *func) 1445 { 1446 struct sr_hotplug_list *mhe; 1447 1448 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1449 DEVNAME(sd->sd_sc), func); 1450 1451 /* make sure we aren't on the list yet */ 1452 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1453 if (mhe->sh_hotplug == func) 1454 return; 1455 1456 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1457 M_WAITOK | M_ZERO); 1458 mhe->sh_hotplug = func; 1459 mhe->sh_sd = sd; 1460 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1461 } 1462 1463 void 1464 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1465 { 1466 struct sr_hotplug_list *mhe; 1467 1468 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1469 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1470 1471 /* make sure we are on the list yet */ 1472 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1473 if (mhe->sh_hotplug == func) { 1474 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1475 sr_hotplug_list, shl_link); 1476 free(mhe, M_DEVBUF); 1477 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1478 SLIST_INIT(&sr_hotplug_callbacks); 1479 return; 1480 } 1481 } 1482 1483 void 1484 sr_disk_attach(struct disk *diskp, int action) 1485 { 1486 struct sr_hotplug_list *mhe; 1487 1488 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1489 if (mhe->sh_sd->sd_ready) 1490 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1491 } 1492 1493 int 1494 sr_match(struct device *parent, void *match, void *aux) 1495 { 1496 return (1); 1497 } 1498 1499 void 1500 sr_attach(struct device *parent, struct device *self, void *aux) 1501 { 1502 struct sr_softc *sc = (void *)self; 1503 1504 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1505 1506 rw_init(&sc->sc_lock, "sr_lock"); 1507 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1508 1509 SLIST_INIT(&sr_hotplug_callbacks); 1510 SLIST_INIT(&sc->sc_hotspare_list); 1511 1512 if (bio_register(&sc->sc_dev, sr_ioctl) != 0) 1513 printf("%s: controller registration failed", DEVNAME(sc)); 1514 else 1515 sc->sc_ioctl = sr_ioctl; 1516 1517 printf("\n"); 1518 1519 softraid_disk_attach = sr_disk_attach; 1520 1521 sr_boot_assembly(sc); 1522 } 1523 1524 int 1525 sr_detach(struct device *self, int flags) 1526 { 1527 return (0); 1528 } 1529 1530 int 1531 sr_activate(struct device *self, enum devact act) 1532 { 1533 return (1); 1534 } 1535 1536 void 1537 sr_minphys(struct buf *bp, struct scsi_link *sl) 1538 { 1539 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1540 1541 /* XXX currently using SR_MAXFER = MAXPHYS */ 1542 if (bp->b_bcount > SR_MAXFER) 1543 bp->b_bcount = SR_MAXFER; 1544 minphys(bp); 1545 } 1546 1547 void 1548 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1549 { 1550 size_t copy_cnt; 1551 1552 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1553 xs, size); 1554 1555 if (xs->datalen) { 1556 copy_cnt = MIN(size, xs->datalen); 1557 bcopy(v, xs->data, copy_cnt); 1558 } 1559 } 1560 1561 int 1562 sr_ccb_alloc(struct sr_discipline *sd) 1563 { 1564 struct sr_ccb *ccb; 1565 int i; 1566 1567 if (!sd) 1568 return (1); 1569 1570 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1571 1572 if (sd->sd_ccb) 1573 return (1); 1574 1575 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1576 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1577 TAILQ_INIT(&sd->sd_ccb_freeq); 1578 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1579 ccb = &sd->sd_ccb[i]; 1580 ccb->ccb_dis = sd; 1581 sr_ccb_put(ccb); 1582 } 1583 1584 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1585 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1586 1587 return (0); 1588 } 1589 1590 void 1591 sr_ccb_free(struct sr_discipline *sd) 1592 { 1593 struct sr_ccb *ccb; 1594 1595 if (!sd) 1596 return; 1597 1598 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1599 1600 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1601 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1602 1603 if (sd->sd_ccb) 1604 free(sd->sd_ccb, M_DEVBUF); 1605 } 1606 1607 struct sr_ccb * 1608 sr_ccb_get(struct sr_discipline *sd) 1609 { 1610 struct sr_ccb *ccb; 1611 int s; 1612 1613 s = splbio(); 1614 1615 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1616 if (ccb) { 1617 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1618 ccb->ccb_state = SR_CCB_INPROGRESS; 1619 } 1620 1621 splx(s); 1622 1623 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1624 ccb); 1625 1626 return (ccb); 1627 } 1628 1629 void 1630 sr_ccb_put(struct sr_ccb *ccb) 1631 { 1632 struct sr_discipline *sd = ccb->ccb_dis; 1633 int s; 1634 1635 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1636 ccb); 1637 1638 s = splbio(); 1639 1640 ccb->ccb_wu = NULL; 1641 ccb->ccb_state = SR_CCB_FREE; 1642 ccb->ccb_target = -1; 1643 ccb->ccb_opaque = NULL; 1644 1645 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1646 1647 splx(s); 1648 } 1649 1650 int 1651 sr_wu_alloc(struct sr_discipline *sd) 1652 { 1653 struct sr_workunit *wu; 1654 int i, no_wu; 1655 1656 if (!sd) 1657 return (1); 1658 1659 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 1660 sd, sd->sd_max_wu); 1661 1662 if (sd->sd_wu) 1663 return (1); 1664 1665 no_wu = sd->sd_max_wu; 1666 sd->sd_wu_pending = no_wu; 1667 1668 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 1669 M_DEVBUF, M_WAITOK | M_ZERO); 1670 TAILQ_INIT(&sd->sd_wu_freeq); 1671 TAILQ_INIT(&sd->sd_wu_pendq); 1672 TAILQ_INIT(&sd->sd_wu_defq); 1673 for (i = 0; i < no_wu; i++) { 1674 wu = &sd->sd_wu[i]; 1675 wu->swu_dis = sd; 1676 sr_wu_put(wu); 1677 } 1678 1679 return (0); 1680 } 1681 1682 void 1683 sr_wu_free(struct sr_discipline *sd) 1684 { 1685 struct sr_workunit *wu; 1686 1687 if (!sd) 1688 return; 1689 1690 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 1691 1692 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 1693 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1694 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 1695 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 1696 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 1697 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 1698 1699 if (sd->sd_wu) 1700 free(sd->sd_wu, M_DEVBUF); 1701 } 1702 1703 void 1704 sr_wu_put(struct sr_workunit *wu) 1705 { 1706 struct sr_discipline *sd = wu->swu_dis; 1707 struct sr_ccb *ccb; 1708 1709 int s; 1710 1711 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 1712 1713 s = splbio(); 1714 1715 wu->swu_xs = NULL; 1716 wu->swu_state = SR_WU_FREE; 1717 wu->swu_ios_complete = 0; 1718 wu->swu_ios_failed = 0; 1719 wu->swu_ios_succeeded = 0; 1720 wu->swu_io_count = 0; 1721 wu->swu_blk_start = 0; 1722 wu->swu_blk_end = 0; 1723 wu->swu_collider = NULL; 1724 wu->swu_fake = 0; 1725 wu->swu_flags = 0; 1726 1727 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1728 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1729 sr_ccb_put(ccb); 1730 } 1731 TAILQ_INIT(&wu->swu_ccb); 1732 1733 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 1734 sd->sd_wu_pending--; 1735 1736 /* wake up sleepers */ 1737 #ifdef DIAGNOSTIC 1738 if (sd->sd_wu_sleep < 0) 1739 panic("negative wu sleepers"); 1740 #endif /* DIAGNOSTIC */ 1741 if (sd->sd_wu_sleep) 1742 wakeup(&sd->sd_wu_sleep); 1743 1744 splx(s); 1745 } 1746 1747 struct sr_workunit * 1748 sr_wu_get(struct sr_discipline *sd, int canwait) 1749 { 1750 struct sr_workunit *wu; 1751 int s; 1752 1753 s = splbio(); 1754 1755 for (;;) { 1756 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 1757 if (wu) { 1758 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1759 wu->swu_state = SR_WU_INPROGRESS; 1760 sd->sd_wu_pending++; 1761 break; 1762 } else if (wu == NULL && canwait) { 1763 sd->sd_wu_sleep++; 1764 tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0); 1765 sd->sd_wu_sleep--; 1766 } else 1767 break; 1768 } 1769 1770 splx(s); 1771 1772 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 1773 1774 return (wu); 1775 } 1776 1777 void 1778 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 1779 { 1780 int s; 1781 1782 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 1783 1784 s = splbio(); 1785 scsi_done(xs); 1786 splx(s); 1787 } 1788 1789 int 1790 sr_scsi_cmd(struct scsi_xfer *xs) 1791 { 1792 int s; 1793 struct scsi_link *link = xs->sc_link; 1794 struct sr_softc *sc = link->adapter_softc; 1795 struct sr_workunit *wu = NULL; 1796 struct sr_discipline *sd; 1797 1798 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p " 1799 "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags); 1800 1801 sd = sc->sc_dis[link->scsibus]; 1802 if (sd == NULL) { 1803 s = splhigh(); 1804 sd = sc->sc_attach_dis; 1805 splx(s); 1806 1807 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n", 1808 DEVNAME(sc), sd); 1809 if (sd == NULL) { 1810 printf("%s: sr_scsi_cmd NULL discipline\n", 1811 DEVNAME(sc)); 1812 goto stuffup; 1813 } 1814 } 1815 1816 if (sd->sd_deleted) { 1817 printf("%s: %s device is being deleted, failing io\n", 1818 DEVNAME(sc), sd->sd_meta->ssd_devname); 1819 goto stuffup; 1820 } 1821 1822 /* 1823 * we'll let the midlayer deal with stalls instead of being clever 1824 * and sending sr_wu_get !(xs->flags & SCSI_NOSLEEP) in cansleep 1825 */ 1826 if ((wu = sr_wu_get(sd, 0)) == NULL) { 1827 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc)); 1828 return (NO_CCB); 1829 } 1830 1831 xs->error = XS_NOERROR; 1832 wu->swu_xs = xs; 1833 1834 /* the midlayer will query LUNs so report sense to stop scanning */ 1835 if (link->target != 0 || link->lun != 0) { 1836 DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n", 1837 DEVNAME(sc), link->target, link->lun); 1838 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 1839 SSD_ERRCODE_VALID; 1840 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 1841 sd->sd_scsi_sense.add_sense_code = 0x25; 1842 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 1843 sd->sd_scsi_sense.extra_len = 4; 1844 goto stuffup; 1845 } 1846 1847 switch (xs->cmd->opcode) { 1848 case READ_COMMAND: 1849 case READ_BIG: 1850 case READ_16: 1851 case WRITE_COMMAND: 1852 case WRITE_BIG: 1853 case WRITE_16: 1854 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 1855 DEVNAME(sc), xs->cmd->opcode); 1856 if (sd->sd_scsi_rw(wu)) 1857 goto stuffup; 1858 break; 1859 1860 case SYNCHRONIZE_CACHE: 1861 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 1862 DEVNAME(sc)); 1863 if (sd->sd_scsi_sync(wu)) 1864 goto stuffup; 1865 goto complete; 1866 1867 case TEST_UNIT_READY: 1868 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 1869 DEVNAME(sc)); 1870 if (sd->sd_scsi_tur(wu)) 1871 goto stuffup; 1872 goto complete; 1873 1874 case START_STOP: 1875 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 1876 DEVNAME(sc)); 1877 if (sd->sd_scsi_start_stop(wu)) 1878 goto stuffup; 1879 goto complete; 1880 1881 case INQUIRY: 1882 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 1883 DEVNAME(sc)); 1884 if (sd->sd_scsi_inquiry(wu)) 1885 goto stuffup; 1886 goto complete; 1887 1888 case READ_CAPACITY: 1889 case READ_CAPACITY_16: 1890 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 1891 DEVNAME(sc), xs->cmd->opcode); 1892 if (sd->sd_scsi_read_cap(wu)) 1893 goto stuffup; 1894 goto complete; 1895 1896 case REQUEST_SENSE: 1897 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 1898 DEVNAME(sc)); 1899 if (sd->sd_scsi_req_sense(wu)) 1900 goto stuffup; 1901 goto complete; 1902 1903 default: 1904 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 1905 DEVNAME(sc), xs->cmd->opcode); 1906 /* XXX might need to add generic function to handle others */ 1907 goto stuffup; 1908 } 1909 1910 return (SUCCESSFULLY_QUEUED); 1911 stuffup: 1912 if (sd && sd->sd_scsi_sense.error_code) { 1913 xs->error = XS_SENSE; 1914 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 1915 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 1916 } else { 1917 xs->error = XS_DRIVER_STUFFUP; 1918 xs->flags |= ITSDONE; 1919 } 1920 complete: 1921 if (wu) 1922 sr_wu_put(wu); 1923 sr_scsi_done(sd, xs); 1924 return (COMPLETE); 1925 } 1926 int 1927 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag, 1928 struct proc *p) 1929 { 1930 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 1931 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 1932 1933 return (sr_ioctl(link->adapter_softc, cmd, addr)); 1934 } 1935 1936 int 1937 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr) 1938 { 1939 struct sr_softc *sc = (struct sr_softc *)dev; 1940 int rv = 0; 1941 1942 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc)); 1943 1944 rw_enter_write(&sc->sc_lock); 1945 1946 switch (cmd) { 1947 case BIOCINQ: 1948 DNPRINTF(SR_D_IOCTL, "inq\n"); 1949 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 1950 break; 1951 1952 case BIOCVOL: 1953 DNPRINTF(SR_D_IOCTL, "vol\n"); 1954 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 1955 break; 1956 1957 case BIOCDISK: 1958 DNPRINTF(SR_D_IOCTL, "disk\n"); 1959 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 1960 break; 1961 1962 case BIOCALARM: 1963 DNPRINTF(SR_D_IOCTL, "alarm\n"); 1964 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 1965 break; 1966 1967 case BIOCBLINK: 1968 DNPRINTF(SR_D_IOCTL, "blink\n"); 1969 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 1970 break; 1971 1972 case BIOCSETSTATE: 1973 DNPRINTF(SR_D_IOCTL, "setstate\n"); 1974 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 1975 break; 1976 1977 case BIOCCREATERAID: 1978 DNPRINTF(SR_D_IOCTL, "createraid\n"); 1979 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 1980 break; 1981 1982 case BIOCDELETERAID: 1983 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 1984 break; 1985 default: 1986 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 1987 rv = ENOTTY; 1988 } 1989 1990 rw_exit_write(&sc->sc_lock); 1991 1992 return (rv); 1993 } 1994 1995 int 1996 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 1997 { 1998 int i, vol, disk; 1999 2000 for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++) 2001 /* XXX this will not work when we stagger disciplines */ 2002 if (sc->sc_dis[i]) { 2003 vol++; 2004 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2005 } 2006 2007 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2008 bi->bi_novol = vol + sc->sc_hotspare_no; 2009 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2010 2011 return (0); 2012 } 2013 2014 int 2015 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2016 { 2017 int i, vol, rv = EINVAL; 2018 struct sr_discipline *sd; 2019 struct sr_chunk *hotspare; 2020 daddr64_t rb, sz; 2021 2022 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2023 /* XXX this will not work when we stagger disciplines */ 2024 if (sc->sc_dis[i]) 2025 vol++; 2026 if (vol != bv->bv_volid) 2027 continue; 2028 2029 sd = sc->sc_dis[i]; 2030 bv->bv_status = sd->sd_vol_status; 2031 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2032 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2033 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2034 if (bv->bv_status == BIOC_SVREBUILD) { 2035 sz = sd->sd_meta->ssdi.ssd_size; 2036 rb = sd->sd_meta->ssd_rebuild; 2037 if (rb > 0) 2038 bv->bv_percent = 100 - 2039 ((sz * 100 - rb * 100) / sz) - 1; 2040 else 2041 bv->bv_percent = 0; 2042 } 2043 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2044 sizeof(bv->bv_dev)); 2045 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2046 sizeof(bv->bv_vendor)); 2047 rv = 0; 2048 goto done; 2049 } 2050 2051 /* Check hotspares list. */ 2052 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2053 vol++; 2054 if (vol != bv->bv_volid) 2055 continue; 2056 2057 bv->bv_status = BIOC_SVONLINE; 2058 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2059 bv->bv_level = -1; /* Hotspare. */ 2060 bv->bv_nodisk = 1; 2061 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2062 sizeof(bv->bv_dev)); 2063 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2064 sizeof(bv->bv_vendor)); 2065 rv = 0; 2066 goto done; 2067 } 2068 2069 done: 2070 return (rv); 2071 } 2072 2073 int 2074 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2075 { 2076 int i, vol, rv = EINVAL, id; 2077 struct sr_chunk *src, *hotspare; 2078 2079 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2080 /* XXX this will not work when we stagger disciplines */ 2081 if (sc->sc_dis[i]) 2082 vol++; 2083 if (vol != bd->bd_volid) 2084 continue; 2085 2086 id = bd->bd_diskid; 2087 if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2088 break; 2089 2090 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2091 bd->bd_status = src->src_meta.scm_status; 2092 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2093 bd->bd_channel = vol; 2094 bd->bd_target = id; 2095 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2096 sizeof(bd->bd_vendor)); 2097 rv = 0; 2098 goto done; 2099 } 2100 2101 /* Check hotspares list. */ 2102 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2103 vol++; 2104 if (vol != bd->bd_volid) 2105 continue; 2106 2107 if (bd->bd_diskid != 0) 2108 break; 2109 2110 bd->bd_status = hotspare->src_meta.scm_status; 2111 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2112 bd->bd_channel = vol; 2113 bd->bd_target = bd->bd_diskid; 2114 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2115 sizeof(bd->bd_vendor)); 2116 rv = 0; 2117 goto done; 2118 } 2119 2120 done: 2121 return (rv); 2122 } 2123 2124 int 2125 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2126 { 2127 int rv = EINVAL; 2128 int i, vol, found, c; 2129 struct sr_discipline *sd = NULL; 2130 struct sr_chunk *ch_entry; 2131 struct sr_chunk_head *cl; 2132 2133 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2134 goto done; 2135 2136 if (bs->bs_status == BIOC_SSHOTSPARE) { 2137 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2138 goto done; 2139 } 2140 2141 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2142 /* XXX this will not work when we stagger disciplines */ 2143 if (sc->sc_dis[i]) 2144 vol++; 2145 if (vol != bs->bs_volid) 2146 continue; 2147 sd = sc->sc_dis[i]; 2148 break; 2149 } 2150 if (sd == NULL) 2151 goto done; 2152 2153 switch (bs->bs_status) { 2154 case BIOC_SSOFFLINE: 2155 /* Take chunk offline */ 2156 found = c = 0; 2157 cl = &sd->sd_vol.sv_chunk_list; 2158 SLIST_FOREACH(ch_entry, cl, src_link) { 2159 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2160 found = 1; 2161 break; 2162 } 2163 c++; 2164 } 2165 if (found == 0) { 2166 printf("%s: chunk not part of array\n", DEVNAME(sc)); 2167 goto done; 2168 } 2169 2170 /* XXX: check current state first */ 2171 sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE); 2172 2173 if (sr_meta_save(sd, SR_META_DIRTY)) { 2174 printf("%s: could not save metadata to %s\n", 2175 DEVNAME(sc), sd->sd_meta->ssd_devname); 2176 goto done; 2177 } 2178 rv = 0; 2179 break; 2180 2181 case BIOC_SDSCRUB: 2182 break; 2183 2184 case BIOC_SSREBUILD: 2185 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id); 2186 break; 2187 2188 default: 2189 printf("%s: unsupported state request %d\n", 2190 DEVNAME(sc), bs->bs_status); 2191 } 2192 2193 done: 2194 return (rv); 2195 } 2196 2197 int 2198 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2199 { 2200 struct sr_discipline *sd; 2201 struct sr_chunk *chunk; 2202 int i, c; 2203 2204 /* See if chunk is already in use. */ 2205 for (i = 0; i < SR_MAXSCSIBUS; i++) { 2206 if (!sc->sc_dis[i]) 2207 continue; 2208 sd = sc->sc_dis[i]; 2209 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2210 chunk = sd->sd_vol.sv_chunks[c]; 2211 if (chunk->src_dev_mm == dev) 2212 return chunk->src_meta.scm_status; 2213 } 2214 } 2215 2216 /* Check hotspares list. */ 2217 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2218 if (chunk->src_dev_mm == dev) 2219 return chunk->src_meta.scm_status; 2220 2221 return BIOC_SDINVALID; 2222 } 2223 2224 int 2225 sr_hotspare(struct sr_softc *sc, dev_t dev) 2226 { 2227 struct sr_discipline *sd = NULL; 2228 struct sr_metadata *sm = NULL; 2229 struct sr_meta_chunk *hm; 2230 struct sr_chunk_head *cl; 2231 struct sr_chunk *hotspare, *chunk, *last; 2232 struct sr_uuid uuid; 2233 struct disklabel label; 2234 struct vnode *vn; 2235 daddr64_t size; 2236 char devname[32]; 2237 int rv = EINVAL; 2238 int c, part, open = 0; 2239 2240 /* 2241 * Add device to global hotspares list. 2242 */ 2243 2244 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2245 2246 /* Make sure chunk is not already in use. */ 2247 c = sr_chunk_in_use(sc, dev); 2248 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2249 if (c == BIOC_SDHOTSPARE) 2250 printf("%s: %s is already a hotspare\n", 2251 DEVNAME(sc), devname); 2252 else 2253 printf("%s: %s is already in use\n", 2254 DEVNAME(sc), devname); 2255 goto done; 2256 } 2257 2258 /* XXX - See if there is an existing degraded volume... */ 2259 2260 /* Open device. */ 2261 if (bdevvp(dev, &vn)) { 2262 printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc)); 2263 goto done; 2264 } 2265 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2266 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2267 DEVNAME(sc), devname); 2268 vput(vn); 2269 goto fail; 2270 } 2271 open = 1; /* close dev on error */ 2272 2273 /* Get partition details. */ 2274 part = DISKPART(dev); 2275 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2276 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2277 DEVNAME(sc)); 2278 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2279 vput(vn); 2280 goto fail; 2281 } 2282 if (label.d_partitions[part].p_fstype != FS_RAID) { 2283 printf("%s: %s partition not of type RAID (%d)\n", 2284 DEVNAME(sc), devname, 2285 label.d_partitions[part].p_fstype); 2286 goto fail; 2287 } 2288 2289 /* Calculate partition size. */ 2290 size = DL_GETPSIZE(&label.d_partitions[part]) - 2291 SR_META_SIZE - SR_META_OFFSET; 2292 2293 /* 2294 * Create and populate chunk metadata. 2295 */ 2296 2297 sr_uuid_get(&uuid); 2298 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2299 2300 hotspare->src_dev_mm = dev; 2301 hotspare->src_vn = vn; 2302 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2303 hotspare->src_size = size; 2304 2305 hm = &hotspare->src_meta; 2306 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2307 hm->scmi.scm_chunk_id = 0; 2308 hm->scmi.scm_size = size; 2309 hm->scmi.scm_coerced_size = size; 2310 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2311 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2312 2313 sr_checksum(sc, hm, &hm->scm_checksum, 2314 sizeof(struct sr_meta_chunk_invariant)); 2315 2316 hm->scm_status = BIOC_SDHOTSPARE; 2317 2318 /* 2319 * Create and populate our own discipline and metadata. 2320 */ 2321 2322 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2323 sm->ssdi.ssd_magic = SR_MAGIC; 2324 sm->ssdi.ssd_version = SR_META_VERSION; 2325 sm->ssd_ondisk = 0; 2326 sm->ssdi.ssd_flags = 0; 2327 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2328 sm->ssdi.ssd_chunk_no = 1; 2329 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2330 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2331 sm->ssdi.ssd_size = size; 2332 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2333 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2334 "SR %s", "HOTSPARE"); 2335 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2336 "%03d", SR_META_VERSION); 2337 2338 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2339 sd->sd_sc = sc; 2340 sd->sd_meta = sm; 2341 sd->sd_meta_type = SR_META_F_NATIVE; 2342 sd->sd_vol_status = BIOC_SVONLINE; 2343 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2344 2345 /* Add chunk to volume. */ 2346 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2347 M_WAITOK | M_ZERO); 2348 sd->sd_vol.sv_chunks[0] = hotspare; 2349 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2350 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2351 2352 /* Save metadata. */ 2353 if (sr_meta_save(sd, SR_META_DIRTY)) { 2354 printf("%s: could not save metadata to %s\n", 2355 DEVNAME(sc), devname); 2356 goto fail; 2357 } 2358 2359 /* 2360 * Add chunk to hotspare list. 2361 */ 2362 rw_enter_write(&sc->sc_hs_lock); 2363 cl = &sc->sc_hotspare_list; 2364 if (SLIST_EMPTY(cl)) 2365 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2366 else { 2367 SLIST_FOREACH(chunk, cl, src_link) 2368 last = chunk; 2369 SLIST_INSERT_AFTER(last, hotspare, src_link); 2370 } 2371 sc->sc_hotspare_no++; 2372 rw_exit_write(&sc->sc_hs_lock); 2373 2374 rv = 0; 2375 goto done; 2376 2377 fail: 2378 if (hotspare) 2379 free(hotspare, M_DEVBUF); 2380 2381 done: 2382 if (sd && sd->sd_vol.sv_chunks) 2383 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2384 if (sd) 2385 free(sd, M_DEVBUF); 2386 if (sm) 2387 free(sm, M_DEVBUF); 2388 if (open) { 2389 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2390 vput(vn); 2391 } 2392 2393 return (rv); 2394 } 2395 2396 void 2397 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2398 { 2399 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2400 } 2401 2402 void 2403 sr_hotspare_rebuild(struct sr_discipline *sd) 2404 { 2405 struct sr_chunk_head *cl; 2406 struct sr_chunk *hotspare, *chunk = NULL; 2407 struct sr_workunit *wu; 2408 struct sr_ccb *ccb; 2409 int i, s, chunk_no, busy; 2410 2411 /* 2412 * Attempt to locate a hotspare and initiate rebuild. 2413 */ 2414 2415 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2416 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2417 BIOC_SDOFFLINE) { 2418 chunk_no = i; 2419 chunk = sd->sd_vol.sv_chunks[i]; 2420 break; 2421 } 2422 } 2423 2424 if (chunk == NULL) { 2425 printf("%s: no offline chunk found on %s!\n", 2426 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 2427 return; 2428 } 2429 2430 /* See if we have a suitable hotspare... */ 2431 rw_enter_write(&sd->sd_sc->sc_hs_lock); 2432 cl = &sd->sd_sc->sc_hotspare_list; 2433 SLIST_FOREACH(hotspare, cl, src_link) 2434 if (hotspare->src_size >= chunk->src_size) 2435 break; 2436 2437 if (hotspare != NULL) { 2438 2439 printf("%s: %s volume degraded, will attempt to " 2440 "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc), 2441 sd->sd_meta->ssd_devname, hotspare->src_devname); 2442 2443 /* 2444 * Ensure that all pending I/O completes on the failed chunk 2445 * before trying to initiate a rebuild. 2446 */ 2447 i = 0; 2448 do { 2449 busy = 0; 2450 2451 s = splbio(); 2452 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2453 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2454 if (ccb->ccb_target == chunk_no) 2455 busy = 1; 2456 } 2457 } 2458 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2459 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2460 if (ccb->ccb_target == chunk_no) 2461 busy = 1; 2462 } 2463 } 2464 splx(s); 2465 2466 if (busy) { 2467 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2468 i++; 2469 } 2470 2471 } while (busy && i < 120); 2472 2473 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2474 "complete on failed chunk %s\n", DEVNAME(sd->sd_sc), 2475 i, chunk->src_devname); 2476 2477 if (busy) { 2478 printf("%s: pending I/O failed to complete on " 2479 "failed chunk %s, hotspare rebuild aborted...\n", 2480 DEVNAME(sd->sd_sc), chunk->src_devname); 2481 goto done; 2482 } 2483 2484 s = splbio(); 2485 rw_enter_write(&sd->sd_sc->sc_lock); 2486 if (sr_rebuild_init(sd, hotspare->src_dev_mm) == 0) { 2487 2488 /* Remove hotspare from available list. */ 2489 sd->sd_sc->sc_hotspare_no--; 2490 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2491 free(hotspare, M_DEVBUF); 2492 2493 } 2494 rw_exit_write(&sd->sd_sc->sc_lock); 2495 splx(s); 2496 } 2497 done: 2498 rw_exit_write(&sd->sd_sc->sc_hs_lock); 2499 } 2500 2501 int 2502 sr_rebuild_init(struct sr_discipline *sd, dev_t dev) 2503 { 2504 struct sr_softc *sc = sd->sd_sc; 2505 int rv = EINVAL, part; 2506 int c, found, open = 0; 2507 char devname[32]; 2508 struct vnode *vn; 2509 daddr64_t size, csize; 2510 struct disklabel label; 2511 struct sr_meta_chunk *old, *new; 2512 2513 /* 2514 * Attempt to initiate a rebuild onto the specified device. 2515 */ 2516 2517 if (!sd->sd_rebuild) { 2518 printf("%s: discipline does not support rebuild\n", 2519 DEVNAME(sc)); 2520 goto done; 2521 } 2522 2523 /* make sure volume is in the right state */ 2524 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2525 printf("%s: rebuild already in progress\n", DEVNAME(sc)); 2526 goto done; 2527 } 2528 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2529 printf("%s: %s not degraded\n", DEVNAME(sc), 2530 sd->sd_meta->ssd_devname); 2531 goto done; 2532 } 2533 2534 /* find offline chunk */ 2535 for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 2536 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 2537 BIOC_SDOFFLINE) { 2538 found = c; 2539 new = &sd->sd_vol.sv_chunks[c]->src_meta; 2540 if (c > 0) 2541 break; /* roll at least once over the for */ 2542 } else { 2543 csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size; 2544 old = &sd->sd_vol.sv_chunks[c]->src_meta; 2545 if (found != -1) 2546 break; 2547 } 2548 if (found == -1) { 2549 printf("%s: no offline chunks available for rebuild\n", 2550 DEVNAME(sc)); 2551 goto done; 2552 } 2553 2554 /* populate meta entry */ 2555 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2556 if (bdevvp(dev, &vn)) { 2557 printf("%s:, sr_rebuild_init: can't allocate vnode\n", 2558 DEVNAME(sc)); 2559 goto done; 2560 } 2561 2562 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2563 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2564 "open %s\n", DEVNAME(sc), devname); 2565 vput(vn); 2566 goto done; 2567 } 2568 open = 1; /* close dev on error */ 2569 2570 /* get partition */ 2571 part = DISKPART(dev); 2572 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2573 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2574 DEVNAME(sc)); 2575 goto done; 2576 } 2577 if (label.d_partitions[part].p_fstype != FS_RAID) { 2578 printf("%s: %s partition not of type RAID (%d)\n", 2579 DEVNAME(sc), devname, 2580 label.d_partitions[part].p_fstype); 2581 goto done; 2582 } 2583 2584 /* is partition large enough? */ 2585 size = DL_GETPSIZE(&label.d_partitions[part]) - 2586 SR_META_SIZE - SR_META_OFFSET; 2587 if (size < csize) { 2588 printf("%s: partition too small, at least %llu B required\n", 2589 DEVNAME(sc), csize << DEV_BSHIFT); 2590 goto done; 2591 } else if (size > csize) 2592 printf("%s: partition too large, wasting %llu B\n", 2593 DEVNAME(sc), (size - csize) << DEV_BSHIFT); 2594 2595 /* make sure we are not stomping on some other partition */ 2596 c = sr_chunk_in_use(sc, dev); 2597 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2598 printf("%s: %s is already in use\n", DEVNAME(sc), devname); 2599 goto done; 2600 } 2601 2602 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2603 sd->sd_meta->ssd_rebuild = 0; 2604 2605 /* recreate metadata */ 2606 open = 0; /* leave dev open from here on out */ 2607 sd->sd_vol.sv_chunks[found]->src_dev_mm = dev; 2608 sd->sd_vol.sv_chunks[found]->src_vn = vn; 2609 new->scmi.scm_volid = old->scmi.scm_volid; 2610 new->scmi.scm_chunk_id = found; 2611 strlcpy(new->scmi.scm_devname, devname, 2612 sizeof new->scmi.scm_devname); 2613 new->scmi.scm_size = size; 2614 new->scmi.scm_coerced_size = old->scmi.scm_coerced_size; 2615 bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid, 2616 sizeof new->scmi.scm_uuid); 2617 sr_checksum(sc, new, &new->scm_checksum, 2618 sizeof(struct sr_meta_chunk_invariant)); 2619 sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD); 2620 if (sr_meta_save(sd, SR_META_DIRTY)) { 2621 printf("%s: could not save metadata to %s\n", 2622 DEVNAME(sc), devname); 2623 open = 1; 2624 goto done; 2625 } 2626 2627 printf("%s: rebuild of %s started on %s\n", DEVNAME(sc), 2628 sd->sd_meta->ssd_devname, devname); 2629 2630 sd->sd_reb_abort = 0; 2631 kthread_create_deferred(sr_rebuild, sd); 2632 2633 rv = 0; 2634 done: 2635 if (open) { 2636 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2637 vput(vn); 2638 } 2639 2640 return (rv); 2641 } 2642 2643 void 2644 sr_roam_chunks(struct sr_discipline *sd) 2645 { 2646 struct sr_softc *sc = sd->sd_sc; 2647 struct sr_chunk *chunk; 2648 struct sr_meta_chunk *meta; 2649 int roamed = 0; 2650 2651 /* Have any chunks roamed? */ 2652 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 2653 2654 meta = &chunk->src_meta; 2655 2656 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 2657 sizeof(meta->scmi.scm_devname))) { 2658 2659 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 2660 meta->scmi.scm_devname, chunk->src_devname); 2661 2662 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 2663 sizeof(meta->scmi.scm_devname)); 2664 2665 roamed++; 2666 } 2667 } 2668 2669 if (roamed) 2670 sr_meta_save(sd, SR_META_DIRTY); 2671 } 2672 2673 int 2674 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 2675 { 2676 dev_t *dt; 2677 int i, s, no_chunk, rv = EINVAL, vol; 2678 int no_meta, updatemeta = 0, disk = 1; 2679 u_int64_t vol_size; 2680 int32_t strip_size = 0; 2681 struct sr_chunk_head *cl; 2682 struct sr_discipline *sd = NULL; 2683 struct sr_chunk *ch_entry; 2684 struct device *dev, *dev2; 2685 struct scsibus_attach_args saa; 2686 2687 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 2688 DEVNAME(sc), user); 2689 2690 /* user input */ 2691 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 2692 goto unwind; 2693 2694 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 2695 if (user) { 2696 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 2697 goto unwind; 2698 } else 2699 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 2700 2701 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2702 sd->sd_sc = sc; 2703 2704 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 2705 cl = &sd->sd_vol.sv_chunk_list; 2706 SLIST_INIT(cl); 2707 2708 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 2709 if (sd->sd_meta_type == SR_META_F_INVALID) { 2710 printf("%s: invalid metadata format\n", DEVNAME(sc)); 2711 goto unwind; 2712 } 2713 2714 if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) { 2715 printf("%s: can't attach metadata type %d\n", DEVNAME(sc), 2716 sd->sd_meta_type); 2717 goto unwind; 2718 } 2719 2720 /* force the raid volume by clearing metadata region */ 2721 if (bc->bc_flags & BIOC_SCFORCE) { 2722 /* make sure disk isn't up and running */ 2723 if (sr_meta_read(sd)) 2724 if (sr_already_assembled(sd)) { 2725 printf("%s: disk ", DEVNAME(sc)); 2726 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2727 printf(" is currently in use; can't force " 2728 "create\n"); 2729 goto unwind; 2730 } 2731 2732 if (sr_meta_clear(sd)) { 2733 printf("%s: failed to clear metadata\n", DEVNAME(sc)); 2734 goto unwind; 2735 } 2736 } 2737 2738 if ((no_meta = sr_meta_read(sd)) == 0) { 2739 /* fill out all chunk metadata */ 2740 sr_meta_chunks_create(sc, cl); 2741 ch_entry = SLIST_FIRST(cl); 2742 2743 /* no metadata available */ 2744 switch (bc->bc_level) { 2745 case 0: 2746 if (no_chunk < 2) 2747 goto unwind; 2748 strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name)); 2749 /* 2750 * XXX add variable strip size later even though 2751 * MAXPHYS is really the clever value, users like 2752 * to tinker with that type of stuff 2753 */ 2754 strip_size = MAXPHYS; 2755 vol_size = 2756 (ch_entry->src_meta.scmi.scm_coerced_size & 2757 ~((strip_size >> DEV_BSHIFT) - 1)) * no_chunk; 2758 break; 2759 case 1: 2760 if (no_chunk < 2) 2761 goto unwind; 2762 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 2763 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2764 break; 2765 case 4: 2766 case 5: 2767 if (no_chunk < 3) 2768 goto unwind; 2769 if (bc->bc_level == 4) 2770 strlcpy(sd->sd_name, "RAID 4", 2771 sizeof(sd->sd_name)); 2772 else 2773 strlcpy(sd->sd_name, "RAID 5", 2774 sizeof(sd->sd_name)); 2775 /* 2776 * XXX add variable strip size later even though 2777 * MAXPHYS is really the clever value, users like 2778 * to tinker with that type of stuff 2779 */ 2780 strip_size = MAXPHYS; 2781 vol_size = 2782 (ch_entry->src_meta.scmi.scm_coerced_size & 2783 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 1); 2784 break; 2785 //#ifdef not_yet 2786 case 6: 2787 if (no_chunk < 4) 2788 goto unwind; 2789 strlcpy(sd->sd_name, "RAID 6", 2790 sizeof(sd->sd_name)); 2791 /* 2792 * XXX add variable strip size later even though 2793 * MAXPHYS is really the clever value, users like 2794 * to tinker with that type of stuff 2795 */ 2796 strip_size = MAXPHYS; 2797 vol_size = 2798 (ch_entry->src_meta.scmi.scm_coerced_size & 2799 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 2); 2800 break; 2801 //#endif /* not_yet */ 2802 #ifdef AOE 2803 #ifdef not_yet 2804 case 'A': 2805 /* target */ 2806 if (no_chunk != 1) 2807 goto unwind; 2808 strlcpy(sd->sd_name, "AOE TARG", sizeof(sd->sd_name)); 2809 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2810 break; 2811 case 'a': 2812 /* initiator */ 2813 if (no_chunk != 1) 2814 goto unwind; 2815 strlcpy(sd->sd_name, "AOE INIT", sizeof(sd->sd_name)); 2816 break; 2817 #endif /* not_yet */ 2818 #endif /* AOE */ 2819 #ifdef CRYPTO 2820 case 'C': 2821 DNPRINTF(SR_D_IOCTL, 2822 "%s: sr_ioctl_createraid: no_chunk %d\n", 2823 DEVNAME(sc), no_chunk); 2824 2825 if (no_chunk != 1) 2826 goto unwind; 2827 2828 /* no hint available yet */ 2829 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2830 bc->bc_opaque_status = BIOC_SOINOUT_FAILED; 2831 rv = 0; 2832 goto unwind; 2833 } 2834 2835 if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE)) 2836 goto unwind; 2837 2838 if (sr_crypto_get_kdf(bc, sd)) 2839 goto unwind; 2840 2841 strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name)); 2842 vol_size = ch_entry->src_meta.scmi.scm_size; 2843 2844 sr_crypto_create_keys(sd); 2845 2846 break; 2847 #endif /* CRYPTO */ 2848 default: 2849 goto unwind; 2850 } 2851 2852 /* fill out all volume metadata */ 2853 DNPRINTF(SR_D_IOCTL, 2854 "%s: sr_ioctl_createraid: vol_size: %lld\n", 2855 DEVNAME(sc), vol_size); 2856 sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; 2857 sd->sd_meta->ssdi.ssd_size = vol_size; 2858 sd->sd_vol_status = BIOC_SVONLINE; 2859 sd->sd_meta->ssdi.ssd_level = bc->bc_level; 2860 sd->sd_meta->ssdi.ssd_strip_size = strip_size; 2861 strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", 2862 sizeof(sd->sd_meta->ssdi.ssd_vendor)); 2863 snprintf(sd->sd_meta->ssdi.ssd_product, 2864 sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", 2865 sd->sd_name); 2866 snprintf(sd->sd_meta->ssdi.ssd_revision, 2867 sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", 2868 SR_META_VERSION); 2869 2870 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2871 updatemeta = 1; 2872 } else if (no_meta == no_chunk) { 2873 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 2874 printf("%s: %s was not shutdown properly\n", 2875 DEVNAME(sc), sd->sd_meta->ssd_devname); 2876 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 2877 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 2878 "metadata\n", DEVNAME(sc)); 2879 goto unwind; 2880 } 2881 if (sr_already_assembled(sd)) { 2882 printf("%s: disk ", DEVNAME(sc)); 2883 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2884 printf(" already assembled\n"); 2885 goto unwind; 2886 } 2887 #ifdef CRYPTO 2888 /* provide userland with kdf hint */ 2889 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2890 if (bc->bc_opaque == NULL) 2891 goto unwind; 2892 2893 if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) < 2894 bc->bc_opaque_size) 2895 goto unwind; 2896 2897 if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint, 2898 bc->bc_opaque, bc->bc_opaque_size)) 2899 goto unwind; 2900 2901 /* we're done */ 2902 bc->bc_opaque_status = BIOC_SOINOUT_OK; 2903 rv = 0; 2904 goto unwind; 2905 } 2906 /* get kdf with maskkey from userland */ 2907 if (bc->bc_opaque_flags & BIOC_SOIN) { 2908 if (sr_crypto_get_kdf(bc, sd)) 2909 goto unwind; 2910 } 2911 #endif /* CRYPTO */ 2912 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 2913 DEVNAME(sc)); 2914 updatemeta = 0; 2915 } else if (no_meta == -1) { 2916 printf("%s: one of the chunks has corrupt metadata; aborting " 2917 "assembly\n", DEVNAME(sc)); 2918 goto unwind; 2919 } else { 2920 if (sr_already_assembled(sd)) { 2921 printf("%s: disk ", DEVNAME(sc)); 2922 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2923 printf(" already assembled; will not partial " 2924 "assemble it\n"); 2925 goto unwind; 2926 } 2927 printf("%s: trying to bring up %s degraded\n", DEVNAME(sc), 2928 sd->sd_meta->ssd_devname); 2929 } 2930 2931 /* metadata SHALL be fully filled in at this point */ 2932 2933 /* Make sure that metadata level matches assembly level. */ 2934 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 2935 printf("%s: volume level does not match metadata level!\n", 2936 DEVNAME(sc)); 2937 goto unwind; 2938 } 2939 2940 if (sr_discipline_init(sd, sd->sd_meta->ssdi.ssd_level)) { 2941 printf("%s: could not initialize discipline\n", DEVNAME(sc)); 2942 goto unwind; 2943 } 2944 2945 /* allocate all resources */ 2946 if ((rv = sd->sd_alloc_resources(sd))) 2947 goto unwind; 2948 2949 if (disk) { 2950 /* set volume status */ 2951 sd->sd_set_vol_state(sd); 2952 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 2953 printf("%s: %s offline, will not be brought online\n", 2954 DEVNAME(sc), sd->sd_meta->ssd_devname); 2955 goto unwind; 2956 } 2957 2958 /* setup scsi midlayer */ 2959 if (sd->sd_openings) 2960 sd->sd_link.openings = sd->sd_openings(sd); 2961 else 2962 sd->sd_link.openings = sd->sd_max_wu; 2963 sd->sd_link.device = &sr_dev; 2964 sd->sd_link.device_softc = sc; 2965 sd->sd_link.adapter_softc = sc; 2966 sd->sd_link.adapter = &sr_switch; 2967 sd->sd_link.adapter_target = SR_MAX_LD; 2968 sd->sd_link.adapter_buswidth = 1; 2969 bzero(&saa, sizeof(saa)); 2970 saa.saa_sc_link = &sd->sd_link; 2971 2972 /* 2973 * we passed all checks return ENXIO if volume can't be created 2974 */ 2975 rv = ENXIO; 2976 2977 /* clear sense data */ 2978 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2979 2980 /* use temporary discipline pointer */ 2981 s = splhigh(); 2982 sc->sc_attach_dis = sd; 2983 splx(s); 2984 dev2 = config_found(&sc->sc_dev, &saa, scsiprint); 2985 s = splhigh(); 2986 sc->sc_attach_dis = NULL; 2987 splx(s); 2988 TAILQ_FOREACH(dev, &alldevs, dv_list) 2989 if (dev->dv_parent == dev2) 2990 break; 2991 if (dev == NULL) 2992 goto unwind; 2993 2994 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n", 2995 DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus); 2996 2997 sc->sc_dis[sd->sd_link.scsibus] = sd; 2998 for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++) 2999 if (sc->sc_dis[i]) 3000 vol++; 3001 sd->sd_scsibus_dev = dev2; 3002 3003 rv = 0; 3004 if (updatemeta) { 3005 /* fill out remaining volume metadata */ 3006 sd->sd_meta->ssdi.ssd_volid = vol; 3007 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3008 sizeof(sd->sd_meta->ssd_devname)); 3009 sr_meta_init(sd, cl); 3010 } else { 3011 if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3012 sizeof(dev->dv_xname))) { 3013 printf("%s: volume %s is roaming, it used to " 3014 "be %s, updating metadata\n", 3015 DEVNAME(sc), dev->dv_xname, 3016 sd->sd_meta->ssd_devname); 3017 3018 sd->sd_meta->ssdi.ssd_volid = vol; 3019 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3020 sizeof(sd->sd_meta->ssd_devname)); 3021 } 3022 } 3023 3024 /* Update device name on any chunks which roamed. */ 3025 sr_roam_chunks(sd); 3026 3027 #ifndef SMALL_KERNEL 3028 if (sr_sensors_create(sd)) 3029 printf("%s: unable to create sensor for %s\n", 3030 DEVNAME(sc), dev->dv_xname); 3031 else 3032 sd->sd_vol.sv_sensor_valid = 1; 3033 #endif /* SMALL_KERNEL */ 3034 } else { 3035 /* we are not an os disk */ 3036 if (updatemeta) { 3037 /* fill out remaining volume metadata */ 3038 sd->sd_meta->ssdi.ssd_volid = 0; 3039 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3040 sizeof(sd->sd_meta->ssd_devname)); 3041 sr_meta_init(sd, cl); 3042 } 3043 if (sd->sd_start_discipline(sd)) 3044 goto unwind; 3045 } 3046 3047 /* save metadata to disk */ 3048 rv = sr_meta_save(sd, SR_META_DIRTY); 3049 sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd); 3050 3051 if (sd->sd_vol_status == BIOC_SVREBUILD) 3052 kthread_create_deferred(sr_rebuild, sd); 3053 3054 sd->sd_ready = 1; 3055 3056 return (rv); 3057 unwind: 3058 sr_discipline_shutdown(sd); 3059 3060 return (rv); 3061 } 3062 3063 int 3064 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3065 { 3066 struct sr_discipline *sd = NULL; 3067 int rv = 1; 3068 int i; 3069 3070 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3071 dr->bd_dev); 3072 3073 for (i = 0; i < SR_MAXSCSIBUS; i++) 3074 if (sc->sc_dis[i]) { 3075 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3076 dr->bd_dev, 3077 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3078 sd = sc->sc_dis[i]; 3079 break; 3080 } 3081 } 3082 3083 if (sd == NULL) 3084 goto bad; 3085 3086 sd->sd_deleted = 1; 3087 sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE; 3088 sr_shutdown(sd); 3089 3090 rv = 0; 3091 bad: 3092 return (rv); 3093 } 3094 3095 void 3096 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3097 { 3098 struct sr_chunk *ch_entry, *ch_next; 3099 3100 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3101 3102 if (!cl) 3103 return; 3104 3105 for (ch_entry = SLIST_FIRST(cl); 3106 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3107 ch_next = SLIST_NEXT(ch_entry, src_link); 3108 3109 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3110 DEVNAME(sc), ch_entry->src_devname); 3111 if (ch_entry->src_vn) { 3112 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 0); 3113 vput(ch_entry->src_vn); 3114 } 3115 free(ch_entry, M_DEVBUF); 3116 } 3117 SLIST_INIT(cl); 3118 } 3119 3120 void 3121 sr_discipline_free(struct sr_discipline *sd) 3122 { 3123 struct sr_softc *sc; 3124 int i; 3125 3126 if (!sd) 3127 return; 3128 3129 sc = sd->sd_sc; 3130 3131 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3132 DEVNAME(sc), 3133 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3134 if (sd->sd_free_resources) 3135 sd->sd_free_resources(sd); 3136 if (sd->sd_vol.sv_chunks) 3137 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3138 if (sd->sd_meta) 3139 free(sd->sd_meta, M_DEVBUF); 3140 if (sd->sd_meta_foreign) 3141 free(sd->sd_meta_foreign, M_DEVBUF); 3142 3143 for (i = 0; i < SR_MAXSCSIBUS; i++) 3144 if (sc->sc_dis[i] == sd) { 3145 sc->sc_dis[i] = NULL; 3146 break; 3147 } 3148 3149 free(sd, M_DEVBUF); 3150 } 3151 3152 void 3153 sr_discipline_shutdown(struct sr_discipline *sd) 3154 { 3155 struct sr_softc *sc = sd->sd_sc; 3156 int s; 3157 3158 if (!sd || !sc) 3159 return; 3160 3161 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3162 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3163 3164 s = splbio(); 3165 3166 sd->sd_ready = 0; 3167 3168 if (sd->sd_shutdownhook) 3169 shutdownhook_disestablish(sd->sd_shutdownhook); 3170 3171 /* make sure there isn't a sync pending and yield */ 3172 wakeup(sd); 3173 while (sd->sd_sync || sd->sd_must_flush) 3174 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3175 EWOULDBLOCK) 3176 break; 3177 3178 #ifndef SMALL_KERNEL 3179 sr_sensors_delete(sd); 3180 #endif /* SMALL_KERNEL */ 3181 3182 if (sd->sd_scsibus_dev) 3183 config_detach(sd->sd_scsibus_dev, DETACH_FORCE); 3184 3185 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3186 3187 if (sd) 3188 sr_discipline_free(sd); 3189 3190 splx(s); 3191 } 3192 3193 int 3194 sr_discipline_init(struct sr_discipline *sd, int level) 3195 { 3196 int rv = 1; 3197 3198 switch (level) { 3199 case 0: 3200 sr_raid0_discipline_init(sd); 3201 break; 3202 case 1: 3203 sr_raid1_discipline_init(sd); 3204 break; 3205 case 4: 3206 case 5: 3207 if (level == 4) 3208 sd->sd_type = SR_MD_RAID4; 3209 else 3210 sd->sd_type = SR_MD_RAID5; 3211 sr_raidp_discipline_init(sd); 3212 break; 3213 case 6: 3214 sd->sd_type = SR_MD_RAID6; 3215 sr_raid6_discipline_init(sd); 3216 break; 3217 #ifdef AOE 3218 /* AOE target. */ 3219 case 'A': 3220 sr_aoe_server_discipline_init(sd); 3221 break; 3222 /* AOE initiator. */ 3223 case 'a': 3224 sr_aoe_discipline_init(sd); 3225 break; 3226 #endif 3227 #ifdef CRYPTO 3228 case 'C': 3229 sr_crypto_discipline_init(sd); 3230 break; 3231 #endif 3232 default: 3233 goto bad; 3234 } 3235 3236 rv = 0; 3237 bad: 3238 return (rv); 3239 } 3240 3241 int 3242 sr_raid_inquiry(struct sr_workunit *wu) 3243 { 3244 struct sr_discipline *sd = wu->swu_dis; 3245 struct scsi_xfer *xs = wu->swu_xs; 3246 struct scsi_inquiry_data inq; 3247 3248 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3249 3250 bzero(&inq, sizeof(inq)); 3251 inq.device = T_DIRECT; 3252 inq.dev_qual2 = 0; 3253 inq.version = 2; 3254 inq.response_format = 2; 3255 inq.additional_length = 32; 3256 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3257 sizeof(inq.vendor)); 3258 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3259 sizeof(inq.product)); 3260 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3261 sizeof(inq.revision)); 3262 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3263 3264 return (0); 3265 } 3266 3267 int 3268 sr_raid_read_cap(struct sr_workunit *wu) 3269 { 3270 struct sr_discipline *sd = wu->swu_dis; 3271 struct scsi_xfer *xs = wu->swu_xs; 3272 struct scsi_read_cap_data rcd; 3273 struct scsi_read_cap_data_16 rcd16; 3274 int rv = 1; 3275 3276 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3277 3278 if (xs->cmd->opcode == READ_CAPACITY) { 3279 bzero(&rcd, sizeof(rcd)); 3280 if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu) 3281 _lto4b(0xffffffff, rcd.addr); 3282 else 3283 _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr); 3284 _lto4b(512, rcd.length); 3285 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3286 rv = 0; 3287 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3288 bzero(&rcd16, sizeof(rcd16)); 3289 _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr); 3290 _lto4b(512, rcd16.length); 3291 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3292 rv = 0; 3293 } 3294 3295 return (rv); 3296 } 3297 3298 int 3299 sr_raid_tur(struct sr_workunit *wu) 3300 { 3301 struct sr_discipline *sd = wu->swu_dis; 3302 3303 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3304 3305 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3306 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3307 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3308 sd->sd_scsi_sense.add_sense_code = 0x04; 3309 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3310 sd->sd_scsi_sense.extra_len = 4; 3311 return (1); 3312 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3313 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3314 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3315 sd->sd_scsi_sense.add_sense_code = 0x05; 3316 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3317 sd->sd_scsi_sense.extra_len = 4; 3318 return (1); 3319 } 3320 3321 return (0); 3322 } 3323 3324 int 3325 sr_raid_request_sense(struct sr_workunit *wu) 3326 { 3327 struct sr_discipline *sd = wu->swu_dis; 3328 struct scsi_xfer *xs = wu->swu_xs; 3329 3330 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3331 DEVNAME(sd->sd_sc)); 3332 3333 /* use latest sense data */ 3334 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3335 3336 /* clear sense data */ 3337 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3338 3339 return (0); 3340 } 3341 3342 int 3343 sr_raid_start_stop(struct sr_workunit *wu) 3344 { 3345 struct sr_discipline *sd = wu->swu_dis; 3346 struct scsi_xfer *xs = wu->swu_xs; 3347 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3348 int rv = 1; 3349 3350 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3351 DEVNAME(sd->sd_sc)); 3352 3353 if (!ss) 3354 return (rv); 3355 3356 if (ss->byte2 == 0x00) { 3357 /* START */ 3358 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3359 /* bring volume online */ 3360 /* XXX check to see if volume can be brought online */ 3361 sd->sd_vol_status = BIOC_SVONLINE; 3362 } 3363 rv = 0; 3364 } else /* XXX is this the check? if (byte == 0x01) */ { 3365 /* STOP */ 3366 if (sd->sd_vol_status == BIOC_SVONLINE) { 3367 /* bring volume offline */ 3368 sd->sd_vol_status = BIOC_SVOFFLINE; 3369 } 3370 rv = 0; 3371 } 3372 3373 return (rv); 3374 } 3375 3376 int 3377 sr_raid_sync(struct sr_workunit *wu) 3378 { 3379 struct sr_discipline *sd = wu->swu_dis; 3380 int s, rv = 0, ios; 3381 3382 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3383 3384 /* when doing a fake sync don't count the wu */ 3385 ios = wu->swu_fake ? 0 : 1; 3386 3387 s = splbio(); 3388 sd->sd_sync = 1; 3389 3390 while (sd->sd_wu_pending > ios) 3391 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3392 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3393 DEVNAME(sd->sd_sc)); 3394 rv = 1; 3395 break; 3396 } 3397 3398 sd->sd_sync = 0; 3399 splx(s); 3400 3401 wakeup(&sd->sd_sync); 3402 3403 return (rv); 3404 } 3405 3406 void 3407 sr_raid_startwu(struct sr_workunit *wu) 3408 { 3409 struct sr_discipline *sd = wu->swu_dis; 3410 struct sr_ccb *ccb; 3411 3412 splassert(IPL_BIO); 3413 3414 if (wu->swu_state == SR_WU_RESTART) 3415 /* 3416 * no need to put the wu on the pending queue since we 3417 * are restarting the io 3418 */ 3419 ; 3420 else 3421 /* move wu to pending queue */ 3422 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3423 3424 /* start all individual ios */ 3425 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3426 VOP_STRATEGY(&ccb->ccb_buf); 3427 } 3428 } 3429 3430 void 3431 sr_checksum_print(u_int8_t *md5) 3432 { 3433 int i; 3434 3435 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 3436 printf("%02x", md5[i]); 3437 } 3438 3439 void 3440 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 3441 { 3442 MD5_CTX ctx; 3443 3444 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 3445 md5, len); 3446 3447 MD5Init(&ctx); 3448 MD5Update(&ctx, src, len); 3449 MD5Final(md5, &ctx); 3450 } 3451 3452 void 3453 sr_uuid_get(struct sr_uuid *uuid) 3454 { 3455 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 3456 /* UUID version 4: random */ 3457 uuid->sui_id[6] &= 0x0f; 3458 uuid->sui_id[6] |= 0x40; 3459 /* RFC4122 variant */ 3460 uuid->sui_id[8] &= 0x3f; 3461 uuid->sui_id[8] |= 0x80; 3462 } 3463 3464 void 3465 sr_uuid_print(struct sr_uuid *uuid, int cr) 3466 { 3467 printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 3468 "%02x%02x%02x%02x%02x%02x", 3469 uuid->sui_id[0], uuid->sui_id[1], 3470 uuid->sui_id[2], uuid->sui_id[3], 3471 uuid->sui_id[4], uuid->sui_id[5], 3472 uuid->sui_id[6], uuid->sui_id[7], 3473 uuid->sui_id[8], uuid->sui_id[9], 3474 uuid->sui_id[10], uuid->sui_id[11], 3475 uuid->sui_id[12], uuid->sui_id[13], 3476 uuid->sui_id[14], uuid->sui_id[15]); 3477 3478 if (cr) 3479 printf("\n"); 3480 } 3481 3482 int 3483 sr_already_assembled(struct sr_discipline *sd) 3484 { 3485 struct sr_softc *sc = sd->sd_sc; 3486 int i; 3487 3488 for (i = 0; i < SR_MAXSCSIBUS; i++) 3489 if (sc->sc_dis[i]) 3490 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 3491 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 3492 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 3493 return (1); 3494 3495 return (0); 3496 } 3497 3498 int32_t 3499 sr_validate_stripsize(u_int32_t b) 3500 { 3501 int s = 0; 3502 3503 if (b % 512) 3504 return (-1); 3505 3506 while ((b & 1) == 0) { 3507 b >>= 1; 3508 s++; 3509 } 3510 3511 /* only multiple of twos */ 3512 b >>= 1; 3513 if (b) 3514 return(-1); 3515 3516 return (s); 3517 } 3518 3519 void 3520 sr_shutdown(void *arg) 3521 { 3522 struct sr_discipline *sd = arg; 3523 #ifdef SR_DEBUG 3524 struct sr_softc *sc = sd->sd_sc; 3525 #endif 3526 DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", 3527 DEVNAME(sc), sd->sd_meta->ssd_devname); 3528 3529 /* abort rebuild and drain io */ 3530 sd->sd_reb_abort = 1; 3531 while (sd->sd_reb_active) 3532 tsleep(sd, PWAIT, "sr_shutdown", 1); 3533 3534 sr_meta_save(sd, 0); 3535 3536 sr_discipline_shutdown(sd); 3537 } 3538 3539 int 3540 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 3541 { 3542 struct sr_discipline *sd = wu->swu_dis; 3543 struct scsi_xfer *xs = wu->swu_xs; 3544 int rv = 1; 3545 3546 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 3547 xs->cmd->opcode); 3548 3549 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3550 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 3551 DEVNAME(sd->sd_sc), func); 3552 goto bad; 3553 } 3554 3555 if (xs->datalen == 0) { 3556 printf("%s: %s: illegal block count for %s\n", 3557 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3558 goto bad; 3559 } 3560 3561 if (xs->cmdlen == 10) 3562 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 3563 else if (xs->cmdlen == 16) 3564 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 3565 else if (xs->cmdlen == 6) 3566 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 3567 else { 3568 printf("%s: %s: illegal cmdlen for %s\n", 3569 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3570 goto bad; 3571 } 3572 3573 wu->swu_blk_start = *blk; 3574 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 3575 3576 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 3577 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 3578 "end: %lld length: %d\n", 3579 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 3580 wu->swu_blk_end, xs->datalen); 3581 3582 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 3583 SSD_ERRCODE_VALID; 3584 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 3585 sd->sd_scsi_sense.add_sense_code = 0x21; 3586 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3587 sd->sd_scsi_sense.extra_len = 4; 3588 goto bad; 3589 } 3590 3591 rv = 0; 3592 bad: 3593 return (rv); 3594 } 3595 3596 int 3597 sr_check_io_collision(struct sr_workunit *wu) 3598 { 3599 struct sr_discipline *sd = wu->swu_dis; 3600 struct sr_workunit *wup; 3601 3602 splassert(IPL_BIO); 3603 3604 /* walk queue backwards and fill in collider if we have one */ 3605 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 3606 if (wu->swu_blk_end < wup->swu_blk_start || 3607 wup->swu_blk_end < wu->swu_blk_start) 3608 continue; 3609 3610 /* we have an LBA collision, defer wu */ 3611 wu->swu_state = SR_WU_DEFERRED; 3612 if (wup->swu_collider) 3613 /* wu is on deferred queue, append to last wu */ 3614 while (wup->swu_collider) 3615 wup = wup->swu_collider; 3616 3617 wup->swu_collider = wu; 3618 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 3619 sd->sd_wu_collisions++; 3620 goto queued; 3621 } 3622 3623 return (0); 3624 queued: 3625 return (1); 3626 } 3627 3628 void 3629 sr_rebuild(void *arg) 3630 { 3631 struct sr_discipline *sd = arg; 3632 struct sr_softc *sc = sd->sd_sc; 3633 3634 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 3635 DEVNAME(sc)) != 0) 3636 printf("%s: unable to start backgound operation\n", 3637 DEVNAME(sc)); 3638 } 3639 3640 void 3641 sr_rebuild_thread(void *arg) 3642 { 3643 struct sr_discipline *sd = arg; 3644 struct sr_softc *sc = sd->sd_sc; 3645 daddr64_t whole_blk, partial_blk, blk, sz, lba; 3646 daddr64_t psz, rb, restart; 3647 uint64_t mysize = 0; 3648 struct sr_workunit *wu_r, *wu_w; 3649 struct scsi_xfer xs_r, xs_w; 3650 struct scsi_rw_16 cr, cw; 3651 int c, s, slept, percent = 0, old_percent = -1; 3652 u_int8_t *buf; 3653 3654 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 3655 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 3656 3657 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 3658 if (restart > whole_blk) { 3659 printf("%s: bogus rebuild restart offset, starting from 0\n", 3660 DEVNAME(sc)); 3661 restart = 0; 3662 } 3663 if (restart) { 3664 /* 3665 * XXX there is a hole here; there is a posibility that we 3666 * had a restart however the chunk that was supposed to 3667 * be rebuilt is no longer valid; we can reach this situation 3668 * when a rebuild is in progress and the box crashes and 3669 * on reboot the rebuild chunk is different (like zero'd or 3670 * replaced). We need to check the uuid of the chunk that is 3671 * being rebuilt to assert this. 3672 */ 3673 psz = sd->sd_meta->ssdi.ssd_size; 3674 rb = sd->sd_meta->ssd_rebuild; 3675 if (rb > 0) 3676 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3677 else 3678 percent = 0; 3679 printf("%s: resuming rebuild on %s at %llu%%\n", 3680 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 3681 } 3682 3683 sd->sd_reb_active = 1; 3684 3685 buf = malloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, M_DEVBUF, M_WAITOK); 3686 for (blk = restart; blk <= whole_blk; blk++) { 3687 if (blk == whole_blk) 3688 sz = partial_blk; 3689 else 3690 sz = SR_REBUILD_IO_SIZE; 3691 mysize += sz; 3692 lba = blk * sz; 3693 3694 /* get some wu */ 3695 if ((wu_r = sr_wu_get(sd, 1)) == NULL) 3696 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 3697 if ((wu_w = sr_wu_get(sd, 1)) == NULL) 3698 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 3699 3700 /* setup read io */ 3701 bzero(&xs_r, sizeof xs_r); 3702 bzero(&cr, sizeof cr); 3703 xs_r.error = XS_NOERROR; 3704 xs_r.flags = SCSI_DATA_IN; 3705 xs_r.datalen = sz << DEV_BSHIFT; 3706 xs_r.data = buf; 3707 xs_r.cmdlen = 16; 3708 cr.opcode = READ_16; 3709 _lto4b(sz, cr.length); 3710 _lto8b(lba, cr.addr); 3711 xs_r.cmd = (struct scsi_generic *)&cr; 3712 wu_r->swu_flags |= SR_WUF_REBUILD; 3713 wu_r->swu_xs = &xs_r; 3714 if (sd->sd_scsi_rw(wu_r)) { 3715 printf("%s: could not create read io\n", 3716 DEVNAME(sc)); 3717 goto fail; 3718 } 3719 3720 /* setup write io */ 3721 bzero(&xs_w, sizeof xs_w); 3722 bzero(&cw, sizeof cw); 3723 xs_w.error = XS_NOERROR; 3724 xs_w.flags = SCSI_DATA_OUT; 3725 xs_w.datalen = sz << DEV_BSHIFT; 3726 xs_w.data = buf; 3727 xs_w.cmdlen = 16; 3728 cw.opcode = WRITE_16; 3729 _lto4b(sz, cw.length); 3730 _lto8b(lba, cw.addr); 3731 xs_w.cmd = (struct scsi_generic *)&cw; 3732 wu_w->swu_flags |= SR_WUF_REBUILD; 3733 wu_w->swu_xs = &xs_w; 3734 if (sd->sd_scsi_rw(wu_w)) { 3735 printf("%s: could not create write io\n", 3736 DEVNAME(sc)); 3737 goto fail; 3738 } 3739 3740 /* 3741 * collide with the read io so that we get automatically 3742 * started when the read is done 3743 */ 3744 wu_w->swu_state = SR_WU_DEFERRED; 3745 wu_r->swu_collider = wu_w; 3746 s = splbio(); 3747 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 3748 3749 /* schedule io */ 3750 if (sr_check_io_collision(wu_r)) 3751 goto queued; 3752 3753 sr_raid_startwu(wu_r); 3754 queued: 3755 splx(s); 3756 3757 /* wait for read completion */ 3758 slept = 0; 3759 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 3760 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 3761 slept = 1; 3762 } 3763 /* yield if we didn't sleep */ 3764 if (slept == 0) 3765 tsleep(sc, PWAIT, "sr_yield", 1); 3766 3767 sr_wu_put(wu_r); 3768 sr_wu_put(wu_w); 3769 3770 sd->sd_meta->ssd_rebuild = lba; 3771 3772 /* save metadata every percent */ 3773 psz = sd->sd_meta->ssdi.ssd_size; 3774 rb = sd->sd_meta->ssd_rebuild; 3775 if (rb > 0) 3776 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3777 else 3778 percent = 0; 3779 if (percent != old_percent && blk != whole_blk) { 3780 if (sr_meta_save(sd, SR_META_DIRTY)) 3781 printf("%s: could not save metadata to %s\n", 3782 DEVNAME(sc), sd->sd_meta->ssd_devname); 3783 old_percent = percent; 3784 } 3785 3786 if (sd->sd_reb_abort) 3787 goto abort; 3788 } 3789 3790 /* all done */ 3791 sd->sd_meta->ssd_rebuild = 0; 3792 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 3793 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 3794 BIOC_SDREBUILD) { 3795 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 3796 break; 3797 } 3798 3799 abort: 3800 if (sr_meta_save(sd, SR_META_DIRTY)) 3801 printf("%s: could not save metadata to %s\n", 3802 DEVNAME(sc), sd->sd_meta->ssd_devname); 3803 fail: 3804 free(buf, M_DEVBUF); 3805 sd->sd_reb_active = 0; 3806 kthread_exit(0); 3807 } 3808 3809 #ifndef SMALL_KERNEL 3810 int 3811 sr_sensors_create(struct sr_discipline *sd) 3812 { 3813 struct sr_softc *sc = sd->sd_sc; 3814 int rv = 1; 3815 3816 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 3817 DEVNAME(sc), sd->sd_meta->ssd_devname); 3818 3819 strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc), 3820 sizeof(sd->sd_vol.sv_sensordev.xname)); 3821 3822 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 3823 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 3824 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 3825 sizeof(sd->sd_vol.sv_sensor.desc)); 3826 3827 sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor); 3828 3829 if (sc->sc_sensors_running == 0) { 3830 if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) 3831 goto bad; 3832 sc->sc_sensors_running = 1; 3833 } 3834 sensordev_install(&sd->sd_vol.sv_sensordev); 3835 3836 rv = 0; 3837 bad: 3838 return (rv); 3839 } 3840 3841 void 3842 sr_sensors_delete(struct sr_discipline *sd) 3843 { 3844 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 3845 3846 if (sd->sd_vol.sv_sensor_valid) 3847 sensordev_deinstall(&sd->sd_vol.sv_sensordev); 3848 } 3849 3850 void 3851 sr_sensors_refresh(void *arg) 3852 { 3853 struct sr_softc *sc = arg; 3854 struct sr_volume *sv; 3855 struct sr_discipline *sd; 3856 int i, vol; 3857 3858 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 3859 3860 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3861 /* XXX this will not work when we stagger disciplines */ 3862 if (!sc->sc_dis[i]) 3863 continue; 3864 3865 sd = sc->sc_dis[i]; 3866 sv = &sd->sd_vol; 3867 3868 switch(sd->sd_vol_status) { 3869 case BIOC_SVOFFLINE: 3870 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 3871 sv->sv_sensor.status = SENSOR_S_CRIT; 3872 break; 3873 3874 case BIOC_SVDEGRADED: 3875 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 3876 sv->sv_sensor.status = SENSOR_S_WARN; 3877 break; 3878 3879 case BIOC_SVSCRUB: 3880 case BIOC_SVONLINE: 3881 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 3882 sv->sv_sensor.status = SENSOR_S_OK; 3883 break; 3884 3885 default: 3886 sv->sv_sensor.value = 0; /* unknown */ 3887 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 3888 } 3889 } 3890 } 3891 #endif /* SMALL_KERNEL */ 3892 3893 #ifdef SR_FANCY_STATS 3894 void sr_print_stats(void); 3895 3896 void 3897 sr_print_stats(void) 3898 { 3899 struct sr_softc *sc; 3900 struct sr_discipline *sd; 3901 int i, vol; 3902 3903 for (i = 0; i < softraid_cd.cd_ndevs; i++) 3904 if (softraid_cd.cd_devs[i]) { 3905 sc = softraid_cd.cd_devs[i]; 3906 /* we'll only have one softc */ 3907 break; 3908 } 3909 3910 if (!sc) { 3911 printf("no softraid softc found\n"); 3912 return; 3913 } 3914 3915 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3916 /* XXX this will not work when we stagger disciplines */ 3917 if (!sc->sc_dis[i]) 3918 continue; 3919 3920 sd = sc->sc_dis[i]; 3921 printf("%s: ios pending: %d collisions %llu\n", 3922 sd->sd_meta->ssd_devname, 3923 sd->sd_wu_pending, 3924 sd->sd_wu_collisions); 3925 } 3926 } 3927 #endif /* SR_FANCY_STATS */ 3928 3929 #ifdef SR_DEBUG 3930 void 3931 sr_meta_print(struct sr_metadata *m) 3932 { 3933 int i; 3934 struct sr_meta_chunk *mc; 3935 struct sr_meta_opt *mo; 3936 3937 if (!(sr_debug & SR_D_META)) 3938 return; 3939 3940 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 3941 printf("\tssd_version %d\n", m->ssdi.ssd_version); 3942 printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags); 3943 printf("\tssd_uuid "); 3944 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 3945 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 3946 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 3947 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 3948 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 3949 printf("\tssd_level %d\n", m->ssdi.ssd_level); 3950 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 3951 printf("\tssd_devname %s\n", m->ssd_devname); 3952 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 3953 printf("\tssd_product %s\n", m->ssdi.ssd_product); 3954 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 3955 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 3956 printf("\tssd_checksum "); 3957 sr_checksum_print(m->ssd_checksum); 3958 printf("\n"); 3959 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 3960 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 3961 3962 mc = (struct sr_meta_chunk *)(m + 1); 3963 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 3964 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 3965 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 3966 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 3967 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 3968 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 3969 printf("\t\tscm_uuid "); 3970 sr_uuid_print(&mc->scmi.scm_uuid, 1); 3971 printf("\t\tscm_checksum "); 3972 sr_checksum_print(mc->scm_checksum); 3973 printf("\n"); 3974 printf("\t\tscm_status %d\n", mc->scm_status); 3975 } 3976 3977 mo = (struct sr_meta_opt *)(mc); 3978 for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { 3979 printf("\t\t\tsom_type %d\n", mo->somi.som_type); 3980 printf("\t\t\tsom_checksum "); 3981 sr_checksum_print(mo->som_checksum); 3982 printf("\n"); 3983 } 3984 } 3985 3986 void 3987 sr_dump_mem(u_int8_t *p, int len) 3988 { 3989 int i; 3990 3991 for (i = 0; i < len; i++) 3992 printf("%02x ", *p++); 3993 printf("\n"); 3994 } 3995 3996 #endif /* SR_DEBUG */ 3997