1 /* $OpenBSD: softraid.c,v 1.227 2011/04/14 02:11:23 marco Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 #include <sys/dkio.h> 44 45 #ifdef AOE 46 #include <sys/mbuf.h> 47 #include <net/if_aoe.h> 48 #endif /* AOE */ 49 50 #include <crypto/cryptodev.h> 51 52 #include <scsi/scsi_all.h> 53 #include <scsi/scsiconf.h> 54 #include <scsi/scsi_disk.h> 55 56 #include <dev/softraidvar.h> 57 #include <dev/rndvar.h> 58 59 /* #define SR_FANCY_STATS */ 60 61 #ifdef SR_DEBUG 62 #define SR_FANCY_STATS 63 uint32_t sr_debug = 0 64 /* | SR_D_CMD */ 65 /* | SR_D_MISC */ 66 /* | SR_D_INTR */ 67 /* | SR_D_IOCTL */ 68 /* | SR_D_CCB */ 69 /* | SR_D_WU */ 70 /* | SR_D_META */ 71 /* | SR_D_DIS */ 72 /* | SR_D_STATE */ 73 ; 74 #endif 75 76 int sr_match(struct device *, void *, void *); 77 void sr_attach(struct device *, struct device *, void *); 78 int sr_detach(struct device *, int); 79 80 struct cfattach softraid_ca = { 81 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 82 }; 83 84 struct cfdriver softraid_cd = { 85 NULL, "softraid", DV_DULL 86 }; 87 88 /* scsi & discipline */ 89 void sr_scsi_cmd(struct scsi_xfer *); 90 void sr_minphys(struct buf *bp, struct scsi_link *sl); 91 void sr_copy_internal_data(struct scsi_xfer *, 92 void *, size_t); 93 int sr_scsi_ioctl(struct scsi_link *, u_long, 94 caddr_t, int); 95 int sr_ioctl(struct device *, u_long, caddr_t); 96 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 97 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 98 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 99 int sr_ioctl_setstate(struct sr_softc *, 100 struct bioc_setstate *); 101 int sr_ioctl_createraid(struct sr_softc *, 102 struct bioc_createraid *, int); 103 int sr_ioctl_deleteraid(struct sr_softc *, 104 struct bioc_deleteraid *); 105 int sr_ioctl_discipline(struct sr_softc *, 106 struct bioc_discipline *); 107 int sr_ioctl_installboot(struct sr_softc *, 108 struct bioc_installboot *); 109 void sr_chunks_unwind(struct sr_softc *, 110 struct sr_chunk_head *); 111 void sr_discipline_free(struct sr_discipline *); 112 void sr_discipline_shutdown(struct sr_discipline *); 113 int sr_discipline_init(struct sr_discipline *, int); 114 115 /* utility functions */ 116 void sr_shutdown(void *); 117 void sr_uuid_get(struct sr_uuid *); 118 void sr_uuid_print(struct sr_uuid *, int); 119 void sr_checksum_print(u_int8_t *); 120 int sr_boot_assembly(struct sr_softc *); 121 int sr_already_assembled(struct sr_discipline *); 122 int sr_hotspare(struct sr_softc *, dev_t); 123 void sr_hotspare_rebuild(struct sr_discipline *); 124 int sr_rebuild_init(struct sr_discipline *, dev_t, int); 125 void sr_rebuild(void *); 126 void sr_rebuild_thread(void *); 127 void sr_roam_chunks(struct sr_discipline *); 128 int sr_chunk_in_use(struct sr_softc *, dev_t); 129 void sr_startwu_callback(void *, void *); 130 int sr_rw(struct sr_softc *, dev_t, char *, size_t, 131 daddr64_t, long); 132 133 /* don't include these on RAMDISK */ 134 #ifndef SMALL_KERNEL 135 void sr_sensors_refresh(void *); 136 int sr_sensors_create(struct sr_discipline *); 137 void sr_sensors_delete(struct sr_discipline *); 138 #endif 139 140 /* metadata */ 141 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 142 int sr_meta_attach(struct sr_discipline *, int, int); 143 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 144 size_t, daddr64_t, long); 145 int sr_meta_clear(struct sr_discipline *); 146 void sr_meta_chunks_create(struct sr_softc *, 147 struct sr_chunk_head *); 148 void sr_meta_init(struct sr_discipline *, 149 struct sr_chunk_head *); 150 void sr_meta_opt_load(struct sr_discipline *, 151 struct sr_meta_opt *); 152 153 /* hotplug magic */ 154 void sr_disk_attach(struct disk *, int); 155 156 struct sr_hotplug_list { 157 void (*sh_hotplug)(struct sr_discipline *, 158 struct disk *, int); 159 struct sr_discipline *sh_sd; 160 161 SLIST_ENTRY(sr_hotplug_list) shl_link; 162 }; 163 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 164 165 struct sr_hotplug_list_head sr_hotplug_callbacks; 166 extern void (*softraid_disk_attach)(struct disk *, int); 167 168 /* scsi glue */ 169 struct scsi_adapter sr_switch = { 170 sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl 171 }; 172 173 /* native metadata format */ 174 int sr_meta_native_bootprobe(struct sr_softc *, dev_t, 175 struct sr_metadata_list_head *); 176 #define SR_META_NOTCLAIMED (0) 177 #define SR_META_CLAIMED (1) 178 int sr_meta_native_probe(struct sr_softc *, 179 struct sr_chunk *); 180 int sr_meta_native_attach(struct sr_discipline *, int); 181 int sr_meta_native_write(struct sr_discipline *, dev_t, 182 struct sr_metadata *,void *); 183 184 #ifdef SR_DEBUG 185 void sr_meta_print(struct sr_metadata *); 186 #else 187 #define sr_meta_print(m) 188 #endif 189 190 /* the metadata driver should remain stateless */ 191 struct sr_meta_driver { 192 daddr64_t smd_offset; /* metadata location */ 193 u_int32_t smd_size; /* size of metadata */ 194 195 int (*smd_probe)(struct sr_softc *, 196 struct sr_chunk *); 197 int (*smd_attach)(struct sr_discipline *, int); 198 int (*smd_detach)(struct sr_discipline *); 199 int (*smd_read)(struct sr_discipline *, dev_t, 200 struct sr_metadata *, void *); 201 int (*smd_write)(struct sr_discipline *, dev_t, 202 struct sr_metadata *, void *); 203 int (*smd_validate)(struct sr_discipline *, 204 struct sr_metadata *, void *); 205 } smd[] = { 206 { SR_META_OFFSET, SR_META_SIZE * 512, 207 sr_meta_native_probe, sr_meta_native_attach, NULL, 208 sr_meta_native_read, sr_meta_native_write, NULL }, 209 { 0, 0, NULL, NULL, NULL, NULL } 210 }; 211 212 int 213 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force) 214 { 215 struct sr_softc *sc = sd->sd_sc; 216 struct sr_chunk_head *cl; 217 struct sr_chunk *ch_entry, *chunk1, *chunk2; 218 int rv = 1, i = 0; 219 220 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 221 222 /* in memory copy of metadata */ 223 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 224 if (!sd->sd_meta) { 225 printf("%s: could not allocate memory for metadata\n", 226 DEVNAME(sc)); 227 goto bad; 228 } 229 230 if (sd->sd_meta_type != SR_META_F_NATIVE) { 231 /* in memory copy of foreign metadata */ 232 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 233 M_DEVBUF, M_ZERO | M_NOWAIT); 234 if (!sd->sd_meta_foreign) { 235 /* unwind frees sd_meta */ 236 printf("%s: could not allocate memory for foreign " 237 "metadata\n", DEVNAME(sc)); 238 goto bad; 239 } 240 } 241 242 /* we have a valid list now create an array index */ 243 cl = &sd->sd_vol.sv_chunk_list; 244 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * chunk_no, 245 M_DEVBUF, M_WAITOK | M_ZERO); 246 247 /* fill out chunk array */ 248 i = 0; 249 SLIST_FOREACH(ch_entry, cl, src_link) 250 sd->sd_vol.sv_chunks[i++] = ch_entry; 251 252 /* attach metadata */ 253 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 254 goto bad; 255 256 /* Force chunks into correct order now that metadata is attached. */ 257 SLIST_FOREACH(ch_entry, cl, src_link) 258 SLIST_REMOVE(cl, ch_entry, sr_chunk, src_link); 259 for (i = 0; i < chunk_no; i++) { 260 ch_entry = sd->sd_vol.sv_chunks[i]; 261 chunk2 = NULL; 262 SLIST_FOREACH(chunk1, cl, src_link) { 263 if (chunk1->src_meta.scmi.scm_chunk_id > 264 ch_entry->src_meta.scmi.scm_chunk_id) 265 break; 266 chunk2 = chunk1; 267 } 268 if (chunk2 == NULL) 269 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 270 else 271 SLIST_INSERT_AFTER(chunk2, ch_entry, src_link); 272 } 273 i = 0; 274 SLIST_FOREACH(ch_entry, cl, src_link) 275 sd->sd_vol.sv_chunks[i++] = ch_entry; 276 277 rv = 0; 278 bad: 279 return (rv); 280 } 281 282 int 283 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 284 { 285 struct sr_softc *sc = sd->sd_sc; 286 struct vnode *vn; 287 struct sr_chunk *ch_entry, *ch_prev = NULL; 288 struct sr_chunk_head *cl; 289 char devname[32]; 290 int i, d, type, found, prevf, error; 291 dev_t dev; 292 293 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 294 295 if (no_chunk == 0) 296 goto unwind; 297 298 299 cl = &sd->sd_vol.sv_chunk_list; 300 301 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 302 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 303 M_WAITOK | M_ZERO); 304 /* keep disks in user supplied order */ 305 if (ch_prev) 306 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 307 else 308 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 309 ch_prev = ch_entry; 310 dev = dt[d]; 311 ch_entry->src_dev_mm = dev; 312 313 if (dev == NODEV) { 314 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 315 continue; 316 } else { 317 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 318 if (bdevvp(dev, &vn)) { 319 printf("%s:, sr_meta_probe: can't allocate " 320 "vnode\n", DEVNAME(sc)); 321 goto unwind; 322 } 323 324 /* 325 * XXX leaving dev open for now; move this to attach 326 * and figure out the open/close dance for unwind. 327 */ 328 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc); 329 if (error) { 330 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 331 "open %s\n", DEVNAME(sc), devname); 332 vput(vn); 333 goto unwind; 334 } 335 336 strlcpy(ch_entry->src_devname, devname, 337 sizeof(ch_entry->src_devname)); 338 ch_entry->src_vn = vn; 339 } 340 341 /* determine if this is a device we understand */ 342 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 343 type = smd[i].smd_probe(sc, ch_entry); 344 if (type == SR_META_F_INVALID) 345 continue; 346 else { 347 found = type; 348 break; 349 } 350 } 351 352 if (found == SR_META_F_INVALID) 353 goto unwind; 354 if (prevf == SR_META_F_INVALID) 355 prevf = found; 356 if (prevf != found) { 357 DNPRINTF(SR_D_META, "%s: prevf != found\n", 358 DEVNAME(sc)); 359 goto unwind; 360 } 361 } 362 363 return (prevf); 364 unwind: 365 return (SR_META_F_INVALID); 366 } 367 368 void 369 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 370 { 371 int maj, unit, part; 372 char *name; 373 374 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 375 DEVNAME(sc), buf, size); 376 377 if (!buf) 378 return; 379 380 maj = major(dev); 381 part = DISKPART(dev); 382 unit = DISKUNIT(dev); 383 384 name = findblkname(maj); 385 if (name == NULL) 386 return; 387 388 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 389 } 390 391 int 392 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr64_t offset, 393 long flags) 394 { 395 struct vnode *vp; 396 struct buf b; 397 size_t bufsize, dma_bufsize; 398 int rv = 1; 399 char *dma_buf; 400 401 DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %d, %llu 0x%x)\n", 402 DEVNAME(sc), dev, buf, size, offset, flags); 403 404 dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size; 405 dma_buf = dma_alloc(dma_bufsize, PR_WAITOK); 406 407 if (bdevvp(dev, &vp)) { 408 printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc)); 409 goto done; 410 } 411 412 while (size > 0) { 413 DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %d, offset %llu)\n", 414 DEVNAME(sc), dma_buf, size, offset); 415 416 bufsize = (size > MAXPHYS) ? MAXPHYS : size; 417 if (flags == B_WRITE) 418 bcopy(buf, dma_buf, bufsize); 419 420 bzero(&b, sizeof(b)); 421 b.b_flags = flags | B_PHYS; 422 b.b_proc = curproc; 423 b.b_dev = dev; 424 b.b_iodone = NULL; 425 b.b_error = 0; 426 b.b_blkno = offset; 427 b.b_data = dma_buf; 428 b.b_bcount = bufsize; 429 b.b_bufsize = bufsize; 430 b.b_resid = bufsize; 431 b.b_vp = vp; 432 433 if ((b.b_flags & B_READ) == 0) 434 vp->v_numoutput++; 435 436 LIST_INIT(&b.b_dep); 437 VOP_STRATEGY(&b); 438 biowait(&b); 439 440 if (b.b_flags & B_ERROR) { 441 printf("%s: I/O error %d on dev 0x%x at block %llu\n", 442 DEVNAME(sc), b.b_error, dev, b.b_blkno); 443 goto done; 444 } 445 446 if (flags == B_READ) 447 bcopy(dma_buf, buf, bufsize); 448 449 size -= bufsize; 450 buf += bufsize; 451 offset += howmany(bufsize, DEV_BSIZE); 452 } 453 454 rv = 0; 455 456 done: 457 if (vp) 458 vput(vp); 459 460 dma_free(dma_buf, dma_bufsize); 461 462 return (rv); 463 } 464 465 int 466 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t size, 467 daddr64_t offset, long flags) 468 { 469 int rv = 1; 470 471 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 472 DEVNAME(sd->sd_sc), dev, md, size, offset, flags); 473 474 if (md == NULL) { 475 printf("%s: sr_meta_rw: invalid metadata pointer\n", 476 DEVNAME(sd->sd_sc)); 477 goto done; 478 } 479 480 rv = sr_rw(sd->sd_sc, dev, md, size, offset, flags); 481 482 done: 483 return (rv); 484 } 485 486 int 487 sr_meta_clear(struct sr_discipline *sd) 488 { 489 struct sr_softc *sc = sd->sd_sc; 490 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 491 struct sr_chunk *ch_entry; 492 void *m; 493 int rv = 1; 494 495 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 496 497 if (sd->sd_meta_type != SR_META_F_NATIVE) { 498 printf("%s: sr_meta_clear can not clear foreign metadata\n", 499 DEVNAME(sc)); 500 goto done; 501 } 502 503 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 504 SLIST_FOREACH(ch_entry, cl, src_link) { 505 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 506 /* XXX mark disk offline */ 507 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 508 "clear %s\n", ch_entry->src_devname); 509 rv++; 510 continue; 511 } 512 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 513 } 514 515 bzero(sd->sd_meta, SR_META_SIZE * 512); 516 517 free(m, M_DEVBUF); 518 rv = 0; 519 done: 520 return (rv); 521 } 522 523 void 524 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) 525 { 526 struct sr_chunk *ch_entry; 527 struct sr_uuid uuid; 528 int cid = 0; 529 char *name; 530 u_int64_t max_chunk_sz = 0, min_chunk_sz; 531 532 DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); 533 534 sr_uuid_get(&uuid); 535 536 /* fill out stuff and get largest chunk size while looping */ 537 SLIST_FOREACH(ch_entry, cl, src_link) { 538 name = ch_entry->src_devname; 539 ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; 540 ch_entry->src_meta.scmi.scm_chunk_id = cid++; 541 ch_entry->src_meta.scm_status = BIOC_SDONLINE; 542 strlcpy(ch_entry->src_meta.scmi.scm_devname, name, 543 sizeof(ch_entry->src_meta.scmi.scm_devname)); 544 bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, 545 sizeof(ch_entry->src_meta.scmi.scm_uuid)); 546 547 if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) 548 max_chunk_sz = ch_entry->src_meta.scmi.scm_size; 549 } 550 551 /* get smallest chunk size */ 552 min_chunk_sz = max_chunk_sz; 553 SLIST_FOREACH(ch_entry, cl, src_link) 554 if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) 555 min_chunk_sz = ch_entry->src_meta.scmi.scm_size; 556 557 /* equalize all sizes */ 558 SLIST_FOREACH(ch_entry, cl, src_link) 559 ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; 560 561 /* whine if chunks are not the same size */ 562 if (min_chunk_sz != max_chunk_sz) 563 printf("%s: chunk sizes are not equal; up to %llu blocks " 564 "wasted per chunk\n", 565 DEVNAME(sc), max_chunk_sz - min_chunk_sz); 566 } 567 568 void 569 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) 570 { 571 struct sr_softc *sc = sd->sd_sc; 572 struct sr_metadata *sm = sd->sd_meta; 573 struct sr_meta_chunk *im_sc; 574 int i, chunk_no; 575 576 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 577 578 if (!sm) 579 return; 580 581 /* initial metadata */ 582 sm->ssdi.ssd_magic = SR_MAGIC; 583 sm->ssdi.ssd_version = SR_META_VERSION; 584 sm->ssd_ondisk = 0; 585 sm->ssdi.ssd_vol_flags = sd->sd_meta_flags; 586 sm->ssd_data_offset = SR_DATA_OFFSET; 587 588 /* get uuid from chunk 0 */ 589 bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, 590 &sm->ssdi.ssd_uuid, 591 sizeof(struct sr_uuid)); 592 593 /* volume is filled in createraid */ 594 595 /* add missing chunk bits */ 596 chunk_no = sm->ssdi.ssd_chunk_no; 597 for (i = 0; i < chunk_no; i++) { 598 im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; 599 im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; 600 sr_checksum(sc, im_sc, &im_sc->scm_checksum, 601 sizeof(struct sr_meta_chunk_invariant)); 602 } 603 } 604 605 void 606 sr_meta_opt_load(struct sr_discipline *sd, struct sr_meta_opt *om) 607 { 608 if (om->somi.som_type == SR_OPT_BOOT) { 609 610 611 } else 612 panic("unknown optional metadata type"); 613 } 614 615 void 616 sr_meta_save_callback(void *arg1, void *arg2) 617 { 618 struct sr_discipline *sd = arg1; 619 int s; 620 621 s = splbio(); 622 623 if (sr_meta_save(arg1, SR_META_DIRTY)) 624 printf("%s: save metadata failed\n", 625 DEVNAME(sd->sd_sc)); 626 627 sd->sd_must_flush = 0; 628 splx(s); 629 } 630 631 int 632 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 633 { 634 struct sr_softc *sc = sd->sd_sc; 635 struct sr_metadata *sm = sd->sd_meta, *m; 636 struct sr_meta_driver *s; 637 struct sr_chunk *src; 638 struct sr_meta_chunk *cm; 639 struct sr_workunit wu; 640 struct sr_meta_opt_item *omi; 641 struct sr_meta_opt *om; 642 int i; 643 644 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 645 DEVNAME(sc), sd->sd_meta->ssd_devname); 646 647 if (!sm) { 648 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 649 goto bad; 650 } 651 652 /* meta scratchpad */ 653 s = &smd[sd->sd_meta_type]; 654 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 655 if (!m) { 656 printf("%s: could not allocate metadata scratch area\n", 657 DEVNAME(sc)); 658 goto bad; 659 } 660 661 /* from here on out metadata is updated */ 662 restart: 663 sm->ssd_ondisk++; 664 sm->ssd_meta_flags = flags; 665 bcopy(sm, m, sizeof(*m)); 666 667 /* Chunk metadata. */ 668 cm = (struct sr_meta_chunk *)(m + 1); 669 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 670 src = sd->sd_vol.sv_chunks[i]; 671 bcopy(&src->src_meta, cm, sizeof(*cm)); 672 cm++; 673 } 674 675 /* Optional metadata. */ 676 om = (struct sr_meta_opt *)(cm); 677 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 678 bcopy(&omi->omi_om, om, sizeof(*om)); 679 sr_checksum(sc, om, &om->som_checksum, 680 sizeof(struct sr_meta_opt_invariant)); 681 om++; 682 } 683 684 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 685 src = sd->sd_vol.sv_chunks[i]; 686 687 /* skip disks that are offline */ 688 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 689 continue; 690 691 /* calculate metadata checksum for correct chunk */ 692 m->ssdi.ssd_chunk_id = i; 693 sr_checksum(sc, m, &m->ssd_checksum, 694 sizeof(struct sr_meta_invariant)); 695 696 #ifdef SR_DEBUG 697 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 698 "chunkid: %d checksum: ", 699 DEVNAME(sc), src->src_meta.scmi.scm_devname, 700 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 701 702 if (sr_debug & SR_D_META) 703 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 704 DNPRINTF(SR_D_META, "\n"); 705 sr_meta_print(m); 706 #endif 707 708 /* translate and write to disk */ 709 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 710 printf("%s: could not write metadata to %s\n", 711 DEVNAME(sc), src->src_devname); 712 /* restart the meta write */ 713 src->src_meta.scm_status = BIOC_SDOFFLINE; 714 /* XXX recalculate volume status */ 715 goto restart; 716 } 717 } 718 719 /* not all disciplines have sync */ 720 if (sd->sd_scsi_sync) { 721 bzero(&wu, sizeof(wu)); 722 wu.swu_fake = 1; 723 wu.swu_dis = sd; 724 sd->sd_scsi_sync(&wu); 725 } 726 free(m, M_DEVBUF); 727 return (0); 728 bad: 729 return (1); 730 } 731 732 int 733 sr_meta_read(struct sr_discipline *sd) 734 { 735 #ifdef SR_DEBUG 736 struct sr_softc *sc = sd->sd_sc; 737 #endif 738 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 739 struct sr_metadata *sm; 740 struct sr_chunk *ch_entry; 741 struct sr_meta_chunk *cp; 742 struct sr_meta_driver *s; 743 struct sr_meta_opt_item *omi; 744 struct sr_meta_opt *om; 745 void *fm = NULL; 746 int i, no_disk = 0, got_meta = 0; 747 748 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 749 750 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 751 s = &smd[sd->sd_meta_type]; 752 if (sd->sd_meta_type != SR_META_F_NATIVE) 753 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 754 755 cp = (struct sr_meta_chunk *)(sm + 1); 756 SLIST_FOREACH(ch_entry, cl, src_link) { 757 /* skip disks that are offline */ 758 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 759 DNPRINTF(SR_D_META, 760 "%s: %s chunk marked offline, spoofing status\n", 761 DEVNAME(sc), ch_entry->src_devname); 762 cp++; /* adjust chunk pointer to match failure */ 763 continue; 764 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 765 /* read and translate */ 766 /* XXX mark chunk offline, elsewhere!! */ 767 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 768 cp++; /* adjust chunk pointer to match failure */ 769 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 770 DEVNAME(sc)); 771 continue; 772 } 773 774 if (sm->ssdi.ssd_magic != SR_MAGIC) { 775 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 776 DEVNAME(sc)); 777 continue; 778 } 779 780 /* validate metadata */ 781 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 782 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 783 DEVNAME(sc)); 784 no_disk = -1; 785 goto done; 786 } 787 788 /* assume first chunk contains metadata */ 789 if (got_meta == 0) { 790 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 791 got_meta = 1; 792 } 793 794 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 795 796 /* Process optional metadata. */ 797 om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + 798 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 799 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 800 801 omi = malloc(sizeof(struct sr_meta_opt_item), 802 M_DEVBUF, M_WAITOK | M_ZERO); 803 bcopy(om, &omi->omi_om, sizeof(struct sr_meta_opt)); 804 SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link); 805 806 /* See if discipline wants to handle it. */ 807 if (sd->sd_meta_opt_load && 808 sd->sd_meta_opt_load(sd, &omi->omi_om) == 0) 809 continue; 810 else 811 sr_meta_opt_load(sd, &omi->omi_om); 812 813 om++; 814 } 815 816 cp++; 817 no_disk++; 818 } 819 820 free(sm, M_DEVBUF); 821 if (fm) 822 free(fm, M_DEVBUF); 823 824 done: 825 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 826 no_disk); 827 return (no_disk); 828 } 829 830 int 831 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 832 void *fm) 833 { 834 struct sr_softc *sc = sd->sd_sc; 835 struct sr_meta_driver *s; 836 #ifdef SR_DEBUG 837 struct sr_meta_chunk *mc; 838 #endif 839 char devname[32]; 840 int rv = 1; 841 u_int8_t checksum[MD5_DIGEST_LENGTH]; 842 843 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 844 845 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 846 847 s = &smd[sd->sd_meta_type]; 848 if (sd->sd_meta_type != SR_META_F_NATIVE) 849 if (s->smd_validate(sd, sm, fm)) { 850 printf("%s: invalid foreign metadata\n", DEVNAME(sc)); 851 goto done; 852 } 853 854 /* 855 * at this point all foreign metadata has been translated to the native 856 * format and will be treated just like the native format 857 */ 858 859 if (sm->ssdi.ssd_magic != SR_MAGIC) { 860 printf("%s: not valid softraid metadata\n", DEVNAME(sc)); 861 goto done; 862 } 863 864 /* Verify metadata checksum. */ 865 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 866 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 867 printf("%s: invalid metadata checksum\n", DEVNAME(sc)); 868 goto done; 869 } 870 871 /* Handle changes between versions. */ 872 if (sm->ssdi.ssd_version == 3) { 873 874 /* 875 * Version 3 - update metadata version and fix up data offset 876 * value since this did not exist in version 3. 877 */ 878 sm->ssdi.ssd_version = SR_META_VERSION; 879 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 880 "%03d", SR_META_VERSION); 881 if (sm->ssd_data_offset == 0) 882 sm->ssd_data_offset = SR_META_V3_DATA_OFFSET; 883 884 } else if (sm->ssdi.ssd_version == SR_META_VERSION) { 885 886 /* 887 * Version 4 - original metadata format did not store 888 * data offset so fix this up if necessary. 889 */ 890 if (sm->ssd_data_offset == 0) 891 sm->ssd_data_offset = SR_DATA_OFFSET; 892 893 } else { 894 895 printf("%s: %s can not read metadata version %u, expected %u\n", 896 DEVNAME(sc), devname, sm->ssdi.ssd_version, 897 SR_META_VERSION); 898 goto done; 899 900 } 901 902 #ifdef SR_DEBUG 903 /* warn if disk changed order */ 904 mc = (struct sr_meta_chunk *)(sm + 1); 905 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 906 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 907 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 908 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 909 devname); 910 #endif 911 912 /* we have meta data on disk */ 913 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 914 DEVNAME(sc), devname); 915 916 rv = 0; 917 done: 918 return (rv); 919 } 920 921 int 922 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno, 923 struct sr_metadata_list_head *mlh) 924 { 925 struct vnode *vn; 926 struct disklabel label; 927 struct sr_metadata *md = NULL; 928 struct sr_discipline *fake_sd = NULL; 929 struct sr_metadata_list *mle; 930 char devname[32]; 931 dev_t chrdev, rawdev; 932 int error, i; 933 int rv = SR_META_NOTCLAIMED; 934 935 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 936 937 /* 938 * Use character raw device to avoid SCSI complaints about missing 939 * media on removable media devices. 940 */ 941 chrdev = blktochr(devno); 942 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART); 943 if (cdevvp(rawdev, &vn)) { 944 printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n", 945 DEVNAME(sc)); 946 goto done; 947 } 948 949 /* open device */ 950 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 951 if (error) { 952 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 953 "failed\n", DEVNAME(sc)); 954 vput(vn); 955 goto done; 956 } 957 958 /* get disklabel */ 959 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 960 curproc); 961 if (error) { 962 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 963 "failed\n", DEVNAME(sc)); 964 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 965 vput(vn); 966 goto done; 967 } 968 969 /* we are done, close device */ 970 error = VOP_CLOSE(vn, FREAD, NOCRED, curproc); 971 if (error) { 972 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 973 "failed\n", DEVNAME(sc)); 974 vput(vn); 975 goto done; 976 } 977 vput(vn); 978 979 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 980 if (md == NULL) { 981 printf("%s: not enough memory for metadata buffer\n", 982 DEVNAME(sc)); 983 goto done; 984 } 985 986 /* create fake sd to use utility functions */ 987 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, 988 M_ZERO | M_NOWAIT); 989 if (fake_sd == NULL) { 990 printf("%s: not enough memory for fake discipline\n", 991 DEVNAME(sc)); 992 goto done; 993 } 994 fake_sd->sd_sc = sc; 995 fake_sd->sd_meta_type = SR_META_F_NATIVE; 996 997 for (i = 0; i < MAXPARTITIONS; i++) { 998 if (label.d_partitions[i].p_fstype != FS_RAID) 999 continue; 1000 1001 /* open partition */ 1002 rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i); 1003 if (bdevvp(rawdev, &vn)) { 1004 printf("%s:, sr_meta_native_bootprobe: can't allocate " 1005 "vnode for partition\n", DEVNAME(sc)); 1006 goto done; 1007 } 1008 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1009 if (error) { 1010 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 1011 "open failed, partition %d\n", 1012 DEVNAME(sc), i); 1013 vput(vn); 1014 continue; 1015 } 1016 1017 if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) { 1018 printf("%s: native bootprobe could not read native " 1019 "metadata\n", DEVNAME(sc)); 1020 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1021 vput(vn); 1022 continue; 1023 } 1024 1025 /* are we a softraid partition? */ 1026 if (md->ssdi.ssd_magic != SR_MAGIC) { 1027 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1028 vput(vn); 1029 continue; 1030 } 1031 1032 sr_meta_getdevname(sc, rawdev, devname, sizeof(devname)); 1033 if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) { 1034 if (md->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE) { 1035 DNPRINTF(SR_D_META, "%s: don't save %s\n", 1036 DEVNAME(sc), devname); 1037 } else { 1038 /* XXX fix M_WAITOK, this is boot time */ 1039 mle = malloc(sizeof(*mle), M_DEVBUF, 1040 M_WAITOK | M_ZERO); 1041 bcopy(md, &mle->sml_metadata, 1042 SR_META_SIZE * 512); 1043 mle->sml_mm = rawdev; 1044 SLIST_INSERT_HEAD(mlh, mle, sml_link); 1045 rv = SR_META_CLAIMED; 1046 } 1047 } 1048 1049 /* we are done, close partition */ 1050 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1051 vput(vn); 1052 } 1053 1054 done: 1055 if (fake_sd) 1056 free(fake_sd, M_DEVBUF); 1057 if (md) 1058 free(md, M_DEVBUF); 1059 1060 return (rv); 1061 } 1062 1063 int 1064 sr_boot_assembly(struct sr_softc *sc) 1065 { 1066 struct disk *dk; 1067 struct sr_disk_head sdklist; 1068 struct sr_disk *sdk; 1069 struct bioc_createraid bc; 1070 struct sr_metadata_list_head mlh, kdh; 1071 struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; 1072 struct sr_metadata *metadata; 1073 struct sr_boot_volume_head bvh; 1074 struct sr_boot_volume *vol, *vp1, *vp2; 1075 struct sr_meta_chunk *hm; 1076 struct sr_chunk_head *cl; 1077 struct sr_chunk *hotspare, *chunk, *last; 1078 u_int32_t chunk_id; 1079 u_int64_t *ondisk = NULL; 1080 dev_t *devs = NULL; 1081 char devname[32]; 1082 int rv = 0, i; 1083 1084 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1085 1086 SLIST_INIT(&sdklist); 1087 SLIST_INIT(&mlh); 1088 1089 dk = TAILQ_FIRST(&disklist); 1090 while (dk != TAILQ_END(&disklist)) { 1091 1092 /* See if this disk has been checked. */ 1093 SLIST_FOREACH(sdk, &sdklist, sdk_link) 1094 if (sdk->sdk_devno == dk->dk_devno) 1095 break; 1096 1097 if (sdk != NULL) { 1098 dk = TAILQ_NEXT(dk, dk_link); 1099 continue; 1100 } 1101 1102 /* Add this disk to the list that we've checked. */ 1103 sdk = malloc(sizeof(struct sr_disk), M_DEVBUF, 1104 M_NOWAIT | M_CANFAIL | M_ZERO); 1105 if (sdk == NULL) 1106 goto unwind; 1107 sdk->sdk_devno = dk->dk_devno; 1108 SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link); 1109 1110 /* Only check sd(4) and wd(4) devices. */ 1111 if (strncmp(dk->dk_name, "sd", 2) && 1112 strncmp(dk->dk_name, "wd", 2)) { 1113 dk = TAILQ_NEXT(dk, dk_link); 1114 continue; 1115 } 1116 1117 /* native softraid uses partitions */ 1118 sr_meta_native_bootprobe(sc, dk->dk_devno, &mlh); 1119 1120 /* probe non-native disks if native failed. */ 1121 1122 /* Restart scan since we may have slept. */ 1123 dk = TAILQ_FIRST(&disklist); 1124 } 1125 1126 /* 1127 * Create a list of volumes and associate chunks with each volume. 1128 */ 1129 1130 SLIST_INIT(&bvh); 1131 SLIST_INIT(&kdh); 1132 1133 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { 1134 1135 mlenext = SLIST_NEXT(mle, sml_link); 1136 SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); 1137 1138 metadata = (struct sr_metadata *)&mle->sml_metadata; 1139 mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; 1140 1141 /* Handle key disks separately. */ 1142 if (metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) { 1143 SLIST_INSERT_HEAD(&kdh, mle, sml_link); 1144 continue; 1145 } 1146 1147 SLIST_FOREACH(vol, &bvh, sbv_link) { 1148 if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1149 sizeof(metadata->ssdi.ssd_uuid)) == 0) 1150 break; 1151 } 1152 1153 if (vol == NULL) { 1154 vol = malloc(sizeof(struct sr_boot_volume), 1155 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1156 if (vol == NULL) { 1157 printf("%s: failed to allocate boot volume!\n", 1158 DEVNAME(sc)); 1159 goto unwind; 1160 } 1161 1162 vol->sbv_level = metadata->ssdi.ssd_level; 1163 vol->sbv_volid = metadata->ssdi.ssd_volid; 1164 vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; 1165 bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1166 sizeof(metadata->ssdi.ssd_uuid)); 1167 SLIST_INIT(&vol->sml); 1168 1169 /* Maintain volume order. */ 1170 vp2 = NULL; 1171 SLIST_FOREACH(vp1, &bvh, sbv_link) { 1172 if (vp1->sbv_volid > vol->sbv_volid) 1173 break; 1174 vp2 = vp1; 1175 } 1176 if (vp2 == NULL) { 1177 DNPRINTF(SR_D_META, "%s: insert volume %u " 1178 "at head\n", DEVNAME(sc), vol->sbv_volid); 1179 SLIST_INSERT_HEAD(&bvh, vol, sbv_link); 1180 } else { 1181 DNPRINTF(SR_D_META, "%s: insert volume %u " 1182 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1183 vp2->sbv_volid); 1184 SLIST_INSERT_AFTER(vp2, vol, sbv_link); 1185 } 1186 } 1187 1188 /* Maintain chunk order. */ 1189 mle2 = NULL; 1190 SLIST_FOREACH(mle1, &vol->sml, sml_link) { 1191 if (mle1->sml_chunk_id > mle->sml_chunk_id) 1192 break; 1193 mle2 = mle1; 1194 } 1195 if (mle2 == NULL) { 1196 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1197 "at head\n", DEVNAME(sc), vol->sbv_volid, 1198 mle->sml_chunk_id); 1199 SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); 1200 } else { 1201 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1202 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1203 mle->sml_chunk_id, mle2->sml_chunk_id); 1204 SLIST_INSERT_AFTER(mle2, mle, sml_link); 1205 } 1206 1207 vol->sbv_dev_no++; 1208 } 1209 1210 /* Allocate memory for device and ondisk version arrays. */ 1211 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1212 M_NOWAIT | M_CANFAIL); 1213 if (devs == NULL) { 1214 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1215 goto unwind; 1216 } 1217 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1218 M_NOWAIT | M_CANFAIL); 1219 if (ondisk == NULL) { 1220 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1221 goto unwind; 1222 } 1223 1224 /* 1225 * Assemble hotspare "volumes". 1226 */ 1227 SLIST_FOREACH(vol, &bvh, sbv_link) { 1228 1229 /* Check if this is a hotspare "volume". */ 1230 if (vol->sbv_level != SR_HOTSPARE_LEVEL || 1231 vol->sbv_chunk_no != 1) 1232 continue; 1233 1234 #ifdef SR_DEBUG 1235 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1236 DEVNAME(sc)); 1237 if (sr_debug & SR_D_META) 1238 sr_uuid_print(&vol->sbv_uuid, 0); 1239 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1240 vol->sbv_volid, vol->sbv_chunk_no); 1241 #endif 1242 1243 /* Create hotspare chunk metadata. */ 1244 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1245 M_NOWAIT | M_CANFAIL | M_ZERO); 1246 if (hotspare == NULL) { 1247 printf("%s: failed to allocate hotspare\n", 1248 DEVNAME(sc)); 1249 goto unwind; 1250 } 1251 1252 mle = SLIST_FIRST(&vol->sml); 1253 sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname)); 1254 hotspare->src_dev_mm = mle->sml_mm; 1255 strlcpy(hotspare->src_devname, devname, 1256 sizeof(hotspare->src_devname)); 1257 hotspare->src_size = metadata->ssdi.ssd_size; 1258 1259 hm = &hotspare->src_meta; 1260 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1261 hm->scmi.scm_chunk_id = 0; 1262 hm->scmi.scm_size = metadata->ssdi.ssd_size; 1263 hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size; 1264 strlcpy(hm->scmi.scm_devname, devname, 1265 sizeof(hm->scmi.scm_devname)); 1266 bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1267 sizeof(struct sr_uuid)); 1268 1269 sr_checksum(sc, hm, &hm->scm_checksum, 1270 sizeof(struct sr_meta_chunk_invariant)); 1271 1272 hm->scm_status = BIOC_SDHOTSPARE; 1273 1274 /* Add chunk to hotspare list. */ 1275 rw_enter_write(&sc->sc_hs_lock); 1276 cl = &sc->sc_hotspare_list; 1277 if (SLIST_EMPTY(cl)) 1278 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1279 else { 1280 SLIST_FOREACH(chunk, cl, src_link) 1281 last = chunk; 1282 SLIST_INSERT_AFTER(last, hotspare, src_link); 1283 } 1284 sc->sc_hotspare_no++; 1285 rw_exit_write(&sc->sc_hs_lock); 1286 1287 } 1288 1289 /* 1290 * Assemble RAID volumes. 1291 */ 1292 SLIST_FOREACH(vol, &bvh, sbv_link) { 1293 1294 bzero(&bc, sizeof(bc)); 1295 1296 /* Check if this is a hotspare "volume". */ 1297 if (vol->sbv_level == SR_HOTSPARE_LEVEL && 1298 vol->sbv_chunk_no == 1) 1299 continue; 1300 1301 #ifdef SR_DEBUG 1302 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1303 if (sr_debug & SR_D_META) 1304 sr_uuid_print(&vol->sbv_uuid, 0); 1305 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1306 vol->sbv_volid, vol->sbv_chunk_no); 1307 #endif 1308 1309 /* 1310 * If this is a crypto volume, try to find a matching 1311 * key disk... 1312 */ 1313 bc.bc_key_disk = NODEV; 1314 if (vol->sbv_level == 'C') { 1315 SLIST_FOREACH(mle, &kdh, sml_link) { 1316 metadata = 1317 (struct sr_metadata *)&mle->sml_metadata; 1318 if (bcmp(&metadata->ssdi.ssd_uuid, 1319 &vol->sbv_uuid, 1320 sizeof(metadata->ssdi.ssd_uuid)) == 0) { 1321 bc.bc_key_disk = mle->sml_mm; 1322 } 1323 } 1324 } 1325 1326 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1327 devs[i] = NODEV; /* mark device as illegal */ 1328 ondisk[i] = 0; 1329 } 1330 1331 SLIST_FOREACH(mle, &vol->sml, sml_link) { 1332 metadata = (struct sr_metadata *)&mle->sml_metadata; 1333 chunk_id = metadata->ssdi.ssd_chunk_id; 1334 1335 if (devs[chunk_id] != NODEV) { 1336 vol->sbv_dev_no--; 1337 sr_meta_getdevname(sc, mle->sml_mm, devname, 1338 sizeof(devname)); 1339 printf("%s: found duplicate chunk %u for " 1340 "volume %u on device %s\n", DEVNAME(sc), 1341 chunk_id, vol->sbv_volid, devname); 1342 } 1343 1344 if (devs[chunk_id] == NODEV || 1345 metadata->ssd_ondisk > ondisk[chunk_id]) { 1346 devs[chunk_id] = mle->sml_mm; 1347 ondisk[chunk_id] = metadata->ssd_ondisk; 1348 DNPRINTF(SR_D_META, "%s: using ondisk " 1349 "metadata version %llu for chunk %u\n", 1350 DEVNAME(sc), ondisk[chunk_id], chunk_id); 1351 } 1352 } 1353 1354 if (vol->sbv_chunk_no != vol->sbv_dev_no) { 1355 printf("%s: not all chunks were provided; " 1356 "attempting to bring volume %d online\n", 1357 DEVNAME(sc), vol->sbv_volid); 1358 } 1359 1360 bc.bc_level = vol->sbv_level; 1361 bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); 1362 bc.bc_dev_list = devs; 1363 bc.bc_flags = BIOC_SCDEVT; 1364 1365 rw_enter_write(&sc->sc_lock); 1366 sr_ioctl_createraid(sc, &bc, 0); 1367 rw_exit_write(&sc->sc_lock); 1368 1369 rv++; 1370 } 1371 1372 /* done with metadata */ 1373 unwind: 1374 for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { 1375 vp2 = SLIST_NEXT(vp1, sbv_link); 1376 for (mle1 = SLIST_FIRST(&vp1->sml); 1377 mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { 1378 mle2 = SLIST_NEXT(mle1, sml_link); 1379 free(mle1, M_DEVBUF); 1380 } 1381 free(vp1, M_DEVBUF); 1382 } 1383 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { 1384 mle2 = SLIST_NEXT(mle, sml_link); 1385 free(mle, M_DEVBUF); 1386 } 1387 SLIST_INIT(&mlh); 1388 1389 while (!SLIST_EMPTY(&sdklist)) { 1390 sdk = SLIST_FIRST(&sdklist); 1391 SLIST_REMOVE_HEAD(&sdklist, sdk_link); 1392 free(sdk, M_DEVBUF); 1393 } 1394 1395 if (devs) 1396 free(devs, M_DEVBUF); 1397 if (ondisk) 1398 free(ondisk, M_DEVBUF); 1399 1400 return (rv); 1401 } 1402 1403 int 1404 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1405 { 1406 struct disklabel label; 1407 char *devname; 1408 int error, part; 1409 daddr64_t size; 1410 1411 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1412 DEVNAME(sc), ch_entry->src_devname); 1413 1414 devname = ch_entry->src_devname; 1415 part = DISKPART(ch_entry->src_dev_mm); 1416 1417 /* get disklabel */ 1418 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1419 NOCRED, curproc); 1420 if (error) { 1421 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1422 DEVNAME(sc), devname); 1423 goto unwind; 1424 } 1425 1426 /* make sure the partition is of the right type */ 1427 if (label.d_partitions[part].p_fstype != FS_RAID) { 1428 DNPRINTF(SR_D_META, 1429 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1430 devname, 1431 label.d_partitions[part].p_fstype); 1432 goto unwind; 1433 } 1434 1435 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 1436 if (size <= 0) { 1437 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1438 devname); 1439 goto unwind; 1440 } 1441 ch_entry->src_size = size; 1442 1443 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1444 devname, size); 1445 1446 return (SR_META_F_NATIVE); 1447 unwind: 1448 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1449 devname ? devname : "nodev"); 1450 return (SR_META_F_INVALID); 1451 } 1452 1453 int 1454 sr_meta_native_attach(struct sr_discipline *sd, int force) 1455 { 1456 struct sr_softc *sc = sd->sd_sc; 1457 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1458 struct sr_metadata *md = NULL; 1459 struct sr_chunk *ch_entry, *ch_next; 1460 struct sr_uuid uuid; 1461 u_int64_t version = 0; 1462 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1463 1464 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1465 1466 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 1467 if (md == NULL) { 1468 printf("%s: not enough memory for metadata buffer\n", 1469 DEVNAME(sc)); 1470 goto bad; 1471 } 1472 1473 bzero(&uuid, sizeof uuid); 1474 1475 sr = not_sr = d = 0; 1476 SLIST_FOREACH(ch_entry, cl, src_link) { 1477 if (ch_entry->src_dev_mm == NODEV) 1478 continue; 1479 1480 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1481 printf("%s: could not read native metadata\n", 1482 DEVNAME(sc)); 1483 goto bad; 1484 } 1485 1486 if (md->ssdi.ssd_magic == SR_MAGIC) { 1487 sr++; 1488 ch_entry->src_meta.scmi.scm_chunk_id = 1489 md->ssdi.ssd_chunk_id; 1490 if (d == 0) { 1491 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1492 expected = md->ssdi.ssd_chunk_no; 1493 version = md->ssd_ondisk; 1494 d++; 1495 continue; 1496 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1497 sizeof uuid)) { 1498 printf("%s: not part of the same volume\n", 1499 DEVNAME(sc)); 1500 goto bad; 1501 } 1502 if (md->ssd_ondisk != version) { 1503 old_meta++; 1504 version = MAX(md->ssd_ondisk, version); 1505 } 1506 } else 1507 not_sr++; 1508 } 1509 1510 if (sr && not_sr) { 1511 printf("%s: not all chunks are of the native metadata format\n", 1512 DEVNAME(sc)); 1513 goto bad; 1514 } 1515 1516 /* mixed metadata versions; mark bad disks offline */ 1517 if (old_meta) { 1518 d = 0; 1519 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1520 ch_entry = ch_next, d++) { 1521 ch_next = SLIST_NEXT(ch_entry, src_link); 1522 1523 /* XXX do we want to read this again? */ 1524 if (ch_entry->src_dev_mm == NODEV) 1525 panic("src_dev_mm == NODEV"); 1526 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1527 NULL)) 1528 printf("%s: could not read native metadata\n", 1529 DEVNAME(sc)); 1530 if (md->ssd_ondisk != version) 1531 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1532 BIOC_SDOFFLINE; 1533 } 1534 } 1535 1536 if (expected != sr && !force && expected != -1) { 1537 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1538 "anyway\n", DEVNAME(sc)); 1539 } 1540 1541 rv = 0; 1542 bad: 1543 if (md) 1544 free(md, M_DEVBUF); 1545 return (rv); 1546 } 1547 1548 int 1549 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1550 struct sr_metadata *md, void *fm) 1551 { 1552 #ifdef SR_DEBUG 1553 struct sr_softc *sc = sd->sd_sc; 1554 #endif 1555 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1556 DEVNAME(sc), dev, md); 1557 1558 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1559 B_READ)); 1560 } 1561 1562 int 1563 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1564 struct sr_metadata *md, void *fm) 1565 { 1566 #ifdef SR_DEBUG 1567 struct sr_softc *sc = sd->sd_sc; 1568 #endif 1569 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1570 DEVNAME(sc), dev, md); 1571 1572 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1573 B_WRITE)); 1574 } 1575 1576 void 1577 sr_hotplug_register(struct sr_discipline *sd, void *func) 1578 { 1579 struct sr_hotplug_list *mhe; 1580 1581 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1582 DEVNAME(sd->sd_sc), func); 1583 1584 /* make sure we aren't on the list yet */ 1585 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1586 if (mhe->sh_hotplug == func) 1587 return; 1588 1589 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1590 M_WAITOK | M_ZERO); 1591 mhe->sh_hotplug = func; 1592 mhe->sh_sd = sd; 1593 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1594 } 1595 1596 void 1597 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1598 { 1599 struct sr_hotplug_list *mhe; 1600 1601 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1602 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1603 1604 /* make sure we are on the list yet */ 1605 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1606 if (mhe->sh_hotplug == func) { 1607 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1608 sr_hotplug_list, shl_link); 1609 free(mhe, M_DEVBUF); 1610 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1611 SLIST_INIT(&sr_hotplug_callbacks); 1612 return; 1613 } 1614 } 1615 1616 void 1617 sr_disk_attach(struct disk *diskp, int action) 1618 { 1619 struct sr_hotplug_list *mhe; 1620 1621 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1622 if (mhe->sh_sd->sd_ready) 1623 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1624 } 1625 1626 int 1627 sr_match(struct device *parent, void *match, void *aux) 1628 { 1629 return (1); 1630 } 1631 1632 void 1633 sr_attach(struct device *parent, struct device *self, void *aux) 1634 { 1635 struct sr_softc *sc = (void *)self; 1636 1637 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1638 1639 rw_init(&sc->sc_lock, "sr_lock"); 1640 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1641 1642 SLIST_INIT(&sr_hotplug_callbacks); 1643 SLIST_INIT(&sc->sc_hotspare_list); 1644 1645 #if NBIO > 0 1646 if (bio_register(&sc->sc_dev, sr_ioctl) != 0) 1647 printf("%s: controller registration failed", DEVNAME(sc)); 1648 else 1649 sc->sc_ioctl = sr_ioctl; 1650 #endif /* NBIO > 0 */ 1651 1652 #ifndef SMALL_KERNEL 1653 strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), 1654 sizeof(sc->sc_sensordev.xname)); 1655 sensordev_install(&sc->sc_sensordev); 1656 #endif /* SMALL_KERNEL */ 1657 1658 printf("\n"); 1659 1660 softraid_disk_attach = sr_disk_attach; 1661 1662 sr_boot_assembly(sc); 1663 } 1664 1665 int 1666 sr_detach(struct device *self, int flags) 1667 { 1668 #ifndef SMALL_KERNEL 1669 struct sr_softc *sc = (void *)self; 1670 1671 sensordev_deinstall(&sc->sc_sensordev); 1672 #endif /* SMALL_KERNEL */ 1673 1674 return (0); 1675 } 1676 1677 void 1678 sr_minphys(struct buf *bp, struct scsi_link *sl) 1679 { 1680 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1681 1682 /* XXX currently using SR_MAXFER = MAXPHYS */ 1683 if (bp->b_bcount > SR_MAXFER) 1684 bp->b_bcount = SR_MAXFER; 1685 minphys(bp); 1686 } 1687 1688 void 1689 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1690 { 1691 size_t copy_cnt; 1692 1693 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1694 xs, size); 1695 1696 if (xs->datalen) { 1697 copy_cnt = MIN(size, xs->datalen); 1698 bcopy(v, xs->data, copy_cnt); 1699 } 1700 } 1701 1702 int 1703 sr_ccb_alloc(struct sr_discipline *sd) 1704 { 1705 struct sr_ccb *ccb; 1706 int i; 1707 1708 if (!sd) 1709 return (1); 1710 1711 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1712 1713 if (sd->sd_ccb) 1714 return (1); 1715 1716 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1717 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1718 TAILQ_INIT(&sd->sd_ccb_freeq); 1719 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1720 ccb = &sd->sd_ccb[i]; 1721 ccb->ccb_dis = sd; 1722 sr_ccb_put(ccb); 1723 } 1724 1725 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1726 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1727 1728 return (0); 1729 } 1730 1731 void 1732 sr_ccb_free(struct sr_discipline *sd) 1733 { 1734 struct sr_ccb *ccb; 1735 1736 if (!sd) 1737 return; 1738 1739 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1740 1741 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1742 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1743 1744 if (sd->sd_ccb) 1745 free(sd->sd_ccb, M_DEVBUF); 1746 } 1747 1748 struct sr_ccb * 1749 sr_ccb_get(struct sr_discipline *sd) 1750 { 1751 struct sr_ccb *ccb; 1752 int s; 1753 1754 s = splbio(); 1755 1756 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1757 if (ccb) { 1758 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1759 ccb->ccb_state = SR_CCB_INPROGRESS; 1760 } 1761 1762 splx(s); 1763 1764 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1765 ccb); 1766 1767 return (ccb); 1768 } 1769 1770 void 1771 sr_ccb_put(struct sr_ccb *ccb) 1772 { 1773 struct sr_discipline *sd = ccb->ccb_dis; 1774 int s; 1775 1776 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1777 ccb); 1778 1779 s = splbio(); 1780 1781 ccb->ccb_wu = NULL; 1782 ccb->ccb_state = SR_CCB_FREE; 1783 ccb->ccb_target = -1; 1784 ccb->ccb_opaque = NULL; 1785 1786 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1787 1788 splx(s); 1789 } 1790 1791 int 1792 sr_wu_alloc(struct sr_discipline *sd) 1793 { 1794 struct sr_workunit *wu; 1795 int i, no_wu; 1796 1797 if (!sd) 1798 return (1); 1799 1800 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 1801 sd, sd->sd_max_wu); 1802 1803 if (sd->sd_wu) 1804 return (1); 1805 1806 no_wu = sd->sd_max_wu; 1807 sd->sd_wu_pending = no_wu; 1808 1809 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 1810 M_DEVBUF, M_WAITOK | M_ZERO); 1811 TAILQ_INIT(&sd->sd_wu_freeq); 1812 TAILQ_INIT(&sd->sd_wu_pendq); 1813 TAILQ_INIT(&sd->sd_wu_defq); 1814 for (i = 0; i < no_wu; i++) { 1815 wu = &sd->sd_wu[i]; 1816 wu->swu_dis = sd; 1817 sr_wu_put(sd, wu); 1818 } 1819 1820 return (0); 1821 } 1822 1823 void 1824 sr_wu_free(struct sr_discipline *sd) 1825 { 1826 struct sr_workunit *wu; 1827 1828 if (!sd) 1829 return; 1830 1831 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 1832 1833 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 1834 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1835 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 1836 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 1837 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 1838 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 1839 1840 if (sd->sd_wu) 1841 free(sd->sd_wu, M_DEVBUF); 1842 } 1843 1844 void 1845 sr_wu_put(void *xsd, void *xwu) 1846 { 1847 struct sr_discipline *sd = (struct sr_discipline *)xsd; 1848 struct sr_workunit *wu = (struct sr_workunit *)xwu; 1849 struct sr_ccb *ccb; 1850 1851 int s; 1852 1853 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 1854 1855 s = splbio(); 1856 if (wu->swu_cb_active == 1) 1857 panic("%s: sr_wu_put got active wu", DEVNAME(sd->sd_sc)); 1858 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1859 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1860 sr_ccb_put(ccb); 1861 } 1862 splx(s); 1863 1864 bzero(wu, sizeof(*wu)); 1865 TAILQ_INIT(&wu->swu_ccb); 1866 wu->swu_dis = sd; 1867 1868 mtx_enter(&sd->sd_wu_mtx); 1869 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 1870 sd->sd_wu_pending--; 1871 mtx_leave(&sd->sd_wu_mtx); 1872 } 1873 1874 void * 1875 sr_wu_get(void *xsd) 1876 { 1877 struct sr_discipline *sd = (struct sr_discipline *)xsd; 1878 struct sr_workunit *wu; 1879 1880 mtx_enter(&sd->sd_wu_mtx); 1881 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 1882 if (wu) { 1883 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1884 sd->sd_wu_pending++; 1885 } 1886 mtx_leave(&sd->sd_wu_mtx); 1887 1888 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 1889 1890 return (wu); 1891 } 1892 1893 void 1894 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 1895 { 1896 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 1897 1898 scsi_done(xs); 1899 } 1900 1901 void 1902 sr_scsi_cmd(struct scsi_xfer *xs) 1903 { 1904 int s; 1905 struct scsi_link *link = xs->sc_link; 1906 struct sr_softc *sc = link->adapter_softc; 1907 struct sr_workunit *wu = NULL; 1908 struct sr_discipline *sd; 1909 struct sr_ccb *ccb; 1910 1911 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p " 1912 "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags); 1913 1914 sd = sc->sc_dis[link->scsibus]; 1915 if (sd == NULL) { 1916 s = splhigh(); 1917 sd = sc->sc_attach_dis; 1918 splx(s); 1919 1920 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n", 1921 DEVNAME(sc), sd); 1922 if (sd == NULL) { 1923 printf("%s: sr_scsi_cmd NULL discipline\n", 1924 DEVNAME(sc)); 1925 goto stuffup; 1926 } 1927 } 1928 1929 if (sd->sd_deleted) { 1930 printf("%s: %s device is being deleted, failing io\n", 1931 DEVNAME(sc), sd->sd_meta->ssd_devname); 1932 goto stuffup; 1933 } 1934 1935 wu = xs->io; 1936 /* scsi layer *can* re-send wu without calling sr_wu_put(). */ 1937 s = splbio(); 1938 if (wu->swu_cb_active == 1) 1939 panic("%s: sr_scsi_cmd got active wu", DEVNAME(sd->sd_sc)); 1940 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1941 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1942 sr_ccb_put(ccb); 1943 } 1944 splx(s); 1945 1946 bzero(wu, sizeof(*wu)); 1947 TAILQ_INIT(&wu->swu_ccb); 1948 wu->swu_state = SR_WU_INPROGRESS; 1949 wu->swu_dis = sd; 1950 wu->swu_xs = xs; 1951 1952 /* the midlayer will query LUNs so report sense to stop scanning */ 1953 if (link->target != 0 || link->lun != 0) { 1954 DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n", 1955 DEVNAME(sc), link->target, link->lun); 1956 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 1957 SSD_ERRCODE_VALID; 1958 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 1959 sd->sd_scsi_sense.add_sense_code = 0x25; 1960 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 1961 sd->sd_scsi_sense.extra_len = 4; 1962 goto stuffup; 1963 } 1964 1965 switch (xs->cmd->opcode) { 1966 case READ_COMMAND: 1967 case READ_BIG: 1968 case READ_16: 1969 case WRITE_COMMAND: 1970 case WRITE_BIG: 1971 case WRITE_16: 1972 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 1973 DEVNAME(sc), xs->cmd->opcode); 1974 if (sd->sd_scsi_rw(wu)) 1975 goto stuffup; 1976 break; 1977 1978 case SYNCHRONIZE_CACHE: 1979 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 1980 DEVNAME(sc)); 1981 if (sd->sd_scsi_sync(wu)) 1982 goto stuffup; 1983 goto complete; 1984 1985 case TEST_UNIT_READY: 1986 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 1987 DEVNAME(sc)); 1988 if (sd->sd_scsi_tur(wu)) 1989 goto stuffup; 1990 goto complete; 1991 1992 case START_STOP: 1993 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 1994 DEVNAME(sc)); 1995 if (sd->sd_scsi_start_stop(wu)) 1996 goto stuffup; 1997 goto complete; 1998 1999 case INQUIRY: 2000 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 2001 DEVNAME(sc)); 2002 if (sd->sd_scsi_inquiry(wu)) 2003 goto stuffup; 2004 goto complete; 2005 2006 case READ_CAPACITY: 2007 case READ_CAPACITY_16: 2008 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 2009 DEVNAME(sc), xs->cmd->opcode); 2010 if (sd->sd_scsi_read_cap(wu)) 2011 goto stuffup; 2012 goto complete; 2013 2014 case REQUEST_SENSE: 2015 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 2016 DEVNAME(sc)); 2017 if (sd->sd_scsi_req_sense(wu)) 2018 goto stuffup; 2019 goto complete; 2020 2021 default: 2022 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 2023 DEVNAME(sc), xs->cmd->opcode); 2024 /* XXX might need to add generic function to handle others */ 2025 goto stuffup; 2026 } 2027 2028 return; 2029 stuffup: 2030 if (sd && sd->sd_scsi_sense.error_code) { 2031 xs->error = XS_SENSE; 2032 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 2033 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2034 } else { 2035 xs->error = XS_DRIVER_STUFFUP; 2036 } 2037 complete: 2038 sr_scsi_done(sd, xs); 2039 } 2040 int 2041 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag) 2042 { 2043 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 2044 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 2045 2046 switch (cmd) { 2047 case DIOCGCACHE: 2048 case DIOCSCACHE: 2049 return (EOPNOTSUPP); 2050 default: 2051 return (sr_ioctl(link->adapter_softc, cmd, addr)); 2052 } 2053 } 2054 2055 int 2056 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr) 2057 { 2058 struct sr_softc *sc = (struct sr_softc *)dev; 2059 int rv = 0; 2060 2061 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc)); 2062 2063 rw_enter_write(&sc->sc_lock); 2064 2065 switch (cmd) { 2066 case BIOCINQ: 2067 DNPRINTF(SR_D_IOCTL, "inq\n"); 2068 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 2069 break; 2070 2071 case BIOCVOL: 2072 DNPRINTF(SR_D_IOCTL, "vol\n"); 2073 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 2074 break; 2075 2076 case BIOCDISK: 2077 DNPRINTF(SR_D_IOCTL, "disk\n"); 2078 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 2079 break; 2080 2081 case BIOCALARM: 2082 DNPRINTF(SR_D_IOCTL, "alarm\n"); 2083 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 2084 break; 2085 2086 case BIOCBLINK: 2087 DNPRINTF(SR_D_IOCTL, "blink\n"); 2088 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 2089 break; 2090 2091 case BIOCSETSTATE: 2092 DNPRINTF(SR_D_IOCTL, "setstate\n"); 2093 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 2094 break; 2095 2096 case BIOCCREATERAID: 2097 DNPRINTF(SR_D_IOCTL, "createraid\n"); 2098 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 2099 break; 2100 2101 case BIOCDELETERAID: 2102 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 2103 break; 2104 2105 case BIOCDISCIPLINE: 2106 rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr); 2107 break; 2108 2109 case BIOCINSTALLBOOT: 2110 rv = sr_ioctl_installboot(sc, (struct bioc_installboot *)addr); 2111 break; 2112 2113 default: 2114 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 2115 rv = ENOTTY; 2116 } 2117 2118 rw_exit_write(&sc->sc_lock); 2119 2120 return (rv); 2121 } 2122 2123 int 2124 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 2125 { 2126 int i, vol, disk; 2127 2128 for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++) 2129 /* XXX this will not work when we stagger disciplines */ 2130 if (sc->sc_dis[i]) { 2131 vol++; 2132 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2133 } 2134 2135 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2136 bi->bi_novol = vol + sc->sc_hotspare_no; 2137 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2138 2139 return (0); 2140 } 2141 2142 int 2143 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2144 { 2145 int i, vol, rv = EINVAL; 2146 struct sr_discipline *sd; 2147 struct sr_chunk *hotspare; 2148 daddr64_t rb, sz; 2149 2150 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2151 /* XXX this will not work when we stagger disciplines */ 2152 if (sc->sc_dis[i]) 2153 vol++; 2154 if (vol != bv->bv_volid) 2155 continue; 2156 2157 if (sc->sc_dis[i] == NULL) 2158 goto done; 2159 2160 sd = sc->sc_dis[i]; 2161 bv->bv_status = sd->sd_vol_status; 2162 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2163 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2164 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2165 2166 #ifdef CRYPTO 2167 if (sd->sd_meta->ssdi.ssd_level == 'C' && 2168 sd->mds.mdd_crypto.key_disk != NULL) 2169 bv->bv_nodisk++; 2170 #endif 2171 2172 if (bv->bv_status == BIOC_SVREBUILD) { 2173 sz = sd->sd_meta->ssdi.ssd_size; 2174 rb = sd->sd_meta->ssd_rebuild; 2175 if (rb > 0) 2176 bv->bv_percent = 100 - 2177 ((sz * 100 - rb * 100) / sz) - 1; 2178 else 2179 bv->bv_percent = 0; 2180 } 2181 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2182 sizeof(bv->bv_dev)); 2183 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2184 sizeof(bv->bv_vendor)); 2185 rv = 0; 2186 goto done; 2187 } 2188 2189 /* Check hotspares list. */ 2190 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2191 vol++; 2192 if (vol != bv->bv_volid) 2193 continue; 2194 2195 bv->bv_status = BIOC_SVONLINE; 2196 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2197 bv->bv_level = -1; /* Hotspare. */ 2198 bv->bv_nodisk = 1; 2199 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2200 sizeof(bv->bv_dev)); 2201 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2202 sizeof(bv->bv_vendor)); 2203 rv = 0; 2204 goto done; 2205 } 2206 2207 done: 2208 return (rv); 2209 } 2210 2211 int 2212 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2213 { 2214 int i, vol, rv = EINVAL, id; 2215 struct sr_chunk *src, *hotspare; 2216 2217 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2218 /* XXX this will not work when we stagger disciplines */ 2219 if (sc->sc_dis[i]) 2220 vol++; 2221 if (vol != bd->bd_volid) 2222 continue; 2223 2224 if (sc->sc_dis[i] == NULL) 2225 goto done; 2226 2227 id = bd->bd_diskid; 2228 2229 if (id < sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2230 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2231 #ifdef CRYPTO 2232 else if (id == sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no && 2233 sc->sc_dis[i]->sd_meta->ssdi.ssd_level == 'C' && 2234 sc->sc_dis[i]->mds.mdd_crypto.key_disk != NULL) 2235 src = sc->sc_dis[i]->mds.mdd_crypto.key_disk; 2236 #endif 2237 else 2238 break; 2239 2240 bd->bd_status = src->src_meta.scm_status; 2241 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2242 bd->bd_channel = vol; 2243 bd->bd_target = id; 2244 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2245 sizeof(bd->bd_vendor)); 2246 rv = 0; 2247 goto done; 2248 } 2249 2250 /* Check hotspares list. */ 2251 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2252 vol++; 2253 if (vol != bd->bd_volid) 2254 continue; 2255 2256 if (bd->bd_diskid != 0) 2257 break; 2258 2259 bd->bd_status = hotspare->src_meta.scm_status; 2260 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2261 bd->bd_channel = vol; 2262 bd->bd_target = bd->bd_diskid; 2263 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2264 sizeof(bd->bd_vendor)); 2265 rv = 0; 2266 goto done; 2267 } 2268 2269 done: 2270 return (rv); 2271 } 2272 2273 int 2274 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2275 { 2276 int rv = EINVAL; 2277 int i, vol, found, c; 2278 struct sr_discipline *sd = NULL; 2279 struct sr_chunk *ch_entry; 2280 struct sr_chunk_head *cl; 2281 2282 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2283 goto done; 2284 2285 if (bs->bs_status == BIOC_SSHOTSPARE) { 2286 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2287 goto done; 2288 } 2289 2290 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2291 /* XXX this will not work when we stagger disciplines */ 2292 if (sc->sc_dis[i]) 2293 vol++; 2294 if (vol != bs->bs_volid) 2295 continue; 2296 sd = sc->sc_dis[i]; 2297 break; 2298 } 2299 if (sd == NULL) 2300 goto done; 2301 2302 switch (bs->bs_status) { 2303 case BIOC_SSOFFLINE: 2304 /* Take chunk offline */ 2305 found = c = 0; 2306 cl = &sd->sd_vol.sv_chunk_list; 2307 SLIST_FOREACH(ch_entry, cl, src_link) { 2308 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2309 found = 1; 2310 break; 2311 } 2312 c++; 2313 } 2314 if (found == 0) { 2315 printf("%s: chunk not part of array\n", DEVNAME(sc)); 2316 goto done; 2317 } 2318 2319 /* XXX: check current state first */ 2320 sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE); 2321 2322 if (sr_meta_save(sd, SR_META_DIRTY)) { 2323 printf("%s: could not save metadata to %s\n", 2324 DEVNAME(sc), sd->sd_meta->ssd_devname); 2325 goto done; 2326 } 2327 rv = 0; 2328 break; 2329 2330 case BIOC_SDSCRUB: 2331 break; 2332 2333 case BIOC_SSREBUILD: 2334 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0); 2335 break; 2336 2337 default: 2338 printf("%s: unsupported state request %d\n", 2339 DEVNAME(sc), bs->bs_status); 2340 } 2341 2342 done: 2343 return (rv); 2344 } 2345 2346 int 2347 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2348 { 2349 struct sr_discipline *sd; 2350 struct sr_chunk *chunk; 2351 int i, c; 2352 2353 /* See if chunk is already in use. */ 2354 for (i = 0; i < SR_MAXSCSIBUS; i++) { 2355 if (sc->sc_dis[i] == NULL) 2356 continue; 2357 sd = sc->sc_dis[i]; 2358 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2359 chunk = sd->sd_vol.sv_chunks[c]; 2360 if (chunk->src_dev_mm == dev) 2361 return chunk->src_meta.scm_status; 2362 } 2363 } 2364 2365 /* Check hotspares list. */ 2366 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2367 if (chunk->src_dev_mm == dev) 2368 return chunk->src_meta.scm_status; 2369 2370 return BIOC_SDINVALID; 2371 } 2372 2373 int 2374 sr_hotspare(struct sr_softc *sc, dev_t dev) 2375 { 2376 struct sr_discipline *sd = NULL; 2377 struct sr_metadata *sm = NULL; 2378 struct sr_meta_chunk *hm; 2379 struct sr_chunk_head *cl; 2380 struct sr_chunk *chunk, *last, *hotspare = NULL; 2381 struct sr_uuid uuid; 2382 struct disklabel label; 2383 struct vnode *vn; 2384 daddr64_t size; 2385 char devname[32]; 2386 int rv = EINVAL; 2387 int c, part, open = 0; 2388 2389 /* 2390 * Add device to global hotspares list. 2391 */ 2392 2393 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2394 2395 /* Make sure chunk is not already in use. */ 2396 c = sr_chunk_in_use(sc, dev); 2397 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2398 if (c == BIOC_SDHOTSPARE) 2399 printf("%s: %s is already a hotspare\n", 2400 DEVNAME(sc), devname); 2401 else 2402 printf("%s: %s is already in use\n", 2403 DEVNAME(sc), devname); 2404 goto done; 2405 } 2406 2407 /* XXX - See if there is an existing degraded volume... */ 2408 2409 /* Open device. */ 2410 if (bdevvp(dev, &vn)) { 2411 printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc)); 2412 goto done; 2413 } 2414 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 2415 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2416 DEVNAME(sc), devname); 2417 vput(vn); 2418 goto fail; 2419 } 2420 open = 1; /* close dev on error */ 2421 2422 /* Get partition details. */ 2423 part = DISKPART(dev); 2424 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 2425 NOCRED, curproc)) { 2426 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2427 DEVNAME(sc)); 2428 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2429 vput(vn); 2430 goto fail; 2431 } 2432 if (label.d_partitions[part].p_fstype != FS_RAID) { 2433 printf("%s: %s partition not of type RAID (%d)\n", 2434 DEVNAME(sc), devname, 2435 label.d_partitions[part].p_fstype); 2436 goto fail; 2437 } 2438 2439 /* Calculate partition size. */ 2440 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 2441 2442 /* 2443 * Create and populate chunk metadata. 2444 */ 2445 2446 sr_uuid_get(&uuid); 2447 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2448 2449 hotspare->src_dev_mm = dev; 2450 hotspare->src_vn = vn; 2451 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2452 hotspare->src_size = size; 2453 2454 hm = &hotspare->src_meta; 2455 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2456 hm->scmi.scm_chunk_id = 0; 2457 hm->scmi.scm_size = size; 2458 hm->scmi.scm_coerced_size = size; 2459 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2460 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2461 2462 sr_checksum(sc, hm, &hm->scm_checksum, 2463 sizeof(struct sr_meta_chunk_invariant)); 2464 2465 hm->scm_status = BIOC_SDHOTSPARE; 2466 2467 /* 2468 * Create and populate our own discipline and metadata. 2469 */ 2470 2471 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2472 sm->ssdi.ssd_magic = SR_MAGIC; 2473 sm->ssdi.ssd_version = SR_META_VERSION; 2474 sm->ssd_ondisk = 0; 2475 sm->ssdi.ssd_vol_flags = 0; 2476 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2477 sm->ssdi.ssd_chunk_no = 1; 2478 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2479 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2480 sm->ssdi.ssd_size = size; 2481 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2482 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2483 "SR %s", "HOTSPARE"); 2484 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2485 "%03d", SR_META_VERSION); 2486 2487 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2488 sd->sd_sc = sc; 2489 sd->sd_meta = sm; 2490 sd->sd_meta_type = SR_META_F_NATIVE; 2491 sd->sd_vol_status = BIOC_SVONLINE; 2492 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2493 SLIST_INIT(&sd->sd_meta_opt); 2494 2495 /* Add chunk to volume. */ 2496 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2497 M_WAITOK | M_ZERO); 2498 sd->sd_vol.sv_chunks[0] = hotspare; 2499 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2500 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2501 2502 /* Save metadata. */ 2503 if (sr_meta_save(sd, SR_META_DIRTY)) { 2504 printf("%s: could not save metadata to %s\n", 2505 DEVNAME(sc), devname); 2506 goto fail; 2507 } 2508 2509 /* 2510 * Add chunk to hotspare list. 2511 */ 2512 rw_enter_write(&sc->sc_hs_lock); 2513 cl = &sc->sc_hotspare_list; 2514 if (SLIST_EMPTY(cl)) 2515 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2516 else { 2517 SLIST_FOREACH(chunk, cl, src_link) 2518 last = chunk; 2519 SLIST_INSERT_AFTER(last, hotspare, src_link); 2520 } 2521 sc->sc_hotspare_no++; 2522 rw_exit_write(&sc->sc_hs_lock); 2523 2524 rv = 0; 2525 goto done; 2526 2527 fail: 2528 if (hotspare) 2529 free(hotspare, M_DEVBUF); 2530 2531 done: 2532 if (sd && sd->sd_vol.sv_chunks) 2533 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2534 if (sd) 2535 free(sd, M_DEVBUF); 2536 if (sm) 2537 free(sm, M_DEVBUF); 2538 if (open) { 2539 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2540 vput(vn); 2541 } 2542 2543 return (rv); 2544 } 2545 2546 void 2547 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2548 { 2549 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2550 } 2551 2552 void 2553 sr_hotspare_rebuild(struct sr_discipline *sd) 2554 { 2555 struct sr_chunk_head *cl; 2556 struct sr_chunk *hotspare, *chunk = NULL; 2557 struct sr_workunit *wu; 2558 struct sr_ccb *ccb; 2559 int i, s, chunk_no, busy; 2560 2561 /* 2562 * Attempt to locate a hotspare and initiate rebuild. 2563 */ 2564 2565 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2566 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2567 BIOC_SDOFFLINE) { 2568 chunk_no = i; 2569 chunk = sd->sd_vol.sv_chunks[i]; 2570 break; 2571 } 2572 } 2573 2574 if (chunk == NULL) { 2575 printf("%s: no offline chunk found on %s!\n", 2576 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 2577 return; 2578 } 2579 2580 /* See if we have a suitable hotspare... */ 2581 rw_enter_write(&sd->sd_sc->sc_hs_lock); 2582 cl = &sd->sd_sc->sc_hotspare_list; 2583 SLIST_FOREACH(hotspare, cl, src_link) 2584 if (hotspare->src_size >= chunk->src_size) 2585 break; 2586 2587 if (hotspare != NULL) { 2588 2589 printf("%s: %s volume degraded, will attempt to " 2590 "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc), 2591 sd->sd_meta->ssd_devname, hotspare->src_devname); 2592 2593 /* 2594 * Ensure that all pending I/O completes on the failed chunk 2595 * before trying to initiate a rebuild. 2596 */ 2597 i = 0; 2598 do { 2599 busy = 0; 2600 2601 s = splbio(); 2602 if (wu->swu_cb_active == 1) 2603 panic("%s: sr_hotspare_rebuild", 2604 DEVNAME(sd->sd_sc)); 2605 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2606 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2607 if (ccb->ccb_target == chunk_no) 2608 busy = 1; 2609 } 2610 } 2611 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2612 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2613 if (ccb->ccb_target == chunk_no) 2614 busy = 1; 2615 } 2616 } 2617 splx(s); 2618 2619 if (busy) { 2620 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2621 i++; 2622 } 2623 2624 } while (busy && i < 120); 2625 2626 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2627 "complete on failed chunk %s\n", DEVNAME(sd->sd_sc), 2628 i, chunk->src_devname); 2629 2630 if (busy) { 2631 printf("%s: pending I/O failed to complete on " 2632 "failed chunk %s, hotspare rebuild aborted...\n", 2633 DEVNAME(sd->sd_sc), chunk->src_devname); 2634 goto done; 2635 } 2636 2637 s = splbio(); 2638 rw_enter_write(&sd->sd_sc->sc_lock); 2639 if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) { 2640 2641 /* Remove hotspare from available list. */ 2642 sd->sd_sc->sc_hotspare_no--; 2643 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2644 free(hotspare, M_DEVBUF); 2645 2646 } 2647 rw_exit_write(&sd->sd_sc->sc_lock); 2648 splx(s); 2649 } 2650 done: 2651 rw_exit_write(&sd->sd_sc->sc_hs_lock); 2652 } 2653 2654 int 2655 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare) 2656 { 2657 struct sr_softc *sc = sd->sd_sc; 2658 int rv = EINVAL, part; 2659 int c, found, open = 0; 2660 char devname[32]; 2661 struct vnode *vn; 2662 daddr64_t size, csize; 2663 struct disklabel label; 2664 struct sr_meta_chunk *old, *new; 2665 2666 /* 2667 * Attempt to initiate a rebuild onto the specified device. 2668 */ 2669 2670 if (!(sd->sd_capabilities & SR_CAP_REBUILD)) { 2671 printf("%s: discipline does not support rebuild\n", 2672 DEVNAME(sc)); 2673 goto done; 2674 } 2675 2676 /* make sure volume is in the right state */ 2677 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2678 printf("%s: rebuild already in progress\n", DEVNAME(sc)); 2679 goto done; 2680 } 2681 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2682 printf("%s: %s not degraded\n", DEVNAME(sc), 2683 sd->sd_meta->ssd_devname); 2684 goto done; 2685 } 2686 2687 /* find offline chunk */ 2688 for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 2689 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 2690 BIOC_SDOFFLINE) { 2691 found = c; 2692 new = &sd->sd_vol.sv_chunks[c]->src_meta; 2693 if (c > 0) 2694 break; /* roll at least once over the for */ 2695 } else { 2696 csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size; 2697 old = &sd->sd_vol.sv_chunks[c]->src_meta; 2698 if (found != -1) 2699 break; 2700 } 2701 if (found == -1) { 2702 printf("%s: no offline chunks available for rebuild\n", 2703 DEVNAME(sc)); 2704 goto done; 2705 } 2706 2707 /* populate meta entry */ 2708 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2709 if (bdevvp(dev, &vn)) { 2710 printf("%s:, sr_rebuild_init: can't allocate vnode\n", 2711 DEVNAME(sc)); 2712 goto done; 2713 } 2714 2715 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 2716 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2717 "open %s\n", DEVNAME(sc), devname); 2718 vput(vn); 2719 goto done; 2720 } 2721 open = 1; /* close dev on error */ 2722 2723 /* get partition */ 2724 part = DISKPART(dev); 2725 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 2726 NOCRED, curproc)) { 2727 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2728 DEVNAME(sc)); 2729 goto done; 2730 } 2731 if (label.d_partitions[part].p_fstype != FS_RAID) { 2732 printf("%s: %s partition not of type RAID (%d)\n", 2733 DEVNAME(sc), devname, 2734 label.d_partitions[part].p_fstype); 2735 goto done; 2736 } 2737 2738 /* is partition large enough? */ 2739 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 2740 if (size < csize) { 2741 printf("%s: partition too small, at least %llu B required\n", 2742 DEVNAME(sc), csize << DEV_BSHIFT); 2743 goto done; 2744 } else if (size > csize) 2745 printf("%s: partition too large, wasting %llu B\n", 2746 DEVNAME(sc), (size - csize) << DEV_BSHIFT); 2747 2748 /* make sure we are not stomping on some other partition */ 2749 c = sr_chunk_in_use(sc, dev); 2750 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE && 2751 !(hotspare && c == BIOC_SDHOTSPARE)) { 2752 printf("%s: %s is already in use\n", DEVNAME(sc), devname); 2753 goto done; 2754 } 2755 2756 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2757 sd->sd_meta->ssd_rebuild = 0; 2758 2759 /* recreate metadata */ 2760 open = 0; /* leave dev open from here on out */ 2761 sd->sd_vol.sv_chunks[found]->src_dev_mm = dev; 2762 sd->sd_vol.sv_chunks[found]->src_vn = vn; 2763 new->scmi.scm_volid = old->scmi.scm_volid; 2764 new->scmi.scm_chunk_id = found; 2765 strlcpy(new->scmi.scm_devname, devname, 2766 sizeof new->scmi.scm_devname); 2767 new->scmi.scm_size = size; 2768 new->scmi.scm_coerced_size = old->scmi.scm_coerced_size; 2769 bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid, 2770 sizeof new->scmi.scm_uuid); 2771 sr_checksum(sc, new, &new->scm_checksum, 2772 sizeof(struct sr_meta_chunk_invariant)); 2773 sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD); 2774 if (sr_meta_save(sd, SR_META_DIRTY)) { 2775 printf("%s: could not save metadata to %s\n", 2776 DEVNAME(sc), devname); 2777 open = 1; 2778 goto done; 2779 } 2780 2781 printf("%s: rebuild of %s started on %s\n", DEVNAME(sc), 2782 sd->sd_meta->ssd_devname, devname); 2783 2784 sd->sd_reb_abort = 0; 2785 kthread_create_deferred(sr_rebuild, sd); 2786 2787 rv = 0; 2788 done: 2789 if (open) { 2790 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2791 vput(vn); 2792 } 2793 2794 return (rv); 2795 } 2796 2797 void 2798 sr_roam_chunks(struct sr_discipline *sd) 2799 { 2800 struct sr_softc *sc = sd->sd_sc; 2801 struct sr_chunk *chunk; 2802 struct sr_meta_chunk *meta; 2803 int roamed = 0; 2804 2805 /* Have any chunks roamed? */ 2806 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 2807 meta = &chunk->src_meta; 2808 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 2809 sizeof(meta->scmi.scm_devname))) { 2810 2811 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 2812 meta->scmi.scm_devname, chunk->src_devname); 2813 2814 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 2815 sizeof(meta->scmi.scm_devname)); 2816 2817 roamed++; 2818 } 2819 } 2820 2821 if (roamed) 2822 sr_meta_save(sd, SR_META_DIRTY); 2823 } 2824 2825 int 2826 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 2827 { 2828 dev_t *dt; 2829 int i, s, no_chunk, rv = EINVAL, vol; 2830 int no_meta, updatemeta = 0; 2831 struct sr_chunk_head *cl; 2832 struct sr_discipline *sd = NULL; 2833 struct sr_chunk *ch_entry; 2834 struct device *dev, *dev2; 2835 struct scsibus_attach_args saa; 2836 char devname[32]; 2837 2838 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 2839 DEVNAME(sc), user); 2840 2841 /* user input */ 2842 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 2843 goto unwind; 2844 2845 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 2846 if (user) { 2847 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 2848 goto unwind; 2849 } else 2850 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 2851 2852 /* Initialise discipline. */ 2853 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2854 sd->sd_sc = sc; 2855 SLIST_INIT(&sd->sd_meta_opt); 2856 sd->sd_workq = workq_create("srdis", 1, IPL_BIO); 2857 if (sd->sd_workq == NULL) { 2858 printf("%s: could not create workq\n", DEVNAME(sc)); 2859 goto unwind; 2860 } 2861 if (sr_discipline_init(sd, bc->bc_level)) { 2862 printf("%s: could not initialize discipline\n", DEVNAME(sc)); 2863 goto unwind; 2864 } 2865 2866 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 2867 cl = &sd->sd_vol.sv_chunk_list; 2868 SLIST_INIT(cl); 2869 2870 /* Ensure that chunks are not already in use. */ 2871 for (i = 0; i < no_chunk; i++) { 2872 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) { 2873 sr_meta_getdevname(sc, dt[i], devname, sizeof(devname)); 2874 printf("%s: chunk %s already in use\n", 2875 DEVNAME(sc), devname); 2876 goto unwind; 2877 } 2878 } 2879 2880 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 2881 if (sd->sd_meta_type == SR_META_F_INVALID) { 2882 printf("%s: invalid metadata format\n", DEVNAME(sc)); 2883 goto unwind; 2884 } 2885 2886 if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) { 2887 printf("%s: can't attach metadata type %d\n", DEVNAME(sc), 2888 sd->sd_meta_type); 2889 goto unwind; 2890 } 2891 2892 /* force the raid volume by clearing metadata region */ 2893 if (bc->bc_flags & BIOC_SCFORCE) { 2894 /* make sure disk isn't up and running */ 2895 if (sr_meta_read(sd)) 2896 if (sr_already_assembled(sd)) { 2897 printf("%s: disk ", DEVNAME(sc)); 2898 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2899 printf(" is currently in use; can't force " 2900 "create\n"); 2901 goto unwind; 2902 } 2903 2904 if (sr_meta_clear(sd)) { 2905 printf("%s: failed to clear metadata\n", DEVNAME(sc)); 2906 goto unwind; 2907 } 2908 } 2909 2910 if ((no_meta = sr_meta_read(sd)) == 0) { 2911 /* fill out all chunk metadata */ 2912 sr_meta_chunks_create(sc, cl); 2913 ch_entry = SLIST_FIRST(cl); 2914 2915 sd->sd_vol_status = BIOC_SVONLINE; 2916 sd->sd_meta->ssdi.ssd_level = bc->bc_level; 2917 sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; 2918 2919 /* Make the volume UUID available. */ 2920 bcopy(&ch_entry->src_meta.scmi.scm_uuid, 2921 &sd->sd_meta->ssdi.ssd_uuid, 2922 sizeof(sd->sd_meta->ssdi.ssd_uuid)); 2923 2924 if (sd->sd_create) { 2925 if ((i = sd->sd_create(sd, bc, no_chunk, 2926 ch_entry->src_meta.scmi.scm_coerced_size))) { 2927 rv = i; 2928 goto unwind; 2929 } 2930 } 2931 2932 /* fill out all volume metadata */ 2933 DNPRINTF(SR_D_IOCTL, 2934 "%s: sr_ioctl_createraid: vol_size: %lld\n", 2935 DEVNAME(sc), sd->sd_meta->ssdi.ssd_size); 2936 strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", 2937 sizeof(sd->sd_meta->ssdi.ssd_vendor)); 2938 snprintf(sd->sd_meta->ssdi.ssd_product, 2939 sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", 2940 sd->sd_name); 2941 snprintf(sd->sd_meta->ssdi.ssd_revision, 2942 sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", 2943 SR_META_VERSION); 2944 2945 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2946 updatemeta = 1; 2947 } else if (no_meta == no_chunk) { 2948 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 2949 printf("%s: %s was not shutdown properly\n", 2950 DEVNAME(sc), sd->sd_meta->ssd_devname); 2951 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 2952 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 2953 "metadata\n", DEVNAME(sc)); 2954 goto unwind; 2955 } 2956 if (sr_already_assembled(sd)) { 2957 printf("%s: disk ", DEVNAME(sc)); 2958 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2959 printf(" already assembled\n"); 2960 goto unwind; 2961 } 2962 2963 if (sd->sd_assemble) { 2964 if ((i = sd->sd_assemble(sd, bc, no_chunk))) { 2965 rv = i; 2966 goto unwind; 2967 } 2968 } 2969 2970 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 2971 DEVNAME(sc)); 2972 updatemeta = 0; 2973 } else if (no_meta == -1) { 2974 printf("%s: one of the chunks has corrupt metadata; aborting " 2975 "assembly\n", DEVNAME(sc)); 2976 goto unwind; 2977 } else { 2978 if (sr_already_assembled(sd)) { 2979 printf("%s: disk ", DEVNAME(sc)); 2980 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2981 printf(" already assembled; will not partial " 2982 "assemble it\n"); 2983 goto unwind; 2984 } 2985 2986 if (sd->sd_assemble) { 2987 if ((i = sd->sd_assemble(sd, bc, no_chunk))) { 2988 rv = i; 2989 goto unwind; 2990 } 2991 } 2992 2993 printf("%s: trying to bring up %s degraded\n", DEVNAME(sc), 2994 sd->sd_meta->ssd_devname); 2995 } 2996 2997 /* metadata SHALL be fully filled in at this point */ 2998 2999 /* Make sure that metadata level matches assembly level. */ 3000 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 3001 printf("%s: volume level does not match metadata level!\n", 3002 DEVNAME(sc)); 3003 goto unwind; 3004 } 3005 3006 /* allocate all resources */ 3007 if ((rv = sd->sd_alloc_resources(sd))) 3008 goto unwind; 3009 3010 /* Adjust flags if necessary. */ 3011 if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) && 3012 (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) != 3013 (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) { 3014 sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE; 3015 sd->sd_meta->ssdi.ssd_vol_flags |= 3016 bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3017 } 3018 3019 if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) { 3020 /* set volume status */ 3021 sd->sd_set_vol_state(sd); 3022 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3023 printf("%s: %s offline, will not be brought online\n", 3024 DEVNAME(sc), sd->sd_meta->ssd_devname); 3025 goto unwind; 3026 } 3027 3028 /* setup scsi midlayer */ 3029 mtx_init(&sd->sd_wu_mtx, IPL_BIO); 3030 scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put); 3031 if (sd->sd_openings) 3032 sd->sd_link.openings = sd->sd_openings(sd); 3033 else 3034 sd->sd_link.openings = sd->sd_max_wu; 3035 sd->sd_link.device_softc = sc; 3036 sd->sd_link.adapter_softc = sc; 3037 sd->sd_link.adapter = &sr_switch; 3038 sd->sd_link.adapter_target = SR_MAX_LD; 3039 sd->sd_link.adapter_buswidth = 1; 3040 sd->sd_link.pool = &sd->sd_iopool; 3041 bzero(&saa, sizeof(saa)); 3042 saa.saa_sc_link = &sd->sd_link; 3043 3044 /* 3045 * we passed all checks return ENXIO if volume can't be created 3046 */ 3047 rv = ENXIO; 3048 3049 /* clear sense data */ 3050 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3051 3052 /* use temporary discipline pointer */ 3053 s = splhigh(); 3054 sc->sc_attach_dis = sd; 3055 splx(s); 3056 dev2 = config_found(&sc->sc_dev, &saa, scsiprint); 3057 s = splhigh(); 3058 sc->sc_attach_dis = NULL; 3059 splx(s); 3060 TAILQ_FOREACH(dev, &alldevs, dv_list) 3061 if (dev->dv_parent == dev2) 3062 break; 3063 if (dev == NULL) 3064 goto unwind; 3065 3066 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n", 3067 DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus); 3068 3069 sc->sc_dis[sd->sd_link.scsibus] = sd; 3070 for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++) 3071 if (sc->sc_dis[i]) 3072 vol++; 3073 sd->sd_scsibus_dev = dev2; 3074 3075 rv = 0; 3076 if (updatemeta) { 3077 /* fill out remaining volume metadata */ 3078 sd->sd_meta->ssdi.ssd_volid = vol; 3079 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3080 sizeof(sd->sd_meta->ssd_devname)); 3081 sr_meta_init(sd, cl); 3082 } else { 3083 if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3084 sizeof(dev->dv_xname))) { 3085 printf("%s: volume %s is roaming, it used to " 3086 "be %s, updating metadata\n", 3087 DEVNAME(sc), dev->dv_xname, 3088 sd->sd_meta->ssd_devname); 3089 3090 sd->sd_meta->ssdi.ssd_volid = vol; 3091 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3092 sizeof(sd->sd_meta->ssd_devname)); 3093 } 3094 } 3095 3096 /* Update device name on any chunks which roamed. */ 3097 sr_roam_chunks(sd); 3098 3099 #ifndef SMALL_KERNEL 3100 if (sr_sensors_create(sd)) 3101 printf("%s: unable to create sensor for %s\n", 3102 DEVNAME(sc), dev->dv_xname); 3103 else 3104 sd->sd_vol.sv_sensor_valid = 1; 3105 #endif /* SMALL_KERNEL */ 3106 } else { 3107 /* we are not an os disk */ 3108 if (updatemeta) { 3109 /* fill out remaining volume metadata */ 3110 sd->sd_meta->ssdi.ssd_volid = 0; 3111 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3112 sizeof(sd->sd_meta->ssd_devname)); 3113 sr_meta_init(sd, cl); 3114 } 3115 if (sd->sd_start_discipline(sd)) 3116 goto unwind; 3117 } 3118 3119 /* save metadata to disk */ 3120 rv = sr_meta_save(sd, SR_META_DIRTY); 3121 sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd); 3122 3123 if (sd->sd_vol_status == BIOC_SVREBUILD) 3124 kthread_create_deferred(sr_rebuild, sd); 3125 3126 sd->sd_ready = 1; 3127 3128 return (rv); 3129 unwind: 3130 sr_discipline_shutdown(sd); 3131 3132 /* XXX - use internal status values! */ 3133 if (rv == EAGAIN) 3134 rv = 0; 3135 3136 return (rv); 3137 } 3138 3139 int 3140 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3141 { 3142 struct sr_discipline *sd = NULL; 3143 int rv = 1; 3144 int i; 3145 3146 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3147 dr->bd_dev); 3148 3149 for (i = 0; i < SR_MAXSCSIBUS; i++) 3150 if (sc->sc_dis[i]) { 3151 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3152 dr->bd_dev, 3153 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3154 sd = sc->sc_dis[i]; 3155 break; 3156 } 3157 } 3158 3159 if (sd == NULL) 3160 goto bad; 3161 3162 sd->sd_deleted = 1; 3163 sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE; 3164 sr_shutdown(sd); 3165 3166 rv = 0; 3167 bad: 3168 return (rv); 3169 } 3170 3171 int 3172 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd) 3173 { 3174 struct sr_discipline *sd = NULL; 3175 int i, rv = 1; 3176 3177 /* Dispatch a discipline specific ioctl. */ 3178 3179 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc), 3180 bd->bd_dev); 3181 3182 for (i = 0; i < SR_MAXSCSIBUS; i++) 3183 if (sc->sc_dis[i]) { 3184 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3185 bd->bd_dev, 3186 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3187 sd = sc->sc_dis[i]; 3188 break; 3189 } 3190 } 3191 3192 if (sd && sd->sd_ioctl_handler) 3193 rv = sd->sd_ioctl_handler(sd, bd); 3194 3195 return (rv); 3196 } 3197 3198 int 3199 sr_ioctl_installboot(struct sr_softc *sc, struct bioc_installboot *bb) 3200 { 3201 void *bootblk = NULL, *bootldr = NULL; 3202 struct sr_discipline *sd = NULL; 3203 struct sr_chunk *chunk; 3204 u_int32_t bbs, bls; 3205 int rv = EINVAL; 3206 int i; 3207 3208 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc), 3209 bb->bb_dev); 3210 3211 for (i = 0; i < SR_MAXSCSIBUS; i++) 3212 if (sc->sc_dis[i]) { 3213 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3214 bb->bb_dev, 3215 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3216 sd = sc->sc_dis[i]; 3217 break; 3218 } 3219 } 3220 3221 if (sd == NULL) 3222 goto done; 3223 3224 /* Ensure that boot storage area is large enough. */ 3225 if (sd->sd_meta->ssd_data_offset < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) { 3226 printf("%s: insufficient boot storage!\n", DEVNAME(sd->sd_sc)); 3227 goto done; 3228 } 3229 3230 if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * 512) 3231 goto done; 3232 3233 if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * 512) 3234 goto done; 3235 3236 /* Copy in boot block. */ 3237 bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE; 3238 bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO); 3239 if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0) 3240 goto done; 3241 3242 /* Copy in boot loader. */ 3243 bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE; 3244 bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO); 3245 if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0) 3246 goto done; 3247 3248 /* Save boot block and boot loader to each chunk. */ 3249 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3250 3251 chunk = sd->sd_vol.sv_chunks[i]; 3252 3253 /* Save boot blocks. */ 3254 DNPRINTF(SR_D_IOCTL, 3255 "sr_ioctl_installboot: saving boot block to %s " 3256 "(%u bytes)\n", chunk->src_devname, bbs); 3257 3258 if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs, 3259 SR_BOOT_BLOCKS_OFFSET, B_WRITE)) { 3260 printf("%s: failed to write boot block\n", DEVNAME(sc)); 3261 goto done; 3262 } 3263 3264 /* Save boot loader.*/ 3265 DNPRINTF(SR_D_IOCTL, 3266 "sr_ioctl_installboot: saving boot loader to %s " 3267 "(%u bytes)\n", chunk->src_devname, bls); 3268 3269 if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls, 3270 SR_BOOT_LOADER_OFFSET, B_WRITE)) { 3271 printf("%s: failed to write boot loader\n", 3272 DEVNAME(sc)); 3273 goto done; 3274 } 3275 3276 } 3277 3278 /* XXX - Install boot block on disk - MD code. */ 3279 3280 /* Save boot details in metadata. */ 3281 sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE; 3282 3283 /* XXX - Store size of boot block/loader in optional metadata. */ 3284 3285 /* Save metadata. */ 3286 if (sr_meta_save(sd, SR_META_DIRTY)) { 3287 printf("%s: could not save metadata to %s\n", 3288 DEVNAME(sc), chunk->src_devname); 3289 goto done; 3290 } 3291 3292 rv = 0; 3293 3294 done: 3295 if (bootblk) 3296 free(bootblk, M_DEVBUF); 3297 if (bootldr) 3298 free(bootldr, M_DEVBUF); 3299 3300 return (rv); 3301 } 3302 3303 void 3304 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3305 { 3306 struct sr_chunk *ch_entry, *ch_next; 3307 3308 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3309 3310 if (!cl) 3311 return; 3312 3313 for (ch_entry = SLIST_FIRST(cl); 3314 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3315 ch_next = SLIST_NEXT(ch_entry, src_link); 3316 3317 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3318 DEVNAME(sc), ch_entry->src_devname); 3319 if (ch_entry->src_vn) { 3320 /* 3321 * XXX - explicitly lock the vnode until we can resolve 3322 * the problem introduced by vnode aliasing... specfs 3323 * has no locking, whereas ufs/ffs does! 3324 */ 3325 vn_lock(ch_entry->src_vn, LK_EXCLUSIVE | 3326 LK_RETRY, curproc); 3327 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 3328 curproc); 3329 vput(ch_entry->src_vn); 3330 } 3331 free(ch_entry, M_DEVBUF); 3332 } 3333 SLIST_INIT(cl); 3334 } 3335 3336 void 3337 sr_discipline_free(struct sr_discipline *sd) 3338 { 3339 struct sr_softc *sc; 3340 struct sr_meta_opt_head *omh; 3341 struct sr_meta_opt_item *omi, *omi_next; 3342 int i; 3343 3344 if (!sd) 3345 return; 3346 3347 sc = sd->sd_sc; 3348 3349 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3350 DEVNAME(sc), 3351 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3352 if (sd->sd_free_resources) 3353 sd->sd_free_resources(sd); 3354 if (sd->sd_vol.sv_chunks) 3355 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3356 if (sd->sd_meta) 3357 free(sd->sd_meta, M_DEVBUF); 3358 if (sd->sd_meta_foreign) 3359 free(sd->sd_meta_foreign, M_DEVBUF); 3360 3361 omh = &sd->sd_meta_opt; 3362 for (omi = SLIST_FIRST(omh); omi != SLIST_END(omh); omi = omi_next) { 3363 omi_next = SLIST_NEXT(omi, omi_link); 3364 free(omi, M_DEVBUF); 3365 } 3366 3367 for (i = 0; i < SR_MAXSCSIBUS; i++) 3368 if (sc->sc_dis[i] == sd) { 3369 sc->sc_dis[i] = NULL; 3370 break; 3371 } 3372 3373 explicit_bzero(sd, sizeof *sd); 3374 free(sd, M_DEVBUF); 3375 } 3376 3377 void 3378 sr_discipline_shutdown(struct sr_discipline *sd) 3379 { 3380 struct sr_softc *sc = sd->sd_sc; 3381 int s; 3382 3383 if (!sd || !sc) 3384 return; 3385 3386 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3387 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3388 3389 s = splbio(); 3390 3391 sd->sd_ready = 0; 3392 3393 if (sd->sd_shutdownhook) 3394 shutdownhook_disestablish(sd->sd_shutdownhook); 3395 3396 /* make sure there isn't a sync pending and yield */ 3397 wakeup(sd); 3398 while (sd->sd_sync || sd->sd_must_flush) 3399 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3400 EWOULDBLOCK) 3401 break; 3402 3403 #ifndef SMALL_KERNEL 3404 sr_sensors_delete(sd); 3405 #endif /* SMALL_KERNEL */ 3406 3407 if (sd->sd_scsibus_dev) 3408 config_detach(sd->sd_scsibus_dev, DETACH_FORCE); 3409 3410 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3411 3412 if (sd->sd_workq) 3413 workq_destroy(sd->sd_workq); 3414 3415 if (sd) 3416 sr_discipline_free(sd); 3417 3418 splx(s); 3419 } 3420 3421 int 3422 sr_discipline_init(struct sr_discipline *sd, int level) 3423 { 3424 int rv = 1; 3425 3426 switch (level) { 3427 case 0: 3428 sr_raid0_discipline_init(sd); 3429 break; 3430 case 1: 3431 sr_raid1_discipline_init(sd); 3432 break; 3433 case 4: 3434 sr_raidp_discipline_init(sd, SR_MD_RAID4); 3435 break; 3436 case 5: 3437 sr_raidp_discipline_init(sd, SR_MD_RAID5); 3438 break; 3439 case 6: 3440 sr_raid6_discipline_init(sd); 3441 break; 3442 #ifdef AOE 3443 /* AOE target. */ 3444 case 'A': 3445 sr_aoe_server_discipline_init(sd); 3446 break; 3447 /* AOE initiator. */ 3448 case 'a': 3449 sr_aoe_discipline_init(sd); 3450 break; 3451 #endif 3452 #ifdef CRYPTO 3453 case 'C': 3454 sr_crypto_discipline_init(sd); 3455 break; 3456 #endif 3457 default: 3458 goto bad; 3459 } 3460 3461 rv = 0; 3462 bad: 3463 return (rv); 3464 } 3465 3466 int 3467 sr_raid_inquiry(struct sr_workunit *wu) 3468 { 3469 struct sr_discipline *sd = wu->swu_dis; 3470 struct scsi_xfer *xs = wu->swu_xs; 3471 struct scsi_inquiry_data inq; 3472 3473 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3474 3475 bzero(&inq, sizeof(inq)); 3476 inq.device = T_DIRECT; 3477 inq.dev_qual2 = 0; 3478 inq.version = 2; 3479 inq.response_format = 2; 3480 inq.additional_length = 32; 3481 inq.flags |= SID_CmdQue; 3482 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3483 sizeof(inq.vendor)); 3484 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3485 sizeof(inq.product)); 3486 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3487 sizeof(inq.revision)); 3488 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3489 3490 return (0); 3491 } 3492 3493 int 3494 sr_raid_read_cap(struct sr_workunit *wu) 3495 { 3496 struct sr_discipline *sd = wu->swu_dis; 3497 struct scsi_xfer *xs = wu->swu_xs; 3498 struct scsi_read_cap_data rcd; 3499 struct scsi_read_cap_data_16 rcd16; 3500 daddr64_t addr; 3501 int rv = 1; 3502 3503 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3504 3505 addr = sd->sd_meta->ssdi.ssd_size - 1; 3506 if (xs->cmd->opcode == READ_CAPACITY) { 3507 bzero(&rcd, sizeof(rcd)); 3508 if (addr > 0xffffffffllu) 3509 _lto4b(0xffffffff, rcd.addr); 3510 else 3511 _lto4b(addr, rcd.addr); 3512 _lto4b(512, rcd.length); 3513 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3514 rv = 0; 3515 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3516 bzero(&rcd16, sizeof(rcd16)); 3517 _lto8b(addr, rcd16.addr); 3518 _lto4b(512, rcd16.length); 3519 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3520 rv = 0; 3521 } 3522 3523 return (rv); 3524 } 3525 3526 int 3527 sr_raid_tur(struct sr_workunit *wu) 3528 { 3529 struct sr_discipline *sd = wu->swu_dis; 3530 3531 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3532 3533 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3534 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3535 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3536 sd->sd_scsi_sense.add_sense_code = 0x04; 3537 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3538 sd->sd_scsi_sense.extra_len = 4; 3539 return (1); 3540 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3541 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3542 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3543 sd->sd_scsi_sense.add_sense_code = 0x05; 3544 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3545 sd->sd_scsi_sense.extra_len = 4; 3546 return (1); 3547 } 3548 3549 return (0); 3550 } 3551 3552 int 3553 sr_raid_request_sense(struct sr_workunit *wu) 3554 { 3555 struct sr_discipline *sd = wu->swu_dis; 3556 struct scsi_xfer *xs = wu->swu_xs; 3557 3558 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3559 DEVNAME(sd->sd_sc)); 3560 3561 /* use latest sense data */ 3562 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3563 3564 /* clear sense data */ 3565 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3566 3567 return (0); 3568 } 3569 3570 int 3571 sr_raid_start_stop(struct sr_workunit *wu) 3572 { 3573 struct scsi_xfer *xs = wu->swu_xs; 3574 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3575 3576 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3577 DEVNAME(sd->sd_sc)); 3578 3579 if (!ss) 3580 return (1); 3581 3582 /* 3583 * do nothing! 3584 * a softraid discipline should always reflect correct status 3585 */ 3586 return (0); 3587 } 3588 3589 int 3590 sr_raid_sync(struct sr_workunit *wu) 3591 { 3592 struct sr_discipline *sd = wu->swu_dis; 3593 int s, rv = 0, ios; 3594 3595 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3596 3597 /* when doing a fake sync don't count the wu */ 3598 ios = wu->swu_fake ? 0 : 1; 3599 3600 s = splbio(); 3601 sd->sd_sync = 1; 3602 3603 while (sd->sd_wu_pending > ios) 3604 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3605 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3606 DEVNAME(sd->sd_sc)); 3607 rv = 1; 3608 break; 3609 } 3610 3611 sd->sd_sync = 0; 3612 splx(s); 3613 3614 wakeup(&sd->sd_sync); 3615 3616 return (rv); 3617 } 3618 3619 void 3620 sr_startwu_callback(void *arg1, void *arg2) 3621 { 3622 struct sr_discipline *sd = arg1; 3623 struct sr_workunit *wu = arg2; 3624 struct sr_ccb *ccb; 3625 int s; 3626 3627 s = splbio(); 3628 if (wu->swu_cb_active == 1) 3629 panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc)); 3630 wu->swu_cb_active = 1; 3631 3632 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 3633 VOP_STRATEGY(&ccb->ccb_buf); 3634 3635 wu->swu_cb_active = 0; 3636 splx(s); 3637 } 3638 3639 void 3640 sr_raid_startwu(struct sr_workunit *wu) 3641 { 3642 struct sr_discipline *sd = wu->swu_dis; 3643 3644 splassert(IPL_BIO); 3645 3646 if (wu->swu_state == SR_WU_RESTART) 3647 /* 3648 * no need to put the wu on the pending queue since we 3649 * are restarting the io 3650 */ 3651 ; 3652 else 3653 /* move wu to pending queue */ 3654 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3655 3656 /* start all individual ios */ 3657 workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback, 3658 sd, wu); 3659 } 3660 3661 void 3662 sr_checksum_print(u_int8_t *md5) 3663 { 3664 int i; 3665 3666 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 3667 printf("%02x", md5[i]); 3668 } 3669 3670 void 3671 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 3672 { 3673 MD5_CTX ctx; 3674 3675 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 3676 md5, len); 3677 3678 MD5Init(&ctx); 3679 MD5Update(&ctx, src, len); 3680 MD5Final(md5, &ctx); 3681 } 3682 3683 void 3684 sr_uuid_get(struct sr_uuid *uuid) 3685 { 3686 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 3687 /* UUID version 4: random */ 3688 uuid->sui_id[6] &= 0x0f; 3689 uuid->sui_id[6] |= 0x40; 3690 /* RFC4122 variant */ 3691 uuid->sui_id[8] &= 0x3f; 3692 uuid->sui_id[8] |= 0x80; 3693 } 3694 3695 void 3696 sr_uuid_print(struct sr_uuid *uuid, int cr) 3697 { 3698 printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 3699 "%02x%02x%02x%02x%02x%02x", 3700 uuid->sui_id[0], uuid->sui_id[1], 3701 uuid->sui_id[2], uuid->sui_id[3], 3702 uuid->sui_id[4], uuid->sui_id[5], 3703 uuid->sui_id[6], uuid->sui_id[7], 3704 uuid->sui_id[8], uuid->sui_id[9], 3705 uuid->sui_id[10], uuid->sui_id[11], 3706 uuid->sui_id[12], uuid->sui_id[13], 3707 uuid->sui_id[14], uuid->sui_id[15]); 3708 3709 if (cr) 3710 printf("\n"); 3711 } 3712 3713 int 3714 sr_already_assembled(struct sr_discipline *sd) 3715 { 3716 struct sr_softc *sc = sd->sd_sc; 3717 int i; 3718 3719 for (i = 0; i < SR_MAXSCSIBUS; i++) 3720 if (sc->sc_dis[i]) 3721 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 3722 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 3723 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 3724 return (1); 3725 3726 return (0); 3727 } 3728 3729 int32_t 3730 sr_validate_stripsize(u_int32_t b) 3731 { 3732 int s = 0; 3733 3734 if (b % 512) 3735 return (-1); 3736 3737 while ((b & 1) == 0) { 3738 b >>= 1; 3739 s++; 3740 } 3741 3742 /* only multiple of twos */ 3743 b >>= 1; 3744 if (b) 3745 return(-1); 3746 3747 return (s); 3748 } 3749 3750 void 3751 sr_shutdown(void *arg) 3752 { 3753 struct sr_discipline *sd = arg; 3754 #ifdef SR_DEBUG 3755 struct sr_softc *sc = sd->sd_sc; 3756 #endif 3757 DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", 3758 DEVNAME(sc), sd->sd_meta->ssd_devname); 3759 3760 /* abort rebuild and drain io */ 3761 sd->sd_reb_abort = 1; 3762 while (sd->sd_reb_active) 3763 tsleep(sd, PWAIT, "sr_shutdown", 1); 3764 3765 sr_meta_save(sd, 0); 3766 3767 sr_discipline_shutdown(sd); 3768 } 3769 3770 int 3771 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 3772 { 3773 struct sr_discipline *sd = wu->swu_dis; 3774 struct scsi_xfer *xs = wu->swu_xs; 3775 int rv = 1; 3776 3777 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 3778 xs->cmd->opcode); 3779 3780 if (sd->sd_meta->ssd_data_offset == 0) 3781 panic("invalid data offset"); 3782 3783 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3784 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 3785 DEVNAME(sd->sd_sc), func); 3786 goto bad; 3787 } 3788 3789 if (xs->datalen == 0) { 3790 printf("%s: %s: illegal block count for %s\n", 3791 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3792 goto bad; 3793 } 3794 3795 if (xs->cmdlen == 10) 3796 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 3797 else if (xs->cmdlen == 16) 3798 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 3799 else if (xs->cmdlen == 6) 3800 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 3801 else { 3802 printf("%s: %s: illegal cmdlen for %s\n", 3803 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3804 goto bad; 3805 } 3806 3807 wu->swu_blk_start = *blk; 3808 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 3809 3810 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 3811 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 3812 "end: %lld length: %d\n", 3813 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 3814 wu->swu_blk_end, xs->datalen); 3815 3816 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 3817 SSD_ERRCODE_VALID; 3818 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 3819 sd->sd_scsi_sense.add_sense_code = 0x21; 3820 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3821 sd->sd_scsi_sense.extra_len = 4; 3822 goto bad; 3823 } 3824 3825 rv = 0; 3826 bad: 3827 return (rv); 3828 } 3829 3830 int 3831 sr_check_io_collision(struct sr_workunit *wu) 3832 { 3833 struct sr_discipline *sd = wu->swu_dis; 3834 struct sr_workunit *wup; 3835 3836 splassert(IPL_BIO); 3837 3838 /* walk queue backwards and fill in collider if we have one */ 3839 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 3840 if (wu->swu_blk_end < wup->swu_blk_start || 3841 wup->swu_blk_end < wu->swu_blk_start) 3842 continue; 3843 3844 /* we have an LBA collision, defer wu */ 3845 wu->swu_state = SR_WU_DEFERRED; 3846 if (wup->swu_collider) 3847 /* wu is on deferred queue, append to last wu */ 3848 while (wup->swu_collider) 3849 wup = wup->swu_collider; 3850 3851 wup->swu_collider = wu; 3852 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 3853 sd->sd_wu_collisions++; 3854 goto queued; 3855 } 3856 3857 return (0); 3858 queued: 3859 return (1); 3860 } 3861 3862 void 3863 sr_rebuild(void *arg) 3864 { 3865 struct sr_discipline *sd = arg; 3866 struct sr_softc *sc = sd->sd_sc; 3867 3868 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 3869 DEVNAME(sc)) != 0) 3870 printf("%s: unable to start backgound operation\n", 3871 DEVNAME(sc)); 3872 } 3873 3874 void 3875 sr_rebuild_thread(void *arg) 3876 { 3877 struct sr_discipline *sd = arg; 3878 struct sr_softc *sc = sd->sd_sc; 3879 daddr64_t whole_blk, partial_blk, blk, sz, lba; 3880 daddr64_t psz, rb, restart; 3881 uint64_t mysize = 0; 3882 struct sr_workunit *wu_r, *wu_w; 3883 struct scsi_xfer xs_r, xs_w; 3884 struct scsi_rw_16 cr, cw; 3885 int c, s, slept, percent = 0, old_percent = -1; 3886 u_int8_t *buf; 3887 3888 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 3889 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 3890 3891 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 3892 if (restart > whole_blk) { 3893 printf("%s: bogus rebuild restart offset, starting from 0\n", 3894 DEVNAME(sc)); 3895 restart = 0; 3896 } 3897 if (restart) { 3898 /* 3899 * XXX there is a hole here; there is a posibility that we 3900 * had a restart however the chunk that was supposed to 3901 * be rebuilt is no longer valid; we can reach this situation 3902 * when a rebuild is in progress and the box crashes and 3903 * on reboot the rebuild chunk is different (like zero'd or 3904 * replaced). We need to check the uuid of the chunk that is 3905 * being rebuilt to assert this. 3906 */ 3907 psz = sd->sd_meta->ssdi.ssd_size; 3908 rb = sd->sd_meta->ssd_rebuild; 3909 if (rb > 0) 3910 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3911 else 3912 percent = 0; 3913 printf("%s: resuming rebuild on %s at %llu%%\n", 3914 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 3915 } 3916 3917 sd->sd_reb_active = 1; 3918 3919 /* currently this is 64k therefore we can use dma_alloc */ 3920 buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK); 3921 for (blk = restart; blk <= whole_blk; blk++) { 3922 if (blk == whole_blk) 3923 sz = partial_blk; 3924 else 3925 sz = SR_REBUILD_IO_SIZE; 3926 mysize += sz; 3927 lba = blk * sz; 3928 3929 /* get some wu */ 3930 if ((wu_r = scsi_io_get(&sd->sd_iopool, 0)) == NULL) 3931 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 3932 if ((wu_w = scsi_io_get(&sd->sd_iopool, 0)) == NULL) 3933 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 3934 3935 /* setup read io */ 3936 bzero(&xs_r, sizeof xs_r); 3937 bzero(&cr, sizeof cr); 3938 xs_r.error = XS_NOERROR; 3939 xs_r.flags = SCSI_DATA_IN; 3940 xs_r.datalen = sz << DEV_BSHIFT; 3941 xs_r.data = buf; 3942 xs_r.cmdlen = 16; 3943 cr.opcode = READ_16; 3944 _lto4b(sz, cr.length); 3945 _lto8b(lba, cr.addr); 3946 xs_r.cmd = (struct scsi_generic *)&cr; 3947 wu_r->swu_flags |= SR_WUF_REBUILD; 3948 wu_r->swu_xs = &xs_r; 3949 if (sd->sd_scsi_rw(wu_r)) { 3950 printf("%s: could not create read io\n", 3951 DEVNAME(sc)); 3952 goto fail; 3953 } 3954 3955 /* setup write io */ 3956 bzero(&xs_w, sizeof xs_w); 3957 bzero(&cw, sizeof cw); 3958 xs_w.error = XS_NOERROR; 3959 xs_w.flags = SCSI_DATA_OUT; 3960 xs_w.datalen = sz << DEV_BSHIFT; 3961 xs_w.data = buf; 3962 xs_w.cmdlen = 16; 3963 cw.opcode = WRITE_16; 3964 _lto4b(sz, cw.length); 3965 _lto8b(lba, cw.addr); 3966 xs_w.cmd = (struct scsi_generic *)&cw; 3967 wu_w->swu_flags |= SR_WUF_REBUILD; 3968 wu_w->swu_xs = &xs_w; 3969 if (sd->sd_scsi_rw(wu_w)) { 3970 printf("%s: could not create write io\n", 3971 DEVNAME(sc)); 3972 goto fail; 3973 } 3974 3975 /* 3976 * collide with the read io so that we get automatically 3977 * started when the read is done 3978 */ 3979 wu_w->swu_state = SR_WU_DEFERRED; 3980 wu_r->swu_collider = wu_w; 3981 s = splbio(); 3982 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 3983 3984 /* schedule io */ 3985 if (sr_check_io_collision(wu_r)) 3986 goto queued; 3987 3988 sr_raid_startwu(wu_r); 3989 queued: 3990 splx(s); 3991 3992 /* wait for read completion */ 3993 slept = 0; 3994 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 3995 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 3996 slept = 1; 3997 } 3998 /* yield if we didn't sleep */ 3999 if (slept == 0) 4000 tsleep(sc, PWAIT, "sr_yield", 1); 4001 4002 scsi_io_put(&sd->sd_iopool, wu_r); 4003 scsi_io_put(&sd->sd_iopool, wu_w); 4004 4005 sd->sd_meta->ssd_rebuild = lba; 4006 4007 /* save metadata every percent */ 4008 psz = sd->sd_meta->ssdi.ssd_size; 4009 rb = sd->sd_meta->ssd_rebuild; 4010 if (rb > 0) 4011 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 4012 else 4013 percent = 0; 4014 if (percent != old_percent && blk != whole_blk) { 4015 if (sr_meta_save(sd, SR_META_DIRTY)) 4016 printf("%s: could not save metadata to %s\n", 4017 DEVNAME(sc), sd->sd_meta->ssd_devname); 4018 old_percent = percent; 4019 } 4020 4021 if (sd->sd_reb_abort) 4022 goto abort; 4023 } 4024 4025 /* all done */ 4026 sd->sd_meta->ssd_rebuild = 0; 4027 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 4028 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 4029 BIOC_SDREBUILD) { 4030 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 4031 break; 4032 } 4033 4034 abort: 4035 if (sr_meta_save(sd, SR_META_DIRTY)) 4036 printf("%s: could not save metadata to %s\n", 4037 DEVNAME(sc), sd->sd_meta->ssd_devname); 4038 fail: 4039 dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT); 4040 sd->sd_reb_active = 0; 4041 kthread_exit(0); 4042 } 4043 4044 #ifndef SMALL_KERNEL 4045 int 4046 sr_sensors_create(struct sr_discipline *sd) 4047 { 4048 struct sr_softc *sc = sd->sd_sc; 4049 int rv = 1; 4050 4051 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 4052 DEVNAME(sc), sd->sd_meta->ssd_devname); 4053 4054 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 4055 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 4056 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 4057 sizeof(sd->sd_vol.sv_sensor.desc)); 4058 4059 sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4060 sd->sd_vol.sv_sensor_attached = 1; 4061 4062 if (sc->sc_sensors_running == 0) { 4063 if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) 4064 goto bad; 4065 sc->sc_sensors_running = 1; 4066 } 4067 4068 rv = 0; 4069 bad: 4070 return (rv); 4071 } 4072 4073 void 4074 sr_sensors_delete(struct sr_discipline *sd) 4075 { 4076 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 4077 4078 if (sd->sd_vol.sv_sensor_attached) 4079 sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4080 } 4081 4082 void 4083 sr_sensors_refresh(void *arg) 4084 { 4085 struct sr_softc *sc = arg; 4086 struct sr_volume *sv; 4087 struct sr_discipline *sd; 4088 int i, vol; 4089 4090 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 4091 4092 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 4093 /* XXX this will not work when we stagger disciplines */ 4094 if (!sc->sc_dis[i]) 4095 continue; 4096 4097 sd = sc->sc_dis[i]; 4098 sv = &sd->sd_vol; 4099 4100 switch(sd->sd_vol_status) { 4101 case BIOC_SVOFFLINE: 4102 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 4103 sv->sv_sensor.status = SENSOR_S_CRIT; 4104 break; 4105 4106 case BIOC_SVDEGRADED: 4107 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 4108 sv->sv_sensor.status = SENSOR_S_WARN; 4109 break; 4110 4111 case BIOC_SVSCRUB: 4112 case BIOC_SVONLINE: 4113 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 4114 sv->sv_sensor.status = SENSOR_S_OK; 4115 break; 4116 4117 default: 4118 sv->sv_sensor.value = 0; /* unknown */ 4119 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 4120 } 4121 } 4122 } 4123 #endif /* SMALL_KERNEL */ 4124 4125 #ifdef SR_FANCY_STATS 4126 void sr_print_stats(void); 4127 4128 void 4129 sr_print_stats(void) 4130 { 4131 struct sr_softc *sc; 4132 struct sr_discipline *sd; 4133 int i, vol; 4134 4135 for (i = 0; i < softraid_cd.cd_ndevs; i++) 4136 if (softraid_cd.cd_devs[i]) { 4137 sc = softraid_cd.cd_devs[i]; 4138 /* we'll only have one softc */ 4139 break; 4140 } 4141 4142 if (!sc) { 4143 printf("no softraid softc found\n"); 4144 return; 4145 } 4146 4147 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 4148 /* XXX this will not work when we stagger disciplines */ 4149 if (!sc->sc_dis[i]) 4150 continue; 4151 4152 sd = sc->sc_dis[i]; 4153 printf("%s: ios pending: %d collisions %llu\n", 4154 sd->sd_meta->ssd_devname, 4155 sd->sd_wu_pending, 4156 sd->sd_wu_collisions); 4157 } 4158 } 4159 #endif /* SR_FANCY_STATS */ 4160 4161 #ifdef SR_DEBUG 4162 void 4163 sr_meta_print(struct sr_metadata *m) 4164 { 4165 int i; 4166 struct sr_meta_chunk *mc; 4167 struct sr_meta_opt *mo; 4168 4169 if (!(sr_debug & SR_D_META)) 4170 return; 4171 4172 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 4173 printf("\tssd_version %d\n", m->ssdi.ssd_version); 4174 printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags); 4175 printf("\tssd_uuid "); 4176 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 4177 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 4178 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 4179 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 4180 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 4181 printf("\tssd_level %d\n", m->ssdi.ssd_level); 4182 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 4183 printf("\tssd_devname %s\n", m->ssd_devname); 4184 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 4185 printf("\tssd_product %s\n", m->ssdi.ssd_product); 4186 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 4187 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 4188 printf("\tssd_checksum "); 4189 sr_checksum_print(m->ssd_checksum); 4190 printf("\n"); 4191 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 4192 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 4193 4194 mc = (struct sr_meta_chunk *)(m + 1); 4195 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 4196 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 4197 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 4198 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 4199 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 4200 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 4201 printf("\t\tscm_uuid "); 4202 sr_uuid_print(&mc->scmi.scm_uuid, 1); 4203 printf("\t\tscm_checksum "); 4204 sr_checksum_print(mc->scm_checksum); 4205 printf("\n"); 4206 printf("\t\tscm_status %d\n", mc->scm_status); 4207 } 4208 4209 mo = (struct sr_meta_opt *)(mc); 4210 for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { 4211 printf("\t\t\tsom_type %d\n", mo->somi.som_type); 4212 printf("\t\t\tsom_checksum "); 4213 sr_checksum_print(mo->som_checksum); 4214 printf("\n"); 4215 } 4216 } 4217 4218 void 4219 sr_dump_mem(u_int8_t *p, int len) 4220 { 4221 int i; 4222 4223 for (i = 0; i < len; i++) 4224 printf("%02x ", *p++); 4225 printf("\n"); 4226 } 4227 4228 #endif /* SR_DEBUG */ 4229