1 /* $OpenBSD: softraid.c,v 1.274 2012/01/30 13:13:03 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 #include <sys/dkio.h> 44 45 #ifdef AOE 46 #include <sys/mbuf.h> 47 #include <net/if_aoe.h> 48 #endif /* AOE */ 49 50 #include <crypto/cryptodev.h> 51 52 #include <scsi/scsi_all.h> 53 #include <scsi/scsiconf.h> 54 #include <scsi/scsi_disk.h> 55 56 #include <dev/softraidvar.h> 57 #include <dev/rndvar.h> 58 59 /* #define SR_FANCY_STATS */ 60 61 #ifdef SR_DEBUG 62 #define SR_FANCY_STATS 63 uint32_t sr_debug = 0 64 /* | SR_D_CMD */ 65 /* | SR_D_MISC */ 66 /* | SR_D_INTR */ 67 /* | SR_D_IOCTL */ 68 /* | SR_D_CCB */ 69 /* | SR_D_WU */ 70 /* | SR_D_META */ 71 /* | SR_D_DIS */ 72 /* | SR_D_STATE */ 73 ; 74 #endif 75 76 struct sr_softc *softraid0; 77 78 int sr_match(struct device *, void *, void *); 79 void sr_attach(struct device *, struct device *, void *); 80 int sr_detach(struct device *, int); 81 void sr_map_root(void); 82 83 struct cfattach softraid_ca = { 84 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 85 }; 86 87 struct cfdriver softraid_cd = { 88 NULL, "softraid", DV_DULL 89 }; 90 91 /* scsi & discipline */ 92 void sr_scsi_cmd(struct scsi_xfer *); 93 void sr_minphys(struct buf *, struct scsi_link *); 94 int sr_scsi_probe(struct scsi_link *); 95 void sr_copy_internal_data(struct scsi_xfer *, 96 void *, size_t); 97 int sr_scsi_ioctl(struct scsi_link *, u_long, 98 caddr_t, int); 99 int sr_bio_ioctl(struct device *, u_long, caddr_t); 100 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 101 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 102 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 103 int sr_ioctl_setstate(struct sr_softc *, 104 struct bioc_setstate *); 105 int sr_ioctl_createraid(struct sr_softc *, 106 struct bioc_createraid *, int); 107 int sr_ioctl_deleteraid(struct sr_softc *, 108 struct bioc_deleteraid *); 109 int sr_ioctl_discipline(struct sr_softc *, 110 struct bioc_discipline *); 111 int sr_ioctl_installboot(struct sr_softc *, 112 struct bioc_installboot *); 113 void sr_chunks_unwind(struct sr_softc *, 114 struct sr_chunk_head *); 115 void sr_discipline_free(struct sr_discipline *); 116 void sr_discipline_shutdown(struct sr_discipline *, int); 117 int sr_discipline_init(struct sr_discipline *, int); 118 void sr_set_chunk_state(struct sr_discipline *, int, int); 119 void sr_set_vol_state(struct sr_discipline *); 120 121 /* utility functions */ 122 void sr_shutdown(struct sr_softc *); 123 void sr_shutdownhook(void *); 124 void sr_uuid_generate(struct sr_uuid *); 125 char *sr_uuid_format(struct sr_uuid *); 126 void sr_uuid_print(struct sr_uuid *, int); 127 void sr_checksum_print(u_int8_t *); 128 int sr_boot_assembly(struct sr_softc *); 129 int sr_already_assembled(struct sr_discipline *); 130 int sr_hotspare(struct sr_softc *, dev_t); 131 void sr_hotspare_rebuild(struct sr_discipline *); 132 int sr_rebuild_init(struct sr_discipline *, dev_t, int); 133 void sr_rebuild(void *); 134 void sr_rebuild_thread(void *); 135 void sr_roam_chunks(struct sr_discipline *); 136 int sr_chunk_in_use(struct sr_softc *, dev_t); 137 void sr_startwu_callback(void *, void *); 138 int sr_rw(struct sr_softc *, dev_t, char *, size_t, 139 daddr64_t, long); 140 141 /* don't include these on RAMDISK */ 142 #ifndef SMALL_KERNEL 143 void sr_sensors_refresh(void *); 144 int sr_sensors_create(struct sr_discipline *); 145 void sr_sensors_delete(struct sr_discipline *); 146 #endif 147 148 /* metadata */ 149 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 150 int sr_meta_attach(struct sr_discipline *, int, int); 151 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 152 size_t, daddr64_t, long); 153 int sr_meta_clear(struct sr_discipline *); 154 void sr_meta_init(struct sr_discipline *, int, int); 155 void sr_meta_init_complete(struct sr_discipline *); 156 void sr_meta_opt_handler(struct sr_discipline *, 157 struct sr_meta_opt_hdr *); 158 159 /* hotplug magic */ 160 void sr_disk_attach(struct disk *, int); 161 162 struct sr_hotplug_list { 163 void (*sh_hotplug)(struct sr_discipline *, 164 struct disk *, int); 165 struct sr_discipline *sh_sd; 166 167 SLIST_ENTRY(sr_hotplug_list) shl_link; 168 }; 169 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 170 171 struct sr_hotplug_list_head sr_hotplug_callbacks; 172 extern void (*softraid_disk_attach)(struct disk *, int); 173 174 /* scsi glue */ 175 struct scsi_adapter sr_switch = { 176 sr_scsi_cmd, sr_minphys, sr_scsi_probe, NULL, sr_scsi_ioctl 177 }; 178 179 /* native metadata format */ 180 int sr_meta_native_bootprobe(struct sr_softc *, dev_t, 181 struct sr_boot_chunk_head *); 182 #define SR_META_NOTCLAIMED (0) 183 #define SR_META_CLAIMED (1) 184 int sr_meta_native_probe(struct sr_softc *, 185 struct sr_chunk *); 186 int sr_meta_native_attach(struct sr_discipline *, int); 187 int sr_meta_native_write(struct sr_discipline *, dev_t, 188 struct sr_metadata *,void *); 189 190 #ifdef SR_DEBUG 191 void sr_meta_print(struct sr_metadata *); 192 #else 193 #define sr_meta_print(m) 194 #endif 195 196 /* the metadata driver should remain stateless */ 197 struct sr_meta_driver { 198 daddr64_t smd_offset; /* metadata location */ 199 u_int32_t smd_size; /* size of metadata */ 200 201 int (*smd_probe)(struct sr_softc *, 202 struct sr_chunk *); 203 int (*smd_attach)(struct sr_discipline *, int); 204 int (*smd_detach)(struct sr_discipline *); 205 int (*smd_read)(struct sr_discipline *, dev_t, 206 struct sr_metadata *, void *); 207 int (*smd_write)(struct sr_discipline *, dev_t, 208 struct sr_metadata *, void *); 209 int (*smd_validate)(struct sr_discipline *, 210 struct sr_metadata *, void *); 211 } smd[] = { 212 { SR_META_OFFSET, SR_META_SIZE * 512, 213 sr_meta_native_probe, sr_meta_native_attach, NULL, 214 sr_meta_native_read, sr_meta_native_write, NULL }, 215 { 0, 0, NULL, NULL, NULL, NULL } 216 }; 217 218 int 219 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force) 220 { 221 struct sr_softc *sc = sd->sd_sc; 222 struct sr_chunk_head *cl; 223 struct sr_chunk *ch_entry, *chunk1, *chunk2; 224 int rv = 1, i = 0; 225 226 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 227 228 /* in memory copy of metadata */ 229 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 230 if (!sd->sd_meta) { 231 sr_error(sc, "could not allocate memory for metadata"); 232 goto bad; 233 } 234 235 if (sd->sd_meta_type != SR_META_F_NATIVE) { 236 /* in memory copy of foreign metadata */ 237 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 238 M_DEVBUF, M_ZERO | M_NOWAIT); 239 if (!sd->sd_meta_foreign) { 240 /* unwind frees sd_meta */ 241 sr_error(sc, "could not allocate memory for foreign " 242 "metadata"); 243 goto bad; 244 } 245 } 246 247 /* we have a valid list now create an array index */ 248 cl = &sd->sd_vol.sv_chunk_list; 249 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * chunk_no, 250 M_DEVBUF, M_WAITOK | M_ZERO); 251 252 /* fill out chunk array */ 253 i = 0; 254 SLIST_FOREACH(ch_entry, cl, src_link) 255 sd->sd_vol.sv_chunks[i++] = ch_entry; 256 257 /* attach metadata */ 258 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 259 goto bad; 260 261 /* Force chunks into correct order now that metadata is attached. */ 262 SLIST_FOREACH(ch_entry, cl, src_link) 263 SLIST_REMOVE(cl, ch_entry, sr_chunk, src_link); 264 for (i = 0; i < chunk_no; i++) { 265 ch_entry = sd->sd_vol.sv_chunks[i]; 266 chunk2 = NULL; 267 SLIST_FOREACH(chunk1, cl, src_link) { 268 if (chunk1->src_meta.scmi.scm_chunk_id > 269 ch_entry->src_meta.scmi.scm_chunk_id) 270 break; 271 chunk2 = chunk1; 272 } 273 if (chunk2 == NULL) 274 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 275 else 276 SLIST_INSERT_AFTER(chunk2, ch_entry, src_link); 277 } 278 i = 0; 279 SLIST_FOREACH(ch_entry, cl, src_link) 280 sd->sd_vol.sv_chunks[i++] = ch_entry; 281 282 rv = 0; 283 bad: 284 return (rv); 285 } 286 287 int 288 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 289 { 290 struct sr_softc *sc = sd->sd_sc; 291 struct vnode *vn; 292 struct sr_chunk *ch_entry, *ch_prev = NULL; 293 struct sr_chunk_head *cl; 294 char devname[32]; 295 int i, d, type, found, prevf, error; 296 dev_t dev; 297 298 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 299 300 if (no_chunk == 0) 301 goto unwind; 302 303 cl = &sd->sd_vol.sv_chunk_list; 304 305 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 306 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 307 M_WAITOK | M_ZERO); 308 /* keep disks in user supplied order */ 309 if (ch_prev) 310 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 311 else 312 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 313 ch_prev = ch_entry; 314 dev = dt[d]; 315 ch_entry->src_dev_mm = dev; 316 317 if (dev == NODEV) { 318 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 319 continue; 320 } else { 321 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 322 if (bdevvp(dev, &vn)) { 323 sr_error(sc, "sr_meta_probe: cannot allocate " 324 "vnode"); 325 goto unwind; 326 } 327 328 /* 329 * XXX leaving dev open for now; move this to attach 330 * and figure out the open/close dance for unwind. 331 */ 332 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc); 333 if (error) { 334 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 335 "open %s\n", DEVNAME(sc), devname); 336 vput(vn); 337 goto unwind; 338 } 339 340 strlcpy(ch_entry->src_devname, devname, 341 sizeof(ch_entry->src_devname)); 342 ch_entry->src_vn = vn; 343 } 344 345 /* determine if this is a device we understand */ 346 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 347 type = smd[i].smd_probe(sc, ch_entry); 348 if (type == SR_META_F_INVALID) 349 continue; 350 else { 351 found = type; 352 break; 353 } 354 } 355 356 if (found == SR_META_F_INVALID) 357 goto unwind; 358 if (prevf == SR_META_F_INVALID) 359 prevf = found; 360 if (prevf != found) { 361 DNPRINTF(SR_D_META, "%s: prevf != found\n", 362 DEVNAME(sc)); 363 goto unwind; 364 } 365 } 366 367 return (prevf); 368 unwind: 369 return (SR_META_F_INVALID); 370 } 371 372 void 373 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 374 { 375 int maj, unit, part; 376 char *name; 377 378 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 379 DEVNAME(sc), buf, size); 380 381 if (!buf) 382 return; 383 384 maj = major(dev); 385 part = DISKPART(dev); 386 unit = DISKUNIT(dev); 387 388 name = findblkname(maj); 389 if (name == NULL) 390 return; 391 392 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 393 } 394 395 int 396 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr64_t offset, 397 long flags) 398 { 399 struct vnode *vp; 400 struct buf b; 401 size_t bufsize, dma_bufsize; 402 int rv = 1; 403 char *dma_buf; 404 405 DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %d, %llu 0x%x)\n", 406 DEVNAME(sc), dev, buf, size, offset, flags); 407 408 dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size; 409 dma_buf = dma_alloc(dma_bufsize, PR_WAITOK); 410 411 if (bdevvp(dev, &vp)) { 412 printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc)); 413 goto done; 414 } 415 416 while (size > 0) { 417 DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %d, offset %llu)\n", 418 DEVNAME(sc), dma_buf, size, offset); 419 420 bufsize = (size > MAXPHYS) ? MAXPHYS : size; 421 if (flags == B_WRITE) 422 bcopy(buf, dma_buf, bufsize); 423 424 bzero(&b, sizeof(b)); 425 b.b_flags = flags | B_PHYS; 426 b.b_proc = curproc; 427 b.b_dev = dev; 428 b.b_iodone = NULL; 429 b.b_error = 0; 430 b.b_blkno = offset; 431 b.b_data = dma_buf; 432 b.b_bcount = bufsize; 433 b.b_bufsize = bufsize; 434 b.b_resid = bufsize; 435 b.b_vp = vp; 436 437 if ((b.b_flags & B_READ) == 0) 438 vp->v_numoutput++; 439 440 LIST_INIT(&b.b_dep); 441 VOP_STRATEGY(&b); 442 biowait(&b); 443 444 if (b.b_flags & B_ERROR) { 445 printf("%s: I/O error %d on dev 0x%x at block %llu\n", 446 DEVNAME(sc), b.b_error, dev, b.b_blkno); 447 goto done; 448 } 449 450 if (flags == B_READ) 451 bcopy(dma_buf, buf, bufsize); 452 453 size -= bufsize; 454 buf += bufsize; 455 offset += howmany(bufsize, DEV_BSIZE); 456 } 457 458 rv = 0; 459 460 done: 461 if (vp) 462 vput(vp); 463 464 dma_free(dma_buf, dma_bufsize); 465 466 return (rv); 467 } 468 469 int 470 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t size, 471 daddr64_t offset, long flags) 472 { 473 int rv = 1; 474 475 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 476 DEVNAME(sd->sd_sc), dev, md, size, offset, flags); 477 478 if (md == NULL) { 479 printf("%s: sr_meta_rw: invalid metadata pointer\n", 480 DEVNAME(sd->sd_sc)); 481 goto done; 482 } 483 484 rv = sr_rw(sd->sd_sc, dev, md, size, offset, flags); 485 486 done: 487 return (rv); 488 } 489 490 int 491 sr_meta_clear(struct sr_discipline *sd) 492 { 493 struct sr_softc *sc = sd->sd_sc; 494 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 495 struct sr_chunk *ch_entry; 496 void *m; 497 int rv = 1; 498 499 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 500 501 if (sd->sd_meta_type != SR_META_F_NATIVE) { 502 sr_error(sc, "cannot clear foreign metadata"); 503 goto done; 504 } 505 506 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 507 SLIST_FOREACH(ch_entry, cl, src_link) { 508 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 509 /* XXX mark disk offline */ 510 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 511 "clear %s\n", ch_entry->src_devname); 512 rv++; 513 continue; 514 } 515 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 516 } 517 518 bzero(sd->sd_meta, SR_META_SIZE * 512); 519 520 free(m, M_DEVBUF); 521 rv = 0; 522 done: 523 return (rv); 524 } 525 526 void 527 sr_meta_init(struct sr_discipline *sd, int level, int no_chunk) 528 { 529 struct sr_softc *sc = sd->sd_sc; 530 struct sr_metadata *sm = sd->sd_meta; 531 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 532 struct sr_meta_chunk *scm; 533 struct sr_chunk *chunk; 534 int cid = 0; 535 u_int64_t max_chunk_sz = 0, min_chunk_sz = 0; 536 537 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 538 539 if (!sm) 540 return; 541 542 /* Initialise volume metadata. */ 543 sm->ssdi.ssd_magic = SR_MAGIC; 544 sm->ssdi.ssd_version = SR_META_VERSION; 545 sm->ssdi.ssd_vol_flags = sd->sd_meta_flags; 546 sm->ssdi.ssd_volid = 0; 547 sm->ssdi.ssd_chunk_no = no_chunk; 548 sm->ssdi.ssd_level = level; 549 550 sm->ssd_data_offset = SR_DATA_OFFSET; 551 sm->ssd_ondisk = 0; 552 553 sr_uuid_generate(&sm->ssdi.ssd_uuid); 554 555 /* Initialise chunk metadata and get min/max chunk sizes. */ 556 SLIST_FOREACH(chunk, cl, src_link) { 557 scm = &chunk->src_meta; 558 scm->scmi.scm_size = chunk->src_size; 559 scm->scmi.scm_chunk_id = cid++; 560 scm->scm_status = BIOC_SDONLINE; 561 scm->scmi.scm_volid = 0; 562 strlcpy(scm->scmi.scm_devname, chunk->src_devname, 563 sizeof(scm->scmi.scm_devname)); 564 bcopy(&sm->ssdi.ssd_uuid, &scm->scmi.scm_uuid, 565 sizeof(scm->scmi.scm_uuid)); 566 sr_checksum(sc, scm, &scm->scm_checksum, 567 sizeof(scm->scm_checksum)); 568 569 if (min_chunk_sz == 0) 570 min_chunk_sz = scm->scmi.scm_size; 571 min_chunk_sz = MIN(min_chunk_sz, scm->scmi.scm_size); 572 max_chunk_sz = MAX(max_chunk_sz, scm->scmi.scm_size); 573 } 574 575 /* Equalize chunk sizes. */ 576 SLIST_FOREACH(chunk, cl, src_link) 577 chunk->src_meta.scmi.scm_coerced_size = min_chunk_sz; 578 579 sd->sd_vol.sv_chunk_minsz = min_chunk_sz; 580 sd->sd_vol.sv_chunk_maxsz = max_chunk_sz; 581 } 582 583 void 584 sr_meta_init_complete(struct sr_discipline *sd) 585 { 586 #ifdef SR_DEBUG 587 struct sr_softc *sc = sd->sd_sc; 588 #endif 589 struct sr_metadata *sm = sd->sd_meta; 590 591 DNPRINTF(SR_D_META, "%s: sr_meta_complete\n", DEVNAME(sc)); 592 593 /* Complete initialisation of volume metadata. */ 594 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 595 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 596 "SR %s", sd->sd_name); 597 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 598 "%03d", sm->ssdi.ssd_version); 599 } 600 601 void 602 sr_meta_opt_handler(struct sr_discipline *sd, struct sr_meta_opt_hdr *om) 603 { 604 if (om->som_type != SR_OPT_BOOT) 605 panic("unknown optional metadata type"); 606 } 607 608 void 609 sr_meta_save_callback(void *arg1, void *arg2) 610 { 611 struct sr_discipline *sd = arg1; 612 int s; 613 614 s = splbio(); 615 616 if (sr_meta_save(arg1, SR_META_DIRTY)) 617 printf("%s: save metadata failed\n", DEVNAME(sd->sd_sc)); 618 619 sd->sd_must_flush = 0; 620 splx(s); 621 } 622 623 int 624 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 625 { 626 struct sr_softc *sc = sd->sd_sc; 627 struct sr_metadata *sm = sd->sd_meta, *m; 628 struct sr_meta_driver *s; 629 struct sr_chunk *src; 630 struct sr_meta_chunk *cm; 631 struct sr_workunit wu; 632 struct sr_meta_opt_hdr *omh; 633 struct sr_meta_opt_item *omi; 634 int i; 635 636 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 637 DEVNAME(sc), sd->sd_meta->ssd_devname); 638 639 if (!sm) { 640 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 641 goto bad; 642 } 643 644 /* meta scratchpad */ 645 s = &smd[sd->sd_meta_type]; 646 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 647 if (!m) { 648 printf("%s: could not allocate metadata scratch area\n", 649 DEVNAME(sc)); 650 goto bad; 651 } 652 653 /* from here on out metadata is updated */ 654 restart: 655 sm->ssd_ondisk++; 656 sm->ssd_meta_flags = flags; 657 bcopy(sm, m, sizeof(*m)); 658 659 /* Chunk metadata. */ 660 cm = (struct sr_meta_chunk *)(m + 1); 661 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 662 src = sd->sd_vol.sv_chunks[i]; 663 bcopy(&src->src_meta, cm, sizeof(*cm)); 664 cm++; 665 } 666 667 /* Optional metadata. */ 668 omh = (struct sr_meta_opt_hdr *)(cm); 669 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 670 DNPRINTF(SR_D_META, "%s: saving optional metadata type %u with " 671 "length %u\n", DEVNAME(sc), omi->omi_som->som_type, 672 omi->omi_som->som_length); 673 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 674 sr_checksum(sc, omi->omi_som, &omi->omi_som->som_checksum, 675 omi->omi_som->som_length); 676 bcopy(omi->omi_som, omh, omi->omi_som->som_length); 677 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)omh + 678 omi->omi_som->som_length); 679 } 680 681 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 682 src = sd->sd_vol.sv_chunks[i]; 683 684 /* skip disks that are offline */ 685 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 686 continue; 687 688 /* calculate metadata checksum for correct chunk */ 689 m->ssdi.ssd_chunk_id = i; 690 sr_checksum(sc, m, &m->ssd_checksum, 691 sizeof(struct sr_meta_invariant)); 692 693 #ifdef SR_DEBUG 694 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 695 "chunkid: %d checksum: ", 696 DEVNAME(sc), src->src_meta.scmi.scm_devname, 697 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 698 699 if (sr_debug & SR_D_META) 700 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 701 DNPRINTF(SR_D_META, "\n"); 702 sr_meta_print(m); 703 #endif 704 705 /* translate and write to disk */ 706 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 707 printf("%s: could not write metadata to %s\n", 708 DEVNAME(sc), src->src_devname); 709 /* restart the meta write */ 710 src->src_meta.scm_status = BIOC_SDOFFLINE; 711 /* XXX recalculate volume status */ 712 goto restart; 713 } 714 } 715 716 /* not all disciplines have sync */ 717 if (sd->sd_scsi_sync) { 718 bzero(&wu, sizeof(wu)); 719 wu.swu_fake = 1; 720 wu.swu_dis = sd; 721 sd->sd_scsi_sync(&wu); 722 } 723 free(m, M_DEVBUF); 724 return (0); 725 bad: 726 return (1); 727 } 728 729 int 730 sr_meta_read(struct sr_discipline *sd) 731 { 732 struct sr_softc *sc = sd->sd_sc; 733 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 734 struct sr_metadata *sm; 735 struct sr_chunk *ch_entry; 736 struct sr_meta_chunk *cp; 737 struct sr_meta_driver *s; 738 void *fm = NULL; 739 int no_disk = 0, got_meta = 0; 740 741 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 742 743 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 744 s = &smd[sd->sd_meta_type]; 745 if (sd->sd_meta_type != SR_META_F_NATIVE) 746 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 747 748 cp = (struct sr_meta_chunk *)(sm + 1); 749 SLIST_FOREACH(ch_entry, cl, src_link) { 750 /* skip disks that are offline */ 751 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 752 DNPRINTF(SR_D_META, 753 "%s: %s chunk marked offline, spoofing status\n", 754 DEVNAME(sc), ch_entry->src_devname); 755 cp++; /* adjust chunk pointer to match failure */ 756 continue; 757 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 758 /* read and translate */ 759 /* XXX mark chunk offline, elsewhere!! */ 760 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 761 cp++; /* adjust chunk pointer to match failure */ 762 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 763 DEVNAME(sc)); 764 continue; 765 } 766 767 if (sm->ssdi.ssd_magic != SR_MAGIC) { 768 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 769 DEVNAME(sc)); 770 continue; 771 } 772 773 /* validate metadata */ 774 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 775 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 776 DEVNAME(sc)); 777 no_disk = -1; 778 goto done; 779 } 780 781 /* assume first chunk contains metadata */ 782 if (got_meta == 0) { 783 sr_meta_opt_load(sc, sm, &sd->sd_meta_opt); 784 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 785 got_meta = 1; 786 } 787 788 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 789 790 no_disk++; 791 cp++; 792 } 793 794 free(sm, M_DEVBUF); 795 if (fm) 796 free(fm, M_DEVBUF); 797 798 done: 799 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 800 no_disk); 801 return (no_disk); 802 } 803 804 void 805 sr_meta_opt_load(struct sr_softc *sc, struct sr_metadata *sm, 806 struct sr_meta_opt_head *som) 807 { 808 struct sr_meta_opt_hdr *omh; 809 struct sr_meta_opt_item *omi; 810 u_int8_t checksum[MD5_DIGEST_LENGTH]; 811 int i; 812 813 /* Process optional metadata. */ 814 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) + 815 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 816 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 817 818 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF, 819 M_WAITOK | M_ZERO); 820 SLIST_INSERT_HEAD(som, omi, omi_link); 821 822 if (omh->som_length == 0) { 823 824 /* Load old fixed length optional metadata. */ 825 DNPRINTF(SR_D_META, "%s: old optional metadata of type " 826 "%u\n", DEVNAME(sc), omh->som_type); 827 828 /* Validate checksum. */ 829 sr_checksum(sc, (void *)omh, &checksum, 830 SR_OLD_META_OPT_SIZE - MD5_DIGEST_LENGTH); 831 if (bcmp(&checksum, (void *)omh + SR_OLD_META_OPT_MD5, 832 sizeof(checksum))) 833 panic("%s: invalid optional metadata " 834 "checksum", DEVNAME(sc)); 835 836 /* Determine correct length. */ 837 switch (omh->som_type) { 838 case SR_OPT_CRYPTO: 839 omh->som_length = sizeof(struct sr_meta_crypto); 840 break; 841 case SR_OPT_BOOT: 842 omh->som_length = sizeof(struct sr_meta_boot); 843 break; 844 case SR_OPT_KEYDISK: 845 omh->som_length = 846 sizeof(struct sr_meta_keydisk); 847 break; 848 default: 849 panic("unknown old optional metadata " 850 "type %u\n", omh->som_type); 851 } 852 853 omi->omi_som = malloc(omh->som_length, M_DEVBUF, 854 M_WAITOK | M_ZERO); 855 bcopy((u_int8_t *)omh + SR_OLD_META_OPT_OFFSET, 856 (u_int8_t *)omi->omi_som + sizeof(*omi->omi_som), 857 omh->som_length - sizeof(*omi->omi_som)); 858 omi->omi_som->som_type = omh->som_type; 859 omi->omi_som->som_length = omh->som_length; 860 861 omh = (struct sr_meta_opt_hdr *)((void *)omh + 862 SR_OLD_META_OPT_SIZE); 863 } else { 864 865 /* Load variable length optional metadata. */ 866 DNPRINTF(SR_D_META, "%s: optional metadata of type %u, " 867 "length %u\n", DEVNAME(sc), omh->som_type, 868 omh->som_length); 869 omi->omi_som = malloc(omh->som_length, M_DEVBUF, 870 M_WAITOK | M_ZERO); 871 bcopy(omh, omi->omi_som, omh->som_length); 872 873 /* Validate checksum. */ 874 bcopy(&omi->omi_som->som_checksum, &checksum, 875 MD5_DIGEST_LENGTH); 876 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 877 sr_checksum(sc, omi->omi_som, 878 &omi->omi_som->som_checksum, omh->som_length); 879 if (bcmp(&checksum, &omi->omi_som->som_checksum, 880 sizeof(checksum))) 881 panic("%s: invalid optional metadata checksum", 882 DEVNAME(sc)); 883 884 omh = (struct sr_meta_opt_hdr *)((void *)omh + 885 omh->som_length); 886 } 887 } 888 } 889 890 int 891 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 892 void *fm) 893 { 894 struct sr_softc *sc = sd->sd_sc; 895 struct sr_meta_driver *s; 896 #ifdef SR_DEBUG 897 struct sr_meta_chunk *mc; 898 #endif 899 u_int8_t checksum[MD5_DIGEST_LENGTH]; 900 char devname[32]; 901 int rv = 1; 902 903 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 904 905 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 906 907 s = &smd[sd->sd_meta_type]; 908 if (sd->sd_meta_type != SR_META_F_NATIVE) 909 if (s->smd_validate(sd, sm, fm)) { 910 sr_error(sc, "invalid foreign metadata"); 911 goto done; 912 } 913 914 /* 915 * at this point all foreign metadata has been translated to the native 916 * format and will be treated just like the native format 917 */ 918 919 if (sm->ssdi.ssd_magic != SR_MAGIC) { 920 sr_error(sc, "not valid softraid metadata"); 921 goto done; 922 } 923 924 /* Verify metadata checksum. */ 925 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 926 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 927 sr_error(sc, "invalid metadata checksum"); 928 goto done; 929 } 930 931 /* Handle changes between versions. */ 932 if (sm->ssdi.ssd_version == 3) { 933 934 /* 935 * Version 3 - update metadata version and fix up data offset 936 * value since this did not exist in version 3. 937 */ 938 if (sm->ssd_data_offset == 0) 939 sm->ssd_data_offset = SR_META_V3_DATA_OFFSET; 940 941 } else if (sm->ssdi.ssd_version == 4) { 942 943 /* 944 * Version 4 - original metadata format did not store 945 * data offset so fix this up if necessary. 946 */ 947 if (sm->ssd_data_offset == 0) 948 sm->ssd_data_offset = SR_DATA_OFFSET; 949 950 } else if (sm->ssdi.ssd_version == SR_META_VERSION) { 951 952 /* 953 * Version 5 - variable length optional metadata. Migration 954 * from earlier fixed length optional metadata is handled 955 * in sr_meta_read(). 956 */ 957 958 } else { 959 960 sr_error(sc, "cannot read metadata version %u on %s, " 961 "expected version %u or earlier", 962 sm->ssdi.ssd_version, devname, SR_META_VERSION); 963 goto done; 964 965 } 966 967 /* Update version number and revision string. */ 968 sm->ssdi.ssd_version = SR_META_VERSION; 969 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 970 "%03d", SR_META_VERSION); 971 972 #ifdef SR_DEBUG 973 /* warn if disk changed order */ 974 mc = (struct sr_meta_chunk *)(sm + 1); 975 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 976 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 977 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 978 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 979 devname); 980 #endif 981 982 /* we have meta data on disk */ 983 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 984 DEVNAME(sc), devname); 985 986 rv = 0; 987 done: 988 return (rv); 989 } 990 991 int 992 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno, 993 struct sr_boot_chunk_head *bch) 994 { 995 struct vnode *vn; 996 struct disklabel label; 997 struct sr_metadata *md = NULL; 998 struct sr_discipline *fake_sd = NULL; 999 struct sr_boot_chunk *bc; 1000 char devname[32]; 1001 dev_t chrdev, rawdev; 1002 int error, i; 1003 int rv = SR_META_NOTCLAIMED; 1004 1005 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 1006 1007 /* 1008 * Use character raw device to avoid SCSI complaints about missing 1009 * media on removable media devices. 1010 */ 1011 chrdev = blktochr(devno); 1012 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART); 1013 if (cdevvp(rawdev, &vn)) { 1014 sr_error(sc, "sr_meta_native_bootprobe: cannot allocate vnode"); 1015 goto done; 1016 } 1017 1018 /* open device */ 1019 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1020 if (error) { 1021 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 1022 "failed\n", DEVNAME(sc)); 1023 vput(vn); 1024 goto done; 1025 } 1026 1027 /* get disklabel */ 1028 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 1029 curproc); 1030 if (error) { 1031 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 1032 "failed\n", DEVNAME(sc)); 1033 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1034 vput(vn); 1035 goto done; 1036 } 1037 1038 /* we are done, close device */ 1039 error = VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1040 if (error) { 1041 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 1042 "failed\n", DEVNAME(sc)); 1043 vput(vn); 1044 goto done; 1045 } 1046 vput(vn); 1047 1048 /* Make sure this is a 512-byte/sector device. */ 1049 if (label.d_secsize != DEV_BSIZE) { 1050 DNPRINTF(SR_D_META, "%s: %s has unsupported sector size (%d)", 1051 DEVNAME(sc), devname, label.d_secsize); 1052 goto done; 1053 } 1054 1055 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 1056 if (md == NULL) { 1057 sr_error(sc, "not enough memory for metadata buffer"); 1058 goto done; 1059 } 1060 1061 /* create fake sd to use utility functions */ 1062 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, 1063 M_ZERO | M_NOWAIT); 1064 if (fake_sd == NULL) { 1065 sr_error(sc, "not enough memory for fake discipline"); 1066 goto done; 1067 } 1068 fake_sd->sd_sc = sc; 1069 fake_sd->sd_meta_type = SR_META_F_NATIVE; 1070 1071 for (i = 0; i < MAXPARTITIONS; i++) { 1072 if (label.d_partitions[i].p_fstype != FS_RAID) 1073 continue; 1074 1075 /* open partition */ 1076 rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i); 1077 if (bdevvp(rawdev, &vn)) { 1078 sr_error(sc, "sr_meta_native_bootprobe: cannot " 1079 "allocate vnode for partition"); 1080 goto done; 1081 } 1082 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1083 if (error) { 1084 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 1085 "open failed, partition %d\n", 1086 DEVNAME(sc), i); 1087 vput(vn); 1088 continue; 1089 } 1090 1091 if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) { 1092 sr_error(sc, "native bootprobe could not read native " 1093 "metadata"); 1094 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1095 vput(vn); 1096 continue; 1097 } 1098 1099 /* are we a softraid partition? */ 1100 if (md->ssdi.ssd_magic != SR_MAGIC) { 1101 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1102 vput(vn); 1103 continue; 1104 } 1105 1106 sr_meta_getdevname(sc, rawdev, devname, sizeof(devname)); 1107 if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) { 1108 if (md->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE) { 1109 DNPRINTF(SR_D_META, "%s: don't save %s\n", 1110 DEVNAME(sc), devname); 1111 } else { 1112 /* XXX fix M_WAITOK, this is boot time */ 1113 bc = malloc(sizeof(struct sr_boot_chunk), 1114 M_DEVBUF, M_WAITOK | M_ZERO); 1115 bc->sbc_metadata = 1116 malloc(sizeof(struct sr_metadata), 1117 M_DEVBUF, M_WAITOK | M_ZERO); 1118 bcopy(md, bc->sbc_metadata, 1119 sizeof(struct sr_metadata)); 1120 bc->sbc_mm = rawdev; 1121 SLIST_INSERT_HEAD(bch, bc, sbc_link); 1122 rv = SR_META_CLAIMED; 1123 } 1124 } 1125 1126 /* we are done, close partition */ 1127 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1128 vput(vn); 1129 } 1130 1131 done: 1132 if (fake_sd) 1133 free(fake_sd, M_DEVBUF); 1134 if (md) 1135 free(md, M_DEVBUF); 1136 1137 return (rv); 1138 } 1139 1140 int 1141 sr_boot_assembly(struct sr_softc *sc) 1142 { 1143 struct sr_boot_volume_head bvh; 1144 struct sr_boot_chunk_head bch, kdh; 1145 struct sr_boot_volume *bv, *bv1, *bv2; 1146 struct sr_boot_chunk *bc, *bcnext, *bc1, *bc2; 1147 struct sr_disk_head sdklist; 1148 struct sr_disk *sdk; 1149 struct disk *dk; 1150 struct bioc_createraid bcr; 1151 struct sr_meta_chunk *hm; 1152 struct sr_chunk_head *cl; 1153 struct sr_chunk *hotspare, *chunk, *last; 1154 u_int64_t *ondisk = NULL; 1155 dev_t *devs = NULL; 1156 char devname[32]; 1157 int rv = 0, i; 1158 1159 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1160 1161 SLIST_INIT(&sdklist); 1162 SLIST_INIT(&bvh); 1163 SLIST_INIT(&bch); 1164 SLIST_INIT(&kdh); 1165 1166 dk = TAILQ_FIRST(&disklist); 1167 while (dk != TAILQ_END(&disklist)) { 1168 1169 /* See if this disk has been checked. */ 1170 SLIST_FOREACH(sdk, &sdklist, sdk_link) 1171 if (sdk->sdk_devno == dk->dk_devno) 1172 break; 1173 1174 if (sdk != NULL || dk->dk_devno == NODEV) { 1175 dk = TAILQ_NEXT(dk, dk_link); 1176 continue; 1177 } 1178 1179 /* Add this disk to the list that we've checked. */ 1180 sdk = malloc(sizeof(struct sr_disk), M_DEVBUF, 1181 M_NOWAIT | M_CANFAIL | M_ZERO); 1182 if (sdk == NULL) 1183 goto unwind; 1184 sdk->sdk_devno = dk->dk_devno; 1185 SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link); 1186 1187 /* Only check sd(4) and wd(4) devices. */ 1188 if (strncmp(dk->dk_name, "sd", 2) && 1189 strncmp(dk->dk_name, "wd", 2)) { 1190 dk = TAILQ_NEXT(dk, dk_link); 1191 continue; 1192 } 1193 1194 /* native softraid uses partitions */ 1195 sr_meta_native_bootprobe(sc, dk->dk_devno, &bch); 1196 1197 /* probe non-native disks if native failed. */ 1198 1199 /* Restart scan since we may have slept. */ 1200 dk = TAILQ_FIRST(&disklist); 1201 } 1202 1203 /* 1204 * Create a list of volumes and associate chunks with each volume. 1205 */ 1206 for (bc = SLIST_FIRST(&bch); bc != SLIST_END(&bch); bc = bcnext) { 1207 1208 bcnext = SLIST_NEXT(bc, sbc_link); 1209 SLIST_REMOVE(&bch, bc, sr_boot_chunk, sbc_link); 1210 bc->sbc_chunk_id = bc->sbc_metadata->ssdi.ssd_chunk_id; 1211 1212 /* Handle key disks separately. */ 1213 if (bc->sbc_metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) { 1214 SLIST_INSERT_HEAD(&kdh, bc, sbc_link); 1215 continue; 1216 } 1217 1218 SLIST_FOREACH(bv, &bvh, sbv_link) { 1219 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid, 1220 &bv->sbv_uuid, 1221 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) == 0) 1222 break; 1223 } 1224 1225 if (bv == NULL) { 1226 bv = malloc(sizeof(struct sr_boot_volume), 1227 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1228 if (bv == NULL) { 1229 sr_error(sc, "failed to allocate boot volume"); 1230 goto unwind; 1231 } 1232 1233 bv->sbv_level = bc->sbc_metadata->ssdi.ssd_level; 1234 bv->sbv_volid = bc->sbc_metadata->ssdi.ssd_volid; 1235 bv->sbv_chunk_no = bc->sbc_metadata->ssdi.ssd_chunk_no; 1236 bcopy(&bc->sbc_metadata->ssdi.ssd_uuid, &bv->sbv_uuid, 1237 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)); 1238 SLIST_INIT(&bv->sbv_chunks); 1239 1240 /* Maintain volume order. */ 1241 bv2 = NULL; 1242 SLIST_FOREACH(bv1, &bvh, sbv_link) { 1243 if (bv1->sbv_volid > bv->sbv_volid) 1244 break; 1245 bv2 = bv1; 1246 } 1247 if (bv2 == NULL) { 1248 DNPRINTF(SR_D_META, "%s: insert volume %u " 1249 "at head\n", DEVNAME(sc), bv->sbv_volid); 1250 SLIST_INSERT_HEAD(&bvh, bv, sbv_link); 1251 } else { 1252 DNPRINTF(SR_D_META, "%s: insert volume %u " 1253 "after %u\n", DEVNAME(sc), bv->sbv_volid, 1254 bv2->sbv_volid); 1255 SLIST_INSERT_AFTER(bv2, bv, sbv_link); 1256 } 1257 } 1258 1259 /* Maintain chunk order. */ 1260 bc2 = NULL; 1261 SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) { 1262 if (bc1->sbc_chunk_id > bc->sbc_chunk_id) 1263 break; 1264 bc2 = bc1; 1265 } 1266 if (bc2 == NULL) { 1267 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1268 "at head\n", DEVNAME(sc), bv->sbv_volid, 1269 bc->sbc_chunk_id); 1270 SLIST_INSERT_HEAD(&bv->sbv_chunks, bc, sbc_link); 1271 } else { 1272 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1273 "after %u\n", DEVNAME(sc), bv->sbv_volid, 1274 bc->sbc_chunk_id, bc2->sbc_chunk_id); 1275 SLIST_INSERT_AFTER(bc2, bc, sbc_link); 1276 } 1277 1278 bv->sbv_chunks_found++; 1279 } 1280 1281 /* Allocate memory for device and ondisk version arrays. */ 1282 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1283 M_NOWAIT | M_CANFAIL); 1284 if (devs == NULL) { 1285 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1286 goto unwind; 1287 } 1288 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1289 M_NOWAIT | M_CANFAIL); 1290 if (ondisk == NULL) { 1291 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1292 goto unwind; 1293 } 1294 1295 /* 1296 * Assemble hotspare "volumes". 1297 */ 1298 SLIST_FOREACH(bv, &bvh, sbv_link) { 1299 1300 /* Check if this is a hotspare "volume". */ 1301 if (bv->sbv_level != SR_HOTSPARE_LEVEL || 1302 bv->sbv_chunk_no != 1) 1303 continue; 1304 1305 #ifdef SR_DEBUG 1306 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1307 DEVNAME(sc)); 1308 if (sr_debug & SR_D_META) 1309 sr_uuid_print(&bv->sbv_uuid, 0); 1310 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1311 bv->sbv_volid, bv->sbv_chunk_no); 1312 #endif 1313 1314 /* Create hotspare chunk metadata. */ 1315 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1316 M_NOWAIT | M_CANFAIL | M_ZERO); 1317 if (hotspare == NULL) { 1318 printf("%s: failed to allocate hotspare\n", 1319 DEVNAME(sc)); 1320 goto unwind; 1321 } 1322 1323 bc = SLIST_FIRST(&bv->sbv_chunks); 1324 sr_meta_getdevname(sc, bc->sbc_mm, devname, sizeof(devname)); 1325 hotspare->src_dev_mm = bc->sbc_mm; 1326 strlcpy(hotspare->src_devname, devname, 1327 sizeof(hotspare->src_devname)); 1328 hotspare->src_size = bc->sbc_metadata->ssdi.ssd_size; 1329 1330 hm = &hotspare->src_meta; 1331 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1332 hm->scmi.scm_chunk_id = 0; 1333 hm->scmi.scm_size = bc->sbc_metadata->ssdi.ssd_size; 1334 hm->scmi.scm_coerced_size = bc->sbc_metadata->ssdi.ssd_size; 1335 strlcpy(hm->scmi.scm_devname, devname, 1336 sizeof(hm->scmi.scm_devname)); 1337 bcopy(&bc->sbc_metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1338 sizeof(struct sr_uuid)); 1339 1340 sr_checksum(sc, hm, &hm->scm_checksum, 1341 sizeof(struct sr_meta_chunk_invariant)); 1342 1343 hm->scm_status = BIOC_SDHOTSPARE; 1344 1345 /* Add chunk to hotspare list. */ 1346 rw_enter_write(&sc->sc_hs_lock); 1347 cl = &sc->sc_hotspare_list; 1348 if (SLIST_EMPTY(cl)) 1349 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1350 else { 1351 SLIST_FOREACH(chunk, cl, src_link) 1352 last = chunk; 1353 SLIST_INSERT_AFTER(last, hotspare, src_link); 1354 } 1355 sc->sc_hotspare_no++; 1356 rw_exit_write(&sc->sc_hs_lock); 1357 1358 } 1359 1360 /* 1361 * Assemble RAID volumes. 1362 */ 1363 SLIST_FOREACH(bv, &bvh, sbv_link) { 1364 1365 bzero(&bc, sizeof(bc)); 1366 1367 /* Check if this is a hotspare "volume". */ 1368 if (bv->sbv_level == SR_HOTSPARE_LEVEL && 1369 bv->sbv_chunk_no == 1) 1370 continue; 1371 1372 #ifdef SR_DEBUG 1373 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1374 if (sr_debug & SR_D_META) 1375 sr_uuid_print(&bv->sbv_uuid, 0); 1376 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1377 bv->sbv_volid, bv->sbv_chunk_no); 1378 #endif 1379 1380 /* 1381 * If this is a crypto volume, try to find a matching 1382 * key disk... 1383 */ 1384 bcr.bc_key_disk = NODEV; 1385 if (bv->sbv_level == 'C') { 1386 SLIST_FOREACH(bc, &kdh, sbc_link) { 1387 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid, 1388 &bv->sbv_uuid, 1389 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) 1390 == 0) 1391 bcr.bc_key_disk = bc->sbc_mm; 1392 } 1393 } 1394 1395 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1396 devs[i] = NODEV; /* mark device as illegal */ 1397 ondisk[i] = 0; 1398 } 1399 1400 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) { 1401 if (devs[bc->sbc_chunk_id] != NODEV) { 1402 bv->sbv_chunks_found--; 1403 sr_meta_getdevname(sc, bc->sbc_mm, devname, 1404 sizeof(devname)); 1405 printf("%s: found duplicate chunk %u for " 1406 "volume %u on device %s\n", DEVNAME(sc), 1407 bc->sbc_chunk_id, bv->sbv_volid, devname); 1408 } 1409 1410 if (devs[bc->sbc_chunk_id] == NODEV || 1411 bc->sbc_metadata->ssd_ondisk > 1412 ondisk[bc->sbc_chunk_id]) { 1413 devs[bc->sbc_chunk_id] = bc->sbc_mm; 1414 ondisk[bc->sbc_chunk_id] = 1415 bc->sbc_metadata->ssd_ondisk; 1416 DNPRINTF(SR_D_META, "%s: using ondisk " 1417 "metadata version %llu for chunk %u\n", 1418 DEVNAME(sc), ondisk[bc->sbc_chunk_id], 1419 bc->sbc_chunk_id); 1420 } 1421 } 1422 1423 if (bv->sbv_chunk_no != bv->sbv_chunks_found) { 1424 printf("%s: not all chunks were provided; " 1425 "attempting to bring volume %d online\n", 1426 DEVNAME(sc), bv->sbv_volid); 1427 } 1428 1429 bcr.bc_level = bv->sbv_level; 1430 bcr.bc_dev_list_len = bv->sbv_chunk_no * sizeof(dev_t); 1431 bcr.bc_dev_list = devs; 1432 bcr.bc_flags = BIOC_SCDEVT; 1433 1434 rw_enter_write(&sc->sc_lock); 1435 bio_status_init(&sc->sc_status, &sc->sc_dev); 1436 sr_ioctl_createraid(sc, &bcr, 0); 1437 rw_exit_write(&sc->sc_lock); 1438 1439 rv++; 1440 } 1441 1442 /* done with metadata */ 1443 unwind: 1444 /* Free boot volumes and associated chunks. */ 1445 for (bv1 = SLIST_FIRST(&bvh); bv1 != SLIST_END(&bvh); bv1 = bv2) { 1446 bv2 = SLIST_NEXT(bv1, sbv_link); 1447 for (bc1 = SLIST_FIRST(&bv1->sbv_chunks); 1448 bc1 != SLIST_END(&bv1->sbv_chunks); bc1 = bc2) { 1449 bc2 = SLIST_NEXT(bc1, sbc_link); 1450 if (bc1->sbc_metadata) 1451 free(bc1->sbc_metadata, M_DEVBUF); 1452 free(bc1, M_DEVBUF); 1453 } 1454 free(bv1, M_DEVBUF); 1455 } 1456 /* Free keydisks chunks. */ 1457 for (bc1 = SLIST_FIRST(&kdh); bc1 != SLIST_END(&kdh); bc1 = bc2) { 1458 bc2 = SLIST_NEXT(bc1, sbc_link); 1459 if (bc1->sbc_metadata) 1460 free(bc1->sbc_metadata, M_DEVBUF); 1461 free(bc1, M_DEVBUF); 1462 } 1463 /* Free unallocated chunks. */ 1464 for (bc1 = SLIST_FIRST(&bch); bc1 != SLIST_END(&bch); bc1 = bc2) { 1465 bc2 = SLIST_NEXT(bc1, sbc_link); 1466 if (bc1->sbc_metadata) 1467 free(bc1->sbc_metadata, M_DEVBUF); 1468 free(bc1, M_DEVBUF); 1469 } 1470 1471 while (!SLIST_EMPTY(&sdklist)) { 1472 sdk = SLIST_FIRST(&sdklist); 1473 SLIST_REMOVE_HEAD(&sdklist, sdk_link); 1474 free(sdk, M_DEVBUF); 1475 } 1476 1477 if (devs) 1478 free(devs, M_DEVBUF); 1479 if (ondisk) 1480 free(ondisk, M_DEVBUF); 1481 1482 return (rv); 1483 } 1484 1485 void 1486 sr_map_root(void) 1487 { 1488 struct sr_softc *sc = softraid0; 1489 struct sr_meta_opt_item *omi; 1490 struct sr_meta_boot *sbm; 1491 u_char duid[8]; 1492 int i, j; 1493 1494 if (sc == NULL) 1495 return; 1496 1497 DNPRINTF(SR_D_MISC, "%s: sr_map_root\n", DEVNAME(sc)); 1498 bzero(duid, sizeof(duid)); 1499 if (bcmp(rootduid, duid, sizeof(duid)) == 0) { 1500 DNPRINTF(SR_D_MISC, "%s: root duid is zero\n", DEVNAME(sc)); 1501 return; 1502 } 1503 1504 for (i = 0; i < SR_MAX_LD; i++) { 1505 if (sc->sc_dis[i] == NULL) 1506 continue; 1507 SLIST_FOREACH(omi, &sc->sc_dis[i]->sd_meta_opt, omi_link) { 1508 if (omi->omi_som->som_type != SR_OPT_BOOT) 1509 continue; 1510 sbm = (struct sr_meta_boot *)omi->omi_som; 1511 for (j = 0; j < SR_MAX_BOOT_DISKS; j++) { 1512 if (bcmp(rootduid, sbm->sbm_boot_duid[j], 1513 sizeof(rootduid)) == 0) { 1514 bcopy(sbm->sbm_root_duid, rootduid, 1515 sizeof(rootduid)); 1516 DNPRINTF(SR_D_MISC, "%s: root duid " 1517 "mapped to %02hx%02hx%02hx%02hx" 1518 "%02hx%02hx%02hx%02hx\n", 1519 DEVNAME(sc), rootduid[0], 1520 rootduid[1], rootduid[2], 1521 rootduid[3], rootduid[4], 1522 rootduid[5], rootduid[6], 1523 rootduid[7]); 1524 return; 1525 } 1526 } 1527 } 1528 } 1529 } 1530 1531 int 1532 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1533 { 1534 struct disklabel label; 1535 char *devname; 1536 int error, part; 1537 daddr64_t size; 1538 1539 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1540 DEVNAME(sc), ch_entry->src_devname); 1541 1542 devname = ch_entry->src_devname; 1543 part = DISKPART(ch_entry->src_dev_mm); 1544 1545 /* get disklabel */ 1546 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1547 NOCRED, curproc); 1548 if (error) { 1549 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1550 DEVNAME(sc), devname); 1551 goto unwind; 1552 } 1553 bcopy(label.d_uid, ch_entry->src_duid, sizeof(ch_entry->src_duid)); 1554 1555 /* Make sure this is a 512-byte/sector device. */ 1556 if (label.d_secsize != DEV_BSIZE) { 1557 sr_error(sc, "%s has unsupported sector size (%d)", 1558 devname, label.d_secsize); 1559 goto unwind; 1560 } 1561 1562 /* make sure the partition is of the right type */ 1563 if (label.d_partitions[part].p_fstype != FS_RAID) { 1564 DNPRINTF(SR_D_META, 1565 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1566 devname, 1567 label.d_partitions[part].p_fstype); 1568 goto unwind; 1569 } 1570 1571 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 1572 if (size <= 0) { 1573 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1574 devname); 1575 goto unwind; 1576 } 1577 ch_entry->src_size = size; 1578 1579 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1580 devname, size); 1581 1582 return (SR_META_F_NATIVE); 1583 unwind: 1584 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1585 devname ? devname : "nodev"); 1586 return (SR_META_F_INVALID); 1587 } 1588 1589 int 1590 sr_meta_native_attach(struct sr_discipline *sd, int force) 1591 { 1592 struct sr_softc *sc = sd->sd_sc; 1593 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1594 struct sr_metadata *md = NULL; 1595 struct sr_chunk *ch_entry, *ch_next; 1596 struct sr_uuid uuid; 1597 u_int64_t version = 0; 1598 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1599 1600 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1601 1602 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO | M_NOWAIT); 1603 if (md == NULL) { 1604 sr_error(sc, "not enough memory for metadata buffer"); 1605 goto bad; 1606 } 1607 1608 bzero(&uuid, sizeof uuid); 1609 1610 sr = not_sr = d = 0; 1611 SLIST_FOREACH(ch_entry, cl, src_link) { 1612 if (ch_entry->src_dev_mm == NODEV) 1613 continue; 1614 1615 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1616 sr_error(sc, "could not read native metadata"); 1617 goto bad; 1618 } 1619 1620 if (md->ssdi.ssd_magic == SR_MAGIC) { 1621 sr++; 1622 ch_entry->src_meta.scmi.scm_chunk_id = 1623 md->ssdi.ssd_chunk_id; 1624 if (d == 0) { 1625 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1626 expected = md->ssdi.ssd_chunk_no; 1627 version = md->ssd_ondisk; 1628 d++; 1629 continue; 1630 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1631 sizeof uuid)) { 1632 sr_error(sc, "not part of the same volume"); 1633 goto bad; 1634 } 1635 if (md->ssd_ondisk != version) { 1636 old_meta++; 1637 version = MAX(md->ssd_ondisk, version); 1638 } 1639 } else 1640 not_sr++; 1641 } 1642 1643 if (sr && not_sr) { 1644 sr_error(sc, "not all chunks are of the native metadata " 1645 "format"); 1646 goto bad; 1647 } 1648 1649 /* mixed metadata versions; mark bad disks offline */ 1650 if (old_meta) { 1651 d = 0; 1652 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1653 ch_entry = ch_next, d++) { 1654 ch_next = SLIST_NEXT(ch_entry, src_link); 1655 1656 /* XXX do we want to read this again? */ 1657 if (ch_entry->src_dev_mm == NODEV) 1658 panic("src_dev_mm == NODEV"); 1659 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1660 NULL)) 1661 sr_warn(sc, "could not read native metadata"); 1662 if (md->ssd_ondisk != version) 1663 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1664 BIOC_SDOFFLINE; 1665 } 1666 } 1667 1668 if (expected != sr && !force && expected != -1) { 1669 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1670 "anyway\n", DEVNAME(sc)); 1671 } 1672 1673 rv = 0; 1674 bad: 1675 if (md) 1676 free(md, M_DEVBUF); 1677 return (rv); 1678 } 1679 1680 int 1681 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1682 struct sr_metadata *md, void *fm) 1683 { 1684 #ifdef SR_DEBUG 1685 struct sr_softc *sc = sd->sd_sc; 1686 #endif 1687 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1688 DEVNAME(sc), dev, md); 1689 1690 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1691 B_READ)); 1692 } 1693 1694 int 1695 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1696 struct sr_metadata *md, void *fm) 1697 { 1698 #ifdef SR_DEBUG 1699 struct sr_softc *sc = sd->sd_sc; 1700 #endif 1701 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1702 DEVNAME(sc), dev, md); 1703 1704 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1705 B_WRITE)); 1706 } 1707 1708 void 1709 sr_hotplug_register(struct sr_discipline *sd, void *func) 1710 { 1711 struct sr_hotplug_list *mhe; 1712 1713 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1714 DEVNAME(sd->sd_sc), func); 1715 1716 /* make sure we aren't on the list yet */ 1717 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1718 if (mhe->sh_hotplug == func) 1719 return; 1720 1721 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1722 M_WAITOK | M_ZERO); 1723 mhe->sh_hotplug = func; 1724 mhe->sh_sd = sd; 1725 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1726 } 1727 1728 void 1729 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1730 { 1731 struct sr_hotplug_list *mhe; 1732 1733 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1734 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1735 1736 /* make sure we are on the list yet */ 1737 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1738 if (mhe->sh_hotplug == func) { 1739 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1740 sr_hotplug_list, shl_link); 1741 free(mhe, M_DEVBUF); 1742 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1743 SLIST_INIT(&sr_hotplug_callbacks); 1744 return; 1745 } 1746 } 1747 1748 void 1749 sr_disk_attach(struct disk *diskp, int action) 1750 { 1751 struct sr_hotplug_list *mhe; 1752 1753 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1754 if (mhe->sh_sd->sd_ready) 1755 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1756 } 1757 1758 int 1759 sr_match(struct device *parent, void *match, void *aux) 1760 { 1761 return (1); 1762 } 1763 1764 void 1765 sr_attach(struct device *parent, struct device *self, void *aux) 1766 { 1767 struct sr_softc *sc = (void *)self; 1768 struct scsibus_attach_args saa; 1769 1770 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1771 1772 if (softraid0 == NULL) 1773 softraid0 = sc; 1774 1775 rw_init(&sc->sc_lock, "sr_lock"); 1776 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1777 1778 SLIST_INIT(&sr_hotplug_callbacks); 1779 SLIST_INIT(&sc->sc_hotspare_list); 1780 1781 #if NBIO > 0 1782 if (bio_register(&sc->sc_dev, sr_bio_ioctl) != 0) 1783 printf("%s: controller registration failed", DEVNAME(sc)); 1784 #endif /* NBIO > 0 */ 1785 1786 #ifndef SMALL_KERNEL 1787 strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), 1788 sizeof(sc->sc_sensordev.xname)); 1789 sensordev_install(&sc->sc_sensordev); 1790 #endif /* SMALL_KERNEL */ 1791 1792 printf("\n"); 1793 1794 sc->sc_link.adapter_softc = sc; 1795 sc->sc_link.adapter = &sr_switch; 1796 sc->sc_link.adapter_target = SR_MAX_LD; 1797 sc->sc_link.adapter_buswidth = SR_MAX_LD; 1798 sc->sc_link.luns = 1; 1799 1800 bzero(&saa, sizeof(saa)); 1801 saa.saa_sc_link = &sc->sc_link; 1802 1803 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 1804 &saa, scsiprint); 1805 1806 softraid_disk_attach = sr_disk_attach; 1807 1808 sc->sc_shutdownhook = shutdownhook_establish(sr_shutdownhook, sc); 1809 1810 sr_boot_assembly(sc); 1811 } 1812 1813 int 1814 sr_detach(struct device *self, int flags) 1815 { 1816 struct sr_softc *sc = (void *)self; 1817 int rv; 1818 1819 DNPRINTF(SR_D_MISC, "%s: sr_detach\n", DEVNAME(sc)); 1820 1821 if (sc->sc_shutdownhook) 1822 shutdownhook_disestablish(sc->sc_shutdownhook); 1823 1824 softraid_disk_attach = NULL; 1825 1826 sr_shutdown(sc); 1827 1828 #ifndef SMALL_KERNEL 1829 if (sc->sc_sensor_task != NULL) 1830 sensor_task_unregister(sc->sc_sensor_task); 1831 sensordev_deinstall(&sc->sc_sensordev); 1832 #endif /* SMALL_KERNEL */ 1833 1834 if (sc->sc_scsibus != NULL) { 1835 rv = config_detach((struct device *)sc->sc_scsibus, flags); 1836 if (rv != 0) 1837 return (rv); 1838 sc->sc_scsibus = NULL; 1839 } 1840 1841 return (rv); 1842 } 1843 1844 void 1845 sr_info(struct sr_softc *sc, const char *fmt, ...) 1846 { 1847 va_list ap; 1848 1849 rw_assert_wrlock(&sc->sc_lock); 1850 1851 va_start(ap, fmt); 1852 bio_status(&sc->sc_status, 0, BIO_MSG_INFO, fmt, &ap); 1853 va_end(ap); 1854 } 1855 1856 void 1857 sr_warn(struct sr_softc *sc, const char *fmt, ...) 1858 { 1859 va_list ap; 1860 1861 rw_assert_wrlock(&sc->sc_lock); 1862 1863 va_start(ap, fmt); 1864 bio_status(&sc->sc_status, 1, BIO_MSG_WARN, fmt, &ap); 1865 va_end(ap); 1866 } 1867 1868 void 1869 sr_error(struct sr_softc *sc, const char *fmt, ...) 1870 { 1871 va_list ap; 1872 1873 rw_assert_wrlock(&sc->sc_lock); 1874 1875 va_start(ap, fmt); 1876 bio_status(&sc->sc_status, 1, BIO_MSG_ERROR, fmt, &ap); 1877 va_end(ap); 1878 } 1879 1880 void 1881 sr_minphys(struct buf *bp, struct scsi_link *sl) 1882 { 1883 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1884 1885 /* XXX currently using SR_MAXFER = MAXPHYS */ 1886 if (bp->b_bcount > SR_MAXFER) 1887 bp->b_bcount = SR_MAXFER; 1888 minphys(bp); 1889 } 1890 1891 void 1892 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1893 { 1894 size_t copy_cnt; 1895 1896 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1897 xs, size); 1898 1899 if (xs->datalen) { 1900 copy_cnt = MIN(size, xs->datalen); 1901 bcopy(v, xs->data, copy_cnt); 1902 } 1903 } 1904 1905 int 1906 sr_ccb_alloc(struct sr_discipline *sd) 1907 { 1908 struct sr_ccb *ccb; 1909 int i; 1910 1911 if (!sd) 1912 return (1); 1913 1914 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1915 1916 if (sd->sd_ccb) 1917 return (1); 1918 1919 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1920 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1921 TAILQ_INIT(&sd->sd_ccb_freeq); 1922 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1923 ccb = &sd->sd_ccb[i]; 1924 ccb->ccb_dis = sd; 1925 sr_ccb_put(ccb); 1926 } 1927 1928 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1929 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1930 1931 return (0); 1932 } 1933 1934 void 1935 sr_ccb_free(struct sr_discipline *sd) 1936 { 1937 struct sr_ccb *ccb; 1938 1939 if (!sd) 1940 return; 1941 1942 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1943 1944 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1945 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1946 1947 if (sd->sd_ccb) 1948 free(sd->sd_ccb, M_DEVBUF); 1949 } 1950 1951 struct sr_ccb * 1952 sr_ccb_get(struct sr_discipline *sd) 1953 { 1954 struct sr_ccb *ccb; 1955 int s; 1956 1957 s = splbio(); 1958 1959 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1960 if (ccb) { 1961 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1962 ccb->ccb_state = SR_CCB_INPROGRESS; 1963 } 1964 1965 splx(s); 1966 1967 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1968 ccb); 1969 1970 return (ccb); 1971 } 1972 1973 void 1974 sr_ccb_put(struct sr_ccb *ccb) 1975 { 1976 struct sr_discipline *sd = ccb->ccb_dis; 1977 int s; 1978 1979 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1980 ccb); 1981 1982 s = splbio(); 1983 1984 ccb->ccb_wu = NULL; 1985 ccb->ccb_state = SR_CCB_FREE; 1986 ccb->ccb_target = -1; 1987 ccb->ccb_opaque = NULL; 1988 1989 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1990 1991 splx(s); 1992 } 1993 1994 int 1995 sr_wu_alloc(struct sr_discipline *sd) 1996 { 1997 struct sr_workunit *wu; 1998 int i, no_wu; 1999 2000 if (!sd) 2001 return (1); 2002 2003 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 2004 sd, sd->sd_max_wu); 2005 2006 if (sd->sd_wu) 2007 return (1); 2008 2009 no_wu = sd->sd_max_wu; 2010 sd->sd_wu_pending = no_wu; 2011 2012 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 2013 M_DEVBUF, M_WAITOK | M_ZERO); 2014 TAILQ_INIT(&sd->sd_wu_freeq); 2015 TAILQ_INIT(&sd->sd_wu_pendq); 2016 TAILQ_INIT(&sd->sd_wu_defq); 2017 for (i = 0; i < no_wu; i++) { 2018 wu = &sd->sd_wu[i]; 2019 wu->swu_dis = sd; 2020 sr_wu_put(sd, wu); 2021 } 2022 2023 return (0); 2024 } 2025 2026 void 2027 sr_wu_free(struct sr_discipline *sd) 2028 { 2029 struct sr_workunit *wu; 2030 2031 if (!sd) 2032 return; 2033 2034 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 2035 2036 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 2037 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 2038 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 2039 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 2040 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 2041 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 2042 2043 if (sd->sd_wu) 2044 free(sd->sd_wu, M_DEVBUF); 2045 } 2046 2047 void 2048 sr_wu_put(void *xsd, void *xwu) 2049 { 2050 struct sr_discipline *sd = (struct sr_discipline *)xsd; 2051 struct sr_workunit *wu = (struct sr_workunit *)xwu; 2052 struct sr_ccb *ccb; 2053 2054 int s; 2055 2056 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 2057 2058 s = splbio(); 2059 if (wu->swu_cb_active == 1) 2060 panic("%s: sr_wu_put got active wu", DEVNAME(sd->sd_sc)); 2061 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 2062 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 2063 sr_ccb_put(ccb); 2064 } 2065 splx(s); 2066 2067 bzero(wu, sizeof(*wu)); 2068 TAILQ_INIT(&wu->swu_ccb); 2069 wu->swu_dis = sd; 2070 2071 mtx_enter(&sd->sd_wu_mtx); 2072 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 2073 sd->sd_wu_pending--; 2074 mtx_leave(&sd->sd_wu_mtx); 2075 } 2076 2077 void * 2078 sr_wu_get(void *xsd) 2079 { 2080 struct sr_discipline *sd = (struct sr_discipline *)xsd; 2081 struct sr_workunit *wu; 2082 2083 mtx_enter(&sd->sd_wu_mtx); 2084 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 2085 if (wu) { 2086 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 2087 sd->sd_wu_pending++; 2088 } 2089 mtx_leave(&sd->sd_wu_mtx); 2090 2091 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 2092 2093 return (wu); 2094 } 2095 2096 void 2097 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 2098 { 2099 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 2100 2101 scsi_done(xs); 2102 } 2103 2104 void 2105 sr_scsi_cmd(struct scsi_xfer *xs) 2106 { 2107 int s; 2108 struct scsi_link *link = xs->sc_link; 2109 struct sr_softc *sc = link->adapter_softc; 2110 struct sr_workunit *wu = NULL; 2111 struct sr_discipline *sd; 2112 struct sr_ccb *ccb; 2113 2114 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: target %d xs: %p " 2115 "flags: %#x\n", DEVNAME(sc), link->target, xs, xs->flags); 2116 2117 sd = sc->sc_dis[link->target]; 2118 if (sd == NULL) { 2119 printf("%s: sr_scsi_cmd NULL discipline\n", DEVNAME(sc)); 2120 goto stuffup; 2121 } 2122 2123 if (sd->sd_deleted) { 2124 printf("%s: %s device is being deleted, failing io\n", 2125 DEVNAME(sc), sd->sd_meta->ssd_devname); 2126 goto stuffup; 2127 } 2128 2129 wu = xs->io; 2130 /* scsi layer *can* re-send wu without calling sr_wu_put(). */ 2131 s = splbio(); 2132 if (wu->swu_cb_active == 1) 2133 panic("%s: sr_scsi_cmd got active wu", DEVNAME(sd->sd_sc)); 2134 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 2135 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 2136 sr_ccb_put(ccb); 2137 } 2138 splx(s); 2139 2140 bzero(wu, sizeof(*wu)); 2141 TAILQ_INIT(&wu->swu_ccb); 2142 wu->swu_state = SR_WU_INPROGRESS; 2143 wu->swu_dis = sd; 2144 wu->swu_xs = xs; 2145 2146 switch (xs->cmd->opcode) { 2147 case READ_COMMAND: 2148 case READ_BIG: 2149 case READ_16: 2150 case WRITE_COMMAND: 2151 case WRITE_BIG: 2152 case WRITE_16: 2153 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 2154 DEVNAME(sc), xs->cmd->opcode); 2155 if (sd->sd_scsi_rw(wu)) 2156 goto stuffup; 2157 break; 2158 2159 case SYNCHRONIZE_CACHE: 2160 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 2161 DEVNAME(sc)); 2162 if (sd->sd_scsi_sync(wu)) 2163 goto stuffup; 2164 goto complete; 2165 2166 case TEST_UNIT_READY: 2167 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 2168 DEVNAME(sc)); 2169 if (sd->sd_scsi_tur(wu)) 2170 goto stuffup; 2171 goto complete; 2172 2173 case START_STOP: 2174 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 2175 DEVNAME(sc)); 2176 if (sd->sd_scsi_start_stop(wu)) 2177 goto stuffup; 2178 goto complete; 2179 2180 case INQUIRY: 2181 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 2182 DEVNAME(sc)); 2183 if (sd->sd_scsi_inquiry(wu)) 2184 goto stuffup; 2185 goto complete; 2186 2187 case READ_CAPACITY: 2188 case READ_CAPACITY_16: 2189 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 2190 DEVNAME(sc), xs->cmd->opcode); 2191 if (sd->sd_scsi_read_cap(wu)) 2192 goto stuffup; 2193 goto complete; 2194 2195 case REQUEST_SENSE: 2196 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 2197 DEVNAME(sc)); 2198 if (sd->sd_scsi_req_sense(wu)) 2199 goto stuffup; 2200 goto complete; 2201 2202 default: 2203 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 2204 DEVNAME(sc), xs->cmd->opcode); 2205 /* XXX might need to add generic function to handle others */ 2206 goto stuffup; 2207 } 2208 2209 return; 2210 stuffup: 2211 if (sd && sd->sd_scsi_sense.error_code) { 2212 xs->error = XS_SENSE; 2213 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 2214 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2215 } else { 2216 xs->error = XS_DRIVER_STUFFUP; 2217 } 2218 complete: 2219 sr_scsi_done(sd, xs); 2220 } 2221 2222 int 2223 sr_scsi_probe(struct scsi_link *link) 2224 { 2225 struct sr_softc *sc = link->adapter_softc; 2226 struct sr_discipline *sd; 2227 2228 KASSERT(link->target < SR_MAX_LD && link->lun == 0); 2229 2230 sd = sc->sc_dis[link->target]; 2231 if (sd == NULL) 2232 return (ENODEV); 2233 2234 link->pool = &sd->sd_iopool; 2235 if (sd->sd_openings) 2236 link->openings = sd->sd_openings(sd); 2237 else 2238 link->openings = sd->sd_max_wu; 2239 2240 return (0); 2241 } 2242 2243 int 2244 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag) 2245 { 2246 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 2247 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 2248 2249 /* Pass bio ioctls through to bio handler. */ 2250 if (IOCGROUP(cmd) == 'B') 2251 return (sr_bio_ioctl(link->adapter_softc, cmd, addr)); 2252 2253 switch (cmd) { 2254 case DIOCGCACHE: 2255 case DIOCSCACHE: 2256 return (EOPNOTSUPP); 2257 default: 2258 return (ENOTTY); 2259 } 2260 } 2261 2262 int 2263 sr_bio_ioctl(struct device *dev, u_long cmd, caddr_t addr) 2264 { 2265 struct sr_softc *sc = (struct sr_softc *)dev; 2266 struct bio *bio = (struct bio *)addr; 2267 int rv = 0; 2268 2269 DNPRINTF(SR_D_IOCTL, "%s: sr_bio_ioctl ", DEVNAME(sc)); 2270 2271 rw_enter_write(&sc->sc_lock); 2272 2273 bio_status_init(&sc->sc_status, &sc->sc_dev); 2274 2275 switch (cmd) { 2276 case BIOCINQ: 2277 DNPRINTF(SR_D_IOCTL, "inq\n"); 2278 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 2279 break; 2280 2281 case BIOCVOL: 2282 DNPRINTF(SR_D_IOCTL, "vol\n"); 2283 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 2284 break; 2285 2286 case BIOCDISK: 2287 DNPRINTF(SR_D_IOCTL, "disk\n"); 2288 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 2289 break; 2290 2291 case BIOCALARM: 2292 DNPRINTF(SR_D_IOCTL, "alarm\n"); 2293 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 2294 break; 2295 2296 case BIOCBLINK: 2297 DNPRINTF(SR_D_IOCTL, "blink\n"); 2298 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 2299 break; 2300 2301 case BIOCSETSTATE: 2302 DNPRINTF(SR_D_IOCTL, "setstate\n"); 2303 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 2304 break; 2305 2306 case BIOCCREATERAID: 2307 DNPRINTF(SR_D_IOCTL, "createraid\n"); 2308 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 2309 break; 2310 2311 case BIOCDELETERAID: 2312 DNPRINTF(SR_D_IOCTL, "deleteraid\n"); 2313 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 2314 break; 2315 2316 case BIOCDISCIPLINE: 2317 DNPRINTF(SR_D_IOCTL, "discipline\n"); 2318 rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr); 2319 break; 2320 2321 case BIOCINSTALLBOOT: 2322 DNPRINTF(SR_D_IOCTL, "installboot\n"); 2323 rv = sr_ioctl_installboot(sc, (struct bioc_installboot *)addr); 2324 break; 2325 2326 default: 2327 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 2328 rv = ENOTTY; 2329 } 2330 2331 sc->sc_status.bs_status = (rv ? BIO_STATUS_ERROR : BIO_STATUS_SUCCESS); 2332 2333 bcopy(&sc->sc_status, &bio->bio_status, sizeof(struct bio_status)); 2334 2335 rw_exit_write(&sc->sc_lock); 2336 2337 return (0); 2338 } 2339 2340 int 2341 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 2342 { 2343 int i, vol, disk; 2344 2345 for (i = 0, vol = 0, disk = 0; i < SR_MAX_LD; i++) 2346 /* XXX this will not work when we stagger disciplines */ 2347 if (sc->sc_dis[i]) { 2348 vol++; 2349 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2350 } 2351 2352 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2353 bi->bi_novol = vol + sc->sc_hotspare_no; 2354 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2355 2356 return (0); 2357 } 2358 2359 int 2360 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2361 { 2362 int i, vol, rv = EINVAL; 2363 struct sr_discipline *sd; 2364 struct sr_chunk *hotspare; 2365 daddr64_t rb, sz; 2366 2367 for (i = 0, vol = -1; i < SR_MAX_LD; i++) { 2368 /* XXX this will not work when we stagger disciplines */ 2369 if (sc->sc_dis[i]) 2370 vol++; 2371 if (vol != bv->bv_volid) 2372 continue; 2373 2374 if (sc->sc_dis[i] == NULL) 2375 goto done; 2376 2377 sd = sc->sc_dis[i]; 2378 bv->bv_status = sd->sd_vol_status; 2379 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2380 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2381 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2382 2383 #ifdef CRYPTO 2384 if (sd->sd_meta->ssdi.ssd_level == 'C' && 2385 sd->mds.mdd_crypto.key_disk != NULL) 2386 bv->bv_nodisk++; 2387 #endif 2388 2389 if (bv->bv_status == BIOC_SVREBUILD) { 2390 sz = sd->sd_meta->ssdi.ssd_size; 2391 rb = sd->sd_meta->ssd_rebuild; 2392 if (rb > 0) 2393 bv->bv_percent = 100 - 2394 ((sz * 100 - rb * 100) / sz) - 1; 2395 else 2396 bv->bv_percent = 0; 2397 } 2398 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2399 sizeof(bv->bv_dev)); 2400 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2401 sizeof(bv->bv_vendor)); 2402 rv = 0; 2403 goto done; 2404 } 2405 2406 /* Check hotspares list. */ 2407 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2408 vol++; 2409 if (vol != bv->bv_volid) 2410 continue; 2411 2412 bv->bv_status = BIOC_SVONLINE; 2413 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2414 bv->bv_level = -1; /* Hotspare. */ 2415 bv->bv_nodisk = 1; 2416 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2417 sizeof(bv->bv_dev)); 2418 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2419 sizeof(bv->bv_vendor)); 2420 rv = 0; 2421 goto done; 2422 } 2423 2424 done: 2425 return (rv); 2426 } 2427 2428 int 2429 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2430 { 2431 int i, vol, rv = EINVAL, id; 2432 struct sr_chunk *src, *hotspare; 2433 2434 for (i = 0, vol = -1; i < SR_MAX_LD; i++) { 2435 /* XXX this will not work when we stagger disciplines */ 2436 if (sc->sc_dis[i]) 2437 vol++; 2438 if (vol != bd->bd_volid) 2439 continue; 2440 2441 if (sc->sc_dis[i] == NULL) 2442 goto done; 2443 2444 id = bd->bd_diskid; 2445 2446 if (id < sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2447 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2448 #ifdef CRYPTO 2449 else if (id == sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no && 2450 sc->sc_dis[i]->sd_meta->ssdi.ssd_level == 'C' && 2451 sc->sc_dis[i]->mds.mdd_crypto.key_disk != NULL) 2452 src = sc->sc_dis[i]->mds.mdd_crypto.key_disk; 2453 #endif 2454 else 2455 break; 2456 2457 bd->bd_status = src->src_meta.scm_status; 2458 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2459 bd->bd_channel = vol; 2460 bd->bd_target = id; 2461 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2462 sizeof(bd->bd_vendor)); 2463 rv = 0; 2464 goto done; 2465 } 2466 2467 /* Check hotspares list. */ 2468 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2469 vol++; 2470 if (vol != bd->bd_volid) 2471 continue; 2472 2473 if (bd->bd_diskid != 0) 2474 break; 2475 2476 bd->bd_status = hotspare->src_meta.scm_status; 2477 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2478 bd->bd_channel = vol; 2479 bd->bd_target = bd->bd_diskid; 2480 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2481 sizeof(bd->bd_vendor)); 2482 rv = 0; 2483 goto done; 2484 } 2485 2486 done: 2487 return (rv); 2488 } 2489 2490 int 2491 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2492 { 2493 int rv = EINVAL; 2494 int i, vol, found, c; 2495 struct sr_discipline *sd = NULL; 2496 struct sr_chunk *ch_entry; 2497 struct sr_chunk_head *cl; 2498 2499 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2500 goto done; 2501 2502 if (bs->bs_status == BIOC_SSHOTSPARE) { 2503 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2504 goto done; 2505 } 2506 2507 for (i = 0, vol = -1; i < SR_MAX_LD; i++) { 2508 /* XXX this will not work when we stagger disciplines */ 2509 if (sc->sc_dis[i]) 2510 vol++; 2511 if (vol != bs->bs_volid) 2512 continue; 2513 sd = sc->sc_dis[i]; 2514 break; 2515 } 2516 if (sd == NULL) 2517 goto done; 2518 2519 switch (bs->bs_status) { 2520 case BIOC_SSOFFLINE: 2521 /* Take chunk offline */ 2522 found = c = 0; 2523 cl = &sd->sd_vol.sv_chunk_list; 2524 SLIST_FOREACH(ch_entry, cl, src_link) { 2525 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2526 found = 1; 2527 break; 2528 } 2529 c++; 2530 } 2531 if (found == 0) { 2532 sr_error(sc, "chunk not part of array"); 2533 goto done; 2534 } 2535 2536 /* XXX: check current state first */ 2537 sd->sd_set_chunk_state(sd, c, BIOC_SDOFFLINE); 2538 2539 if (sr_meta_save(sd, SR_META_DIRTY)) { 2540 sr_error(sc, "could not save metadata for %s", 2541 sd->sd_meta->ssd_devname); 2542 goto done; 2543 } 2544 rv = 0; 2545 break; 2546 2547 case BIOC_SDSCRUB: 2548 break; 2549 2550 case BIOC_SSREBUILD: 2551 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0); 2552 break; 2553 2554 default: 2555 sr_error(sc, "unsupported state request %d", bs->bs_status); 2556 } 2557 2558 done: 2559 return (rv); 2560 } 2561 2562 int 2563 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2564 { 2565 struct sr_discipline *sd; 2566 struct sr_chunk *chunk; 2567 int i, c; 2568 2569 DNPRINTF(SR_D_MISC, "%s: sr_chunk_in_use(%d)\n", DEVNAME(sc), dev); 2570 2571 if (dev == NODEV) 2572 return BIOC_SDINVALID; 2573 2574 /* See if chunk is already in use. */ 2575 for (i = 0; i < SR_MAX_LD; i++) { 2576 if (sc->sc_dis[i] == NULL) 2577 continue; 2578 sd = sc->sc_dis[i]; 2579 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2580 chunk = sd->sd_vol.sv_chunks[c]; 2581 if (chunk->src_dev_mm == dev) 2582 return chunk->src_meta.scm_status; 2583 } 2584 } 2585 2586 /* Check hotspares list. */ 2587 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2588 if (chunk->src_dev_mm == dev) 2589 return chunk->src_meta.scm_status; 2590 2591 return BIOC_SDINVALID; 2592 } 2593 2594 int 2595 sr_hotspare(struct sr_softc *sc, dev_t dev) 2596 { 2597 struct sr_discipline *sd = NULL; 2598 struct sr_metadata *sm = NULL; 2599 struct sr_meta_chunk *hm; 2600 struct sr_chunk_head *cl; 2601 struct sr_chunk *chunk, *last, *hotspare = NULL; 2602 struct sr_uuid uuid; 2603 struct disklabel label; 2604 struct vnode *vn; 2605 daddr64_t size; 2606 char devname[32]; 2607 int rv = EINVAL; 2608 int c, part, open = 0; 2609 2610 /* 2611 * Add device to global hotspares list. 2612 */ 2613 2614 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2615 2616 /* Make sure chunk is not already in use. */ 2617 c = sr_chunk_in_use(sc, dev); 2618 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2619 if (c == BIOC_SDHOTSPARE) 2620 sr_error(sc, "%s is already a hotspare", devname); 2621 else 2622 sr_error(sc, "%s is already in use", devname); 2623 goto done; 2624 } 2625 2626 /* XXX - See if there is an existing degraded volume... */ 2627 2628 /* Open device. */ 2629 if (bdevvp(dev, &vn)) { 2630 sr_error(sc, "sr_hotspare: cannot allocate vnode"); 2631 goto done; 2632 } 2633 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 2634 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2635 DEVNAME(sc), devname); 2636 vput(vn); 2637 goto fail; 2638 } 2639 open = 1; /* close dev on error */ 2640 2641 /* Get partition details. */ 2642 part = DISKPART(dev); 2643 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 2644 NOCRED, curproc)) { 2645 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2646 DEVNAME(sc)); 2647 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2648 vput(vn); 2649 goto fail; 2650 } 2651 if (label.d_secsize != DEV_BSIZE) { 2652 sr_error(sc, "%s has unsupported sector size (%d)", 2653 devname, label.d_secsize); 2654 goto fail; 2655 } 2656 if (label.d_partitions[part].p_fstype != FS_RAID) { 2657 sr_error(sc, "%s partition not of type RAID (%d)", 2658 devname, label.d_partitions[part].p_fstype); 2659 goto fail; 2660 } 2661 2662 /* Calculate partition size. */ 2663 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 2664 2665 /* 2666 * Create and populate chunk metadata. 2667 */ 2668 2669 sr_uuid_generate(&uuid); 2670 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2671 2672 hotspare->src_dev_mm = dev; 2673 hotspare->src_vn = vn; 2674 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2675 hotspare->src_size = size; 2676 2677 hm = &hotspare->src_meta; 2678 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2679 hm->scmi.scm_chunk_id = 0; 2680 hm->scmi.scm_size = size; 2681 hm->scmi.scm_coerced_size = size; 2682 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2683 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2684 2685 sr_checksum(sc, hm, &hm->scm_checksum, 2686 sizeof(struct sr_meta_chunk_invariant)); 2687 2688 hm->scm_status = BIOC_SDHOTSPARE; 2689 2690 /* 2691 * Create and populate our own discipline and metadata. 2692 */ 2693 2694 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2695 sm->ssdi.ssd_magic = SR_MAGIC; 2696 sm->ssdi.ssd_version = SR_META_VERSION; 2697 sm->ssd_ondisk = 0; 2698 sm->ssdi.ssd_vol_flags = 0; 2699 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2700 sm->ssdi.ssd_chunk_no = 1; 2701 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2702 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2703 sm->ssdi.ssd_size = size; 2704 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2705 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2706 "SR %s", "HOTSPARE"); 2707 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2708 "%03d", SR_META_VERSION); 2709 2710 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2711 sd->sd_sc = sc; 2712 sd->sd_meta = sm; 2713 sd->sd_meta_type = SR_META_F_NATIVE; 2714 sd->sd_vol_status = BIOC_SVONLINE; 2715 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2716 SLIST_INIT(&sd->sd_meta_opt); 2717 2718 /* Add chunk to volume. */ 2719 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2720 M_WAITOK | M_ZERO); 2721 sd->sd_vol.sv_chunks[0] = hotspare; 2722 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2723 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2724 2725 /* Save metadata. */ 2726 if (sr_meta_save(sd, SR_META_DIRTY)) { 2727 sr_error(sc, "could not save metadata to %s", devname); 2728 goto fail; 2729 } 2730 2731 /* 2732 * Add chunk to hotspare list. 2733 */ 2734 rw_enter_write(&sc->sc_hs_lock); 2735 cl = &sc->sc_hotspare_list; 2736 if (SLIST_EMPTY(cl)) 2737 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2738 else { 2739 SLIST_FOREACH(chunk, cl, src_link) 2740 last = chunk; 2741 SLIST_INSERT_AFTER(last, hotspare, src_link); 2742 } 2743 sc->sc_hotspare_no++; 2744 rw_exit_write(&sc->sc_hs_lock); 2745 2746 rv = 0; 2747 goto done; 2748 2749 fail: 2750 if (hotspare) 2751 free(hotspare, M_DEVBUF); 2752 2753 done: 2754 if (sd && sd->sd_vol.sv_chunks) 2755 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2756 if (sd) 2757 free(sd, M_DEVBUF); 2758 if (sm) 2759 free(sm, M_DEVBUF); 2760 if (open) { 2761 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2762 vput(vn); 2763 } 2764 2765 return (rv); 2766 } 2767 2768 void 2769 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2770 { 2771 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2772 } 2773 2774 void 2775 sr_hotspare_rebuild(struct sr_discipline *sd) 2776 { 2777 struct sr_softc *sc = sd->sd_sc; 2778 struct sr_chunk_head *cl; 2779 struct sr_chunk *hotspare, *chunk = NULL; 2780 struct sr_workunit *wu; 2781 struct sr_ccb *ccb; 2782 int i, s, chunk_no, busy; 2783 2784 /* 2785 * Attempt to locate a hotspare and initiate rebuild. 2786 */ 2787 2788 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2789 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2790 BIOC_SDOFFLINE) { 2791 chunk_no = i; 2792 chunk = sd->sd_vol.sv_chunks[i]; 2793 break; 2794 } 2795 } 2796 2797 if (chunk == NULL) { 2798 printf("%s: no offline chunk found on %s!\n", 2799 DEVNAME(sc), sd->sd_meta->ssd_devname); 2800 return; 2801 } 2802 2803 /* See if we have a suitable hotspare... */ 2804 rw_enter_write(&sc->sc_hs_lock); 2805 cl = &sc->sc_hotspare_list; 2806 SLIST_FOREACH(hotspare, cl, src_link) 2807 if (hotspare->src_size >= chunk->src_size) 2808 break; 2809 2810 if (hotspare != NULL) { 2811 2812 printf("%s: %s volume degraded, will attempt to " 2813 "rebuild on hotspare %s\n", DEVNAME(sc), 2814 sd->sd_meta->ssd_devname, hotspare->src_devname); 2815 2816 /* 2817 * Ensure that all pending I/O completes on the failed chunk 2818 * before trying to initiate a rebuild. 2819 */ 2820 i = 0; 2821 do { 2822 busy = 0; 2823 2824 s = splbio(); 2825 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2826 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2827 if (ccb->ccb_target == chunk_no) 2828 busy = 1; 2829 } 2830 } 2831 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2832 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2833 if (ccb->ccb_target == chunk_no) 2834 busy = 1; 2835 } 2836 } 2837 splx(s); 2838 2839 if (busy) { 2840 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2841 i++; 2842 } 2843 2844 } while (busy && i < 120); 2845 2846 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2847 "complete on failed chunk %s\n", DEVNAME(sc), 2848 i, chunk->src_devname); 2849 2850 if (busy) { 2851 printf("%s: pending I/O failed to complete on " 2852 "failed chunk %s, hotspare rebuild aborted...\n", 2853 DEVNAME(sc), chunk->src_devname); 2854 goto done; 2855 } 2856 2857 s = splbio(); 2858 rw_enter_write(&sc->sc_lock); 2859 bio_status_init(&sc->sc_status, &sc->sc_dev); 2860 if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) { 2861 2862 /* Remove hotspare from available list. */ 2863 sc->sc_hotspare_no--; 2864 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2865 free(hotspare, M_DEVBUF); 2866 2867 } 2868 rw_exit_write(&sc->sc_lock); 2869 splx(s); 2870 } 2871 done: 2872 rw_exit_write(&sc->sc_hs_lock); 2873 } 2874 2875 int 2876 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare) 2877 { 2878 struct sr_softc *sc = sd->sd_sc; 2879 struct sr_chunk *chunk = NULL; 2880 struct sr_meta_chunk *meta; 2881 struct disklabel label; 2882 struct vnode *vn; 2883 daddr64_t size, csize; 2884 char devname[32]; 2885 int rv = EINVAL, open = 0; 2886 int cid, i, part, status; 2887 2888 /* 2889 * Attempt to initiate a rebuild onto the specified device. 2890 */ 2891 2892 if (!(sd->sd_capabilities & SR_CAP_REBUILD)) { 2893 sr_error(sc, "discipline does not support rebuild"); 2894 goto done; 2895 } 2896 2897 /* make sure volume is in the right state */ 2898 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2899 sr_error(sc, "rebuild already in progress"); 2900 goto done; 2901 } 2902 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2903 sr_error(sc, "volume not degraded"); 2904 goto done; 2905 } 2906 2907 /* Find first offline chunk. */ 2908 for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) { 2909 if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status == 2910 BIOC_SDOFFLINE) { 2911 chunk = sd->sd_vol.sv_chunks[cid]; 2912 break; 2913 } 2914 } 2915 if (chunk == NULL) { 2916 sr_error(sc, "no offline chunks available to rebuild"); 2917 goto done; 2918 } 2919 2920 /* Get coerced size from another online chunk. */ 2921 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2922 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2923 BIOC_SDONLINE) { 2924 meta = &sd->sd_vol.sv_chunks[i]->src_meta; 2925 csize = meta->scmi.scm_coerced_size; 2926 break; 2927 } 2928 } 2929 2930 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2931 if (bdevvp(dev, &vn)) { 2932 printf("%s: sr_rebuild_init: can't allocate vnode\n", 2933 DEVNAME(sc)); 2934 goto done; 2935 } 2936 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 2937 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2938 "open %s\n", DEVNAME(sc), devname); 2939 vput(vn); 2940 goto done; 2941 } 2942 open = 1; /* close dev on error */ 2943 2944 /* Get disklabel and check partition. */ 2945 part = DISKPART(dev); 2946 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 2947 NOCRED, curproc)) { 2948 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2949 DEVNAME(sc)); 2950 goto done; 2951 } 2952 if (label.d_secsize != DEV_BSIZE) { 2953 sr_error(sc, "%s has unsupported sector size (%d)", 2954 devname, label.d_secsize); 2955 goto done; 2956 } 2957 if (label.d_partitions[part].p_fstype != FS_RAID) { 2958 sr_error(sc, "%s partition not of type RAID (%d)", 2959 devname, label.d_partitions[part].p_fstype); 2960 goto done; 2961 } 2962 2963 /* Is the partition large enough? */ 2964 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 2965 if (size < csize) { 2966 sr_error(sc, "%s partition too small, at least %llu bytes " 2967 "required", devname, csize << DEV_BSHIFT); 2968 goto done; 2969 } else if (size > csize) 2970 sr_warn(sc, "%s partition too large, wasting %llu bytes", 2971 devname, (size - csize) << DEV_BSHIFT); 2972 2973 /* Ensure that this chunk is not already in use. */ 2974 status = sr_chunk_in_use(sc, dev); 2975 if (status != BIOC_SDINVALID && status != BIOC_SDOFFLINE && 2976 !(hotspare && status == BIOC_SDHOTSPARE)) { 2977 sr_error(sc, "%s is already in use", devname); 2978 goto done; 2979 } 2980 2981 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2982 sd->sd_meta->ssd_rebuild = 0; 2983 2984 open = 0; /* leave dev open from here on out */ 2985 2986 /* Fix up chunk. */ 2987 bcopy(label.d_uid, chunk->src_duid, sizeof(chunk->src_duid)); 2988 chunk->src_dev_mm = dev; 2989 chunk->src_vn = vn; 2990 2991 /* Reconstruct metadata. */ 2992 meta = &chunk->src_meta; 2993 meta->scmi.scm_volid = sd->sd_meta->ssdi.ssd_volid; 2994 meta->scmi.scm_chunk_id = cid; 2995 strlcpy(meta->scmi.scm_devname, devname, 2996 sizeof(meta->scmi.scm_devname)); 2997 meta->scmi.scm_size = size; 2998 meta->scmi.scm_coerced_size = csize; 2999 bcopy(&sd->sd_meta->ssdi.ssd_uuid, &meta->scmi.scm_uuid, 3000 sizeof(meta->scmi.scm_uuid)); 3001 sr_checksum(sc, meta, &meta->scm_checksum, 3002 sizeof(struct sr_meta_chunk_invariant)); 3003 3004 sd->sd_set_chunk_state(sd, cid, BIOC_SDREBUILD); 3005 3006 if (sr_meta_save(sd, SR_META_DIRTY)) { 3007 sr_error(sc, "could not save metadata to %s", devname); 3008 open = 1; 3009 goto done; 3010 } 3011 3012 sr_warn(sc, "rebuild of %s started on %s", 3013 sd->sd_meta->ssd_devname, devname); 3014 3015 sd->sd_reb_abort = 0; 3016 kthread_create_deferred(sr_rebuild, sd); 3017 3018 rv = 0; 3019 done: 3020 if (open) { 3021 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 3022 vput(vn); 3023 } 3024 3025 return (rv); 3026 } 3027 3028 void 3029 sr_roam_chunks(struct sr_discipline *sd) 3030 { 3031 struct sr_softc *sc = sd->sd_sc; 3032 struct sr_chunk *chunk; 3033 struct sr_meta_chunk *meta; 3034 int roamed = 0; 3035 3036 /* Have any chunks roamed? */ 3037 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 3038 meta = &chunk->src_meta; 3039 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 3040 sizeof(meta->scmi.scm_devname))) { 3041 3042 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 3043 meta->scmi.scm_devname, chunk->src_devname); 3044 3045 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 3046 sizeof(meta->scmi.scm_devname)); 3047 3048 roamed++; 3049 } 3050 } 3051 3052 if (roamed) 3053 sr_meta_save(sd, SR_META_DIRTY); 3054 } 3055 3056 int 3057 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 3058 { 3059 struct sr_meta_opt_item *omi; 3060 struct sr_chunk_head *cl; 3061 struct sr_discipline *sd = NULL; 3062 struct sr_chunk *ch_entry; 3063 struct scsi_link *link; 3064 struct device *dev; 3065 char *uuid, devname[32]; 3066 dev_t *dt; 3067 int i, no_chunk, rv = EINVAL, target, vol; 3068 int no_meta; 3069 3070 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 3071 DEVNAME(sc), user); 3072 3073 /* user input */ 3074 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 3075 goto unwind; 3076 3077 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 3078 if (user) { 3079 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 3080 goto unwind; 3081 } else 3082 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 3083 3084 /* Initialise discipline. */ 3085 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 3086 sd->sd_sc = sc; 3087 SLIST_INIT(&sd->sd_meta_opt); 3088 sd->sd_workq = workq_create("srdis", 1, IPL_BIO); 3089 if (sd->sd_workq == NULL) { 3090 sr_error(sc, "could not create discipline workq"); 3091 goto unwind; 3092 } 3093 if (sr_discipline_init(sd, bc->bc_level)) { 3094 sr_error(sc, "could not initialize discipline"); 3095 goto unwind; 3096 } 3097 3098 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 3099 cl = &sd->sd_vol.sv_chunk_list; 3100 SLIST_INIT(cl); 3101 3102 /* Ensure that chunks are not already in use. */ 3103 for (i = 0; i < no_chunk; i++) { 3104 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) { 3105 sr_meta_getdevname(sc, dt[i], devname, sizeof(devname)); 3106 sr_error(sc, "chunk %s already in use", devname); 3107 goto unwind; 3108 } 3109 } 3110 3111 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 3112 if (sd->sd_meta_type == SR_META_F_INVALID) { 3113 sr_error(sc, "invalid metadata format"); 3114 goto unwind; 3115 } 3116 3117 if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) 3118 goto unwind; 3119 3120 /* force the raid volume by clearing metadata region */ 3121 if (bc->bc_flags & BIOC_SCFORCE) { 3122 /* make sure disk isn't up and running */ 3123 if (sr_meta_read(sd)) 3124 if (sr_already_assembled(sd)) { 3125 uuid = sr_uuid_format( 3126 &sd->sd_meta->ssdi.ssd_uuid); 3127 sr_error(sc, "disk %s is currently in use; " 3128 "cannot force create", uuid); 3129 free(uuid, M_DEVBUF); 3130 goto unwind; 3131 } 3132 3133 if (sr_meta_clear(sd)) { 3134 sr_error(sc, "failed to clear metadata"); 3135 goto unwind; 3136 } 3137 } 3138 3139 no_meta = sr_meta_read(sd); 3140 if (no_meta == -1) { 3141 3142 /* Corrupt metadata on one or more chunks. */ 3143 sr_error(sc, "one of the chunks has corrupt metadata; " 3144 "aborting assembly"); 3145 goto unwind; 3146 3147 } else if (no_meta == 0) { 3148 3149 /* Initialise volume and chunk metadata. */ 3150 sr_meta_init(sd, bc->bc_level, no_chunk); 3151 sd->sd_vol_status = BIOC_SVONLINE; 3152 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3153 if (sd->sd_create) { 3154 if ((i = sd->sd_create(sd, bc, no_chunk, 3155 sd->sd_vol.sv_chunk_minsz))) { 3156 rv = i; 3157 goto unwind; 3158 } 3159 } 3160 sr_meta_init_complete(sd); 3161 3162 DNPRINTF(SR_D_IOCTL, 3163 "%s: sr_ioctl_createraid: vol_size: %lld\n", 3164 DEVNAME(sc), sd->sd_meta->ssdi.ssd_size); 3165 3166 /* Warn if we've wasted chunk space due to coercing. */ 3167 if ((sd->sd_capabilities & SR_CAP_NON_COERCED) == 0 && 3168 sd->sd_vol.sv_chunk_minsz != sd->sd_vol.sv_chunk_maxsz) 3169 sr_warn(sc, "chunk sizes are not equal; up to %llu " 3170 "blocks wasted per chunk", 3171 sd->sd_vol.sv_chunk_maxsz - 3172 sd->sd_vol.sv_chunk_minsz); 3173 3174 } else { 3175 3176 /* Ensure metadata level matches requested assembly level. */ 3177 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 3178 sr_error(sc, "volume level does not match metadata " 3179 "level"); 3180 goto unwind; 3181 } 3182 3183 if (sr_already_assembled(sd)) { 3184 uuid = sr_uuid_format(&sd->sd_meta->ssdi.ssd_uuid); 3185 sr_error(sc, "disk %s already assembled", uuid); 3186 free(uuid, M_DEVBUF); 3187 goto unwind; 3188 } 3189 3190 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 3191 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 3192 "metadata\n", DEVNAME(sc)); 3193 goto unwind; 3194 } 3195 3196 if (no_meta != no_chunk) 3197 sr_warn(sc, "trying to bring up %s degraded", 3198 sd->sd_meta->ssd_devname); 3199 3200 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 3201 sr_warn(sc, "%s was not shutdown properly", 3202 sd->sd_meta->ssd_devname); 3203 3204 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) 3205 if (sd->sd_meta_opt_handler == NULL || 3206 sd->sd_meta_opt_handler(sd, omi->omi_som) != 0) 3207 sr_meta_opt_handler(sd, omi->omi_som); 3208 3209 if (sd->sd_assemble) { 3210 if ((i = sd->sd_assemble(sd, bc, no_chunk))) { 3211 rv = i; 3212 goto unwind; 3213 } 3214 } 3215 3216 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 3217 DEVNAME(sc)); 3218 3219 } 3220 3221 /* Metadata MUST be fully populated by this point. */ 3222 3223 /* Allocate all resources. */ 3224 if ((rv = sd->sd_alloc_resources(sd))) 3225 goto unwind; 3226 3227 /* Adjust flags if necessary. */ 3228 if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) && 3229 (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) != 3230 (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) { 3231 sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE; 3232 sd->sd_meta->ssdi.ssd_vol_flags |= 3233 bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3234 } 3235 3236 if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) { 3237 3238 /* Initialise volume state. */ 3239 sd->sd_set_vol_state(sd); 3240 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3241 sr_error(sc, "%s is offline, will not be brought " 3242 "online", sd->sd_meta->ssd_devname); 3243 goto unwind; 3244 } 3245 3246 /* Setup SCSI iopool. */ 3247 mtx_init(&sd->sd_wu_mtx, IPL_BIO); 3248 scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put); 3249 3250 /* 3251 * All checks passed - return ENXIO if volume cannot be created. 3252 */ 3253 rv = ENXIO; 3254 3255 /* 3256 * Find a free target. 3257 * 3258 * XXX: We reserve sd_target == 0 to indicate the 3259 * discipline is not linked into sc->sc_dis, so begin 3260 * the search with target = 1. 3261 */ 3262 for (target = 1; target < SR_MAX_LD; target++) 3263 if (sc->sc_dis[target] == NULL) 3264 break; 3265 if (target == SR_MAX_LD) { 3266 sr_error(sc, "no free target for %s", 3267 sd->sd_meta->ssd_devname); 3268 goto unwind; 3269 } 3270 3271 /* Clear sense data. */ 3272 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3273 3274 /* Attach discipline and get midlayer to probe it. */ 3275 sd->sd_target = target; 3276 sc->sc_dis[target] = sd; 3277 if (scsi_probe_lun(sc->sc_scsibus, target, 0) != 0) { 3278 sr_error(sc, "scsi_probe_lun failed"); 3279 sc->sc_dis[target] = NULL; 3280 sd->sd_target = 0; 3281 goto unwind; 3282 } 3283 3284 link = scsi_get_link(sc->sc_scsibus, target, 0); 3285 dev = link->device_softc; 3286 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s at target %d\n", 3287 DEVNAME(sc), dev->dv_xname, sd->sd_target); 3288 3289 for (i = 0, vol = -1; i <= sd->sd_target; i++) 3290 if (sc->sc_dis[i]) 3291 vol++; 3292 3293 rv = 0; 3294 3295 if (sd->sd_meta->ssd_devname[0] != '\0' && 3296 strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3297 sizeof(dev->dv_xname))) 3298 sr_warn(sc, "volume %s is roaming, it used to be %s, " 3299 "updating metadata", dev->dv_xname, 3300 sd->sd_meta->ssd_devname); 3301 3302 /* Populate remaining volume metadata. */ 3303 sd->sd_meta->ssdi.ssd_volid = vol; 3304 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3305 sizeof(sd->sd_meta->ssd_devname)); 3306 3307 sr_info(sc, "%s volume attached as %s", 3308 sd->sd_meta->ssdi.ssd_product, sd->sd_meta->ssd_devname); 3309 3310 /* Update device name on any roaming chunks. */ 3311 sr_roam_chunks(sd); 3312 3313 #ifndef SMALL_KERNEL 3314 if (sr_sensors_create(sd)) 3315 sr_warn(sc, "unable to create sensor for %s", 3316 dev->dv_xname); 3317 #endif /* SMALL_KERNEL */ 3318 } else { 3319 /* This volume does not attach as a system disk. */ 3320 ch_entry = SLIST_FIRST(cl); /* XXX */ 3321 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3322 sizeof(sd->sd_meta->ssd_devname)); 3323 3324 if (sd->sd_start_discipline(sd)) 3325 goto unwind; 3326 } 3327 3328 /* Save current metadata to disk. */ 3329 rv = sr_meta_save(sd, SR_META_DIRTY); 3330 3331 if (sd->sd_vol_status == BIOC_SVREBUILD) 3332 kthread_create_deferred(sr_rebuild, sd); 3333 3334 sd->sd_ready = 1; 3335 3336 return (rv); 3337 3338 unwind: 3339 sr_discipline_shutdown(sd, 0); 3340 3341 if (rv == EAGAIN) 3342 rv = 0; 3343 3344 return (rv); 3345 } 3346 3347 int 3348 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3349 { 3350 struct sr_discipline *sd = NULL; 3351 int rv = 1; 3352 int i; 3353 3354 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3355 dr->bd_dev); 3356 3357 for (i = 0; i < SR_MAX_LD; i++) 3358 if (sc->sc_dis[i]) { 3359 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3360 dr->bd_dev, 3361 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3362 sd = sc->sc_dis[i]; 3363 break; 3364 } 3365 } 3366 3367 if (sd == NULL) { 3368 sr_error(sc, "volume %s not found", dr->bd_dev); 3369 goto bad; 3370 } 3371 3372 sd->sd_deleted = 1; 3373 sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE; 3374 sr_discipline_shutdown(sd, 1); 3375 3376 rv = 0; 3377 bad: 3378 return (rv); 3379 } 3380 3381 int 3382 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd) 3383 { 3384 struct sr_discipline *sd = NULL; 3385 int i, rv = 1; 3386 3387 /* Dispatch a discipline specific ioctl. */ 3388 3389 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc), 3390 bd->bd_dev); 3391 3392 for (i = 0; i < SR_MAX_LD; i++) 3393 if (sc->sc_dis[i]) { 3394 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3395 bd->bd_dev, 3396 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3397 sd = sc->sc_dis[i]; 3398 break; 3399 } 3400 } 3401 3402 if (sd && sd->sd_ioctl_handler) 3403 rv = sd->sd_ioctl_handler(sd, bd); 3404 3405 return (rv); 3406 } 3407 3408 int 3409 sr_ioctl_installboot(struct sr_softc *sc, struct bioc_installboot *bb) 3410 { 3411 void *bootblk = NULL, *bootldr = NULL; 3412 struct sr_discipline *sd = NULL; 3413 struct sr_chunk *chunk; 3414 struct sr_meta_opt_item *omi; 3415 struct sr_meta_boot *sbm; 3416 struct disk *dk; 3417 u_int32_t bbs, bls; 3418 u_char duid[8]; 3419 int rv = EINVAL; 3420 int i; 3421 3422 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc), 3423 bb->bb_dev); 3424 3425 for (i = 0; i < SR_MAX_LD; i++) 3426 if (sc->sc_dis[i]) { 3427 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3428 bb->bb_dev, 3429 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3430 sd = sc->sc_dis[i]; 3431 break; 3432 } 3433 } 3434 3435 if (sd == NULL) 3436 goto done; 3437 3438 bzero(duid, sizeof(duid)); 3439 TAILQ_FOREACH(dk, &disklist, dk_link) 3440 if (!strncmp(dk->dk_name, bb->bb_dev, sizeof(bb->bb_dev))) 3441 break; 3442 if (dk == NULL || dk->dk_label == NULL || 3443 (dk->dk_flags & DKF_LABELVALID) == 0 || 3444 bcmp(dk->dk_label->d_uid, &duid, sizeof(duid)) == 0) { 3445 sr_error(sc, "failed to get DUID for softraid volume"); 3446 goto done; 3447 } 3448 bcopy(dk->dk_label->d_uid, duid, sizeof(duid)); 3449 3450 /* Ensure that boot storage area is large enough. */ 3451 if (sd->sd_meta->ssd_data_offset < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) { 3452 sr_error(sc, "insufficient boot storage"); 3453 goto done; 3454 } 3455 3456 if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * 512) 3457 goto done; 3458 3459 if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * 512) 3460 goto done; 3461 3462 /* Copy in boot block. */ 3463 bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE; 3464 bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO); 3465 if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0) 3466 goto done; 3467 3468 /* Copy in boot loader. */ 3469 bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE; 3470 bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO); 3471 if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0) 3472 goto done; 3473 3474 /* Create or update optional meta for bootable volumes. */ 3475 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) 3476 if (omi->omi_som->som_type == SR_OPT_BOOT) 3477 break; 3478 if (omi == NULL) { 3479 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF, 3480 M_WAITOK | M_ZERO); 3481 omi->omi_som = malloc(sizeof(struct sr_meta_crypto), M_DEVBUF, 3482 M_WAITOK | M_ZERO); 3483 omi->omi_som->som_type = SR_OPT_BOOT; 3484 omi->omi_som->som_length = sizeof(struct sr_meta_boot); 3485 SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link); 3486 sd->sd_meta->ssdi.ssd_opt_no++; 3487 } 3488 sbm = (struct sr_meta_boot *)omi->omi_som; 3489 3490 bcopy(duid, sbm->sbm_root_duid, sizeof(sbm->sbm_root_duid)); 3491 bzero(&sbm->sbm_boot_duid, sizeof(sbm->sbm_boot_duid)); 3492 sbm->sbm_bootblk_size = bbs; 3493 sbm->sbm_bootldr_size = bls; 3494 3495 DNPRINTF(SR_D_IOCTL, "sr_ioctl_installboot: root duid is " 3496 "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx\n", 3497 sbm->sbm_root_duid[0], sbm->sbm_root_duid[1], 3498 sbm->sbm_root_duid[2], sbm->sbm_root_duid[3], 3499 sbm->sbm_root_duid[4], sbm->sbm_root_duid[5], 3500 sbm->sbm_root_duid[6], sbm->sbm_root_duid[7]); 3501 3502 /* Save boot block and boot loader to each chunk. */ 3503 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3504 3505 chunk = sd->sd_vol.sv_chunks[i]; 3506 if (chunk->src_meta.scm_status != BIOC_SDONLINE && 3507 chunk->src_meta.scm_status != BIOC_SDREBUILD) 3508 continue; 3509 3510 if (i < SR_MAX_BOOT_DISKS) 3511 bcopy(chunk->src_duid, &sbm->sbm_boot_duid[i], 3512 sizeof(sbm->sbm_boot_duid[i])); 3513 3514 /* Save boot blocks. */ 3515 DNPRINTF(SR_D_IOCTL, 3516 "sr_ioctl_installboot: saving boot block to %s " 3517 "(%u bytes)\n", chunk->src_devname, bbs); 3518 3519 if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs, 3520 SR_BOOT_BLOCKS_OFFSET, B_WRITE)) { 3521 sr_error(sc, "failed to write boot block", DEVNAME(sc)); 3522 goto done; 3523 } 3524 3525 /* Save boot loader.*/ 3526 DNPRINTF(SR_D_IOCTL, 3527 "sr_ioctl_installboot: saving boot loader to %s " 3528 "(%u bytes)\n", chunk->src_devname, bls); 3529 3530 if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls, 3531 SR_BOOT_LOADER_OFFSET, B_WRITE)) { 3532 sr_error(sc, "failed to write boot loader"); 3533 goto done; 3534 } 3535 3536 } 3537 3538 /* XXX - Install boot block on disk - MD code. */ 3539 3540 /* Mark volume as bootable and save metadata. */ 3541 sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE; 3542 if (sr_meta_save(sd, SR_META_DIRTY)) { 3543 sr_error(sc, "could not save metadata to %s", 3544 chunk->src_devname); 3545 goto done; 3546 } 3547 3548 rv = 0; 3549 3550 done: 3551 if (bootblk) 3552 free(bootblk, M_DEVBUF); 3553 if (bootldr) 3554 free(bootldr, M_DEVBUF); 3555 3556 return (rv); 3557 } 3558 3559 void 3560 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3561 { 3562 struct sr_chunk *ch_entry, *ch_next; 3563 3564 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3565 3566 if (!cl) 3567 return; 3568 3569 for (ch_entry = SLIST_FIRST(cl); 3570 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3571 ch_next = SLIST_NEXT(ch_entry, src_link); 3572 3573 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3574 DEVNAME(sc), ch_entry->src_devname); 3575 if (ch_entry->src_vn) { 3576 /* 3577 * XXX - explicitly lock the vnode until we can resolve 3578 * the problem introduced by vnode aliasing... specfs 3579 * has no locking, whereas ufs/ffs does! 3580 */ 3581 vn_lock(ch_entry->src_vn, LK_EXCLUSIVE | 3582 LK_RETRY, curproc); 3583 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 3584 curproc); 3585 vput(ch_entry->src_vn); 3586 } 3587 free(ch_entry, M_DEVBUF); 3588 } 3589 SLIST_INIT(cl); 3590 } 3591 3592 void 3593 sr_discipline_free(struct sr_discipline *sd) 3594 { 3595 struct sr_softc *sc; 3596 struct sr_meta_opt_head *som; 3597 struct sr_meta_opt_item *omi, *omi_next; 3598 3599 if (!sd) 3600 return; 3601 3602 sc = sd->sd_sc; 3603 3604 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3605 DEVNAME(sc), 3606 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3607 if (sd->sd_free_resources) 3608 sd->sd_free_resources(sd); 3609 if (sd->sd_vol.sv_chunks) 3610 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3611 if (sd->sd_meta) 3612 free(sd->sd_meta, M_DEVBUF); 3613 if (sd->sd_meta_foreign) 3614 free(sd->sd_meta_foreign, M_DEVBUF); 3615 3616 som = &sd->sd_meta_opt; 3617 for (omi = SLIST_FIRST(som); omi != SLIST_END(som); omi = omi_next) { 3618 omi_next = SLIST_NEXT(omi, omi_link); 3619 if (omi->omi_som) 3620 free(omi->omi_som, M_DEVBUF); 3621 free(omi, M_DEVBUF); 3622 } 3623 3624 if (sd->sd_target != 0) { 3625 KASSERT(sc->sc_dis[sd->sd_target] == sd); 3626 sc->sc_dis[sd->sd_target] = NULL; 3627 } 3628 3629 explicit_bzero(sd, sizeof *sd); 3630 free(sd, M_DEVBUF); 3631 } 3632 3633 void 3634 sr_discipline_shutdown(struct sr_discipline *sd, int meta_save) 3635 { 3636 struct sr_softc *sc; 3637 int s; 3638 3639 if (!sd) 3640 return; 3641 sc = sd->sd_sc; 3642 3643 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3644 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3645 3646 /* If rebuilding, abort rebuild and drain I/O. */ 3647 if (sd->sd_reb_active) { 3648 sd->sd_reb_abort = 1; 3649 while (sd->sd_reb_active) 3650 tsleep(sd, PWAIT, "sr_shutdown", 1); 3651 } 3652 3653 if (meta_save) 3654 sr_meta_save(sd, 0); 3655 3656 s = splbio(); 3657 3658 sd->sd_ready = 0; 3659 3660 /* make sure there isn't a sync pending and yield */ 3661 wakeup(sd); 3662 while (sd->sd_sync || sd->sd_must_flush) 3663 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3664 EWOULDBLOCK) 3665 break; 3666 3667 #ifndef SMALL_KERNEL 3668 sr_sensors_delete(sd); 3669 #endif /* SMALL_KERNEL */ 3670 3671 if (sd->sd_target != 0) 3672 scsi_detach_lun(sc->sc_scsibus, sd->sd_target, 0, DETACH_FORCE); 3673 3674 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3675 3676 if (sd->sd_workq) 3677 workq_destroy(sd->sd_workq); 3678 3679 if (sd) 3680 sr_discipline_free(sd); 3681 3682 splx(s); 3683 } 3684 3685 int 3686 sr_discipline_init(struct sr_discipline *sd, int level) 3687 { 3688 int rv = 1; 3689 3690 /* Initialise discipline function pointers with defaults. */ 3691 sd->sd_alloc_resources = NULL; 3692 sd->sd_assemble = NULL; 3693 sd->sd_create = NULL; 3694 sd->sd_free_resources = NULL; 3695 sd->sd_ioctl_handler = NULL; 3696 sd->sd_openings = NULL; 3697 sd->sd_meta_opt_handler = NULL; 3698 sd->sd_scsi_inquiry = sr_raid_inquiry; 3699 sd->sd_scsi_read_cap = sr_raid_read_cap; 3700 sd->sd_scsi_tur = sr_raid_tur; 3701 sd->sd_scsi_req_sense = sr_raid_request_sense; 3702 sd->sd_scsi_start_stop = sr_raid_start_stop; 3703 sd->sd_scsi_sync = sr_raid_sync; 3704 sd->sd_scsi_rw = NULL; 3705 sd->sd_set_chunk_state = sr_set_chunk_state; 3706 sd->sd_set_vol_state = sr_set_vol_state; 3707 sd->sd_start_discipline = NULL; 3708 3709 switch (level) { 3710 case 0: 3711 sr_raid0_discipline_init(sd); 3712 break; 3713 case 1: 3714 sr_raid1_discipline_init(sd); 3715 break; 3716 case 4: 3717 sr_raidp_discipline_init(sd, SR_MD_RAID4); 3718 break; 3719 case 5: 3720 sr_raidp_discipline_init(sd, SR_MD_RAID5); 3721 break; 3722 case 6: 3723 sr_raid6_discipline_init(sd); 3724 break; 3725 #ifdef AOE 3726 /* AOE target. */ 3727 case 'A': 3728 sr_aoe_server_discipline_init(sd); 3729 break; 3730 /* AOE initiator. */ 3731 case 'a': 3732 sr_aoe_discipline_init(sd); 3733 break; 3734 #endif 3735 #ifdef CRYPTO 3736 case 'C': 3737 sr_crypto_discipline_init(sd); 3738 break; 3739 #endif 3740 case 'c': 3741 sr_concat_discipline_init(sd); 3742 break; 3743 default: 3744 goto bad; 3745 } 3746 3747 rv = 0; 3748 bad: 3749 return (rv); 3750 } 3751 3752 int 3753 sr_raid_inquiry(struct sr_workunit *wu) 3754 { 3755 struct sr_discipline *sd = wu->swu_dis; 3756 struct scsi_xfer *xs = wu->swu_xs; 3757 struct scsi_inquiry *cdb = (struct scsi_inquiry *)xs->cmd; 3758 struct scsi_inquiry_data inq; 3759 3760 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3761 3762 if (xs->cmdlen != sizeof(*cdb)) 3763 return (EINVAL); 3764 3765 if (ISSET(cdb->flags, SI_EVPD)) 3766 return (EOPNOTSUPP); 3767 3768 bzero(&inq, sizeof(inq)); 3769 inq.device = T_DIRECT; 3770 inq.dev_qual2 = 0; 3771 inq.version = 2; 3772 inq.response_format = 2; 3773 inq.additional_length = 32; 3774 inq.flags |= SID_CmdQue; 3775 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3776 sizeof(inq.vendor)); 3777 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3778 sizeof(inq.product)); 3779 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3780 sizeof(inq.revision)); 3781 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3782 3783 return (0); 3784 } 3785 3786 int 3787 sr_raid_read_cap(struct sr_workunit *wu) 3788 { 3789 struct sr_discipline *sd = wu->swu_dis; 3790 struct scsi_xfer *xs = wu->swu_xs; 3791 struct scsi_read_cap_data rcd; 3792 struct scsi_read_cap_data_16 rcd16; 3793 daddr64_t addr; 3794 int rv = 1; 3795 3796 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3797 3798 addr = sd->sd_meta->ssdi.ssd_size - 1; 3799 if (xs->cmd->opcode == READ_CAPACITY) { 3800 bzero(&rcd, sizeof(rcd)); 3801 if (addr > 0xffffffffllu) 3802 _lto4b(0xffffffff, rcd.addr); 3803 else 3804 _lto4b(addr, rcd.addr); 3805 _lto4b(512, rcd.length); 3806 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3807 rv = 0; 3808 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3809 bzero(&rcd16, sizeof(rcd16)); 3810 _lto8b(addr, rcd16.addr); 3811 _lto4b(512, rcd16.length); 3812 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3813 rv = 0; 3814 } 3815 3816 return (rv); 3817 } 3818 3819 int 3820 sr_raid_tur(struct sr_workunit *wu) 3821 { 3822 struct sr_discipline *sd = wu->swu_dis; 3823 3824 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3825 3826 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3827 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3828 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3829 sd->sd_scsi_sense.add_sense_code = 0x04; 3830 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3831 sd->sd_scsi_sense.extra_len = 4; 3832 return (1); 3833 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3834 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3835 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3836 sd->sd_scsi_sense.add_sense_code = 0x05; 3837 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3838 sd->sd_scsi_sense.extra_len = 4; 3839 return (1); 3840 } 3841 3842 return (0); 3843 } 3844 3845 int 3846 sr_raid_request_sense(struct sr_workunit *wu) 3847 { 3848 struct sr_discipline *sd = wu->swu_dis; 3849 struct scsi_xfer *xs = wu->swu_xs; 3850 3851 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3852 DEVNAME(sd->sd_sc)); 3853 3854 /* use latest sense data */ 3855 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3856 3857 /* clear sense data */ 3858 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3859 3860 return (0); 3861 } 3862 3863 int 3864 sr_raid_start_stop(struct sr_workunit *wu) 3865 { 3866 struct scsi_xfer *xs = wu->swu_xs; 3867 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3868 3869 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3870 DEVNAME(wu->swu_dis->sd_sc)); 3871 3872 if (!ss) 3873 return (1); 3874 3875 /* 3876 * do nothing! 3877 * a softraid discipline should always reflect correct status 3878 */ 3879 return (0); 3880 } 3881 3882 int 3883 sr_raid_sync(struct sr_workunit *wu) 3884 { 3885 struct sr_discipline *sd = wu->swu_dis; 3886 int s, rv = 0, ios; 3887 3888 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3889 3890 /* when doing a fake sync don't count the wu */ 3891 ios = wu->swu_fake ? 0 : 1; 3892 3893 s = splbio(); 3894 sd->sd_sync = 1; 3895 3896 while (sd->sd_wu_pending > ios) 3897 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3898 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3899 DEVNAME(sd->sd_sc)); 3900 rv = 1; 3901 break; 3902 } 3903 3904 sd->sd_sync = 0; 3905 splx(s); 3906 3907 wakeup(&sd->sd_sync); 3908 3909 return (rv); 3910 } 3911 3912 void 3913 sr_startwu_callback(void *arg1, void *arg2) 3914 { 3915 struct sr_discipline *sd = arg1; 3916 struct sr_workunit *wu = arg2; 3917 struct sr_ccb *ccb; 3918 int s; 3919 3920 s = splbio(); 3921 if (wu->swu_cb_active == 1) 3922 panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc)); 3923 wu->swu_cb_active = 1; 3924 3925 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 3926 VOP_STRATEGY(&ccb->ccb_buf); 3927 3928 wu->swu_cb_active = 0; 3929 splx(s); 3930 } 3931 3932 void 3933 sr_raid_startwu(struct sr_workunit *wu) 3934 { 3935 struct sr_discipline *sd = wu->swu_dis; 3936 3937 splassert(IPL_BIO); 3938 3939 if (wu->swu_state == SR_WU_RESTART) 3940 /* 3941 * no need to put the wu on the pending queue since we 3942 * are restarting the io 3943 */ 3944 ; 3945 else 3946 /* move wu to pending queue */ 3947 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3948 3949 /* start all individual ios */ 3950 workq_queue_task(sd->sd_workq, &wu->swu_wqt, 0, sr_startwu_callback, 3951 sd, wu); 3952 } 3953 3954 void 3955 sr_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 3956 { 3957 int old_state, s; 3958 3959 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_set_chunk_state %d -> %d\n", 3960 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 3961 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 3962 3963 /* ok to go to splbio since this only happens in error path */ 3964 s = splbio(); 3965 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 3966 3967 /* multiple IOs to the same chunk that fail will come through here */ 3968 if (old_state == new_state) 3969 goto done; 3970 3971 switch (old_state) { 3972 case BIOC_SDONLINE: 3973 if (new_state == BIOC_SDOFFLINE) 3974 break; 3975 else 3976 goto die; 3977 break; 3978 3979 case BIOC_SDOFFLINE: 3980 goto die; 3981 3982 default: 3983 die: 3984 splx(s); /* XXX */ 3985 panic("%s: %s: %s: invalid chunk state transition " 3986 "%d -> %d\n", DEVNAME(sd->sd_sc), 3987 sd->sd_meta->ssd_devname, 3988 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 3989 old_state, new_state); 3990 /* NOTREACHED */ 3991 } 3992 3993 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 3994 sd->sd_set_vol_state(sd); 3995 3996 sd->sd_must_flush = 1; 3997 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 3998 done: 3999 splx(s); 4000 } 4001 4002 void 4003 sr_set_vol_state(struct sr_discipline *sd) 4004 { 4005 int states[SR_MAX_STATES]; 4006 int new_state, i, s, nd; 4007 int old_state = sd->sd_vol_status; 4008 4009 DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state\n", 4010 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4011 4012 nd = sd->sd_meta->ssdi.ssd_chunk_no; 4013 4014 for (i = 0; i < SR_MAX_STATES; i++) 4015 states[i] = 0; 4016 4017 for (i = 0; i < nd; i++) { 4018 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 4019 if (s >= SR_MAX_STATES) 4020 panic("%s: %s: %s: invalid chunk state", 4021 DEVNAME(sd->sd_sc), 4022 sd->sd_meta->ssd_devname, 4023 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 4024 states[s]++; 4025 } 4026 4027 if (states[BIOC_SDONLINE] == nd) 4028 new_state = BIOC_SVONLINE; 4029 else 4030 new_state = BIOC_SVOFFLINE; 4031 4032 DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state %d -> %d\n", 4033 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 4034 old_state, new_state); 4035 4036 switch (old_state) { 4037 case BIOC_SVONLINE: 4038 if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE) 4039 break; 4040 else 4041 goto die; 4042 break; 4043 4044 case BIOC_SVOFFLINE: 4045 /* XXX this might be a little too much */ 4046 goto die; 4047 4048 default: 4049 die: 4050 panic("%s: %s: invalid volume state transition " 4051 "%d -> %d\n", DEVNAME(sd->sd_sc), 4052 sd->sd_meta->ssd_devname, 4053 old_state, new_state); 4054 /* NOTREACHED */ 4055 } 4056 4057 sd->sd_vol_status = new_state; 4058 } 4059 4060 void 4061 sr_checksum_print(u_int8_t *md5) 4062 { 4063 int i; 4064 4065 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 4066 printf("%02x", md5[i]); 4067 } 4068 4069 void 4070 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 4071 { 4072 MD5_CTX ctx; 4073 4074 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 4075 md5, len); 4076 4077 MD5Init(&ctx); 4078 MD5Update(&ctx, src, len); 4079 MD5Final(md5, &ctx); 4080 } 4081 4082 void 4083 sr_uuid_generate(struct sr_uuid *uuid) 4084 { 4085 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 4086 /* UUID version 4: random */ 4087 uuid->sui_id[6] &= 0x0f; 4088 uuid->sui_id[6] |= 0x40; 4089 /* RFC4122 variant */ 4090 uuid->sui_id[8] &= 0x3f; 4091 uuid->sui_id[8] |= 0x80; 4092 } 4093 4094 char * 4095 sr_uuid_format(struct sr_uuid *uuid) 4096 { 4097 char *uuidstr; 4098 4099 uuidstr = malloc(37, M_DEVBUF, M_WAITOK); 4100 4101 snprintf(uuidstr, 37, 4102 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 4103 "%02x%02x%02x%02x%02x%02x", 4104 uuid->sui_id[0], uuid->sui_id[1], 4105 uuid->sui_id[2], uuid->sui_id[3], 4106 uuid->sui_id[4], uuid->sui_id[5], 4107 uuid->sui_id[6], uuid->sui_id[7], 4108 uuid->sui_id[8], uuid->sui_id[9], 4109 uuid->sui_id[10], uuid->sui_id[11], 4110 uuid->sui_id[12], uuid->sui_id[13], 4111 uuid->sui_id[14], uuid->sui_id[15]); 4112 4113 return uuidstr; 4114 } 4115 4116 void 4117 sr_uuid_print(struct sr_uuid *uuid, int cr) 4118 { 4119 char *uuidstr; 4120 4121 uuidstr = sr_uuid_format(uuid); 4122 printf("%s%s", uuidstr, (cr ? "\n" : "")); 4123 free(uuidstr, M_DEVBUF); 4124 } 4125 4126 int 4127 sr_already_assembled(struct sr_discipline *sd) 4128 { 4129 struct sr_softc *sc = sd->sd_sc; 4130 int i; 4131 4132 for (i = 0; i < SR_MAX_LD; i++) 4133 if (sc->sc_dis[i]) 4134 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 4135 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 4136 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 4137 return (1); 4138 4139 return (0); 4140 } 4141 4142 int32_t 4143 sr_validate_stripsize(u_int32_t b) 4144 { 4145 int s = 0; 4146 4147 if (b % 512) 4148 return (-1); 4149 4150 while ((b & 1) == 0) { 4151 b >>= 1; 4152 s++; 4153 } 4154 4155 /* only multiple of twos */ 4156 b >>= 1; 4157 if (b) 4158 return(-1); 4159 4160 return (s); 4161 } 4162 4163 void 4164 sr_shutdownhook(void *arg) 4165 { 4166 sr_shutdown((struct sr_softc *)arg); 4167 } 4168 4169 void 4170 sr_shutdown(struct sr_softc *sc) 4171 { 4172 int i; 4173 4174 DNPRINTF(SR_D_MISC, "%s: sr_shutdown\n", DEVNAME(sc)); 4175 4176 /* XXX this will not work when we stagger disciplines */ 4177 for (i = 0; i < SR_MAX_LD; i++) 4178 if (sc->sc_dis[i]) 4179 sr_discipline_shutdown(sc->sc_dis[i], 1); 4180 } 4181 4182 int 4183 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 4184 { 4185 struct sr_discipline *sd = wu->swu_dis; 4186 struct scsi_xfer *xs = wu->swu_xs; 4187 int rv = 1; 4188 4189 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 4190 xs->cmd->opcode); 4191 4192 if (sd->sd_meta->ssd_data_offset == 0) 4193 panic("invalid data offset"); 4194 4195 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 4196 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 4197 DEVNAME(sd->sd_sc), func); 4198 goto bad; 4199 } 4200 4201 if (xs->datalen == 0) { 4202 printf("%s: %s: illegal block count for %s\n", 4203 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 4204 goto bad; 4205 } 4206 4207 if (xs->cmdlen == 10) 4208 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 4209 else if (xs->cmdlen == 16) 4210 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 4211 else if (xs->cmdlen == 6) 4212 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 4213 else { 4214 printf("%s: %s: illegal cmdlen for %s\n", 4215 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 4216 goto bad; 4217 } 4218 4219 wu->swu_blk_start = *blk; 4220 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 4221 4222 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 4223 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 4224 "end: %lld length: %d\n", 4225 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 4226 wu->swu_blk_end, xs->datalen); 4227 4228 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 4229 SSD_ERRCODE_VALID; 4230 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 4231 sd->sd_scsi_sense.add_sense_code = 0x21; 4232 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 4233 sd->sd_scsi_sense.extra_len = 4; 4234 goto bad; 4235 } 4236 4237 rv = 0; 4238 bad: 4239 return (rv); 4240 } 4241 4242 int 4243 sr_check_io_collision(struct sr_workunit *wu) 4244 { 4245 struct sr_discipline *sd = wu->swu_dis; 4246 struct sr_workunit *wup; 4247 4248 splassert(IPL_BIO); 4249 4250 /* walk queue backwards and fill in collider if we have one */ 4251 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 4252 if (wu->swu_blk_end < wup->swu_blk_start || 4253 wup->swu_blk_end < wu->swu_blk_start) 4254 continue; 4255 4256 /* we have an LBA collision, defer wu */ 4257 wu->swu_state = SR_WU_DEFERRED; 4258 if (wup->swu_collider) 4259 /* wu is on deferred queue, append to last wu */ 4260 while (wup->swu_collider) 4261 wup = wup->swu_collider; 4262 4263 wup->swu_collider = wu; 4264 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 4265 sd->sd_wu_collisions++; 4266 goto queued; 4267 } 4268 4269 return (0); 4270 queued: 4271 return (1); 4272 } 4273 4274 void 4275 sr_rebuild(void *arg) 4276 { 4277 struct sr_discipline *sd = arg; 4278 struct sr_softc *sc = sd->sd_sc; 4279 4280 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 4281 DEVNAME(sc)) != 0) 4282 printf("%s: unable to start background operation\n", 4283 DEVNAME(sc)); 4284 } 4285 4286 void 4287 sr_rebuild_thread(void *arg) 4288 { 4289 struct sr_discipline *sd = arg; 4290 struct sr_softc *sc = sd->sd_sc; 4291 daddr64_t whole_blk, partial_blk, blk, sz, lba; 4292 daddr64_t psz, rb, restart; 4293 struct sr_workunit *wu_r, *wu_w; 4294 struct scsi_xfer xs_r, xs_w; 4295 struct scsi_rw_16 *cr, *cw; 4296 int c, s, slept, percent = 0, old_percent = -1; 4297 u_int8_t *buf; 4298 4299 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 4300 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 4301 4302 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 4303 if (restart > whole_blk) { 4304 printf("%s: bogus rebuild restart offset, starting from 0\n", 4305 DEVNAME(sc)); 4306 restart = 0; 4307 } 4308 if (restart) { 4309 /* 4310 * XXX there is a hole here; there is a posibility that we 4311 * had a restart however the chunk that was supposed to 4312 * be rebuilt is no longer valid; we can reach this situation 4313 * when a rebuild is in progress and the box crashes and 4314 * on reboot the rebuild chunk is different (like zero'd or 4315 * replaced). We need to check the uuid of the chunk that is 4316 * being rebuilt to assert this. 4317 */ 4318 psz = sd->sd_meta->ssdi.ssd_size; 4319 rb = sd->sd_meta->ssd_rebuild; 4320 if (rb > 0) 4321 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 4322 else 4323 percent = 0; 4324 printf("%s: resuming rebuild on %s at %d%%\n", 4325 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 4326 } 4327 4328 sd->sd_reb_active = 1; 4329 4330 /* currently this is 64k therefore we can use dma_alloc */ 4331 buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK); 4332 for (blk = restart; blk <= whole_blk; blk++) { 4333 lba = blk * SR_REBUILD_IO_SIZE; 4334 sz = SR_REBUILD_IO_SIZE; 4335 if (blk == whole_blk) { 4336 if (partial_blk == 0) 4337 break; 4338 sz = partial_blk; 4339 } 4340 4341 /* get some wu */ 4342 if ((wu_r = scsi_io_get(&sd->sd_iopool, 0)) == NULL) 4343 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 4344 if ((wu_w = scsi_io_get(&sd->sd_iopool, 0)) == NULL) 4345 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 4346 4347 /* setup read io */ 4348 bzero(&xs_r, sizeof xs_r); 4349 xs_r.error = XS_NOERROR; 4350 xs_r.flags = SCSI_DATA_IN; 4351 xs_r.datalen = sz << DEV_BSHIFT; 4352 xs_r.data = buf; 4353 xs_r.cmdlen = sizeof(*cr); 4354 xs_r.cmd = &xs_r.cmdstore; 4355 cr = (struct scsi_rw_16 *)xs_r.cmd; 4356 cr->opcode = READ_16; 4357 _lto4b(sz, cr->length); 4358 _lto8b(lba, cr->addr); 4359 wu_r->swu_flags |= SR_WUF_REBUILD; 4360 wu_r->swu_xs = &xs_r; 4361 if (sd->sd_scsi_rw(wu_r)) { 4362 printf("%s: could not create read io\n", 4363 DEVNAME(sc)); 4364 goto fail; 4365 } 4366 4367 /* setup write io */ 4368 bzero(&xs_w, sizeof xs_w); 4369 xs_w.error = XS_NOERROR; 4370 xs_w.flags = SCSI_DATA_OUT; 4371 xs_w.datalen = sz << DEV_BSHIFT; 4372 xs_w.data = buf; 4373 xs_w.cmdlen = sizeof(*cw); 4374 xs_w.cmd = &xs_w.cmdstore; 4375 cw = (struct scsi_rw_16 *)xs_w.cmd; 4376 cw->opcode = WRITE_16; 4377 _lto4b(sz, cw->length); 4378 _lto8b(lba, cw->addr); 4379 wu_w->swu_flags |= SR_WUF_REBUILD; 4380 wu_w->swu_xs = &xs_w; 4381 if (sd->sd_scsi_rw(wu_w)) { 4382 printf("%s: could not create write io\n", 4383 DEVNAME(sc)); 4384 goto fail; 4385 } 4386 4387 /* 4388 * collide with the read io so that we get automatically 4389 * started when the read is done 4390 */ 4391 wu_w->swu_state = SR_WU_DEFERRED; 4392 wu_r->swu_collider = wu_w; 4393 s = splbio(); 4394 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 4395 4396 /* schedule io */ 4397 if (sr_check_io_collision(wu_r)) 4398 goto queued; 4399 4400 sr_raid_startwu(wu_r); 4401 queued: 4402 splx(s); 4403 4404 /* wait for read completion */ 4405 slept = 0; 4406 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 4407 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 4408 slept = 1; 4409 } 4410 /* yield if we didn't sleep */ 4411 if (slept == 0) 4412 tsleep(sc, PWAIT, "sr_yield", 1); 4413 4414 scsi_io_put(&sd->sd_iopool, wu_r); 4415 scsi_io_put(&sd->sd_iopool, wu_w); 4416 4417 sd->sd_meta->ssd_rebuild = lba; 4418 4419 /* save metadata every percent */ 4420 psz = sd->sd_meta->ssdi.ssd_size; 4421 rb = sd->sd_meta->ssd_rebuild; 4422 if (rb > 0) 4423 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 4424 else 4425 percent = 0; 4426 if (percent != old_percent && blk != whole_blk) { 4427 if (sr_meta_save(sd, SR_META_DIRTY)) 4428 printf("%s: could not save metadata to %s\n", 4429 DEVNAME(sc), sd->sd_meta->ssd_devname); 4430 old_percent = percent; 4431 } 4432 4433 if (sd->sd_reb_abort) 4434 goto abort; 4435 } 4436 4437 /* all done */ 4438 sd->sd_meta->ssd_rebuild = 0; 4439 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 4440 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 4441 BIOC_SDREBUILD) { 4442 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 4443 break; 4444 } 4445 4446 abort: 4447 if (sr_meta_save(sd, SR_META_DIRTY)) 4448 printf("%s: could not save metadata to %s\n", 4449 DEVNAME(sc), sd->sd_meta->ssd_devname); 4450 fail: 4451 dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT); 4452 sd->sd_reb_active = 0; 4453 kthread_exit(0); 4454 } 4455 4456 #ifndef SMALL_KERNEL 4457 int 4458 sr_sensors_create(struct sr_discipline *sd) 4459 { 4460 struct sr_softc *sc = sd->sd_sc; 4461 int rv = 1; 4462 4463 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 4464 DEVNAME(sc), sd->sd_meta->ssd_devname); 4465 4466 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 4467 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 4468 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 4469 sizeof(sd->sd_vol.sv_sensor.desc)); 4470 4471 sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4472 sd->sd_vol.sv_sensor_attached = 1; 4473 4474 if (sc->sc_sensor_task == NULL) { 4475 sc->sc_sensor_task = sensor_task_register(sc, 4476 sr_sensors_refresh, 10); 4477 if (sc->sc_sensor_task == NULL) 4478 goto bad; 4479 } 4480 4481 rv = 0; 4482 bad: 4483 return (rv); 4484 } 4485 4486 void 4487 sr_sensors_delete(struct sr_discipline *sd) 4488 { 4489 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 4490 4491 if (sd->sd_vol.sv_sensor_attached) 4492 sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4493 } 4494 4495 void 4496 sr_sensors_refresh(void *arg) 4497 { 4498 struct sr_softc *sc = arg; 4499 struct sr_volume *sv; 4500 struct sr_discipline *sd; 4501 int i, vol; 4502 4503 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 4504 4505 for (i = 0, vol = -1; i < SR_MAX_LD; i++) { 4506 /* XXX this will not work when we stagger disciplines */ 4507 if (!sc->sc_dis[i]) 4508 continue; 4509 4510 sd = sc->sc_dis[i]; 4511 sv = &sd->sd_vol; 4512 4513 switch(sd->sd_vol_status) { 4514 case BIOC_SVOFFLINE: 4515 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 4516 sv->sv_sensor.status = SENSOR_S_CRIT; 4517 break; 4518 4519 case BIOC_SVDEGRADED: 4520 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 4521 sv->sv_sensor.status = SENSOR_S_WARN; 4522 break; 4523 4524 case BIOC_SVSCRUB: 4525 case BIOC_SVONLINE: 4526 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 4527 sv->sv_sensor.status = SENSOR_S_OK; 4528 break; 4529 4530 default: 4531 sv->sv_sensor.value = 0; /* unknown */ 4532 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 4533 } 4534 } 4535 } 4536 #endif /* SMALL_KERNEL */ 4537 4538 #ifdef SR_FANCY_STATS 4539 void sr_print_stats(void); 4540 4541 void 4542 sr_print_stats(void) 4543 { 4544 struct sr_softc *sc; 4545 struct sr_discipline *sd; 4546 int i, vol; 4547 4548 for (i = 0; i < softraid_cd.cd_ndevs; i++) 4549 if (softraid_cd.cd_devs[i]) { 4550 sc = softraid_cd.cd_devs[i]; 4551 /* we'll only have one softc */ 4552 break; 4553 } 4554 4555 if (!sc) { 4556 printf("no softraid softc found\n"); 4557 return; 4558 } 4559 4560 for (i = 0, vol = -1; i < SR_MAX_LD; i++) { 4561 /* XXX this will not work when we stagger disciplines */ 4562 if (!sc->sc_dis[i]) 4563 continue; 4564 4565 sd = sc->sc_dis[i]; 4566 printf("%s: ios pending: %d collisions %llu\n", 4567 sd->sd_meta->ssd_devname, 4568 sd->sd_wu_pending, 4569 sd->sd_wu_collisions); 4570 } 4571 } 4572 #endif /* SR_FANCY_STATS */ 4573 4574 #ifdef SR_DEBUG 4575 void 4576 sr_meta_print(struct sr_metadata *m) 4577 { 4578 int i; 4579 struct sr_meta_chunk *mc; 4580 struct sr_meta_opt_hdr *omh; 4581 4582 if (!(sr_debug & SR_D_META)) 4583 return; 4584 4585 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 4586 printf("\tssd_version %d\n", m->ssdi.ssd_version); 4587 printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags); 4588 printf("\tssd_uuid "); 4589 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 4590 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 4591 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 4592 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 4593 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 4594 printf("\tssd_level %d\n", m->ssdi.ssd_level); 4595 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 4596 printf("\tssd_devname %s\n", m->ssd_devname); 4597 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 4598 printf("\tssd_product %s\n", m->ssdi.ssd_product); 4599 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 4600 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 4601 printf("\tssd_checksum "); 4602 sr_checksum_print(m->ssd_checksum); 4603 printf("\n"); 4604 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 4605 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 4606 4607 mc = (struct sr_meta_chunk *)(m + 1); 4608 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 4609 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 4610 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 4611 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 4612 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 4613 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 4614 printf("\t\tscm_uuid "); 4615 sr_uuid_print(&mc->scmi.scm_uuid, 1); 4616 printf("\t\tscm_checksum "); 4617 sr_checksum_print(mc->scm_checksum); 4618 printf("\n"); 4619 printf("\t\tscm_status %d\n", mc->scm_status); 4620 } 4621 4622 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(m + 1) + 4623 sizeof(struct sr_meta_chunk) * m->ssdi.ssd_chunk_no); 4624 for (i = 0; i < m->ssdi.ssd_opt_no; i++) { 4625 printf("\t\t\tsom_type %d\n", omh->som_type); 4626 printf("\t\t\tsom_checksum "); 4627 sr_checksum_print(omh->som_checksum); 4628 printf("\n"); 4629 omh = (struct sr_meta_opt_hdr *)((void *)omh + 4630 omh->som_length); 4631 } 4632 } 4633 4634 void 4635 sr_dump_mem(u_int8_t *p, int len) 4636 { 4637 int i; 4638 4639 for (i = 0; i < len; i++) 4640 printf("%02x ", *p++); 4641 printf("\n"); 4642 } 4643 4644 #endif /* SR_DEBUG */ 4645