1 /* $OpenBSD: softraid.c,v 1.364 2015/08/19 19:05:24 krw Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/malloc.h> 28 #include <sys/pool.h> 29 #include <sys/kernel.h> 30 #include <sys/disk.h> 31 #include <sys/rwlock.h> 32 #include <sys/queue.h> 33 #include <sys/fcntl.h> 34 #include <sys/disklabel.h> 35 #include <sys/vnode.h> 36 #include <sys/lock.h> 37 #include <sys/mount.h> 38 #include <sys/sensors.h> 39 #include <sys/stat.h> 40 #include <sys/conf.h> 41 #include <sys/uio.h> 42 #include <sys/task.h> 43 #include <sys/kthread.h> 44 #include <sys/dkio.h> 45 #include <sys/stdint.h> 46 47 #include <scsi/scsi_all.h> 48 #include <scsi/scsiconf.h> 49 #include <scsi/scsi_disk.h> 50 51 #include <dev/softraidvar.h> 52 53 #ifdef HIBERNATE 54 #include <lib/libsa/aes_xts.h> 55 #include <sys/hibernate.h> 56 #include <scsi/sdvar.h> 57 #endif /* HIBERNATE */ 58 59 /* #define SR_FANCY_STATS */ 60 61 #ifdef SR_DEBUG 62 #define SR_FANCY_STATS 63 uint32_t sr_debug = 0 64 /* | SR_D_CMD */ 65 /* | SR_D_MISC */ 66 /* | SR_D_INTR */ 67 /* | SR_D_IOCTL */ 68 /* | SR_D_CCB */ 69 /* | SR_D_WU */ 70 /* | SR_D_META */ 71 /* | SR_D_DIS */ 72 /* | SR_D_STATE */ 73 /* | SR_D_REBUILD */ 74 ; 75 #endif 76 77 struct sr_softc *softraid0; 78 struct sr_uuid sr_bootuuid; 79 u_int8_t sr_bootkey[SR_CRYPTO_MAXKEYBYTES]; 80 81 int sr_match(struct device *, void *, void *); 82 void sr_attach(struct device *, struct device *, void *); 83 int sr_detach(struct device *, int); 84 void sr_map_root(void); 85 86 struct cfattach softraid_ca = { 87 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 88 }; 89 90 struct cfdriver softraid_cd = { 91 NULL, "softraid", DV_DULL 92 }; 93 94 /* scsi & discipline */ 95 void sr_scsi_cmd(struct scsi_xfer *); 96 void sr_minphys(struct buf *, struct scsi_link *); 97 int sr_scsi_probe(struct scsi_link *); 98 void sr_copy_internal_data(struct scsi_xfer *, 99 void *, size_t); 100 int sr_scsi_ioctl(struct scsi_link *, u_long, 101 caddr_t, int); 102 int sr_bio_ioctl(struct device *, u_long, caddr_t); 103 int sr_bio_handler(struct sr_softc *, 104 struct sr_discipline *, u_long, struct bio *); 105 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 106 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 107 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 108 int sr_ioctl_setstate(struct sr_softc *, 109 struct bioc_setstate *); 110 int sr_ioctl_createraid(struct sr_softc *, 111 struct bioc_createraid *, int, void *); 112 int sr_ioctl_deleteraid(struct sr_softc *, 113 struct sr_discipline *, struct bioc_deleteraid *); 114 int sr_ioctl_discipline(struct sr_softc *, 115 struct sr_discipline *, struct bioc_discipline *); 116 int sr_ioctl_installboot(struct sr_softc *, 117 struct sr_discipline *, struct bioc_installboot *); 118 void sr_chunks_unwind(struct sr_softc *, 119 struct sr_chunk_head *); 120 void sr_discipline_free(struct sr_discipline *); 121 void sr_discipline_shutdown(struct sr_discipline *, int); 122 int sr_discipline_init(struct sr_discipline *, int); 123 int sr_alloc_resources(struct sr_discipline *); 124 void sr_free_resources(struct sr_discipline *); 125 void sr_set_chunk_state(struct sr_discipline *, int, int); 126 void sr_set_vol_state(struct sr_discipline *); 127 128 /* utility functions */ 129 void sr_shutdown(void); 130 void sr_uuid_generate(struct sr_uuid *); 131 char *sr_uuid_format(struct sr_uuid *); 132 void sr_uuid_print(struct sr_uuid *, int); 133 void sr_checksum_print(u_int8_t *); 134 int sr_boot_assembly(struct sr_softc *); 135 int sr_already_assembled(struct sr_discipline *); 136 int sr_hotspare(struct sr_softc *, dev_t); 137 void sr_hotspare_rebuild(struct sr_discipline *); 138 int sr_rebuild_init(struct sr_discipline *, dev_t, int); 139 void sr_rebuild_start(void *); 140 void sr_rebuild_thread(void *); 141 void sr_rebuild(struct sr_discipline *); 142 void sr_roam_chunks(struct sr_discipline *); 143 int sr_chunk_in_use(struct sr_softc *, dev_t); 144 int sr_rw(struct sr_softc *, dev_t, char *, size_t, 145 daddr_t, long); 146 void sr_wu_done_callback(void *); 147 148 /* don't include these on RAMDISK */ 149 #ifndef SMALL_KERNEL 150 void sr_sensors_refresh(void *); 151 int sr_sensors_create(struct sr_discipline *); 152 void sr_sensors_delete(struct sr_discipline *); 153 #endif 154 155 /* metadata */ 156 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 157 int sr_meta_attach(struct sr_discipline *, int, int); 158 int sr_meta_rw(struct sr_discipline *, dev_t, void *, long); 159 int sr_meta_clear(struct sr_discipline *); 160 void sr_meta_init(struct sr_discipline *, int, int); 161 void sr_meta_init_complete(struct sr_discipline *); 162 void sr_meta_opt_handler(struct sr_discipline *, 163 struct sr_meta_opt_hdr *); 164 165 /* hotplug magic */ 166 void sr_disk_attach(struct disk *, int); 167 168 struct sr_hotplug_list { 169 void (*sh_hotplug)(struct sr_discipline *, 170 struct disk *, int); 171 struct sr_discipline *sh_sd; 172 173 SLIST_ENTRY(sr_hotplug_list) shl_link; 174 }; 175 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 176 177 struct sr_hotplug_list_head sr_hotplug_callbacks; 178 extern void (*softraid_disk_attach)(struct disk *, int); 179 180 /* scsi glue */ 181 struct scsi_adapter sr_switch = { 182 sr_scsi_cmd, sr_minphys, sr_scsi_probe, NULL, sr_scsi_ioctl 183 }; 184 185 /* native metadata format */ 186 int sr_meta_native_bootprobe(struct sr_softc *, dev_t, 187 struct sr_boot_chunk_head *); 188 #define SR_META_NOTCLAIMED (0) 189 #define SR_META_CLAIMED (1) 190 int sr_meta_native_probe(struct sr_softc *, 191 struct sr_chunk *); 192 int sr_meta_native_attach(struct sr_discipline *, int); 193 int sr_meta_native_write(struct sr_discipline *, dev_t, 194 struct sr_metadata *,void *); 195 196 #ifdef SR_DEBUG 197 void sr_meta_print(struct sr_metadata *); 198 #else 199 #define sr_meta_print(m) 200 #endif 201 202 /* the metadata driver should remain stateless */ 203 struct sr_meta_driver { 204 daddr_t smd_offset; /* metadata location */ 205 u_int32_t smd_size; /* size of metadata */ 206 207 int (*smd_probe)(struct sr_softc *, 208 struct sr_chunk *); 209 int (*smd_attach)(struct sr_discipline *, int); 210 int (*smd_detach)(struct sr_discipline *); 211 int (*smd_read)(struct sr_discipline *, dev_t, 212 struct sr_metadata *, void *); 213 int (*smd_write)(struct sr_discipline *, dev_t, 214 struct sr_metadata *, void *); 215 int (*smd_validate)(struct sr_discipline *, 216 struct sr_metadata *, void *); 217 } smd[] = { 218 { SR_META_OFFSET, SR_META_SIZE * DEV_BSIZE, 219 sr_meta_native_probe, sr_meta_native_attach, NULL, 220 sr_meta_native_read, sr_meta_native_write, NULL }, 221 { 0, 0, NULL, NULL, NULL, NULL } 222 }; 223 224 int 225 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force) 226 { 227 struct sr_softc *sc = sd->sd_sc; 228 struct sr_chunk_head *cl; 229 struct sr_chunk *ch_entry, *chunk1, *chunk2; 230 int rv = 1, i = 0; 231 232 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc), chunk_no); 233 234 /* in memory copy of metadata */ 235 sd->sd_meta = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, 236 M_ZERO | M_NOWAIT); 237 if (!sd->sd_meta) { 238 sr_error(sc, "could not allocate memory for metadata"); 239 goto bad; 240 } 241 242 if (sd->sd_meta_type != SR_META_F_NATIVE) { 243 /* in memory copy of foreign metadata */ 244 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 245 M_DEVBUF, M_ZERO | M_NOWAIT); 246 if (!sd->sd_meta_foreign) { 247 /* unwind frees sd_meta */ 248 sr_error(sc, "could not allocate memory for foreign " 249 "metadata"); 250 goto bad; 251 } 252 } 253 254 /* we have a valid list now create an array index */ 255 cl = &sd->sd_vol.sv_chunk_list; 256 sd->sd_vol.sv_chunks = mallocarray(chunk_no, sizeof(struct sr_chunk *), 257 M_DEVBUF, M_WAITOK | M_ZERO); 258 259 /* fill out chunk array */ 260 i = 0; 261 SLIST_FOREACH(ch_entry, cl, src_link) 262 sd->sd_vol.sv_chunks[i++] = ch_entry; 263 264 /* attach metadata */ 265 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 266 goto bad; 267 268 /* Force chunks into correct order now that metadata is attached. */ 269 SLIST_INIT(cl); 270 for (i = 0; i < chunk_no; i++) { 271 ch_entry = sd->sd_vol.sv_chunks[i]; 272 chunk2 = NULL; 273 SLIST_FOREACH(chunk1, cl, src_link) { 274 if (chunk1->src_meta.scmi.scm_chunk_id > 275 ch_entry->src_meta.scmi.scm_chunk_id) 276 break; 277 chunk2 = chunk1; 278 } 279 if (chunk2 == NULL) 280 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 281 else 282 SLIST_INSERT_AFTER(chunk2, ch_entry, src_link); 283 } 284 i = 0; 285 SLIST_FOREACH(ch_entry, cl, src_link) 286 sd->sd_vol.sv_chunks[i++] = ch_entry; 287 288 rv = 0; 289 bad: 290 return (rv); 291 } 292 293 int 294 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 295 { 296 struct sr_softc *sc = sd->sd_sc; 297 struct vnode *vn; 298 struct sr_chunk *ch_entry, *ch_prev = NULL; 299 struct sr_chunk_head *cl; 300 char devname[32]; 301 int i, d, type, found, prevf, error; 302 dev_t dev; 303 304 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 305 306 if (no_chunk == 0) 307 goto unwind; 308 309 cl = &sd->sd_vol.sv_chunk_list; 310 311 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 312 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 313 M_WAITOK | M_ZERO); 314 /* keep disks in user supplied order */ 315 if (ch_prev) 316 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 317 else 318 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 319 ch_prev = ch_entry; 320 dev = dt[d]; 321 ch_entry->src_dev_mm = dev; 322 323 if (dev == NODEV) { 324 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 325 continue; 326 } else { 327 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 328 if (bdevvp(dev, &vn)) { 329 sr_error(sc, "sr_meta_probe: cannot allocate " 330 "vnode"); 331 goto unwind; 332 } 333 334 /* 335 * XXX leaving dev open for now; move this to attach 336 * and figure out the open/close dance for unwind. 337 */ 338 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc); 339 if (error) { 340 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 341 "open %s\n", DEVNAME(sc), devname); 342 vput(vn); 343 goto unwind; 344 } 345 346 strlcpy(ch_entry->src_devname, devname, 347 sizeof(ch_entry->src_devname)); 348 ch_entry->src_vn = vn; 349 } 350 351 /* determine if this is a device we understand */ 352 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 353 type = smd[i].smd_probe(sc, ch_entry); 354 if (type == SR_META_F_INVALID) 355 continue; 356 else { 357 found = type; 358 break; 359 } 360 } 361 362 if (found == SR_META_F_INVALID) 363 goto unwind; 364 if (prevf == SR_META_F_INVALID) 365 prevf = found; 366 if (prevf != found) { 367 DNPRINTF(SR_D_META, "%s: prevf != found\n", 368 DEVNAME(sc)); 369 goto unwind; 370 } 371 } 372 373 return (prevf); 374 unwind: 375 return (SR_META_F_INVALID); 376 } 377 378 void 379 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 380 { 381 int maj, unit, part; 382 char *name; 383 384 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 385 DEVNAME(sc), buf, size); 386 387 if (!buf) 388 return; 389 390 maj = major(dev); 391 part = DISKPART(dev); 392 unit = DISKUNIT(dev); 393 394 name = findblkname(maj); 395 if (name == NULL) 396 return; 397 398 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 399 } 400 401 int 402 sr_rw(struct sr_softc *sc, dev_t dev, char *buf, size_t size, daddr_t blkno, 403 long flags) 404 { 405 struct vnode *vp; 406 struct buf b; 407 size_t bufsize, dma_bufsize; 408 int rv = 1; 409 char *dma_buf; 410 411 DNPRINTF(SR_D_MISC, "%s: sr_rw(0x%x, %p, %zu, %lld 0x%lx)\n", 412 DEVNAME(sc), dev, buf, size, (long long)blkno, flags); 413 414 dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size; 415 dma_buf = dma_alloc(dma_bufsize, PR_WAITOK); 416 417 if (bdevvp(dev, &vp)) { 418 printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc)); 419 goto done; 420 } 421 422 while (size > 0) { 423 DNPRINTF(SR_D_MISC, "%s: dma_buf %p, size %zu, blkno %lld)\n", 424 DEVNAME(sc), dma_buf, size, (long long)blkno); 425 426 bufsize = (size > MAXPHYS) ? MAXPHYS : size; 427 if (flags == B_WRITE) 428 memcpy(dma_buf, buf, bufsize); 429 430 bzero(&b, sizeof(b)); 431 b.b_flags = flags | B_PHYS; 432 b.b_proc = curproc; 433 b.b_dev = dev; 434 b.b_iodone = NULL; 435 b.b_error = 0; 436 b.b_blkno = blkno; 437 b.b_data = dma_buf; 438 b.b_bcount = bufsize; 439 b.b_bufsize = bufsize; 440 b.b_resid = bufsize; 441 b.b_vp = vp; 442 443 if ((b.b_flags & B_READ) == 0) 444 vp->v_numoutput++; 445 446 LIST_INIT(&b.b_dep); 447 VOP_STRATEGY(&b); 448 biowait(&b); 449 450 if (b.b_flags & B_ERROR) { 451 printf("%s: I/O error %d on dev 0x%x at block %llu\n", 452 DEVNAME(sc), b.b_error, dev, b.b_blkno); 453 goto done; 454 } 455 456 if (flags == B_READ) 457 memcpy(buf, dma_buf, bufsize); 458 459 size -= bufsize; 460 buf += bufsize; 461 blkno += howmany(bufsize, DEV_BSIZE); 462 } 463 464 rv = 0; 465 466 done: 467 if (vp) 468 vput(vp); 469 470 dma_free(dma_buf, dma_bufsize); 471 472 return (rv); 473 } 474 475 int 476 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, long flags) 477 { 478 int rv = 1; 479 480 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, 0x%lx)\n", 481 DEVNAME(sd->sd_sc), dev, md, flags); 482 483 if (md == NULL) { 484 printf("%s: sr_meta_rw: invalid metadata pointer\n", 485 DEVNAME(sd->sd_sc)); 486 goto done; 487 } 488 489 rv = sr_rw(sd->sd_sc, dev, md, SR_META_SIZE * DEV_BSIZE, 490 SR_META_OFFSET, flags); 491 492 done: 493 return (rv); 494 } 495 496 int 497 sr_meta_clear(struct sr_discipline *sd) 498 { 499 struct sr_softc *sc = sd->sd_sc; 500 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 501 struct sr_chunk *ch_entry; 502 void *m; 503 int rv = 1; 504 505 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 506 507 if (sd->sd_meta_type != SR_META_F_NATIVE) { 508 sr_error(sc, "cannot clear foreign metadata"); 509 goto done; 510 } 511 512 m = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_WAITOK | M_ZERO); 513 SLIST_FOREACH(ch_entry, cl, src_link) { 514 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 515 /* XXX mark disk offline */ 516 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 517 "clear %s\n", DEVNAME(sc), ch_entry->src_devname); 518 rv++; 519 continue; 520 } 521 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 522 } 523 524 bzero(sd->sd_meta, SR_META_SIZE * DEV_BSIZE); 525 526 free(m, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 527 rv = 0; 528 done: 529 return (rv); 530 } 531 532 void 533 sr_meta_init(struct sr_discipline *sd, int level, int no_chunk) 534 { 535 struct sr_softc *sc = sd->sd_sc; 536 struct sr_metadata *sm = sd->sd_meta; 537 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 538 struct sr_meta_chunk *scm; 539 struct sr_chunk *chunk; 540 int cid = 0; 541 u_int64_t max_chunk_sz = 0, min_chunk_sz = 0; 542 543 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 544 545 if (!sm) 546 return; 547 548 /* Initialise volume metadata. */ 549 sm->ssdi.ssd_magic = SR_MAGIC; 550 sm->ssdi.ssd_version = SR_META_VERSION; 551 sm->ssdi.ssd_vol_flags = sd->sd_meta_flags; 552 sm->ssdi.ssd_volid = 0; 553 sm->ssdi.ssd_chunk_no = no_chunk; 554 sm->ssdi.ssd_level = level; 555 556 sm->ssd_data_blkno = SR_DATA_OFFSET; 557 sm->ssd_ondisk = 0; 558 559 sr_uuid_generate(&sm->ssdi.ssd_uuid); 560 561 /* Initialise chunk metadata and get min/max chunk sizes. */ 562 SLIST_FOREACH(chunk, cl, src_link) { 563 scm = &chunk->src_meta; 564 scm->scmi.scm_size = chunk->src_size; 565 scm->scmi.scm_chunk_id = cid++; 566 scm->scm_status = BIOC_SDONLINE; 567 scm->scmi.scm_volid = 0; 568 strlcpy(scm->scmi.scm_devname, chunk->src_devname, 569 sizeof(scm->scmi.scm_devname)); 570 memcpy(&scm->scmi.scm_uuid, &sm->ssdi.ssd_uuid, 571 sizeof(scm->scmi.scm_uuid)); 572 sr_checksum(sc, scm, &scm->scm_checksum, 573 sizeof(scm->scm_checksum)); 574 575 if (min_chunk_sz == 0) 576 min_chunk_sz = scm->scmi.scm_size; 577 min_chunk_sz = MIN(min_chunk_sz, scm->scmi.scm_size); 578 max_chunk_sz = MAX(max_chunk_sz, scm->scmi.scm_size); 579 } 580 581 /* Equalize chunk sizes. */ 582 SLIST_FOREACH(chunk, cl, src_link) 583 chunk->src_meta.scmi.scm_coerced_size = min_chunk_sz; 584 585 sd->sd_vol.sv_chunk_minsz = min_chunk_sz; 586 sd->sd_vol.sv_chunk_maxsz = max_chunk_sz; 587 } 588 589 void 590 sr_meta_init_complete(struct sr_discipline *sd) 591 { 592 #ifdef SR_DEBUG 593 struct sr_softc *sc = sd->sd_sc; 594 #endif 595 struct sr_metadata *sm = sd->sd_meta; 596 597 DNPRINTF(SR_D_META, "%s: sr_meta_complete\n", DEVNAME(sc)); 598 599 /* Complete initialisation of volume metadata. */ 600 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 601 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 602 "SR %s", sd->sd_name); 603 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 604 "%03d", sm->ssdi.ssd_version); 605 } 606 607 void 608 sr_meta_opt_handler(struct sr_discipline *sd, struct sr_meta_opt_hdr *om) 609 { 610 if (om->som_type != SR_OPT_BOOT) 611 panic("unknown optional metadata type"); 612 } 613 614 void 615 sr_meta_save_callback(void *xsd) 616 { 617 struct sr_discipline *sd = xsd; 618 int s; 619 620 s = splbio(); 621 622 if (sr_meta_save(sd, SR_META_DIRTY)) 623 printf("%s: save metadata failed\n", DEVNAME(sd->sd_sc)); 624 625 sd->sd_must_flush = 0; 626 splx(s); 627 } 628 629 int 630 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 631 { 632 struct sr_softc *sc = sd->sd_sc; 633 struct sr_metadata *sm = sd->sd_meta, *m; 634 struct sr_meta_driver *s; 635 struct sr_chunk *src; 636 struct sr_meta_chunk *cm; 637 struct sr_workunit wu; 638 struct sr_meta_opt_hdr *omh; 639 struct sr_meta_opt_item *omi; 640 int i; 641 642 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 643 DEVNAME(sc), sd->sd_meta->ssd_devname); 644 645 if (!sm) { 646 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 647 goto bad; 648 } 649 650 /* meta scratchpad */ 651 s = &smd[sd->sd_meta_type]; 652 m = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT); 653 if (!m) { 654 printf("%s: could not allocate metadata scratch area\n", 655 DEVNAME(sc)); 656 goto bad; 657 } 658 659 /* from here on out metadata is updated */ 660 restart: 661 sm->ssd_ondisk++; 662 sm->ssd_meta_flags = flags; 663 memcpy(m, sm, sizeof(*m)); 664 665 /* Chunk metadata. */ 666 cm = (struct sr_meta_chunk *)(m + 1); 667 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 668 src = sd->sd_vol.sv_chunks[i]; 669 memcpy(cm, &src->src_meta, sizeof(*cm)); 670 cm++; 671 } 672 673 /* Optional metadata. */ 674 omh = (struct sr_meta_opt_hdr *)(cm); 675 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 676 DNPRINTF(SR_D_META, "%s: saving optional metadata type %u with " 677 "length %u\n", DEVNAME(sc), omi->omi_som->som_type, 678 omi->omi_som->som_length); 679 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 680 sr_checksum(sc, omi->omi_som, &omi->omi_som->som_checksum, 681 omi->omi_som->som_length); 682 memcpy(omh, omi->omi_som, omi->omi_som->som_length); 683 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)omh + 684 omi->omi_som->som_length); 685 } 686 687 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 688 src = sd->sd_vol.sv_chunks[i]; 689 690 /* skip disks that are offline */ 691 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 692 continue; 693 694 /* calculate metadata checksum for correct chunk */ 695 m->ssdi.ssd_chunk_id = i; 696 sr_checksum(sc, m, &m->ssd_checksum, 697 sizeof(struct sr_meta_invariant)); 698 699 #ifdef SR_DEBUG 700 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 701 "chunkid: %d checksum: ", 702 DEVNAME(sc), src->src_meta.scmi.scm_devname, 703 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 704 705 if (sr_debug & SR_D_META) 706 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 707 DNPRINTF(SR_D_META, "\n"); 708 sr_meta_print(m); 709 #endif 710 711 /* translate and write to disk */ 712 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 713 printf("%s: could not write metadata to %s\n", 714 DEVNAME(sc), src->src_devname); 715 /* restart the meta write */ 716 src->src_meta.scm_status = BIOC_SDOFFLINE; 717 /* XXX recalculate volume status */ 718 goto restart; 719 } 720 } 721 722 /* not all disciplines have sync */ 723 if (sd->sd_scsi_sync) { 724 bzero(&wu, sizeof(wu)); 725 wu.swu_flags |= SR_WUF_FAKE; 726 wu.swu_dis = sd; 727 sd->sd_scsi_sync(&wu); 728 } 729 free(m, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 730 return (0); 731 bad: 732 return (1); 733 } 734 735 int 736 sr_meta_read(struct sr_discipline *sd) 737 { 738 struct sr_softc *sc = sd->sd_sc; 739 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 740 struct sr_metadata *sm; 741 struct sr_chunk *ch_entry; 742 struct sr_meta_chunk *cp; 743 struct sr_meta_driver *s; 744 void *fm = NULL; 745 int no_disk = 0, got_meta = 0; 746 747 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 748 749 sm = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_WAITOK | M_ZERO); 750 s = &smd[sd->sd_meta_type]; 751 if (sd->sd_meta_type != SR_META_F_NATIVE) 752 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 753 754 cp = (struct sr_meta_chunk *)(sm + 1); 755 SLIST_FOREACH(ch_entry, cl, src_link) { 756 /* skip disks that are offline */ 757 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 758 DNPRINTF(SR_D_META, 759 "%s: %s chunk marked offline, spoofing status\n", 760 DEVNAME(sc), ch_entry->src_devname); 761 cp++; /* adjust chunk pointer to match failure */ 762 continue; 763 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 764 /* read and translate */ 765 /* XXX mark chunk offline, elsewhere!! */ 766 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 767 cp++; /* adjust chunk pointer to match failure */ 768 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 769 DEVNAME(sc)); 770 continue; 771 } 772 773 if (sm->ssdi.ssd_magic != SR_MAGIC) { 774 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 775 DEVNAME(sc)); 776 continue; 777 } 778 779 /* validate metadata */ 780 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 781 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 782 DEVNAME(sc)); 783 no_disk = -1; 784 goto done; 785 } 786 787 /* assume first chunk contains metadata */ 788 if (got_meta == 0) { 789 sr_meta_opt_load(sc, sm, &sd->sd_meta_opt); 790 memcpy(sd->sd_meta, sm, sizeof(*sd->sd_meta)); 791 got_meta = 1; 792 } 793 794 memcpy(&ch_entry->src_meta, cp, sizeof(ch_entry->src_meta)); 795 796 no_disk++; 797 cp++; 798 } 799 800 free(sm, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 801 free(fm, M_DEVBUF, s->smd_size); 802 803 done: 804 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 805 no_disk); 806 return (no_disk); 807 } 808 809 void 810 sr_meta_opt_load(struct sr_softc *sc, struct sr_metadata *sm, 811 struct sr_meta_opt_head *som) 812 { 813 struct sr_meta_opt_hdr *omh; 814 struct sr_meta_opt_item *omi; 815 u_int8_t checksum[MD5_DIGEST_LENGTH]; 816 int i; 817 818 /* Process optional metadata. */ 819 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) + 820 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 821 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 822 823 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF, 824 M_WAITOK | M_ZERO); 825 SLIST_INSERT_HEAD(som, omi, omi_link); 826 827 if (omh->som_length == 0) { 828 829 /* Load old fixed length optional metadata. */ 830 DNPRINTF(SR_D_META, "%s: old optional metadata of type " 831 "%u\n", DEVNAME(sc), omh->som_type); 832 833 /* Validate checksum. */ 834 sr_checksum(sc, (void *)omh, &checksum, 835 SR_OLD_META_OPT_SIZE - MD5_DIGEST_LENGTH); 836 if (bcmp(&checksum, (void *)omh + SR_OLD_META_OPT_MD5, 837 sizeof(checksum))) 838 panic("%s: invalid optional metadata " 839 "checksum", DEVNAME(sc)); 840 841 /* Determine correct length. */ 842 switch (omh->som_type) { 843 case SR_OPT_CRYPTO: 844 omh->som_length = sizeof(struct sr_meta_crypto); 845 break; 846 case SR_OPT_BOOT: 847 omh->som_length = sizeof(struct sr_meta_boot); 848 break; 849 case SR_OPT_KEYDISK: 850 omh->som_length = 851 sizeof(struct sr_meta_keydisk); 852 break; 853 default: 854 panic("unknown old optional metadata " 855 "type %u\n", omh->som_type); 856 } 857 858 omi->omi_som = malloc(omh->som_length, M_DEVBUF, 859 M_WAITOK | M_ZERO); 860 memcpy((u_int8_t *)omi->omi_som + sizeof(*omi->omi_som), 861 (u_int8_t *)omh + SR_OLD_META_OPT_OFFSET, 862 omh->som_length - sizeof(*omi->omi_som)); 863 omi->omi_som->som_type = omh->som_type; 864 omi->omi_som->som_length = omh->som_length; 865 866 omh = (struct sr_meta_opt_hdr *)((void *)omh + 867 SR_OLD_META_OPT_SIZE); 868 } else { 869 870 /* Load variable length optional metadata. */ 871 DNPRINTF(SR_D_META, "%s: optional metadata of type %u, " 872 "length %u\n", DEVNAME(sc), omh->som_type, 873 omh->som_length); 874 omi->omi_som = malloc(omh->som_length, M_DEVBUF, 875 M_WAITOK | M_ZERO); 876 memcpy(omi->omi_som, omh, omh->som_length); 877 878 /* Validate checksum. */ 879 memcpy(&checksum, &omi->omi_som->som_checksum, 880 MD5_DIGEST_LENGTH); 881 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH); 882 sr_checksum(sc, omi->omi_som, 883 &omi->omi_som->som_checksum, omh->som_length); 884 if (bcmp(&checksum, &omi->omi_som->som_checksum, 885 sizeof(checksum))) 886 panic("%s: invalid optional metadata checksum", 887 DEVNAME(sc)); 888 889 omh = (struct sr_meta_opt_hdr *)((void *)omh + 890 omh->som_length); 891 } 892 } 893 } 894 895 int 896 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 897 void *fm) 898 { 899 struct sr_softc *sc = sd->sd_sc; 900 struct sr_meta_driver *s; 901 #ifdef SR_DEBUG 902 struct sr_meta_chunk *mc; 903 #endif 904 u_int8_t checksum[MD5_DIGEST_LENGTH]; 905 char devname[32]; 906 int rv = 1; 907 908 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 909 910 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 911 912 s = &smd[sd->sd_meta_type]; 913 if (sd->sd_meta_type != SR_META_F_NATIVE) 914 if (s->smd_validate(sd, sm, fm)) { 915 sr_error(sc, "invalid foreign metadata"); 916 goto done; 917 } 918 919 /* 920 * at this point all foreign metadata has been translated to the native 921 * format and will be treated just like the native format 922 */ 923 924 if (sm->ssdi.ssd_magic != SR_MAGIC) { 925 sr_error(sc, "not valid softraid metadata"); 926 goto done; 927 } 928 929 /* Verify metadata checksum. */ 930 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 931 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 932 sr_error(sc, "invalid metadata checksum"); 933 goto done; 934 } 935 936 /* Handle changes between versions. */ 937 if (sm->ssdi.ssd_version == 3) { 938 939 /* 940 * Version 3 - update metadata version and fix up data blkno 941 * value since this did not exist in version 3. 942 */ 943 if (sm->ssd_data_blkno == 0) 944 sm->ssd_data_blkno = SR_META_V3_DATA_OFFSET; 945 946 } else if (sm->ssdi.ssd_version == 4) { 947 948 /* 949 * Version 4 - original metadata format did not store 950 * data blkno so fix this up if necessary. 951 */ 952 if (sm->ssd_data_blkno == 0) 953 sm->ssd_data_blkno = SR_DATA_OFFSET; 954 955 } else if (sm->ssdi.ssd_version == SR_META_VERSION) { 956 957 /* 958 * Version 5 - variable length optional metadata. Migration 959 * from earlier fixed length optional metadata is handled 960 * in sr_meta_read(). 961 */ 962 963 } else { 964 965 sr_error(sc, "cannot read metadata version %u on %s, " 966 "expected version %u or earlier", 967 sm->ssdi.ssd_version, devname, SR_META_VERSION); 968 goto done; 969 970 } 971 972 /* Update version number and revision string. */ 973 sm->ssdi.ssd_version = SR_META_VERSION; 974 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 975 "%03d", SR_META_VERSION); 976 977 #ifdef SR_DEBUG 978 /* warn if disk changed order */ 979 mc = (struct sr_meta_chunk *)(sm + 1); 980 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 981 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 982 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 983 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 984 devname); 985 #endif 986 987 /* we have meta data on disk */ 988 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 989 DEVNAME(sc), devname); 990 991 rv = 0; 992 done: 993 return (rv); 994 } 995 996 int 997 sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno, 998 struct sr_boot_chunk_head *bch) 999 { 1000 struct vnode *vn; 1001 struct disklabel label; 1002 struct sr_metadata *md = NULL; 1003 struct sr_discipline *fake_sd = NULL; 1004 struct sr_boot_chunk *bc; 1005 char devname[32]; 1006 dev_t chrdev, rawdev; 1007 int error, i; 1008 int rv = SR_META_NOTCLAIMED; 1009 1010 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 1011 1012 /* 1013 * Use character raw device to avoid SCSI complaints about missing 1014 * media on removable media devices. 1015 */ 1016 chrdev = blktochr(devno); 1017 rawdev = MAKEDISKDEV(major(chrdev), DISKUNIT(devno), RAW_PART); 1018 if (cdevvp(rawdev, &vn)) { 1019 sr_error(sc, "sr_meta_native_bootprobe: cannot allocate vnode"); 1020 goto done; 1021 } 1022 1023 /* open device */ 1024 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1025 if (error) { 1026 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 1027 "failed\n", DEVNAME(sc)); 1028 vput(vn); 1029 goto done; 1030 } 1031 1032 /* get disklabel */ 1033 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 1034 curproc); 1035 if (error) { 1036 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 1037 "failed\n", DEVNAME(sc)); 1038 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1039 vput(vn); 1040 goto done; 1041 } 1042 1043 /* we are done, close device */ 1044 error = VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1045 if (error) { 1046 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 1047 "failed\n", DEVNAME(sc)); 1048 vput(vn); 1049 goto done; 1050 } 1051 vput(vn); 1052 1053 /* Make sure this is a DEV_BSIZE byte/sector device. */ 1054 if (label.d_secsize != DEV_BSIZE) { 1055 DNPRINTF(SR_D_META, "%s: %s has unsupported sector size (%d)", 1056 DEVNAME(sc), devname, label.d_secsize); 1057 goto done; 1058 } 1059 1060 md = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT); 1061 if (md == NULL) { 1062 sr_error(sc, "not enough memory for metadata buffer"); 1063 goto done; 1064 } 1065 1066 /* create fake sd to use utility functions */ 1067 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, 1068 M_ZERO | M_NOWAIT); 1069 if (fake_sd == NULL) { 1070 sr_error(sc, "not enough memory for fake discipline"); 1071 goto done; 1072 } 1073 fake_sd->sd_sc = sc; 1074 fake_sd->sd_meta_type = SR_META_F_NATIVE; 1075 1076 for (i = 0; i < MAXPARTITIONS; i++) { 1077 if (label.d_partitions[i].p_fstype != FS_RAID) 1078 continue; 1079 1080 /* open partition */ 1081 rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i); 1082 if (bdevvp(rawdev, &vn)) { 1083 sr_error(sc, "sr_meta_native_bootprobe: cannot " 1084 "allocate vnode for partition"); 1085 goto done; 1086 } 1087 error = VOP_OPEN(vn, FREAD, NOCRED, curproc); 1088 if (error) { 1089 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 1090 "open failed, partition %d\n", 1091 DEVNAME(sc), i); 1092 vput(vn); 1093 continue; 1094 } 1095 1096 if (sr_meta_native_read(fake_sd, rawdev, md, NULL)) { 1097 sr_error(sc, "native bootprobe could not read native " 1098 "metadata"); 1099 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1100 vput(vn); 1101 continue; 1102 } 1103 1104 /* are we a softraid partition? */ 1105 if (md->ssdi.ssd_magic != SR_MAGIC) { 1106 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1107 vput(vn); 1108 continue; 1109 } 1110 1111 sr_meta_getdevname(sc, rawdev, devname, sizeof(devname)); 1112 if (sr_meta_validate(fake_sd, rawdev, md, NULL) == 0) { 1113 /* XXX fix M_WAITOK, this is boot time */ 1114 bc = malloc(sizeof(struct sr_boot_chunk), 1115 M_DEVBUF, M_WAITOK | M_ZERO); 1116 bc->sbc_metadata = malloc(sizeof(struct sr_metadata), 1117 M_DEVBUF, M_WAITOK | M_ZERO); 1118 memcpy(bc->sbc_metadata, md, sizeof(struct sr_metadata)); 1119 bc->sbc_mm = rawdev; 1120 SLIST_INSERT_HEAD(bch, bc, sbc_link); 1121 rv = SR_META_CLAIMED; 1122 } 1123 1124 /* we are done, close partition */ 1125 VOP_CLOSE(vn, FREAD, NOCRED, curproc); 1126 vput(vn); 1127 } 1128 1129 done: 1130 free(fake_sd, M_DEVBUF, sizeof(struct sr_discipline)); 1131 free(md, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 1132 1133 return (rv); 1134 } 1135 1136 int 1137 sr_boot_assembly(struct sr_softc *sc) 1138 { 1139 struct sr_boot_volume_head bvh; 1140 struct sr_boot_chunk_head bch, kdh; 1141 struct sr_boot_volume *bv, *bv1, *bv2; 1142 struct sr_boot_chunk *bc, *bcnext, *bc1, *bc2; 1143 struct sr_disk_head sdklist; 1144 struct sr_disk *sdk; 1145 struct disk *dk; 1146 struct bioc_createraid bcr; 1147 struct sr_meta_chunk *hm; 1148 struct sr_chunk_head *cl; 1149 struct sr_chunk *hotspare, *chunk, *last; 1150 u_int64_t *ondisk = NULL; 1151 dev_t *devs = NULL; 1152 void *data; 1153 char devname[32]; 1154 int rv = 0, i; 1155 1156 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1157 1158 SLIST_INIT(&sdklist); 1159 SLIST_INIT(&bvh); 1160 SLIST_INIT(&bch); 1161 SLIST_INIT(&kdh); 1162 1163 dk = TAILQ_FIRST(&disklist); 1164 while (dk != NULL) { 1165 1166 /* See if this disk has been checked. */ 1167 SLIST_FOREACH(sdk, &sdklist, sdk_link) 1168 if (sdk->sdk_devno == dk->dk_devno) 1169 break; 1170 1171 if (sdk != NULL || dk->dk_devno == NODEV) { 1172 dk = TAILQ_NEXT(dk, dk_link); 1173 continue; 1174 } 1175 1176 /* Add this disk to the list that we've checked. */ 1177 sdk = malloc(sizeof(struct sr_disk), M_DEVBUF, 1178 M_NOWAIT | M_CANFAIL | M_ZERO); 1179 if (sdk == NULL) 1180 goto unwind; 1181 sdk->sdk_devno = dk->dk_devno; 1182 SLIST_INSERT_HEAD(&sdklist, sdk, sdk_link); 1183 1184 /* Only check sd(4) and wd(4) devices. */ 1185 if (strncmp(dk->dk_name, "sd", 2) && 1186 strncmp(dk->dk_name, "wd", 2)) { 1187 dk = TAILQ_NEXT(dk, dk_link); 1188 continue; 1189 } 1190 1191 /* native softraid uses partitions */ 1192 rw_enter_write(&sc->sc_lock); 1193 bio_status_init(&sc->sc_status, &sc->sc_dev); 1194 sr_meta_native_bootprobe(sc, dk->dk_devno, &bch); 1195 rw_exit_write(&sc->sc_lock); 1196 1197 /* probe non-native disks if native failed. */ 1198 1199 /* Restart scan since we may have slept. */ 1200 dk = TAILQ_FIRST(&disklist); 1201 } 1202 1203 /* 1204 * Create a list of volumes and associate chunks with each volume. 1205 */ 1206 for (bc = SLIST_FIRST(&bch); bc != NULL; bc = bcnext) { 1207 1208 bcnext = SLIST_NEXT(bc, sbc_link); 1209 SLIST_REMOVE(&bch, bc, sr_boot_chunk, sbc_link); 1210 bc->sbc_chunk_id = bc->sbc_metadata->ssdi.ssd_chunk_id; 1211 1212 /* Handle key disks separately. */ 1213 if (bc->sbc_metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) { 1214 SLIST_INSERT_HEAD(&kdh, bc, sbc_link); 1215 continue; 1216 } 1217 1218 SLIST_FOREACH(bv, &bvh, sbv_link) { 1219 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid, 1220 &bv->sbv_uuid, 1221 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) == 0) 1222 break; 1223 } 1224 1225 if (bv == NULL) { 1226 bv = malloc(sizeof(struct sr_boot_volume), 1227 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1228 if (bv == NULL) { 1229 printf("%s: failed to allocate boot volume\n", 1230 DEVNAME(sc)); 1231 goto unwind; 1232 } 1233 1234 bv->sbv_level = bc->sbc_metadata->ssdi.ssd_level; 1235 bv->sbv_volid = bc->sbc_metadata->ssdi.ssd_volid; 1236 bv->sbv_chunk_no = bc->sbc_metadata->ssdi.ssd_chunk_no; 1237 bv->sbv_flags = bc->sbc_metadata->ssdi.ssd_vol_flags; 1238 memcpy(&bv->sbv_uuid, &bc->sbc_metadata->ssdi.ssd_uuid, 1239 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)); 1240 SLIST_INIT(&bv->sbv_chunks); 1241 1242 /* Maintain volume order. */ 1243 bv2 = NULL; 1244 SLIST_FOREACH(bv1, &bvh, sbv_link) { 1245 if (bv1->sbv_volid > bv->sbv_volid) 1246 break; 1247 bv2 = bv1; 1248 } 1249 if (bv2 == NULL) { 1250 DNPRINTF(SR_D_META, "%s: insert volume %u " 1251 "at head\n", DEVNAME(sc), bv->sbv_volid); 1252 SLIST_INSERT_HEAD(&bvh, bv, sbv_link); 1253 } else { 1254 DNPRINTF(SR_D_META, "%s: insert volume %u " 1255 "after %u\n", DEVNAME(sc), bv->sbv_volid, 1256 bv2->sbv_volid); 1257 SLIST_INSERT_AFTER(bv2, bv, sbv_link); 1258 } 1259 } 1260 1261 /* Maintain chunk order. */ 1262 bc2 = NULL; 1263 SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) { 1264 if (bc1->sbc_chunk_id > bc->sbc_chunk_id) 1265 break; 1266 bc2 = bc1; 1267 } 1268 if (bc2 == NULL) { 1269 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1270 "at head\n", DEVNAME(sc), bv->sbv_volid, 1271 bc->sbc_chunk_id); 1272 SLIST_INSERT_HEAD(&bv->sbv_chunks, bc, sbc_link); 1273 } else { 1274 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1275 "after %u\n", DEVNAME(sc), bv->sbv_volid, 1276 bc->sbc_chunk_id, bc2->sbc_chunk_id); 1277 SLIST_INSERT_AFTER(bc2, bc, sbc_link); 1278 } 1279 1280 bv->sbv_chunks_found++; 1281 } 1282 1283 /* Allocate memory for device and ondisk version arrays. */ 1284 devs = mallocarray(BIOC_CRMAXLEN, sizeof(dev_t), M_DEVBUF, 1285 M_NOWAIT | M_CANFAIL); 1286 if (devs == NULL) { 1287 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1288 goto unwind; 1289 } 1290 ondisk = mallocarray(BIOC_CRMAXLEN, sizeof(u_int64_t), M_DEVBUF, 1291 M_NOWAIT | M_CANFAIL); 1292 if (ondisk == NULL) { 1293 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1294 goto unwind; 1295 } 1296 1297 /* 1298 * Assemble hotspare "volumes". 1299 */ 1300 SLIST_FOREACH(bv, &bvh, sbv_link) { 1301 1302 /* Check if this is a hotspare "volume". */ 1303 if (bv->sbv_level != SR_HOTSPARE_LEVEL || 1304 bv->sbv_chunk_no != 1) 1305 continue; 1306 1307 #ifdef SR_DEBUG 1308 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1309 DEVNAME(sc)); 1310 if (sr_debug & SR_D_META) 1311 sr_uuid_print(&bv->sbv_uuid, 0); 1312 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1313 bv->sbv_volid, bv->sbv_chunk_no); 1314 #endif 1315 1316 /* Create hotspare chunk metadata. */ 1317 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1318 M_NOWAIT | M_CANFAIL | M_ZERO); 1319 if (hotspare == NULL) { 1320 printf("%s: failed to allocate hotspare\n", 1321 DEVNAME(sc)); 1322 goto unwind; 1323 } 1324 1325 bc = SLIST_FIRST(&bv->sbv_chunks); 1326 sr_meta_getdevname(sc, bc->sbc_mm, devname, sizeof(devname)); 1327 hotspare->src_dev_mm = bc->sbc_mm; 1328 strlcpy(hotspare->src_devname, devname, 1329 sizeof(hotspare->src_devname)); 1330 hotspare->src_size = bc->sbc_metadata->ssdi.ssd_size; 1331 1332 hm = &hotspare->src_meta; 1333 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1334 hm->scmi.scm_chunk_id = 0; 1335 hm->scmi.scm_size = bc->sbc_metadata->ssdi.ssd_size; 1336 hm->scmi.scm_coerced_size = bc->sbc_metadata->ssdi.ssd_size; 1337 strlcpy(hm->scmi.scm_devname, devname, 1338 sizeof(hm->scmi.scm_devname)); 1339 memcpy(&hm->scmi.scm_uuid, &bc->sbc_metadata->ssdi.ssd_uuid, 1340 sizeof(struct sr_uuid)); 1341 1342 sr_checksum(sc, hm, &hm->scm_checksum, 1343 sizeof(struct sr_meta_chunk_invariant)); 1344 1345 hm->scm_status = BIOC_SDHOTSPARE; 1346 1347 /* Add chunk to hotspare list. */ 1348 rw_enter_write(&sc->sc_hs_lock); 1349 cl = &sc->sc_hotspare_list; 1350 if (SLIST_EMPTY(cl)) 1351 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1352 else { 1353 SLIST_FOREACH(chunk, cl, src_link) 1354 last = chunk; 1355 SLIST_INSERT_AFTER(last, hotspare, src_link); 1356 } 1357 sc->sc_hotspare_no++; 1358 rw_exit_write(&sc->sc_hs_lock); 1359 1360 } 1361 1362 /* 1363 * Assemble RAID volumes. 1364 */ 1365 SLIST_FOREACH(bv, &bvh, sbv_link) { 1366 1367 bzero(&bcr, sizeof(bcr)); 1368 data = NULL; 1369 1370 /* Check if this is a hotspare "volume". */ 1371 if (bv->sbv_level == SR_HOTSPARE_LEVEL && 1372 bv->sbv_chunk_no == 1) 1373 continue; 1374 1375 /* 1376 * Skip volumes that are marked as no auto assemble, unless 1377 * this was the volume which we actually booted from. 1378 */ 1379 if (bcmp(&sr_bootuuid, &bv->sbv_uuid, sizeof(sr_bootuuid)) != 0) 1380 if (bv->sbv_flags & BIOC_SCNOAUTOASSEMBLE) 1381 continue; 1382 1383 #ifdef SR_DEBUG 1384 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1385 if (sr_debug & SR_D_META) 1386 sr_uuid_print(&bv->sbv_uuid, 0); 1387 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1388 bv->sbv_volid, bv->sbv_chunk_no); 1389 #endif 1390 1391 /* 1392 * If this is a crypto volume, try to find a matching 1393 * key disk... 1394 */ 1395 bcr.bc_key_disk = NODEV; 1396 if (bv->sbv_level == 'C') { 1397 SLIST_FOREACH(bc, &kdh, sbc_link) { 1398 if (bcmp(&bc->sbc_metadata->ssdi.ssd_uuid, 1399 &bv->sbv_uuid, 1400 sizeof(bc->sbc_metadata->ssdi.ssd_uuid)) 1401 == 0) 1402 bcr.bc_key_disk = bc->sbc_mm; 1403 } 1404 } 1405 1406 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1407 devs[i] = NODEV; /* mark device as illegal */ 1408 ondisk[i] = 0; 1409 } 1410 1411 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) { 1412 if (devs[bc->sbc_chunk_id] != NODEV) { 1413 bv->sbv_chunks_found--; 1414 sr_meta_getdevname(sc, bc->sbc_mm, devname, 1415 sizeof(devname)); 1416 printf("%s: found duplicate chunk %u for " 1417 "volume %u on device %s\n", DEVNAME(sc), 1418 bc->sbc_chunk_id, bv->sbv_volid, devname); 1419 } 1420 1421 if (devs[bc->sbc_chunk_id] == NODEV || 1422 bc->sbc_metadata->ssd_ondisk > 1423 ondisk[bc->sbc_chunk_id]) { 1424 devs[bc->sbc_chunk_id] = bc->sbc_mm; 1425 ondisk[bc->sbc_chunk_id] = 1426 bc->sbc_metadata->ssd_ondisk; 1427 DNPRINTF(SR_D_META, "%s: using ondisk " 1428 "metadata version %llu for chunk %u\n", 1429 DEVNAME(sc), ondisk[bc->sbc_chunk_id], 1430 bc->sbc_chunk_id); 1431 } 1432 } 1433 1434 if (bv->sbv_chunk_no != bv->sbv_chunks_found) { 1435 printf("%s: not all chunks were provided; " 1436 "attempting to bring volume %d online\n", 1437 DEVNAME(sc), bv->sbv_volid); 1438 } 1439 1440 bcr.bc_level = bv->sbv_level; 1441 bcr.bc_dev_list_len = bv->sbv_chunk_no * sizeof(dev_t); 1442 bcr.bc_dev_list = devs; 1443 bcr.bc_flags = BIOC_SCDEVT | 1444 (bv->sbv_flags & BIOC_SCNOAUTOASSEMBLE); 1445 1446 if (bv->sbv_level == 'C' && 1447 bcmp(&sr_bootuuid, &bv->sbv_uuid, sizeof(sr_bootuuid)) == 0) 1448 data = sr_bootkey; 1449 1450 rw_enter_write(&sc->sc_lock); 1451 bio_status_init(&sc->sc_status, &sc->sc_dev); 1452 sr_ioctl_createraid(sc, &bcr, 0, data); 1453 rw_exit_write(&sc->sc_lock); 1454 1455 rv++; 1456 } 1457 1458 /* done with metadata */ 1459 unwind: 1460 /* Free boot volumes and associated chunks. */ 1461 for (bv1 = SLIST_FIRST(&bvh); bv1 != NULL; bv1 = bv2) { 1462 bv2 = SLIST_NEXT(bv1, sbv_link); 1463 for (bc1 = SLIST_FIRST(&bv1->sbv_chunks); bc1 != NULL; 1464 bc1 = bc2) { 1465 bc2 = SLIST_NEXT(bc1, sbc_link); 1466 if (bc1->sbc_metadata) 1467 free(bc1->sbc_metadata, M_DEVBUF, 0); 1468 free(bc1, M_DEVBUF, 0); 1469 } 1470 free(bv1, M_DEVBUF, 0); 1471 } 1472 /* Free keydisks chunks. */ 1473 for (bc1 = SLIST_FIRST(&kdh); bc1 != NULL; bc1 = bc2) { 1474 bc2 = SLIST_NEXT(bc1, sbc_link); 1475 if (bc1->sbc_metadata) 1476 free(bc1->sbc_metadata, M_DEVBUF, 0); 1477 free(bc1, M_DEVBUF, 0); 1478 } 1479 /* Free unallocated chunks. */ 1480 for (bc1 = SLIST_FIRST(&bch); bc1 != NULL; bc1 = bc2) { 1481 bc2 = SLIST_NEXT(bc1, sbc_link); 1482 if (bc1->sbc_metadata) 1483 free(bc1->sbc_metadata, M_DEVBUF, 0); 1484 free(bc1, M_DEVBUF, 0); 1485 } 1486 1487 while (!SLIST_EMPTY(&sdklist)) { 1488 sdk = SLIST_FIRST(&sdklist); 1489 SLIST_REMOVE_HEAD(&sdklist, sdk_link); 1490 free(sdk, M_DEVBUF, 0); 1491 } 1492 1493 free(devs, M_DEVBUF, BIOC_CRMAXLEN * sizeof(dev_t)); 1494 free(ondisk, M_DEVBUF, BIOC_CRMAXLEN * sizeof(u_int64_t)); 1495 1496 return (rv); 1497 } 1498 1499 void 1500 sr_map_root(void) 1501 { 1502 struct sr_softc *sc = softraid0; 1503 struct sr_discipline *sd; 1504 struct sr_meta_opt_item *omi; 1505 struct sr_meta_boot *sbm; 1506 u_char duid[8]; 1507 int i; 1508 1509 DNPRINTF(SR_D_MISC, "%s: sr_map_root\n", DEVNAME(sc)); 1510 1511 if (sc == NULL) 1512 return; 1513 1514 bzero(duid, sizeof(duid)); 1515 if (bcmp(rootduid, duid, sizeof(duid)) == 0) { 1516 DNPRINTF(SR_D_MISC, "%s: root duid is zero\n", DEVNAME(sc)); 1517 return; 1518 } 1519 1520 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 1521 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 1522 if (omi->omi_som->som_type != SR_OPT_BOOT) 1523 continue; 1524 sbm = (struct sr_meta_boot *)omi->omi_som; 1525 for (i = 0; i < SR_MAX_BOOT_DISKS; i++) { 1526 if (bcmp(rootduid, sbm->sbm_boot_duid[i], 1527 sizeof(rootduid)) == 0) { 1528 memcpy(rootduid, sbm->sbm_root_duid, 1529 sizeof(rootduid)); 1530 DNPRINTF(SR_D_MISC, "%s: root duid " 1531 "mapped to %02hx%02hx%02hx%02hx" 1532 "%02hx%02hx%02hx%02hx\n", 1533 DEVNAME(sc), rootduid[0], 1534 rootduid[1], rootduid[2], 1535 rootduid[3], rootduid[4], 1536 rootduid[5], rootduid[6], 1537 rootduid[7]); 1538 return; 1539 } 1540 } 1541 } 1542 } 1543 } 1544 1545 int 1546 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1547 { 1548 struct disklabel label; 1549 char *devname; 1550 int error, part; 1551 u_int64_t size; 1552 1553 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1554 DEVNAME(sc), ch_entry->src_devname); 1555 1556 devname = ch_entry->src_devname; 1557 part = DISKPART(ch_entry->src_dev_mm); 1558 1559 /* get disklabel */ 1560 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1561 NOCRED, curproc); 1562 if (error) { 1563 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1564 DEVNAME(sc), devname); 1565 goto unwind; 1566 } 1567 memcpy(ch_entry->src_duid, label.d_uid, sizeof(ch_entry->src_duid)); 1568 1569 /* Make sure this is a DEV_BSIZE byte/sector device. */ 1570 if (label.d_secsize != DEV_BSIZE) { 1571 sr_error(sc, "%s has unsupported sector size (%u)", 1572 devname, label.d_secsize); 1573 goto unwind; 1574 } 1575 1576 /* make sure the partition is of the right type */ 1577 if (label.d_partitions[part].p_fstype != FS_RAID) { 1578 DNPRINTF(SR_D_META, 1579 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1580 devname, 1581 label.d_partitions[part].p_fstype); 1582 goto unwind; 1583 } 1584 1585 size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part])); 1586 if (size <= SR_DATA_OFFSET) { 1587 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1588 devname); 1589 goto unwind; 1590 } 1591 size -= SR_DATA_OFFSET; 1592 if (size > INT64_MAX) { 1593 DNPRINTF(SR_D_META, "%s: %s partition too large\n", DEVNAME(sc), 1594 devname); 1595 goto unwind; 1596 } 1597 ch_entry->src_size = size; 1598 1599 DNPRINTF(SR_D_META, "%s: probe found %s size %lld\n", DEVNAME(sc), 1600 devname, (long long)size); 1601 1602 return (SR_META_F_NATIVE); 1603 unwind: 1604 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1605 devname ? devname : "nodev"); 1606 return (SR_META_F_INVALID); 1607 } 1608 1609 int 1610 sr_meta_native_attach(struct sr_discipline *sd, int force) 1611 { 1612 struct sr_softc *sc = sd->sd_sc; 1613 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1614 struct sr_metadata *md = NULL; 1615 struct sr_chunk *ch_entry, *ch_next; 1616 struct sr_uuid uuid; 1617 u_int64_t version = 0; 1618 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1619 1620 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1621 1622 md = malloc(SR_META_SIZE * DEV_BSIZE, M_DEVBUF, M_ZERO | M_NOWAIT); 1623 if (md == NULL) { 1624 sr_error(sc, "not enough memory for metadata buffer"); 1625 goto bad; 1626 } 1627 1628 bzero(&uuid, sizeof uuid); 1629 1630 sr = not_sr = d = 0; 1631 SLIST_FOREACH(ch_entry, cl, src_link) { 1632 if (ch_entry->src_dev_mm == NODEV) 1633 continue; 1634 1635 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1636 sr_error(sc, "could not read native metadata"); 1637 goto bad; 1638 } 1639 1640 if (md->ssdi.ssd_magic == SR_MAGIC) { 1641 sr++; 1642 ch_entry->src_meta.scmi.scm_chunk_id = 1643 md->ssdi.ssd_chunk_id; 1644 if (d == 0) { 1645 memcpy(&uuid, &md->ssdi.ssd_uuid, sizeof uuid); 1646 expected = md->ssdi.ssd_chunk_no; 1647 version = md->ssd_ondisk; 1648 d++; 1649 continue; 1650 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1651 sizeof uuid)) { 1652 sr_error(sc, "not part of the same volume"); 1653 goto bad; 1654 } 1655 if (md->ssd_ondisk != version) { 1656 old_meta++; 1657 version = MAX(md->ssd_ondisk, version); 1658 } 1659 } else 1660 not_sr++; 1661 } 1662 1663 if (sr && not_sr) { 1664 sr_error(sc, "not all chunks are of the native metadata " 1665 "format"); 1666 goto bad; 1667 } 1668 1669 /* mixed metadata versions; mark bad disks offline */ 1670 if (old_meta) { 1671 d = 0; 1672 for (ch_entry = SLIST_FIRST(cl); ch_entry != NULL; 1673 ch_entry = ch_next, d++) { 1674 ch_next = SLIST_NEXT(ch_entry, src_link); 1675 1676 /* XXX do we want to read this again? */ 1677 if (ch_entry->src_dev_mm == NODEV) 1678 panic("src_dev_mm == NODEV"); 1679 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1680 NULL)) 1681 sr_warn(sc, "could not read native metadata"); 1682 if (md->ssd_ondisk != version) 1683 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1684 BIOC_SDOFFLINE; 1685 } 1686 } 1687 1688 if (expected != sr && !force && expected != -1) { 1689 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1690 "anyway\n", DEVNAME(sc)); 1691 } 1692 1693 rv = 0; 1694 bad: 1695 free(md, M_DEVBUF, SR_META_SIZE * DEV_BSIZE); 1696 return (rv); 1697 } 1698 1699 int 1700 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1701 struct sr_metadata *md, void *fm) 1702 { 1703 #ifdef SR_DEBUG 1704 struct sr_softc *sc = sd->sd_sc; 1705 #endif 1706 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1707 DEVNAME(sc), dev, md); 1708 1709 return (sr_meta_rw(sd, dev, md, B_READ)); 1710 } 1711 1712 int 1713 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1714 struct sr_metadata *md, void *fm) 1715 { 1716 #ifdef SR_DEBUG 1717 struct sr_softc *sc = sd->sd_sc; 1718 #endif 1719 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1720 DEVNAME(sc), dev, md); 1721 1722 return (sr_meta_rw(sd, dev, md, B_WRITE)); 1723 } 1724 1725 void 1726 sr_hotplug_register(struct sr_discipline *sd, void *func) 1727 { 1728 struct sr_hotplug_list *mhe; 1729 1730 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1731 DEVNAME(sd->sd_sc), func); 1732 1733 /* make sure we aren't on the list yet */ 1734 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1735 if (mhe->sh_hotplug == func) 1736 return; 1737 1738 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1739 M_WAITOK | M_ZERO); 1740 mhe->sh_hotplug = func; 1741 mhe->sh_sd = sd; 1742 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1743 } 1744 1745 void 1746 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1747 { 1748 struct sr_hotplug_list *mhe; 1749 1750 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1751 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1752 1753 /* make sure we are on the list yet */ 1754 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) { 1755 if (mhe->sh_hotplug == func) 1756 break; 1757 } 1758 if (mhe != NULL) { 1759 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1760 sr_hotplug_list, shl_link); 1761 free(mhe, M_DEVBUF, 0); 1762 } 1763 } 1764 1765 void 1766 sr_disk_attach(struct disk *diskp, int action) 1767 { 1768 struct sr_hotplug_list *mhe; 1769 1770 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1771 if (mhe->sh_sd->sd_ready) 1772 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1773 } 1774 1775 int 1776 sr_match(struct device *parent, void *match, void *aux) 1777 { 1778 return (1); 1779 } 1780 1781 void 1782 sr_attach(struct device *parent, struct device *self, void *aux) 1783 { 1784 struct sr_softc *sc = (void *)self; 1785 struct scsibus_attach_args saa; 1786 1787 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1788 1789 if (softraid0 == NULL) 1790 softraid0 = sc; 1791 1792 rw_init(&sc->sc_lock, "sr_lock"); 1793 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1794 1795 SLIST_INIT(&sr_hotplug_callbacks); 1796 TAILQ_INIT(&sc->sc_dis_list); 1797 SLIST_INIT(&sc->sc_hotspare_list); 1798 1799 #if NBIO > 0 1800 if (bio_register(&sc->sc_dev, sr_bio_ioctl) != 0) 1801 printf("%s: controller registration failed", DEVNAME(sc)); 1802 #endif /* NBIO > 0 */ 1803 1804 #ifndef SMALL_KERNEL 1805 strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), 1806 sizeof(sc->sc_sensordev.xname)); 1807 sensordev_install(&sc->sc_sensordev); 1808 #endif /* SMALL_KERNEL */ 1809 1810 printf("\n"); 1811 1812 sc->sc_link.adapter_softc = sc; 1813 sc->sc_link.adapter = &sr_switch; 1814 sc->sc_link.adapter_target = SR_MAX_LD; 1815 sc->sc_link.adapter_buswidth = SR_MAX_LD; 1816 sc->sc_link.luns = 1; 1817 1818 bzero(&saa, sizeof(saa)); 1819 saa.saa_sc_link = &sc->sc_link; 1820 1821 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 1822 &saa, scsiprint); 1823 1824 softraid_disk_attach = sr_disk_attach; 1825 1826 sr_boot_assembly(sc); 1827 1828 explicit_bzero(sr_bootkey, sizeof(sr_bootkey)); 1829 } 1830 1831 int 1832 sr_detach(struct device *self, int flags) 1833 { 1834 struct sr_softc *sc = (void *)self; 1835 int rv; 1836 1837 DNPRINTF(SR_D_MISC, "%s: sr_detach\n", DEVNAME(sc)); 1838 1839 softraid_disk_attach = NULL; 1840 1841 sr_shutdown(); 1842 1843 #ifndef SMALL_KERNEL 1844 if (sc->sc_sensor_task != NULL) 1845 sensor_task_unregister(sc->sc_sensor_task); 1846 sensordev_deinstall(&sc->sc_sensordev); 1847 #endif /* SMALL_KERNEL */ 1848 1849 if (sc->sc_scsibus != NULL) { 1850 rv = config_detach((struct device *)sc->sc_scsibus, flags); 1851 if (rv != 0) 1852 return (rv); 1853 sc->sc_scsibus = NULL; 1854 } 1855 1856 return (0); 1857 } 1858 1859 void 1860 sr_info(struct sr_softc *sc, const char *fmt, ...) 1861 { 1862 va_list ap; 1863 1864 rw_assert_wrlock(&sc->sc_lock); 1865 1866 va_start(ap, fmt); 1867 bio_status(&sc->sc_status, 0, BIO_MSG_INFO, fmt, &ap); 1868 va_end(ap); 1869 } 1870 1871 void 1872 sr_warn(struct sr_softc *sc, const char *fmt, ...) 1873 { 1874 va_list ap; 1875 1876 rw_assert_wrlock(&sc->sc_lock); 1877 1878 va_start(ap, fmt); 1879 bio_status(&sc->sc_status, 1, BIO_MSG_WARN, fmt, &ap); 1880 va_end(ap); 1881 } 1882 1883 void 1884 sr_error(struct sr_softc *sc, const char *fmt, ...) 1885 { 1886 va_list ap; 1887 1888 rw_assert_wrlock(&sc->sc_lock); 1889 1890 va_start(ap, fmt); 1891 bio_status(&sc->sc_status, 1, BIO_MSG_ERROR, fmt, &ap); 1892 va_end(ap); 1893 } 1894 1895 void 1896 sr_minphys(struct buf *bp, struct scsi_link *sl) 1897 { 1898 DNPRINTF(SR_D_MISC, "sr_minphys: %ld\n", bp->b_bcount); 1899 1900 /* XXX currently using SR_MAXFER = MAXPHYS */ 1901 if (bp->b_bcount > SR_MAXFER) 1902 bp->b_bcount = SR_MAXFER; 1903 minphys(bp); 1904 } 1905 1906 void 1907 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1908 { 1909 size_t copy_cnt; 1910 1911 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %zu\n", 1912 xs, size); 1913 1914 if (xs->datalen) { 1915 copy_cnt = MIN(size, xs->datalen); 1916 memcpy(xs->data, v, copy_cnt); 1917 } 1918 } 1919 1920 int 1921 sr_ccb_alloc(struct sr_discipline *sd) 1922 { 1923 struct sr_ccb *ccb; 1924 int i; 1925 1926 if (!sd) 1927 return (1); 1928 1929 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1930 1931 if (sd->sd_ccb) 1932 return (1); 1933 1934 sd->sd_ccb = mallocarray(sd->sd_max_wu, 1935 sd->sd_max_ccb_per_wu * sizeof(struct sr_ccb), 1936 M_DEVBUF, M_WAITOK | M_ZERO); 1937 TAILQ_INIT(&sd->sd_ccb_freeq); 1938 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1939 ccb = &sd->sd_ccb[i]; 1940 ccb->ccb_dis = sd; 1941 sr_ccb_put(ccb); 1942 } 1943 1944 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1945 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1946 1947 return (0); 1948 } 1949 1950 void 1951 sr_ccb_free(struct sr_discipline *sd) 1952 { 1953 struct sr_ccb *ccb; 1954 1955 if (!sd) 1956 return; 1957 1958 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1959 1960 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1961 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1962 1963 if (sd->sd_ccb) 1964 free(sd->sd_ccb, M_DEVBUF, 0); 1965 } 1966 1967 struct sr_ccb * 1968 sr_ccb_get(struct sr_discipline *sd) 1969 { 1970 struct sr_ccb *ccb; 1971 int s; 1972 1973 s = splbio(); 1974 1975 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1976 if (ccb) { 1977 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1978 ccb->ccb_state = SR_CCB_INPROGRESS; 1979 } 1980 1981 splx(s); 1982 1983 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1984 ccb); 1985 1986 return (ccb); 1987 } 1988 1989 void 1990 sr_ccb_put(struct sr_ccb *ccb) 1991 { 1992 struct sr_discipline *sd = ccb->ccb_dis; 1993 int s; 1994 1995 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1996 ccb); 1997 1998 s = splbio(); 1999 2000 ccb->ccb_wu = NULL; 2001 ccb->ccb_state = SR_CCB_FREE; 2002 ccb->ccb_target = -1; 2003 ccb->ccb_opaque = NULL; 2004 2005 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 2006 2007 splx(s); 2008 } 2009 2010 struct sr_ccb * 2011 sr_ccb_rw(struct sr_discipline *sd, int chunk, daddr_t blkno, 2012 long len, u_int8_t *data, int xsflags, int ccbflags) 2013 { 2014 struct sr_chunk *sc = sd->sd_vol.sv_chunks[chunk]; 2015 struct sr_ccb *ccb = NULL; 2016 2017 ccb = sr_ccb_get(sd); 2018 if (ccb == NULL) 2019 goto out; 2020 2021 ccb->ccb_flags = ccbflags; 2022 ccb->ccb_target = chunk; 2023 2024 ccb->ccb_buf.b_flags = B_PHYS | B_CALL; 2025 if (ISSET(xsflags, SCSI_DATA_IN)) 2026 ccb->ccb_buf.b_flags |= B_READ; 2027 else 2028 ccb->ccb_buf.b_flags |= B_WRITE; 2029 2030 ccb->ccb_buf.b_blkno = blkno + sd->sd_meta->ssd_data_blkno; 2031 ccb->ccb_buf.b_bcount = len; 2032 ccb->ccb_buf.b_bufsize = len; 2033 ccb->ccb_buf.b_resid = len; 2034 ccb->ccb_buf.b_data = data; 2035 ccb->ccb_buf.b_error = 0; 2036 ccb->ccb_buf.b_iodone = sd->sd_scsi_intr; 2037 ccb->ccb_buf.b_proc = curproc; 2038 ccb->ccb_buf.b_dev = sc->src_dev_mm; 2039 ccb->ccb_buf.b_vp = sc->src_vn; 2040 ccb->ccb_buf.b_bq = NULL; 2041 2042 if (!ISSET(ccb->ccb_buf.b_flags, B_READ)) 2043 ccb->ccb_buf.b_vp->v_numoutput++; 2044 2045 LIST_INIT(&ccb->ccb_buf.b_dep); 2046 2047 DNPRINTF(SR_D_DIS, "%s: %s %s ccb " 2048 "b_bcount %ld b_blkno %lld b_flags 0x%0lx b_data %p\n", 2049 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, sd->sd_name, 2050 ccb->ccb_buf.b_bcount, (long long)ccb->ccb_buf.b_blkno, 2051 ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); 2052 2053 out: 2054 return ccb; 2055 } 2056 2057 void 2058 sr_ccb_done(struct sr_ccb *ccb) 2059 { 2060 struct sr_workunit *wu = ccb->ccb_wu; 2061 struct sr_discipline *sd = wu->swu_dis; 2062 struct sr_softc *sc = sd->sd_sc; 2063 2064 DNPRINTF(SR_D_INTR, "%s: %s %s ccb done b_bcount %ld b_resid %zu" 2065 " b_flags 0x%0lx block %lld target %d\n", 2066 DEVNAME(sc), sd->sd_meta->ssd_devname, sd->sd_name, 2067 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags, 2068 (long long)ccb->ccb_buf.b_blkno, ccb->ccb_target); 2069 2070 splassert(IPL_BIO); 2071 2072 if (ccb->ccb_target == -1) 2073 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 2074 2075 if (ccb->ccb_buf.b_flags & B_ERROR) { 2076 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target %d\n", 2077 DEVNAME(sc), (long long)ccb->ccb_buf.b_blkno, 2078 ccb->ccb_target); 2079 if (ISSET(sd->sd_capabilities, SR_CAP_REDUNDANT)) 2080 sd->sd_set_chunk_state(sd, ccb->ccb_target, 2081 BIOC_SDOFFLINE); 2082 else 2083 printf("%s: i/o error on block %lld target %d " 2084 "b_error %d\n", DEVNAME(sc), 2085 (long long)ccb->ccb_buf.b_blkno, ccb->ccb_target, 2086 ccb->ccb_buf.b_error); 2087 ccb->ccb_state = SR_CCB_FAILED; 2088 wu->swu_ios_failed++; 2089 } else { 2090 ccb->ccb_state = SR_CCB_OK; 2091 wu->swu_ios_succeeded++; 2092 } 2093 2094 wu->swu_ios_complete++; 2095 } 2096 2097 int 2098 sr_wu_alloc(struct sr_discipline *sd, int wu_size) 2099 { 2100 struct sr_workunit *wu; 2101 int i, no_wu; 2102 2103 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 2104 sd, sd->sd_max_wu); 2105 2106 no_wu = sd->sd_max_wu; 2107 sd->sd_wu_pending = no_wu; 2108 2109 mtx_init(&sd->sd_wu_mtx, IPL_BIO); 2110 TAILQ_INIT(&sd->sd_wu); 2111 TAILQ_INIT(&sd->sd_wu_freeq); 2112 TAILQ_INIT(&sd->sd_wu_pendq); 2113 TAILQ_INIT(&sd->sd_wu_defq); 2114 2115 for (i = 0; i < no_wu; i++) { 2116 wu = malloc(wu_size, M_DEVBUF, M_WAITOK | M_ZERO); 2117 TAILQ_INSERT_TAIL(&sd->sd_wu, wu, swu_next); 2118 TAILQ_INIT(&wu->swu_ccb); 2119 wu->swu_dis = sd; 2120 task_set(&wu->swu_task, sr_wu_done_callback, wu); 2121 sr_wu_put(sd, wu); 2122 } 2123 2124 return (0); 2125 } 2126 2127 void 2128 sr_wu_free(struct sr_discipline *sd) 2129 { 2130 struct sr_workunit *wu; 2131 2132 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 2133 2134 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 2135 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 2136 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 2137 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 2138 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 2139 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 2140 2141 while ((wu = TAILQ_FIRST(&sd->sd_wu)) != NULL) { 2142 TAILQ_REMOVE(&sd->sd_wu, wu, swu_next); 2143 free(wu, M_DEVBUF, 0); 2144 } 2145 } 2146 2147 void * 2148 sr_wu_get(void *xsd) 2149 { 2150 struct sr_discipline *sd = (struct sr_discipline *)xsd; 2151 struct sr_workunit *wu; 2152 2153 mtx_enter(&sd->sd_wu_mtx); 2154 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 2155 if (wu) { 2156 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 2157 sd->sd_wu_pending++; 2158 } 2159 mtx_leave(&sd->sd_wu_mtx); 2160 2161 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 2162 2163 return (wu); 2164 } 2165 2166 void 2167 sr_wu_put(void *xsd, void *xwu) 2168 { 2169 struct sr_discipline *sd = (struct sr_discipline *)xsd; 2170 struct sr_workunit *wu = (struct sr_workunit *)xwu; 2171 2172 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 2173 2174 sr_wu_release_ccbs(wu); 2175 sr_wu_init(sd, wu); 2176 2177 mtx_enter(&sd->sd_wu_mtx); 2178 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 2179 sd->sd_wu_pending--; 2180 mtx_leave(&sd->sd_wu_mtx); 2181 } 2182 2183 void 2184 sr_wu_init(struct sr_discipline *sd, struct sr_workunit *wu) 2185 { 2186 int s; 2187 2188 s = splbio(); 2189 if (wu->swu_cb_active == 1) 2190 panic("%s: sr_wu_init got active wu", DEVNAME(sd->sd_sc)); 2191 splx(s); 2192 2193 wu->swu_xs = NULL; 2194 wu->swu_state = SR_WU_FREE; 2195 wu->swu_flags = 0; 2196 wu->swu_blk_start = 0; 2197 wu->swu_blk_end = 0; 2198 wu->swu_collider = NULL; 2199 } 2200 2201 void 2202 sr_wu_enqueue_ccb(struct sr_workunit *wu, struct sr_ccb *ccb) 2203 { 2204 struct sr_discipline *sd = wu->swu_dis; 2205 int s; 2206 2207 s = splbio(); 2208 if (wu->swu_cb_active == 1) 2209 panic("%s: sr_wu_enqueue_ccb got active wu", 2210 DEVNAME(sd->sd_sc)); 2211 ccb->ccb_wu = wu; 2212 wu->swu_io_count++; 2213 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 2214 splx(s); 2215 } 2216 2217 void 2218 sr_wu_release_ccbs(struct sr_workunit *wu) 2219 { 2220 struct sr_ccb *ccb; 2221 2222 /* Return all ccbs that are associated with this workunit. */ 2223 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 2224 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 2225 sr_ccb_put(ccb); 2226 } 2227 2228 wu->swu_io_count = 0; 2229 wu->swu_ios_complete = 0; 2230 wu->swu_ios_failed = 0; 2231 wu->swu_ios_succeeded = 0; 2232 } 2233 2234 void 2235 sr_wu_done(struct sr_workunit *wu) 2236 { 2237 struct sr_discipline *sd = wu->swu_dis; 2238 2239 DNPRINTF(SR_D_INTR, "%s: sr_wu_done count %d completed %d failed %d\n", 2240 DEVNAME(sd->sd_sc), wu->swu_io_count, wu->swu_ios_complete, 2241 wu->swu_ios_failed); 2242 2243 if (wu->swu_ios_complete < wu->swu_io_count) 2244 return; 2245 2246 task_add(sd->sd_taskq, &wu->swu_task); 2247 } 2248 2249 void 2250 sr_wu_done_callback(void *xwu) 2251 { 2252 struct sr_workunit *wu = xwu; 2253 struct sr_discipline *sd = wu->swu_dis; 2254 struct scsi_xfer *xs = wu->swu_xs; 2255 struct sr_workunit *wup; 2256 int s; 2257 2258 /* 2259 * The SR_WUF_DISCIPLINE or SR_WUF_REBUILD flag must be set if 2260 * the work unit is not associated with a scsi_xfer. 2261 */ 2262 KASSERT(xs != NULL || 2263 (wu->swu_flags & (SR_WUF_DISCIPLINE|SR_WUF_REBUILD))); 2264 2265 s = splbio(); 2266 2267 if (xs != NULL) { 2268 if (wu->swu_ios_failed) 2269 xs->error = XS_DRIVER_STUFFUP; 2270 else 2271 xs->error = XS_NOERROR; 2272 } 2273 2274 if (sd->sd_scsi_wu_done) { 2275 if (sd->sd_scsi_wu_done(wu) == SR_WU_RESTART) 2276 goto done; 2277 } 2278 2279 /* Remove work unit from pending queue. */ 2280 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) 2281 if (wup == wu) 2282 break; 2283 if (wup == NULL) 2284 panic("%s: wu %p not on pending queue", 2285 DEVNAME(sd->sd_sc), wu); 2286 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 2287 2288 if (wu->swu_collider) { 2289 if (wu->swu_ios_failed) 2290 sr_raid_recreate_wu(wu->swu_collider); 2291 2292 /* XXX Should the collider be failed if this xs failed? */ 2293 sr_raid_startwu(wu->swu_collider); 2294 } 2295 2296 /* 2297 * If a discipline provides its own sd_scsi_done function, then it 2298 * is responsible for calling sr_scsi_done() once I/O is complete. 2299 */ 2300 if (wu->swu_flags & SR_WUF_REBUILD) 2301 wu->swu_flags |= SR_WUF_REBUILDIOCOMP; 2302 if (wu->swu_flags & SR_WUF_WAKEUP) 2303 wakeup(wu); 2304 if (sd->sd_scsi_done) 2305 sd->sd_scsi_done(wu); 2306 else if (wu->swu_flags & SR_WUF_DISCIPLINE) 2307 sr_scsi_wu_put(sd, wu); 2308 else if (!(wu->swu_flags & SR_WUF_REBUILD)) 2309 sr_scsi_done(sd, xs); 2310 2311 done: 2312 splx(s); 2313 } 2314 2315 struct sr_workunit * 2316 sr_scsi_wu_get(struct sr_discipline *sd, int flags) 2317 { 2318 return scsi_io_get(&sd->sd_iopool, flags); 2319 } 2320 2321 void 2322 sr_scsi_wu_put(struct sr_discipline *sd, struct sr_workunit *wu) 2323 { 2324 scsi_io_put(&sd->sd_iopool, wu); 2325 2326 if (sd->sd_sync && sd->sd_wu_pending == 0) 2327 wakeup(sd); 2328 } 2329 2330 void 2331 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 2332 { 2333 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 2334 2335 if (xs->error == XS_NOERROR) 2336 xs->resid = 0; 2337 2338 scsi_done(xs); 2339 2340 if (sd->sd_sync && sd->sd_wu_pending == 0) 2341 wakeup(sd); 2342 } 2343 2344 void 2345 sr_scsi_cmd(struct scsi_xfer *xs) 2346 { 2347 struct scsi_link *link = xs->sc_link; 2348 struct sr_softc *sc = link->adapter_softc; 2349 struct sr_workunit *wu = xs->io; 2350 struct sr_discipline *sd; 2351 2352 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd target %d xs %p flags %#x\n", 2353 DEVNAME(sc), link->target, xs, xs->flags); 2354 2355 sd = sc->sc_targets[link->target]; 2356 if (sd == NULL) { 2357 printf("%s: sr_scsi_cmd NULL discipline\n", DEVNAME(sc)); 2358 goto stuffup; 2359 } 2360 2361 if (sd->sd_deleted) { 2362 printf("%s: %s device is being deleted, failing io\n", 2363 DEVNAME(sc), sd->sd_meta->ssd_devname); 2364 goto stuffup; 2365 } 2366 2367 /* scsi layer *can* re-send wu without calling sr_wu_put(). */ 2368 sr_wu_release_ccbs(wu); 2369 sr_wu_init(sd, wu); 2370 wu->swu_state = SR_WU_INPROGRESS; 2371 wu->swu_xs = xs; 2372 2373 switch (xs->cmd->opcode) { 2374 case READ_COMMAND: 2375 case READ_BIG: 2376 case READ_16: 2377 case WRITE_COMMAND: 2378 case WRITE_BIG: 2379 case WRITE_16: 2380 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 2381 DEVNAME(sc), xs->cmd->opcode); 2382 if (sd->sd_scsi_rw(wu)) 2383 goto stuffup; 2384 break; 2385 2386 case SYNCHRONIZE_CACHE: 2387 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 2388 DEVNAME(sc)); 2389 if (sd->sd_scsi_sync(wu)) 2390 goto stuffup; 2391 goto complete; 2392 2393 case TEST_UNIT_READY: 2394 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 2395 DEVNAME(sc)); 2396 if (sd->sd_scsi_tur(wu)) 2397 goto stuffup; 2398 goto complete; 2399 2400 case START_STOP: 2401 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 2402 DEVNAME(sc)); 2403 if (sd->sd_scsi_start_stop(wu)) 2404 goto stuffup; 2405 goto complete; 2406 2407 case INQUIRY: 2408 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 2409 DEVNAME(sc)); 2410 if (sd->sd_scsi_inquiry(wu)) 2411 goto stuffup; 2412 goto complete; 2413 2414 case READ_CAPACITY: 2415 case READ_CAPACITY_16: 2416 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 2417 DEVNAME(sc), xs->cmd->opcode); 2418 if (sd->sd_scsi_read_cap(wu)) 2419 goto stuffup; 2420 goto complete; 2421 2422 case REQUEST_SENSE: 2423 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 2424 DEVNAME(sc)); 2425 if (sd->sd_scsi_req_sense(wu)) 2426 goto stuffup; 2427 goto complete; 2428 2429 default: 2430 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 2431 DEVNAME(sc), xs->cmd->opcode); 2432 /* XXX might need to add generic function to handle others */ 2433 goto stuffup; 2434 } 2435 2436 return; 2437 stuffup: 2438 if (sd && sd->sd_scsi_sense.error_code) { 2439 xs->error = XS_SENSE; 2440 memcpy(&xs->sense, &sd->sd_scsi_sense, sizeof(xs->sense)); 2441 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2442 } else { 2443 xs->error = XS_DRIVER_STUFFUP; 2444 } 2445 complete: 2446 sr_scsi_done(sd, xs); 2447 } 2448 2449 int 2450 sr_scsi_probe(struct scsi_link *link) 2451 { 2452 struct sr_softc *sc = link->adapter_softc; 2453 struct sr_discipline *sd; 2454 2455 KASSERT(link->target < SR_MAX_LD && link->lun == 0); 2456 2457 sd = sc->sc_targets[link->target]; 2458 if (sd == NULL) 2459 return (ENODEV); 2460 2461 link->pool = &sd->sd_iopool; 2462 if (sd->sd_openings) 2463 link->openings = sd->sd_openings(sd); 2464 else 2465 link->openings = sd->sd_max_wu; 2466 2467 return (0); 2468 } 2469 2470 int 2471 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag) 2472 { 2473 struct sr_softc *sc = link->adapter_softc; 2474 struct sr_discipline *sd; 2475 2476 sd = sc->sc_targets[link->target]; 2477 if (sd == NULL) 2478 return (ENODEV); 2479 2480 DNPRINTF(SR_D_IOCTL, "%s: %s sr_scsi_ioctl cmd: %#lx\n", 2481 DEVNAME(sc), sd->sd_meta->ssd_devname, cmd); 2482 2483 /* Pass bio ioctls through to the bio handler. */ 2484 if (IOCGROUP(cmd) == 'B') 2485 return (sr_bio_handler(sc, sd, cmd, (struct bio *)addr)); 2486 2487 switch (cmd) { 2488 case DIOCGCACHE: 2489 case DIOCSCACHE: 2490 return (EOPNOTSUPP); 2491 default: 2492 return (ENOTTY); 2493 } 2494 } 2495 2496 int 2497 sr_bio_ioctl(struct device *dev, u_long cmd, caddr_t addr) 2498 { 2499 struct sr_softc *sc = (struct sr_softc *) dev; 2500 DNPRINTF(SR_D_IOCTL, "%s: sr_bio_ioctl\n", DEVNAME(sc)); 2501 2502 return sr_bio_handler(sc, NULL, cmd, (struct bio *)addr); 2503 } 2504 2505 int 2506 sr_bio_handler(struct sr_softc *sc, struct sr_discipline *sd, u_long cmd, 2507 struct bio *bio) 2508 { 2509 int rv = 0; 2510 2511 DNPRINTF(SR_D_IOCTL, "%s: sr_bio_handler ", DEVNAME(sc)); 2512 2513 rw_enter_write(&sc->sc_lock); 2514 2515 bio_status_init(&sc->sc_status, &sc->sc_dev); 2516 2517 switch (cmd) { 2518 case BIOCINQ: 2519 DNPRINTF(SR_D_IOCTL, "inq\n"); 2520 rv = sr_ioctl_inq(sc, (struct bioc_inq *)bio); 2521 break; 2522 2523 case BIOCVOL: 2524 DNPRINTF(SR_D_IOCTL, "vol\n"); 2525 rv = sr_ioctl_vol(sc, (struct bioc_vol *)bio); 2526 break; 2527 2528 case BIOCDISK: 2529 DNPRINTF(SR_D_IOCTL, "disk\n"); 2530 rv = sr_ioctl_disk(sc, (struct bioc_disk *)bio); 2531 break; 2532 2533 case BIOCALARM: 2534 DNPRINTF(SR_D_IOCTL, "alarm\n"); 2535 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)bio); */ 2536 break; 2537 2538 case BIOCBLINK: 2539 DNPRINTF(SR_D_IOCTL, "blink\n"); 2540 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)bio); */ 2541 break; 2542 2543 case BIOCSETSTATE: 2544 DNPRINTF(SR_D_IOCTL, "setstate\n"); 2545 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)bio); 2546 break; 2547 2548 case BIOCCREATERAID: 2549 DNPRINTF(SR_D_IOCTL, "createraid\n"); 2550 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)bio, 2551 1, NULL); 2552 break; 2553 2554 case BIOCDELETERAID: 2555 DNPRINTF(SR_D_IOCTL, "deleteraid\n"); 2556 rv = sr_ioctl_deleteraid(sc, sd, (struct bioc_deleteraid *)bio); 2557 break; 2558 2559 case BIOCDISCIPLINE: 2560 DNPRINTF(SR_D_IOCTL, "discipline\n"); 2561 rv = sr_ioctl_discipline(sc, sd, (struct bioc_discipline *)bio); 2562 break; 2563 2564 case BIOCINSTALLBOOT: 2565 DNPRINTF(SR_D_IOCTL, "installboot\n"); 2566 rv = sr_ioctl_installboot(sc, sd, 2567 (struct bioc_installboot *)bio); 2568 break; 2569 2570 default: 2571 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 2572 rv = ENOTTY; 2573 } 2574 2575 sc->sc_status.bs_status = (rv ? BIO_STATUS_ERROR : BIO_STATUS_SUCCESS); 2576 2577 memcpy(&bio->bio_status, &sc->sc_status, sizeof(struct bio_status)); 2578 2579 rw_exit_write(&sc->sc_lock); 2580 2581 return (0); 2582 } 2583 2584 int 2585 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 2586 { 2587 struct sr_discipline *sd; 2588 int vol = 0, disk = 0; 2589 2590 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2591 vol++; 2592 disk += sd->sd_meta->ssdi.ssd_chunk_no; 2593 } 2594 2595 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2596 bi->bi_novol = vol + sc->sc_hotspare_no; 2597 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2598 2599 return (0); 2600 } 2601 2602 int 2603 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2604 { 2605 int vol = -1, rv = EINVAL; 2606 struct sr_discipline *sd; 2607 struct sr_chunk *hotspare; 2608 int64_t rb, sz; 2609 2610 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2611 vol++; 2612 if (vol != bv->bv_volid) 2613 continue; 2614 2615 bv->bv_status = sd->sd_vol_status; 2616 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2617 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2618 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2619 2620 #ifdef CRYPTO 2621 if (sd->sd_meta->ssdi.ssd_level == 'C' && 2622 sd->mds.mdd_crypto.key_disk != NULL) 2623 bv->bv_nodisk++; 2624 #endif 2625 2626 if (bv->bv_status == BIOC_SVREBUILD) { 2627 sz = sd->sd_meta->ssdi.ssd_size; 2628 rb = sd->sd_meta->ssd_rebuild; 2629 if (rb > 0) 2630 bv->bv_percent = 100 - 2631 ((sz * 100 - rb * 100) / sz) - 1; 2632 else 2633 bv->bv_percent = 0; 2634 } 2635 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2636 sizeof(bv->bv_dev)); 2637 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2638 sizeof(bv->bv_vendor)); 2639 rv = 0; 2640 goto done; 2641 } 2642 2643 /* Check hotspares list. */ 2644 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2645 vol++; 2646 if (vol != bv->bv_volid) 2647 continue; 2648 2649 bv->bv_status = BIOC_SVONLINE; 2650 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2651 bv->bv_level = -1; /* Hotspare. */ 2652 bv->bv_nodisk = 1; 2653 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2654 sizeof(bv->bv_dev)); 2655 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2656 sizeof(bv->bv_vendor)); 2657 rv = 0; 2658 goto done; 2659 } 2660 2661 done: 2662 return (rv); 2663 } 2664 2665 int 2666 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2667 { 2668 struct sr_discipline *sd; 2669 struct sr_chunk *src, *hotspare; 2670 int vol = -1, rv = EINVAL; 2671 2672 if (bd->bd_diskid < 0) 2673 goto done; 2674 2675 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2676 vol++; 2677 if (vol != bd->bd_volid) 2678 continue; 2679 2680 if (bd->bd_diskid < sd->sd_meta->ssdi.ssd_chunk_no) 2681 src = sd->sd_vol.sv_chunks[bd->bd_diskid]; 2682 #ifdef CRYPTO 2683 else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no && 2684 sd->sd_meta->ssdi.ssd_level == 'C' && 2685 sd->mds.mdd_crypto.key_disk != NULL) 2686 src = sd->mds.mdd_crypto.key_disk; 2687 #endif 2688 else 2689 break; 2690 2691 bd->bd_status = src->src_meta.scm_status; 2692 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2693 bd->bd_channel = vol; 2694 bd->bd_target = bd->bd_diskid; 2695 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2696 sizeof(bd->bd_vendor)); 2697 rv = 0; 2698 goto done; 2699 } 2700 2701 /* Check hotspares list. */ 2702 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2703 vol++; 2704 if (vol != bd->bd_volid) 2705 continue; 2706 2707 if (bd->bd_diskid != 0) 2708 break; 2709 2710 bd->bd_status = hotspare->src_meta.scm_status; 2711 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2712 bd->bd_channel = vol; 2713 bd->bd_target = bd->bd_diskid; 2714 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2715 sizeof(bd->bd_vendor)); 2716 rv = 0; 2717 goto done; 2718 } 2719 2720 done: 2721 return (rv); 2722 } 2723 2724 int 2725 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2726 { 2727 int rv = EINVAL; 2728 int vol = -1, found, c; 2729 struct sr_discipline *sd; 2730 struct sr_chunk *ch_entry; 2731 struct sr_chunk_head *cl; 2732 2733 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2734 goto done; 2735 2736 if (bs->bs_status == BIOC_SSHOTSPARE) { 2737 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2738 goto done; 2739 } 2740 2741 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2742 vol++; 2743 if (vol == bs->bs_volid) 2744 break; 2745 } 2746 if (sd == NULL) 2747 goto done; 2748 2749 switch (bs->bs_status) { 2750 case BIOC_SSOFFLINE: 2751 /* Take chunk offline */ 2752 found = c = 0; 2753 cl = &sd->sd_vol.sv_chunk_list; 2754 SLIST_FOREACH(ch_entry, cl, src_link) { 2755 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2756 found = 1; 2757 break; 2758 } 2759 c++; 2760 } 2761 if (found == 0) { 2762 sr_error(sc, "chunk not part of array"); 2763 goto done; 2764 } 2765 2766 /* XXX: check current state first */ 2767 sd->sd_set_chunk_state(sd, c, BIOC_SDOFFLINE); 2768 2769 if (sr_meta_save(sd, SR_META_DIRTY)) { 2770 sr_error(sc, "could not save metadata for %s", 2771 sd->sd_meta->ssd_devname); 2772 goto done; 2773 } 2774 rv = 0; 2775 break; 2776 2777 case BIOC_SDSCRUB: 2778 break; 2779 2780 case BIOC_SSREBUILD: 2781 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0); 2782 break; 2783 2784 default: 2785 sr_error(sc, "unsupported state request %d", bs->bs_status); 2786 } 2787 2788 done: 2789 return (rv); 2790 } 2791 2792 int 2793 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2794 { 2795 struct sr_discipline *sd; 2796 struct sr_chunk *chunk; 2797 int i; 2798 2799 DNPRINTF(SR_D_MISC, "%s: sr_chunk_in_use(%d)\n", DEVNAME(sc), dev); 2800 2801 if (dev == NODEV) 2802 return BIOC_SDINVALID; 2803 2804 /* See if chunk is already in use. */ 2805 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 2806 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2807 chunk = sd->sd_vol.sv_chunks[i]; 2808 if (chunk->src_dev_mm == dev) 2809 return chunk->src_meta.scm_status; 2810 } 2811 } 2812 2813 /* Check hotspares list. */ 2814 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2815 if (chunk->src_dev_mm == dev) 2816 return chunk->src_meta.scm_status; 2817 2818 return BIOC_SDINVALID; 2819 } 2820 2821 int 2822 sr_hotspare(struct sr_softc *sc, dev_t dev) 2823 { 2824 struct sr_discipline *sd = NULL; 2825 struct sr_metadata *sm = NULL; 2826 struct sr_meta_chunk *hm; 2827 struct sr_chunk_head *cl; 2828 struct sr_chunk *chunk, *last, *hotspare = NULL; 2829 struct sr_uuid uuid; 2830 struct disklabel label; 2831 struct vnode *vn; 2832 u_int64_t size; 2833 char devname[32]; 2834 int rv = EINVAL; 2835 int c, part, open = 0; 2836 2837 /* 2838 * Add device to global hotspares list. 2839 */ 2840 2841 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2842 2843 /* Make sure chunk is not already in use. */ 2844 c = sr_chunk_in_use(sc, dev); 2845 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2846 if (c == BIOC_SDHOTSPARE) 2847 sr_error(sc, "%s is already a hotspare", devname); 2848 else 2849 sr_error(sc, "%s is already in use", devname); 2850 goto done; 2851 } 2852 2853 /* XXX - See if there is an existing degraded volume... */ 2854 2855 /* Open device. */ 2856 if (bdevvp(dev, &vn)) { 2857 sr_error(sc, "sr_hotspare: cannot allocate vnode"); 2858 goto done; 2859 } 2860 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 2861 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2862 DEVNAME(sc), devname); 2863 vput(vn); 2864 goto fail; 2865 } 2866 open = 1; /* close dev on error */ 2867 2868 /* Get partition details. */ 2869 part = DISKPART(dev); 2870 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 2871 NOCRED, curproc)) { 2872 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2873 DEVNAME(sc)); 2874 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 2875 vput(vn); 2876 goto fail; 2877 } 2878 if (label.d_secsize != DEV_BSIZE) { 2879 sr_error(sc, "%s has unsupported sector size (%u)", 2880 devname, label.d_secsize); 2881 goto fail; 2882 } 2883 if (label.d_partitions[part].p_fstype != FS_RAID) { 2884 sr_error(sc, "%s partition not of type RAID (%d)", 2885 devname, label.d_partitions[part].p_fstype); 2886 goto fail; 2887 } 2888 2889 /* Calculate partition size. */ 2890 size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part])); 2891 if (size <= SR_DATA_OFFSET) { 2892 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 2893 devname); 2894 goto fail; 2895 } 2896 size -= SR_DATA_OFFSET; 2897 if (size > INT64_MAX) { 2898 DNPRINTF(SR_D_META, "%s: %s partition too large\n", DEVNAME(sc), 2899 devname); 2900 goto fail; 2901 } 2902 2903 /* 2904 * Create and populate chunk metadata. 2905 */ 2906 2907 sr_uuid_generate(&uuid); 2908 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2909 2910 hotspare->src_dev_mm = dev; 2911 hotspare->src_vn = vn; 2912 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2913 hotspare->src_size = size; 2914 2915 hm = &hotspare->src_meta; 2916 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2917 hm->scmi.scm_chunk_id = 0; 2918 hm->scmi.scm_size = size; 2919 hm->scmi.scm_coerced_size = size; 2920 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2921 memcpy(&hm->scmi.scm_uuid, &uuid, sizeof(struct sr_uuid)); 2922 2923 sr_checksum(sc, hm, &hm->scm_checksum, 2924 sizeof(struct sr_meta_chunk_invariant)); 2925 2926 hm->scm_status = BIOC_SDHOTSPARE; 2927 2928 /* 2929 * Create and populate our own discipline and metadata. 2930 */ 2931 2932 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2933 sm->ssdi.ssd_magic = SR_MAGIC; 2934 sm->ssdi.ssd_version = SR_META_VERSION; 2935 sm->ssd_ondisk = 0; 2936 sm->ssdi.ssd_vol_flags = 0; 2937 memcpy(&sm->ssdi.ssd_uuid, &uuid, sizeof(struct sr_uuid)); 2938 sm->ssdi.ssd_chunk_no = 1; 2939 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2940 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2941 sm->ssdi.ssd_size = size; 2942 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2943 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2944 "SR %s", "HOTSPARE"); 2945 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2946 "%03d", SR_META_VERSION); 2947 2948 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2949 sd->sd_sc = sc; 2950 sd->sd_meta = sm; 2951 sd->sd_meta_type = SR_META_F_NATIVE; 2952 sd->sd_vol_status = BIOC_SVONLINE; 2953 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2954 SLIST_INIT(&sd->sd_meta_opt); 2955 2956 /* Add chunk to volume. */ 2957 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2958 M_WAITOK | M_ZERO); 2959 sd->sd_vol.sv_chunks[0] = hotspare; 2960 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2961 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2962 2963 /* Save metadata. */ 2964 if (sr_meta_save(sd, SR_META_DIRTY)) { 2965 sr_error(sc, "could not save metadata to %s", devname); 2966 goto fail; 2967 } 2968 2969 /* 2970 * Add chunk to hotspare list. 2971 */ 2972 rw_enter_write(&sc->sc_hs_lock); 2973 cl = &sc->sc_hotspare_list; 2974 if (SLIST_EMPTY(cl)) 2975 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2976 else { 2977 SLIST_FOREACH(chunk, cl, src_link) 2978 last = chunk; 2979 SLIST_INSERT_AFTER(last, hotspare, src_link); 2980 } 2981 sc->sc_hotspare_no++; 2982 rw_exit_write(&sc->sc_hs_lock); 2983 2984 rv = 0; 2985 goto done; 2986 2987 fail: 2988 if (hotspare) 2989 free(hotspare, M_DEVBUF, 0); 2990 2991 done: 2992 if (sd && sd->sd_vol.sv_chunks) 2993 free(sd->sd_vol.sv_chunks, M_DEVBUF, 0); 2994 if (sd) 2995 free(sd, M_DEVBUF, 0); 2996 if (sm) 2997 free(sm, M_DEVBUF, 0); 2998 if (open) { 2999 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 3000 vput(vn); 3001 } 3002 3003 return (rv); 3004 } 3005 3006 void 3007 sr_hotspare_rebuild_callback(void *xsd) 3008 { 3009 struct sr_discipline *sd = xsd; 3010 sr_hotspare_rebuild(sd); 3011 } 3012 3013 void 3014 sr_hotspare_rebuild(struct sr_discipline *sd) 3015 { 3016 struct sr_softc *sc = sd->sd_sc; 3017 struct sr_chunk_head *cl; 3018 struct sr_chunk *hotspare, *chunk = NULL; 3019 struct sr_workunit *wu; 3020 struct sr_ccb *ccb; 3021 int i, s, chunk_no, busy; 3022 3023 /* 3024 * Attempt to locate a hotspare and initiate rebuild. 3025 */ 3026 3027 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3028 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 3029 BIOC_SDOFFLINE) { 3030 chunk_no = i; 3031 chunk = sd->sd_vol.sv_chunks[i]; 3032 break; 3033 } 3034 } 3035 3036 if (chunk == NULL) { 3037 printf("%s: no offline chunk found on %s!\n", 3038 DEVNAME(sc), sd->sd_meta->ssd_devname); 3039 return; 3040 } 3041 3042 /* See if we have a suitable hotspare... */ 3043 rw_enter_write(&sc->sc_hs_lock); 3044 cl = &sc->sc_hotspare_list; 3045 SLIST_FOREACH(hotspare, cl, src_link) 3046 if (hotspare->src_size >= chunk->src_size) 3047 break; 3048 3049 if (hotspare != NULL) { 3050 3051 printf("%s: %s volume degraded, will attempt to " 3052 "rebuild on hotspare %s\n", DEVNAME(sc), 3053 sd->sd_meta->ssd_devname, hotspare->src_devname); 3054 3055 /* 3056 * Ensure that all pending I/O completes on the failed chunk 3057 * before trying to initiate a rebuild. 3058 */ 3059 i = 0; 3060 do { 3061 busy = 0; 3062 3063 s = splbio(); 3064 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 3065 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3066 if (ccb->ccb_target == chunk_no) 3067 busy = 1; 3068 } 3069 } 3070 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 3071 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3072 if (ccb->ccb_target == chunk_no) 3073 busy = 1; 3074 } 3075 } 3076 splx(s); 3077 3078 if (busy) { 3079 tsleep(sd, PRIBIO, "sr_hotspare", hz); 3080 i++; 3081 } 3082 3083 } while (busy && i < 120); 3084 3085 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 3086 "complete on failed chunk %s\n", DEVNAME(sc), 3087 i, chunk->src_devname); 3088 3089 if (busy) { 3090 printf("%s: pending I/O failed to complete on " 3091 "failed chunk %s, hotspare rebuild aborted...\n", 3092 DEVNAME(sc), chunk->src_devname); 3093 goto done; 3094 } 3095 3096 s = splbio(); 3097 rw_enter_write(&sc->sc_lock); 3098 bio_status_init(&sc->sc_status, &sc->sc_dev); 3099 if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) { 3100 3101 /* Remove hotspare from available list. */ 3102 sc->sc_hotspare_no--; 3103 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 3104 free(hotspare, M_DEVBUF, 0); 3105 3106 } 3107 rw_exit_write(&sc->sc_lock); 3108 splx(s); 3109 } 3110 done: 3111 rw_exit_write(&sc->sc_hs_lock); 3112 } 3113 3114 int 3115 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare) 3116 { 3117 struct sr_softc *sc = sd->sd_sc; 3118 struct sr_chunk *chunk = NULL; 3119 struct sr_meta_chunk *meta; 3120 struct disklabel label; 3121 struct vnode *vn; 3122 u_int64_t size; 3123 int64_t csize; 3124 char devname[32]; 3125 int rv = EINVAL, open = 0; 3126 int cid, i, part, status; 3127 3128 /* 3129 * Attempt to initiate a rebuild onto the specified device. 3130 */ 3131 3132 if (!(sd->sd_capabilities & SR_CAP_REBUILD)) { 3133 sr_error(sc, "discipline does not support rebuild"); 3134 goto done; 3135 } 3136 3137 /* make sure volume is in the right state */ 3138 if (sd->sd_vol_status == BIOC_SVREBUILD) { 3139 sr_error(sc, "rebuild already in progress"); 3140 goto done; 3141 } 3142 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 3143 sr_error(sc, "volume not degraded"); 3144 goto done; 3145 } 3146 3147 /* Find first offline chunk. */ 3148 for (cid = 0; cid < sd->sd_meta->ssdi.ssd_chunk_no; cid++) { 3149 if (sd->sd_vol.sv_chunks[cid]->src_meta.scm_status == 3150 BIOC_SDOFFLINE) { 3151 chunk = sd->sd_vol.sv_chunks[cid]; 3152 break; 3153 } 3154 } 3155 if (chunk == NULL) { 3156 sr_error(sc, "no offline chunks available to rebuild"); 3157 goto done; 3158 } 3159 3160 /* Get coerced size from another online chunk. */ 3161 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3162 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 3163 BIOC_SDONLINE) { 3164 meta = &sd->sd_vol.sv_chunks[i]->src_meta; 3165 csize = meta->scmi.scm_coerced_size; 3166 break; 3167 } 3168 } 3169 3170 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 3171 if (bdevvp(dev, &vn)) { 3172 printf("%s: sr_rebuild_init: can't allocate vnode\n", 3173 DEVNAME(sc)); 3174 goto done; 3175 } 3176 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { 3177 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 3178 "open %s\n", DEVNAME(sc), devname); 3179 vput(vn); 3180 goto done; 3181 } 3182 open = 1; /* close dev on error */ 3183 3184 /* Get disklabel and check partition. */ 3185 part = DISKPART(dev); 3186 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, 3187 NOCRED, curproc)) { 3188 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 3189 DEVNAME(sc)); 3190 goto done; 3191 } 3192 if (label.d_secsize != DEV_BSIZE) { 3193 sr_error(sc, "%s has unsupported sector size (%u)", 3194 devname, label.d_secsize); 3195 goto done; 3196 } 3197 if (label.d_partitions[part].p_fstype != FS_RAID) { 3198 sr_error(sc, "%s partition not of type RAID (%d)", 3199 devname, label.d_partitions[part].p_fstype); 3200 goto done; 3201 } 3202 3203 /* Is the partition large enough? */ 3204 size = DL_SECTOBLK(&label, DL_GETPSIZE(&label.d_partitions[part])); 3205 if (size <= sd->sd_meta->ssd_data_blkno) { 3206 sr_error(sc, "%s: %s partition too small", DEVNAME(sc), 3207 devname); 3208 goto done; 3209 } 3210 size -= sd->sd_meta->ssd_data_blkno; 3211 if (size > INT64_MAX) { 3212 sr_error(sc, "%s: %s partition too large", DEVNAME(sc), 3213 devname); 3214 goto done; 3215 } 3216 if (size < csize) { 3217 sr_error(sc, "%s partition too small, at least %lld bytes " 3218 "required", devname, (long long)(csize << DEV_BSHIFT)); 3219 goto done; 3220 } else if (size > csize) 3221 sr_warn(sc, "%s partition too large, wasting %lld bytes", 3222 devname, (long long)((size - csize) << DEV_BSHIFT)); 3223 3224 /* Ensure that this chunk is not already in use. */ 3225 status = sr_chunk_in_use(sc, dev); 3226 if (status != BIOC_SDINVALID && status != BIOC_SDOFFLINE && 3227 !(hotspare && status == BIOC_SDHOTSPARE)) { 3228 sr_error(sc, "%s is already in use", devname); 3229 goto done; 3230 } 3231 3232 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 3233 sd->sd_meta->ssd_rebuild = 0; 3234 3235 open = 0; /* leave dev open from here on out */ 3236 3237 /* Fix up chunk. */ 3238 memcpy(chunk->src_duid, label.d_uid, sizeof(chunk->src_duid)); 3239 chunk->src_dev_mm = dev; 3240 chunk->src_vn = vn; 3241 3242 /* Reconstruct metadata. */ 3243 meta = &chunk->src_meta; 3244 meta->scmi.scm_volid = sd->sd_meta->ssdi.ssd_volid; 3245 meta->scmi.scm_chunk_id = cid; 3246 strlcpy(meta->scmi.scm_devname, devname, 3247 sizeof(meta->scmi.scm_devname)); 3248 meta->scmi.scm_size = size; 3249 meta->scmi.scm_coerced_size = csize; 3250 memcpy(&meta->scmi.scm_uuid, &sd->sd_meta->ssdi.ssd_uuid, 3251 sizeof(meta->scmi.scm_uuid)); 3252 sr_checksum(sc, meta, &meta->scm_checksum, 3253 sizeof(struct sr_meta_chunk_invariant)); 3254 3255 sd->sd_set_chunk_state(sd, cid, BIOC_SDREBUILD); 3256 3257 if (sr_meta_save(sd, SR_META_DIRTY)) { 3258 sr_error(sc, "could not save metadata to %s", devname); 3259 open = 1; 3260 goto done; 3261 } 3262 3263 sr_warn(sc, "rebuild of %s started on %s", 3264 sd->sd_meta->ssd_devname, devname); 3265 3266 sd->sd_reb_abort = 0; 3267 kthread_create_deferred(sr_rebuild_start, sd); 3268 3269 rv = 0; 3270 done: 3271 if (open) { 3272 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, curproc); 3273 vput(vn); 3274 } 3275 3276 return (rv); 3277 } 3278 3279 void 3280 sr_roam_chunks(struct sr_discipline *sd) 3281 { 3282 struct sr_softc *sc = sd->sd_sc; 3283 struct sr_chunk *chunk; 3284 struct sr_meta_chunk *meta; 3285 int roamed = 0; 3286 3287 /* Have any chunks roamed? */ 3288 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 3289 meta = &chunk->src_meta; 3290 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 3291 sizeof(meta->scmi.scm_devname))) { 3292 3293 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 3294 meta->scmi.scm_devname, chunk->src_devname); 3295 3296 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 3297 sizeof(meta->scmi.scm_devname)); 3298 3299 roamed++; 3300 } 3301 } 3302 3303 if (roamed) 3304 sr_meta_save(sd, SR_META_DIRTY); 3305 } 3306 3307 int 3308 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, 3309 int user, void *data) 3310 { 3311 struct sr_meta_opt_item *omi; 3312 struct sr_chunk_head *cl; 3313 struct sr_discipline *sd = NULL; 3314 struct sr_chunk *ch_entry; 3315 struct scsi_link *link; 3316 struct device *dev; 3317 char *uuid, devname[32]; 3318 dev_t *dt; 3319 int i, no_chunk, rv = EINVAL, target, vol; 3320 int no_meta; 3321 3322 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 3323 DEVNAME(sc), user); 3324 3325 /* user input */ 3326 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 3327 goto unwind; 3328 3329 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 3330 if (user) { 3331 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 3332 goto unwind; 3333 } else 3334 memcpy(dt, bc->bc_dev_list, bc->bc_dev_list_len); 3335 3336 /* Initialise discipline. */ 3337 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 3338 sd->sd_sc = sc; 3339 SLIST_INIT(&sd->sd_meta_opt); 3340 sd->sd_taskq = taskq_create("srdis", 1, IPL_BIO, 0); 3341 if (sd->sd_taskq == NULL) { 3342 sr_error(sc, "could not create discipline taskq"); 3343 goto unwind; 3344 } 3345 if (sr_discipline_init(sd, bc->bc_level)) { 3346 sr_error(sc, "could not initialize discipline"); 3347 goto unwind; 3348 } 3349 3350 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 3351 cl = &sd->sd_vol.sv_chunk_list; 3352 SLIST_INIT(cl); 3353 3354 /* Ensure that chunks are not already in use. */ 3355 for (i = 0; i < no_chunk; i++) { 3356 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) { 3357 sr_meta_getdevname(sc, dt[i], devname, sizeof(devname)); 3358 sr_error(sc, "chunk %s already in use", devname); 3359 goto unwind; 3360 } 3361 } 3362 3363 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 3364 if (sd->sd_meta_type == SR_META_F_INVALID) { 3365 sr_error(sc, "invalid metadata format"); 3366 goto unwind; 3367 } 3368 3369 if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) 3370 goto unwind; 3371 3372 /* force the raid volume by clearing metadata region */ 3373 if (bc->bc_flags & BIOC_SCFORCE) { 3374 /* make sure disk isn't up and running */ 3375 if (sr_meta_read(sd)) 3376 if (sr_already_assembled(sd)) { 3377 uuid = sr_uuid_format( 3378 &sd->sd_meta->ssdi.ssd_uuid); 3379 sr_error(sc, "disk %s is currently in use; " 3380 "cannot force create", uuid); 3381 free(uuid, M_DEVBUF, 0); 3382 goto unwind; 3383 } 3384 3385 if (sr_meta_clear(sd)) { 3386 sr_error(sc, "failed to clear metadata"); 3387 goto unwind; 3388 } 3389 } 3390 3391 no_meta = sr_meta_read(sd); 3392 if (no_meta == -1) { 3393 3394 /* Corrupt metadata on one or more chunks. */ 3395 sr_error(sc, "one of the chunks has corrupt metadata; " 3396 "aborting assembly"); 3397 goto unwind; 3398 3399 } else if (no_meta == 0) { 3400 3401 /* Initialise volume and chunk metadata. */ 3402 sr_meta_init(sd, bc->bc_level, no_chunk); 3403 sd->sd_vol_status = BIOC_SVONLINE; 3404 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3405 if (sd->sd_create) { 3406 if ((i = sd->sd_create(sd, bc, no_chunk, 3407 sd->sd_vol.sv_chunk_minsz))) { 3408 rv = i; 3409 goto unwind; 3410 } 3411 } 3412 sr_meta_init_complete(sd); 3413 3414 DNPRINTF(SR_D_IOCTL, 3415 "%s: sr_ioctl_createraid: vol_size: %lld\n", 3416 DEVNAME(sc), sd->sd_meta->ssdi.ssd_size); 3417 3418 /* Warn if we've wasted chunk space due to coercing. */ 3419 if ((sd->sd_capabilities & SR_CAP_NON_COERCED) == 0 && 3420 sd->sd_vol.sv_chunk_minsz != sd->sd_vol.sv_chunk_maxsz) 3421 sr_warn(sc, "chunk sizes are not equal; up to %llu " 3422 "blocks wasted per chunk", 3423 sd->sd_vol.sv_chunk_maxsz - 3424 sd->sd_vol.sv_chunk_minsz); 3425 3426 } else { 3427 3428 /* Ensure metadata level matches requested assembly level. */ 3429 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 3430 sr_error(sc, "volume level does not match metadata " 3431 "level"); 3432 goto unwind; 3433 } 3434 3435 if (sr_already_assembled(sd)) { 3436 uuid = sr_uuid_format(&sd->sd_meta->ssdi.ssd_uuid); 3437 sr_error(sc, "disk %s already assembled", uuid); 3438 free(uuid, M_DEVBUF, 0); 3439 goto unwind; 3440 } 3441 3442 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 3443 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 3444 "metadata\n", DEVNAME(sc)); 3445 goto unwind; 3446 } 3447 3448 if (no_meta != no_chunk) 3449 sr_warn(sc, "trying to bring up %s degraded", 3450 sd->sd_meta->ssd_devname); 3451 3452 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 3453 sr_warn(sc, "%s was not shutdown properly", 3454 sd->sd_meta->ssd_devname); 3455 3456 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) 3457 if (sd->sd_meta_opt_handler == NULL || 3458 sd->sd_meta_opt_handler(sd, omi->omi_som) != 0) 3459 sr_meta_opt_handler(sd, omi->omi_som); 3460 3461 if (sd->sd_assemble) { 3462 if ((i = sd->sd_assemble(sd, bc, no_chunk, data))) { 3463 rv = i; 3464 goto unwind; 3465 } 3466 } 3467 3468 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 3469 DEVNAME(sc)); 3470 3471 } 3472 3473 /* Metadata MUST be fully populated by this point. */ 3474 TAILQ_INSERT_TAIL(&sc->sc_dis_list, sd, sd_link); 3475 3476 /* Allocate all resources. */ 3477 if ((rv = sd->sd_alloc_resources(sd))) 3478 goto unwind; 3479 3480 /* Adjust flags if necessary. */ 3481 if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) && 3482 (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) != 3483 (sd->sd_meta->ssdi.ssd_vol_flags & BIOC_SCNOAUTOASSEMBLE)) { 3484 sd->sd_meta->ssdi.ssd_vol_flags &= ~BIOC_SCNOAUTOASSEMBLE; 3485 sd->sd_meta->ssdi.ssd_vol_flags |= 3486 bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 3487 } 3488 3489 if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) { 3490 /* Initialise volume state. */ 3491 sd->sd_set_vol_state(sd); 3492 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3493 sr_error(sc, "%s is offline, will not be brought " 3494 "online", sd->sd_meta->ssd_devname); 3495 goto unwind; 3496 } 3497 3498 /* Setup SCSI iopool. */ 3499 scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put); 3500 3501 /* 3502 * All checks passed - return ENXIO if volume cannot be created. 3503 */ 3504 rv = ENXIO; 3505 3506 /* 3507 * Find a free target. 3508 * 3509 * XXX: We reserve sd_target == 0 to indicate the 3510 * discipline is not linked into sc->sc_targets, so begin 3511 * the search with target = 1. 3512 */ 3513 for (target = 1; target < SR_MAX_LD; target++) 3514 if (sc->sc_targets[target] == NULL) 3515 break; 3516 if (target == SR_MAX_LD) { 3517 sr_error(sc, "no free target for %s", 3518 sd->sd_meta->ssd_devname); 3519 goto unwind; 3520 } 3521 3522 /* Clear sense data. */ 3523 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3524 3525 /* Attach discipline and get midlayer to probe it. */ 3526 sd->sd_target = target; 3527 sc->sc_targets[target] = sd; 3528 if (scsi_probe_lun(sc->sc_scsibus, target, 0) != 0) { 3529 sr_error(sc, "scsi_probe_lun failed"); 3530 sc->sc_targets[target] = NULL; 3531 sd->sd_target = 0; 3532 goto unwind; 3533 } 3534 3535 link = scsi_get_link(sc->sc_scsibus, target, 0); 3536 if (link == NULL) 3537 goto unwind; 3538 3539 dev = link->device_softc; 3540 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s at target %d\n", 3541 DEVNAME(sc), dev->dv_xname, sd->sd_target); 3542 3543 /* XXX - Count volumes, not targets. */ 3544 for (i = 0, vol = -1; i <= sd->sd_target; i++) 3545 if (sc->sc_targets[i]) 3546 vol++; 3547 3548 rv = 0; 3549 3550 if (sd->sd_meta->ssd_devname[0] != '\0' && 3551 strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3552 sizeof(dev->dv_xname))) 3553 sr_warn(sc, "volume %s is roaming, it used to be %s, " 3554 "updating metadata", dev->dv_xname, 3555 sd->sd_meta->ssd_devname); 3556 3557 /* Populate remaining volume metadata. */ 3558 sd->sd_meta->ssdi.ssd_volid = vol; 3559 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3560 sizeof(sd->sd_meta->ssd_devname)); 3561 3562 sr_info(sc, "%s volume attached as %s", 3563 sd->sd_name, sd->sd_meta->ssd_devname); 3564 3565 /* Update device name on any roaming chunks. */ 3566 sr_roam_chunks(sd); 3567 3568 #ifndef SMALL_KERNEL 3569 if (sr_sensors_create(sd)) 3570 sr_warn(sc, "unable to create sensor for %s", 3571 dev->dv_xname); 3572 #endif /* SMALL_KERNEL */ 3573 } else { 3574 /* This volume does not attach as a system disk. */ 3575 ch_entry = SLIST_FIRST(cl); /* XXX */ 3576 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3577 sizeof(sd->sd_meta->ssd_devname)); 3578 3579 if (sd->sd_start_discipline(sd)) 3580 goto unwind; 3581 } 3582 3583 /* Save current metadata to disk. */ 3584 rv = sr_meta_save(sd, SR_META_DIRTY); 3585 3586 if (sd->sd_vol_status == BIOC_SVREBUILD) 3587 kthread_create_deferred(sr_rebuild_start, sd); 3588 3589 sd->sd_ready = 1; 3590 3591 return (rv); 3592 3593 unwind: 3594 sr_discipline_shutdown(sd, 0); 3595 3596 if (rv == EAGAIN) 3597 rv = 0; 3598 3599 return (rv); 3600 } 3601 3602 int 3603 sr_ioctl_deleteraid(struct sr_softc *sc, struct sr_discipline *sd, 3604 struct bioc_deleteraid *bd) 3605 { 3606 int rv = 1; 3607 3608 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", 3609 DEVNAME(sc), bd->bd_dev); 3610 3611 if (sd == NULL) { 3612 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 3613 if (!strncmp(sd->sd_meta->ssd_devname, bd->bd_dev, 3614 sizeof(sd->sd_meta->ssd_devname))) 3615 break; 3616 } 3617 if (sd == NULL) { 3618 sr_error(sc, "volume %s not found", bd->bd_dev); 3619 goto bad; 3620 } 3621 } 3622 3623 sd->sd_deleted = 1; 3624 sd->sd_meta->ssdi.ssd_vol_flags = BIOC_SCNOAUTOASSEMBLE; 3625 sr_discipline_shutdown(sd, 1); 3626 3627 rv = 0; 3628 bad: 3629 return (rv); 3630 } 3631 3632 int 3633 sr_ioctl_discipline(struct sr_softc *sc, struct sr_discipline *sd, 3634 struct bioc_discipline *bd) 3635 { 3636 int rv = 1; 3637 3638 /* Dispatch a discipline specific ioctl. */ 3639 3640 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc), 3641 bd->bd_dev); 3642 3643 if (sd == NULL) { 3644 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 3645 if (!strncmp(sd->sd_meta->ssd_devname, bd->bd_dev, 3646 sizeof(sd->sd_meta->ssd_devname))) 3647 break; 3648 } 3649 if (sd == NULL) { 3650 sr_error(sc, "volume %s not found", bd->bd_dev); 3651 goto bad; 3652 } 3653 } 3654 3655 if (sd->sd_ioctl_handler) 3656 rv = sd->sd_ioctl_handler(sd, bd); 3657 3658 bad: 3659 return (rv); 3660 } 3661 3662 int 3663 sr_ioctl_installboot(struct sr_softc *sc, struct sr_discipline *sd, 3664 struct bioc_installboot *bb) 3665 { 3666 void *bootblk = NULL, *bootldr = NULL; 3667 struct sr_chunk *chunk; 3668 struct sr_meta_opt_item *omi; 3669 struct sr_meta_boot *sbm; 3670 struct disk *dk; 3671 u_int32_t bbs, bls; 3672 u_char duid[8]; 3673 int rv = EINVAL; 3674 int i; 3675 3676 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc), 3677 bb->bb_dev); 3678 3679 if (sd == NULL) { 3680 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 3681 if (!strncmp(sd->sd_meta->ssd_devname, bb->bb_dev, 3682 sizeof(sd->sd_meta->ssd_devname))) 3683 break; 3684 } 3685 if (sd == NULL) { 3686 sr_error(sc, "volume %s not found", bb->bb_dev); 3687 goto done; 3688 } 3689 } 3690 3691 bzero(duid, sizeof(duid)); 3692 TAILQ_FOREACH(dk, &disklist, dk_link) 3693 if (!strncmp(dk->dk_name, bb->bb_dev, sizeof(bb->bb_dev))) 3694 break; 3695 if (dk == NULL || dk->dk_label == NULL || 3696 (dk->dk_flags & DKF_LABELVALID) == 0 || 3697 bcmp(dk->dk_label->d_uid, &duid, sizeof(duid)) == 0) { 3698 sr_error(sc, "failed to get DUID for softraid volume"); 3699 goto done; 3700 } 3701 memcpy(duid, dk->dk_label->d_uid, sizeof(duid)); 3702 3703 /* Ensure that boot storage area is large enough. */ 3704 if (sd->sd_meta->ssd_data_blkno < (SR_BOOT_OFFSET + SR_BOOT_SIZE)) { 3705 sr_error(sc, "insufficient boot storage"); 3706 goto done; 3707 } 3708 3709 if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * DEV_BSIZE) 3710 goto done; 3711 3712 if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * DEV_BSIZE) 3713 goto done; 3714 3715 /* Copy in boot block. */ 3716 bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE; 3717 bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO); 3718 if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0) 3719 goto done; 3720 3721 /* Copy in boot loader. */ 3722 bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE; 3723 bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO); 3724 if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0) 3725 goto done; 3726 3727 /* Create or update optional meta for bootable volumes. */ 3728 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) 3729 if (omi->omi_som->som_type == SR_OPT_BOOT) 3730 break; 3731 if (omi == NULL) { 3732 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF, 3733 M_WAITOK | M_ZERO); 3734 omi->omi_som = malloc(sizeof(struct sr_meta_crypto), M_DEVBUF, 3735 M_WAITOK | M_ZERO); 3736 omi->omi_som->som_type = SR_OPT_BOOT; 3737 omi->omi_som->som_length = sizeof(struct sr_meta_boot); 3738 SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link); 3739 sd->sd_meta->ssdi.ssd_opt_no++; 3740 } 3741 sbm = (struct sr_meta_boot *)omi->omi_som; 3742 3743 memcpy(sbm->sbm_root_duid, duid, sizeof(sbm->sbm_root_duid)); 3744 bzero(&sbm->sbm_boot_duid, sizeof(sbm->sbm_boot_duid)); 3745 sbm->sbm_bootblk_size = bbs; 3746 sbm->sbm_bootldr_size = bls; 3747 3748 DNPRINTF(SR_D_IOCTL, "sr_ioctl_installboot: root duid is " 3749 "%02x%02x%02x%02x%02x%02x%02x%02x\n", sbm->sbm_root_duid[0], 3750 sbm->sbm_root_duid[1], sbm->sbm_root_duid[2], sbm->sbm_root_duid[3], 3751 sbm->sbm_root_duid[4], sbm->sbm_root_duid[5], sbm->sbm_root_duid[6], 3752 sbm->sbm_root_duid[7]); 3753 3754 /* Save boot block and boot loader to each chunk. */ 3755 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3756 3757 chunk = sd->sd_vol.sv_chunks[i]; 3758 if (chunk->src_meta.scm_status != BIOC_SDONLINE && 3759 chunk->src_meta.scm_status != BIOC_SDREBUILD) 3760 continue; 3761 3762 if (i < SR_MAX_BOOT_DISKS) 3763 memcpy(&sbm->sbm_boot_duid[i], chunk->src_duid, 3764 sizeof(sbm->sbm_boot_duid[i])); 3765 3766 /* Save boot blocks. */ 3767 DNPRINTF(SR_D_IOCTL, 3768 "sr_ioctl_installboot: saving boot block to %s " 3769 "(%u bytes)\n", chunk->src_devname, bbs); 3770 3771 if (sr_rw(sc, chunk->src_dev_mm, bootblk, bbs, 3772 SR_BOOT_BLOCKS_OFFSET, B_WRITE)) { 3773 sr_error(sc, "failed to write boot block", DEVNAME(sc)); 3774 goto done; 3775 } 3776 3777 /* Save boot loader.*/ 3778 DNPRINTF(SR_D_IOCTL, 3779 "sr_ioctl_installboot: saving boot loader to %s " 3780 "(%u bytes)\n", chunk->src_devname, bls); 3781 3782 if (sr_rw(sc, chunk->src_dev_mm, bootldr, bls, 3783 SR_BOOT_LOADER_OFFSET, B_WRITE)) { 3784 sr_error(sc, "failed to write boot loader"); 3785 goto done; 3786 } 3787 3788 } 3789 3790 /* XXX - Install boot block on disk - MD code. */ 3791 3792 /* Mark volume as bootable and save metadata. */ 3793 sd->sd_meta->ssdi.ssd_vol_flags |= BIOC_SCBOOTABLE; 3794 if (sr_meta_save(sd, SR_META_DIRTY)) { 3795 sr_error(sc, "could not save metadata to %s", 3796 chunk->src_devname); 3797 goto done; 3798 } 3799 3800 rv = 0; 3801 3802 done: 3803 if (bootblk) 3804 free(bootblk, M_DEVBUF, 0); 3805 if (bootldr) 3806 free(bootldr, M_DEVBUF, 0); 3807 3808 return (rv); 3809 } 3810 3811 void 3812 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3813 { 3814 struct sr_chunk *ch_entry, *ch_next; 3815 3816 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3817 3818 if (!cl) 3819 return; 3820 3821 for (ch_entry = SLIST_FIRST(cl); ch_entry != NULL; ch_entry = ch_next) { 3822 ch_next = SLIST_NEXT(ch_entry, src_link); 3823 3824 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3825 DEVNAME(sc), ch_entry->src_devname); 3826 if (ch_entry->src_vn) { 3827 /* 3828 * XXX - explicitly lock the vnode until we can resolve 3829 * the problem introduced by vnode aliasing... specfs 3830 * has no locking, whereas ufs/ffs does! 3831 */ 3832 vn_lock(ch_entry->src_vn, LK_EXCLUSIVE | 3833 LK_RETRY, curproc); 3834 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 3835 curproc); 3836 vput(ch_entry->src_vn); 3837 } 3838 free(ch_entry, M_DEVBUF, 0); 3839 } 3840 SLIST_INIT(cl); 3841 } 3842 3843 void 3844 sr_discipline_free(struct sr_discipline *sd) 3845 { 3846 struct sr_softc *sc; 3847 struct sr_discipline *sdtmp1; 3848 struct sr_meta_opt_head *som; 3849 struct sr_meta_opt_item *omi, *omi_next; 3850 3851 if (!sd) 3852 return; 3853 3854 sc = sd->sd_sc; 3855 3856 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3857 DEVNAME(sc), 3858 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3859 if (sd->sd_free_resources) 3860 sd->sd_free_resources(sd); 3861 if (sd->sd_vol.sv_chunks) 3862 free(sd->sd_vol.sv_chunks, M_DEVBUF, 0); 3863 if (sd->sd_meta) 3864 free(sd->sd_meta, M_DEVBUF, 0); 3865 if (sd->sd_meta_foreign) 3866 free(sd->sd_meta_foreign, M_DEVBUF, 0); 3867 3868 som = &sd->sd_meta_opt; 3869 for (omi = SLIST_FIRST(som); omi != NULL; omi = omi_next) { 3870 omi_next = SLIST_NEXT(omi, omi_link); 3871 if (omi->omi_som) 3872 free(omi->omi_som, M_DEVBUF, 0); 3873 free(omi, M_DEVBUF, 0); 3874 } 3875 3876 if (sd->sd_target != 0) { 3877 KASSERT(sc->sc_targets[sd->sd_target] == sd); 3878 sc->sc_targets[sd->sd_target] = NULL; 3879 } 3880 3881 TAILQ_FOREACH(sdtmp1, &sc->sc_dis_list, sd_link) { 3882 if (sdtmp1 == sd) 3883 break; 3884 } 3885 if (sdtmp1 != NULL) 3886 TAILQ_REMOVE(&sc->sc_dis_list, sd, sd_link); 3887 3888 explicit_bzero(sd, sizeof *sd); 3889 free(sd, M_DEVBUF, 0); 3890 } 3891 3892 void 3893 sr_discipline_shutdown(struct sr_discipline *sd, int meta_save) 3894 { 3895 struct sr_softc *sc; 3896 int s; 3897 3898 if (!sd) 3899 return; 3900 sc = sd->sd_sc; 3901 3902 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3903 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3904 3905 /* If rebuilding, abort rebuild and drain I/O. */ 3906 if (sd->sd_reb_active) { 3907 sd->sd_reb_abort = 1; 3908 while (sd->sd_reb_active) 3909 tsleep(sd, PWAIT, "sr_shutdown", 1); 3910 } 3911 3912 if (meta_save) 3913 sr_meta_save(sd, 0); 3914 3915 s = splbio(); 3916 3917 sd->sd_ready = 0; 3918 3919 /* make sure there isn't a sync pending and yield */ 3920 wakeup(sd); 3921 while (sd->sd_sync || sd->sd_must_flush) 3922 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3923 EWOULDBLOCK) 3924 break; 3925 3926 #ifndef SMALL_KERNEL 3927 sr_sensors_delete(sd); 3928 #endif /* SMALL_KERNEL */ 3929 3930 if (sd->sd_target != 0) 3931 scsi_detach_lun(sc->sc_scsibus, sd->sd_target, 0, DETACH_FORCE); 3932 3933 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3934 3935 if (sd->sd_taskq) 3936 taskq_destroy(sd->sd_taskq); 3937 3938 sr_discipline_free(sd); 3939 3940 splx(s); 3941 } 3942 3943 int 3944 sr_discipline_init(struct sr_discipline *sd, int level) 3945 { 3946 int rv = 1; 3947 3948 /* Initialise discipline function pointers with defaults. */ 3949 sd->sd_alloc_resources = sr_alloc_resources; 3950 sd->sd_assemble = NULL; 3951 sd->sd_create = NULL; 3952 sd->sd_free_resources = sr_free_resources; 3953 sd->sd_ioctl_handler = NULL; 3954 sd->sd_openings = NULL; 3955 sd->sd_meta_opt_handler = NULL; 3956 sd->sd_rebuild = sr_rebuild; 3957 sd->sd_scsi_inquiry = sr_raid_inquiry; 3958 sd->sd_scsi_read_cap = sr_raid_read_cap; 3959 sd->sd_scsi_tur = sr_raid_tur; 3960 sd->sd_scsi_req_sense = sr_raid_request_sense; 3961 sd->sd_scsi_start_stop = sr_raid_start_stop; 3962 sd->sd_scsi_sync = sr_raid_sync; 3963 sd->sd_scsi_rw = NULL; 3964 sd->sd_scsi_intr = sr_raid_intr; 3965 sd->sd_scsi_wu_done = NULL; 3966 sd->sd_scsi_done = NULL; 3967 sd->sd_set_chunk_state = sr_set_chunk_state; 3968 sd->sd_set_vol_state = sr_set_vol_state; 3969 sd->sd_start_discipline = NULL; 3970 3971 task_set(&sd->sd_meta_save_task, sr_meta_save_callback, sd); 3972 task_set(&sd->sd_hotspare_rebuild_task, sr_hotspare_rebuild_callback, 3973 sd); 3974 3975 switch (level) { 3976 case 0: 3977 sr_raid0_discipline_init(sd); 3978 break; 3979 case 1: 3980 sr_raid1_discipline_init(sd); 3981 break; 3982 case 5: 3983 sr_raid5_discipline_init(sd); 3984 break; 3985 case 6: 3986 sr_raid6_discipline_init(sd); 3987 break; 3988 #ifdef CRYPTO 3989 case 'C': 3990 sr_crypto_discipline_init(sd); 3991 break; 3992 #endif 3993 case 'c': 3994 sr_concat_discipline_init(sd); 3995 break; 3996 default: 3997 goto bad; 3998 } 3999 4000 rv = 0; 4001 bad: 4002 return (rv); 4003 } 4004 4005 int 4006 sr_raid_inquiry(struct sr_workunit *wu) 4007 { 4008 struct sr_discipline *sd = wu->swu_dis; 4009 struct scsi_xfer *xs = wu->swu_xs; 4010 struct scsi_inquiry *cdb = (struct scsi_inquiry *)xs->cmd; 4011 struct scsi_inquiry_data inq; 4012 4013 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 4014 4015 if (xs->cmdlen != sizeof(*cdb)) 4016 return (EINVAL); 4017 4018 if (ISSET(cdb->flags, SI_EVPD)) 4019 return (EOPNOTSUPP); 4020 4021 bzero(&inq, sizeof(inq)); 4022 inq.device = T_DIRECT; 4023 inq.dev_qual2 = 0; 4024 inq.version = 2; 4025 inq.response_format = 2; 4026 inq.additional_length = 32; 4027 inq.flags |= SID_CmdQue; 4028 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 4029 sizeof(inq.vendor)); 4030 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 4031 sizeof(inq.product)); 4032 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 4033 sizeof(inq.revision)); 4034 sr_copy_internal_data(xs, &inq, sizeof(inq)); 4035 4036 return (0); 4037 } 4038 4039 int 4040 sr_raid_read_cap(struct sr_workunit *wu) 4041 { 4042 struct sr_discipline *sd = wu->swu_dis; 4043 struct scsi_xfer *xs = wu->swu_xs; 4044 struct scsi_read_cap_data rcd; 4045 struct scsi_read_cap_data_16 rcd16; 4046 int64_t addr; 4047 int rv = 1; 4048 4049 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 4050 4051 addr = sd->sd_meta->ssdi.ssd_size - 1; 4052 if (xs->cmd->opcode == READ_CAPACITY) { 4053 bzero(&rcd, sizeof(rcd)); 4054 if (addr > 0xffffffffllu) 4055 _lto4b(0xffffffff, rcd.addr); 4056 else 4057 _lto4b(addr, rcd.addr); 4058 _lto4b(DEV_BSIZE, rcd.length); 4059 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 4060 rv = 0; 4061 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 4062 bzero(&rcd16, sizeof(rcd16)); 4063 _lto8b(addr, rcd16.addr); 4064 _lto4b(DEV_BSIZE, rcd16.length); 4065 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 4066 rv = 0; 4067 } 4068 4069 return (rv); 4070 } 4071 4072 int 4073 sr_raid_tur(struct sr_workunit *wu) 4074 { 4075 struct sr_discipline *sd = wu->swu_dis; 4076 4077 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 4078 4079 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 4080 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 4081 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 4082 sd->sd_scsi_sense.add_sense_code = 0x04; 4083 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 4084 sd->sd_scsi_sense.extra_len = 4; 4085 return (1); 4086 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 4087 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 4088 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 4089 sd->sd_scsi_sense.add_sense_code = 0x05; 4090 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 4091 sd->sd_scsi_sense.extra_len = 4; 4092 return (1); 4093 } 4094 4095 return (0); 4096 } 4097 4098 int 4099 sr_raid_request_sense(struct sr_workunit *wu) 4100 { 4101 struct sr_discipline *sd = wu->swu_dis; 4102 struct scsi_xfer *xs = wu->swu_xs; 4103 4104 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 4105 DEVNAME(sd->sd_sc)); 4106 4107 /* use latest sense data */ 4108 memcpy(&xs->sense, &sd->sd_scsi_sense, sizeof(xs->sense)); 4109 4110 /* clear sense data */ 4111 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 4112 4113 return (0); 4114 } 4115 4116 int 4117 sr_raid_start_stop(struct sr_workunit *wu) 4118 { 4119 struct scsi_xfer *xs = wu->swu_xs; 4120 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 4121 4122 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 4123 DEVNAME(wu->swu_dis->sd_sc)); 4124 4125 if (!ss) 4126 return (1); 4127 4128 /* 4129 * do nothing! 4130 * a softraid discipline should always reflect correct status 4131 */ 4132 return (0); 4133 } 4134 4135 int 4136 sr_raid_sync(struct sr_workunit *wu) 4137 { 4138 struct sr_discipline *sd = wu->swu_dis; 4139 int s, rv = 0, ios; 4140 4141 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 4142 4143 /* when doing a fake sync don't count the wu */ 4144 ios = (wu->swu_flags & SR_WUF_FAKE) ? 0 : 1; 4145 4146 s = splbio(); 4147 sd->sd_sync = 1; 4148 while (sd->sd_wu_pending > ios) { 4149 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 4150 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 4151 DEVNAME(sd->sd_sc)); 4152 rv = 1; 4153 break; 4154 } 4155 } 4156 sd->sd_sync = 0; 4157 splx(s); 4158 4159 wakeup(&sd->sd_sync); 4160 4161 return (rv); 4162 } 4163 4164 void 4165 sr_raid_intr(struct buf *bp) 4166 { 4167 struct sr_ccb *ccb = (struct sr_ccb *)bp; 4168 struct sr_workunit *wu = ccb->ccb_wu; 4169 #ifdef SR_DEBUG 4170 struct sr_discipline *sd = wu->swu_dis; 4171 struct scsi_xfer *xs = wu->swu_xs; 4172 #endif 4173 int s; 4174 4175 DNPRINTF(SR_D_INTR, "%s: %s %s intr bp %p xs %p\n", 4176 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, sd->sd_name, bp, xs); 4177 4178 s = splbio(); 4179 sr_ccb_done(ccb); 4180 sr_wu_done(wu); 4181 splx(s); 4182 } 4183 4184 void 4185 sr_schedule_wu(struct sr_workunit *wu) 4186 { 4187 struct sr_discipline *sd = wu->swu_dis; 4188 struct sr_workunit *wup; 4189 int s; 4190 4191 DNPRINTF(SR_D_WU, "sr_schedule_wu: schedule wu %p state %i " 4192 "flags 0x%x\n", wu, wu->swu_state, wu->swu_flags); 4193 4194 KASSERT(wu->swu_io_count > 0); 4195 4196 s = splbio(); 4197 4198 /* Construct the work unit, do not schedule it. */ 4199 if (wu->swu_state == SR_WU_CONSTRUCT) 4200 goto queued; 4201 4202 /* Deferred work unit being reconstructed, do not start. */ 4203 if (wu->swu_state == SR_WU_REQUEUE) 4204 goto queued; 4205 4206 /* Current work unit failed, restart. */ 4207 if (wu->swu_state == SR_WU_RESTART) 4208 goto start; 4209 4210 if (wu->swu_state != SR_WU_INPROGRESS) 4211 panic("sr_schedule_wu: work unit not in progress (state %i)\n", 4212 wu->swu_state); 4213 4214 /* Walk queue backwards and fill in collider if we have one. */ 4215 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 4216 if (wu->swu_blk_end < wup->swu_blk_start || 4217 wup->swu_blk_end < wu->swu_blk_start) 4218 continue; 4219 4220 /* Defer work unit due to LBA collision. */ 4221 DNPRINTF(SR_D_WU, "sr_schedule_wu: deferring work unit %p\n", 4222 wu); 4223 wu->swu_state = SR_WU_DEFERRED; 4224 while (wup->swu_collider) 4225 wup = wup->swu_collider; 4226 wup->swu_collider = wu; 4227 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 4228 sd->sd_wu_collisions++; 4229 goto queued; 4230 } 4231 4232 start: 4233 sr_raid_startwu(wu); 4234 4235 queued: 4236 splx(s); 4237 } 4238 4239 void 4240 sr_raid_startwu(struct sr_workunit *wu) 4241 { 4242 struct sr_discipline *sd = wu->swu_dis; 4243 struct sr_ccb *ccb; 4244 4245 DNPRINTF(SR_D_WU, "sr_raid_startwu: start wu %p\n", wu); 4246 4247 splassert(IPL_BIO); 4248 4249 if (wu->swu_state == SR_WU_DEFERRED) { 4250 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 4251 wu->swu_state = SR_WU_INPROGRESS; 4252 } 4253 4254 if (wu->swu_state != SR_WU_RESTART) 4255 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 4256 4257 /* Start all of the individual I/Os. */ 4258 if (wu->swu_cb_active == 1) 4259 panic("%s: sr_startwu_callback", DEVNAME(sd->sd_sc)); 4260 wu->swu_cb_active = 1; 4261 4262 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) 4263 VOP_STRATEGY(&ccb->ccb_buf); 4264 4265 wu->swu_cb_active = 0; 4266 } 4267 4268 void 4269 sr_raid_recreate_wu(struct sr_workunit *wu) 4270 { 4271 struct sr_discipline *sd = wu->swu_dis; 4272 struct sr_workunit *wup = wu; 4273 4274 /* 4275 * Recreate a work unit by releasing the associated CCBs and reissuing 4276 * the SCSI I/O request. This process is then repeated for all of the 4277 * colliding work units. 4278 */ 4279 do { 4280 sr_wu_release_ccbs(wup); 4281 4282 wup->swu_state = SR_WU_REQUEUE; 4283 if (sd->sd_scsi_rw(wup)) 4284 panic("could not requeue I/O"); 4285 4286 wup = wup->swu_collider; 4287 } while (wup); 4288 } 4289 4290 int 4291 sr_alloc_resources(struct sr_discipline *sd) 4292 { 4293 if (sr_wu_alloc(sd, sizeof(struct sr_workunit))) { 4294 sr_error(sd->sd_sc, "unable to allocate work units"); 4295 return (ENOMEM); 4296 } 4297 if (sr_ccb_alloc(sd)) { 4298 sr_error(sd->sd_sc, "unable to allocate ccbs"); 4299 return (ENOMEM); 4300 } 4301 4302 return (0); 4303 } 4304 4305 void 4306 sr_free_resources(struct sr_discipline *sd) 4307 { 4308 sr_wu_free(sd); 4309 sr_ccb_free(sd); 4310 } 4311 4312 void 4313 sr_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 4314 { 4315 int old_state, s; 4316 4317 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_set_chunk_state %d -> %d\n", 4318 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 4319 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 4320 4321 /* ok to go to splbio since this only happens in error path */ 4322 s = splbio(); 4323 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 4324 4325 /* multiple IOs to the same chunk that fail will come through here */ 4326 if (old_state == new_state) 4327 goto done; 4328 4329 switch (old_state) { 4330 case BIOC_SDONLINE: 4331 if (new_state == BIOC_SDOFFLINE) 4332 break; 4333 else 4334 goto die; 4335 break; 4336 4337 case BIOC_SDOFFLINE: 4338 goto die; 4339 4340 default: 4341 die: 4342 splx(s); /* XXX */ 4343 panic("%s: %s: %s: invalid chunk state transition " 4344 "%d -> %d", DEVNAME(sd->sd_sc), 4345 sd->sd_meta->ssd_devname, 4346 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 4347 old_state, new_state); 4348 /* NOTREACHED */ 4349 } 4350 4351 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 4352 sd->sd_set_vol_state(sd); 4353 4354 sd->sd_must_flush = 1; 4355 task_add(systq, &sd->sd_meta_save_task); 4356 done: 4357 splx(s); 4358 } 4359 4360 void 4361 sr_set_vol_state(struct sr_discipline *sd) 4362 { 4363 int states[SR_MAX_STATES]; 4364 int new_state, i, nd; 4365 int old_state = sd->sd_vol_status; 4366 u_int32_t s; 4367 4368 DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state\n", 4369 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4370 4371 nd = sd->sd_meta->ssdi.ssd_chunk_no; 4372 4373 for (i = 0; i < SR_MAX_STATES; i++) 4374 states[i] = 0; 4375 4376 for (i = 0; i < nd; i++) { 4377 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 4378 if (s >= SR_MAX_STATES) 4379 panic("%s: %s: %s: invalid chunk state", 4380 DEVNAME(sd->sd_sc), 4381 sd->sd_meta->ssd_devname, 4382 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 4383 states[s]++; 4384 } 4385 4386 if (states[BIOC_SDONLINE] == nd) 4387 new_state = BIOC_SVONLINE; 4388 else 4389 new_state = BIOC_SVOFFLINE; 4390 4391 DNPRINTF(SR_D_STATE, "%s: %s: sr_set_vol_state %d -> %d\n", 4392 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 4393 old_state, new_state); 4394 4395 switch (old_state) { 4396 case BIOC_SVONLINE: 4397 if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE) 4398 break; 4399 else 4400 goto die; 4401 break; 4402 4403 case BIOC_SVOFFLINE: 4404 /* XXX this might be a little too much */ 4405 goto die; 4406 4407 default: 4408 die: 4409 panic("%s: %s: invalid volume state transition " 4410 "%d -> %d", DEVNAME(sd->sd_sc), 4411 sd->sd_meta->ssd_devname, 4412 old_state, new_state); 4413 /* NOTREACHED */ 4414 } 4415 4416 sd->sd_vol_status = new_state; 4417 } 4418 4419 void * 4420 sr_block_get(struct sr_discipline *sd, long length) 4421 { 4422 return dma_alloc(length, PR_NOWAIT | PR_ZERO); 4423 } 4424 4425 void 4426 sr_block_put(struct sr_discipline *sd, void *ptr, int length) 4427 { 4428 dma_free(ptr, length); 4429 } 4430 4431 void 4432 sr_checksum_print(u_int8_t *md5) 4433 { 4434 int i; 4435 4436 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 4437 printf("%02x", md5[i]); 4438 } 4439 4440 void 4441 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 4442 { 4443 MD5_CTX ctx; 4444 4445 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 4446 md5, len); 4447 4448 MD5Init(&ctx); 4449 MD5Update(&ctx, src, len); 4450 MD5Final(md5, &ctx); 4451 } 4452 4453 void 4454 sr_uuid_generate(struct sr_uuid *uuid) 4455 { 4456 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 4457 /* UUID version 4: random */ 4458 uuid->sui_id[6] &= 0x0f; 4459 uuid->sui_id[6] |= 0x40; 4460 /* RFC4122 variant */ 4461 uuid->sui_id[8] &= 0x3f; 4462 uuid->sui_id[8] |= 0x80; 4463 } 4464 4465 char * 4466 sr_uuid_format(struct sr_uuid *uuid) 4467 { 4468 char *uuidstr; 4469 4470 uuidstr = malloc(37, M_DEVBUF, M_WAITOK); 4471 4472 snprintf(uuidstr, 37, 4473 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 4474 "%02x%02x%02x%02x%02x%02x", 4475 uuid->sui_id[0], uuid->sui_id[1], 4476 uuid->sui_id[2], uuid->sui_id[3], 4477 uuid->sui_id[4], uuid->sui_id[5], 4478 uuid->sui_id[6], uuid->sui_id[7], 4479 uuid->sui_id[8], uuid->sui_id[9], 4480 uuid->sui_id[10], uuid->sui_id[11], 4481 uuid->sui_id[12], uuid->sui_id[13], 4482 uuid->sui_id[14], uuid->sui_id[15]); 4483 4484 return uuidstr; 4485 } 4486 4487 void 4488 sr_uuid_print(struct sr_uuid *uuid, int cr) 4489 { 4490 char *uuidstr; 4491 4492 uuidstr = sr_uuid_format(uuid); 4493 printf("%s%s", uuidstr, (cr ? "\n" : "")); 4494 free(uuidstr, M_DEVBUF, 37); 4495 } 4496 4497 int 4498 sr_already_assembled(struct sr_discipline *sd) 4499 { 4500 struct sr_softc *sc = sd->sd_sc; 4501 struct sr_discipline *sdtmp; 4502 4503 TAILQ_FOREACH(sdtmp, &sc->sc_dis_list, sd_link) { 4504 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 4505 &sdtmp->sd_meta->ssdi.ssd_uuid, 4506 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 4507 return (1); 4508 } 4509 4510 return (0); 4511 } 4512 4513 int32_t 4514 sr_validate_stripsize(u_int32_t b) 4515 { 4516 int s = 0; 4517 4518 if (b % DEV_BSIZE) 4519 return (-1); 4520 4521 while ((b & 1) == 0) { 4522 b >>= 1; 4523 s++; 4524 } 4525 4526 /* only multiple of twos */ 4527 b >>= 1; 4528 if (b) 4529 return(-1); 4530 4531 return (s); 4532 } 4533 4534 void 4535 sr_shutdown(void) 4536 { 4537 struct sr_softc *sc = softraid0; 4538 struct sr_discipline *sd; 4539 4540 DNPRINTF(SR_D_MISC, "%s: sr_shutdown\n", DEVNAME(sc)); 4541 4542 /* 4543 * Since softraid is not under mainbus, we have to explicitly 4544 * notify its children that the power is going down, so they 4545 * can execute their shutdown hooks. 4546 */ 4547 config_suspend((struct device *)sc, DVACT_POWERDOWN); 4548 4549 /* Shutdown disciplines in reverse attach order. */ 4550 while ((sd = TAILQ_LAST(&sc->sc_dis_list, sr_discipline_list)) != NULL) 4551 sr_discipline_shutdown(sd, 1); 4552 } 4553 4554 int 4555 sr_validate_io(struct sr_workunit *wu, daddr_t *blkno, char *func) 4556 { 4557 struct sr_discipline *sd = wu->swu_dis; 4558 struct scsi_xfer *xs = wu->swu_xs; 4559 int rv = 1; 4560 4561 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 4562 xs->cmd->opcode); 4563 4564 if (sd->sd_meta->ssd_data_blkno == 0) 4565 panic("invalid data blkno"); 4566 4567 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 4568 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 4569 DEVNAME(sd->sd_sc), func); 4570 goto bad; 4571 } 4572 4573 if (xs->datalen == 0) { 4574 printf("%s: %s: illegal block count for %s\n", 4575 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 4576 goto bad; 4577 } 4578 4579 if (xs->cmdlen == 10) 4580 *blkno = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 4581 else if (xs->cmdlen == 16) 4582 *blkno = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 4583 else if (xs->cmdlen == 6) 4584 *blkno = _3btol(((struct scsi_rw *)xs->cmd)->addr); 4585 else { 4586 printf("%s: %s: illegal cmdlen for %s\n", 4587 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 4588 goto bad; 4589 } 4590 4591 wu->swu_blk_start = *blkno; 4592 wu->swu_blk_end = *blkno + (xs->datalen >> DEV_BSHIFT) - 1; 4593 4594 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 4595 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 4596 "end: %lld length: %d\n", 4597 DEVNAME(sd->sd_sc), func, (long long)wu->swu_blk_start, 4598 (long long)wu->swu_blk_end, xs->datalen); 4599 4600 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 4601 SSD_ERRCODE_VALID; 4602 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 4603 sd->sd_scsi_sense.add_sense_code = 0x21; 4604 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 4605 sd->sd_scsi_sense.extra_len = 4; 4606 goto bad; 4607 } 4608 4609 rv = 0; 4610 bad: 4611 return (rv); 4612 } 4613 4614 void 4615 sr_rebuild_start(void *arg) 4616 { 4617 struct sr_discipline *sd = arg; 4618 struct sr_softc *sc = sd->sd_sc; 4619 4620 DNPRINTF(SR_D_REBUILD, "%s: %s starting rebuild thread\n", 4621 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4622 4623 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 4624 DEVNAME(sc)) != 0) 4625 printf("%s: unable to start background operation\n", 4626 DEVNAME(sc)); 4627 } 4628 4629 void 4630 sr_rebuild_thread(void *arg) 4631 { 4632 struct sr_discipline *sd = arg; 4633 4634 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild thread started\n", 4635 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 4636 4637 sd->sd_reb_active = 1; 4638 sd->sd_rebuild(sd); 4639 sd->sd_reb_active = 0; 4640 4641 kthread_exit(0); 4642 } 4643 4644 void 4645 sr_rebuild(struct sr_discipline *sd) 4646 { 4647 struct sr_softc *sc = sd->sd_sc; 4648 u_int64_t sz, psz, whole_blk, partial_blk, blk, restart; 4649 daddr_t lba; 4650 int64_t rb; 4651 struct sr_workunit *wu_r, *wu_w; 4652 struct scsi_xfer xs_r, xs_w; 4653 struct scsi_rw_16 *cr, *cw; 4654 int c, s, slept, percent = 0, old_percent = -1; 4655 u_int8_t *buf; 4656 4657 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 4658 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 4659 4660 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 4661 if (restart > whole_blk) { 4662 printf("%s: bogus rebuild restart offset, starting from 0\n", 4663 DEVNAME(sc)); 4664 restart = 0; 4665 } 4666 if (restart) { 4667 /* 4668 * XXX there is a hole here; there is a posibility that we 4669 * had a restart however the chunk that was supposed to 4670 * be rebuilt is no longer valid; we can reach this situation 4671 * when a rebuild is in progress and the box crashes and 4672 * on reboot the rebuild chunk is different (like zero'd or 4673 * replaced). We need to check the uuid of the chunk that is 4674 * being rebuilt to assert this. 4675 */ 4676 psz = sd->sd_meta->ssdi.ssd_size; 4677 rb = sd->sd_meta->ssd_rebuild; 4678 if (rb > 0) 4679 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 4680 else 4681 percent = 0; 4682 printf("%s: resuming rebuild on %s at %d%%\n", 4683 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 4684 } 4685 4686 /* currently this is 64k therefore we can use dma_alloc */ 4687 buf = dma_alloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, PR_WAITOK); 4688 for (blk = restart; blk <= whole_blk; blk++) { 4689 lba = blk * SR_REBUILD_IO_SIZE; 4690 sz = SR_REBUILD_IO_SIZE; 4691 if (blk == whole_blk) { 4692 if (partial_blk == 0) 4693 break; 4694 sz = partial_blk; 4695 } 4696 4697 /* get some wu */ 4698 wu_r = sr_scsi_wu_get(sd, 0); 4699 wu_w = sr_scsi_wu_get(sd, 0); 4700 4701 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild wu_r %p, wu_w %p\n", 4702 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, wu_r, wu_w); 4703 4704 /* setup read io */ 4705 bzero(&xs_r, sizeof xs_r); 4706 xs_r.error = XS_NOERROR; 4707 xs_r.flags = SCSI_DATA_IN; 4708 xs_r.datalen = sz << DEV_BSHIFT; 4709 xs_r.data = buf; 4710 xs_r.cmdlen = sizeof(*cr); 4711 xs_r.cmd = &xs_r.cmdstore; 4712 cr = (struct scsi_rw_16 *)xs_r.cmd; 4713 cr->opcode = READ_16; 4714 _lto4b(sz, cr->length); 4715 _lto8b(lba, cr->addr); 4716 wu_r->swu_state = SR_WU_CONSTRUCT; 4717 wu_r->swu_flags |= SR_WUF_REBUILD; 4718 wu_r->swu_xs = &xs_r; 4719 if (sd->sd_scsi_rw(wu_r)) { 4720 printf("%s: could not create read io\n", 4721 DEVNAME(sc)); 4722 goto fail; 4723 } 4724 4725 /* setup write io */ 4726 bzero(&xs_w, sizeof xs_w); 4727 xs_w.error = XS_NOERROR; 4728 xs_w.flags = SCSI_DATA_OUT; 4729 xs_w.datalen = sz << DEV_BSHIFT; 4730 xs_w.data = buf; 4731 xs_w.cmdlen = sizeof(*cw); 4732 xs_w.cmd = &xs_w.cmdstore; 4733 cw = (struct scsi_rw_16 *)xs_w.cmd; 4734 cw->opcode = WRITE_16; 4735 _lto4b(sz, cw->length); 4736 _lto8b(lba, cw->addr); 4737 wu_w->swu_state = SR_WU_CONSTRUCT; 4738 wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP; 4739 wu_w->swu_xs = &xs_w; 4740 if (sd->sd_scsi_rw(wu_w)) { 4741 printf("%s: could not create write io\n", 4742 DEVNAME(sc)); 4743 goto fail; 4744 } 4745 4746 /* 4747 * collide with the read io so that we get automatically 4748 * started when the read is done 4749 */ 4750 wu_w->swu_state = SR_WU_DEFERRED; 4751 wu_r->swu_collider = wu_w; 4752 s = splbio(); 4753 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 4754 splx(s); 4755 4756 DNPRINTF(SR_D_REBUILD, "%s: %s rebuild scheduling wu_r %p\n", 4757 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, wu_r); 4758 4759 wu_r->swu_state = SR_WU_INPROGRESS; 4760 sr_schedule_wu(wu_r); 4761 4762 /* wait for write completion */ 4763 slept = 0; 4764 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 4765 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 4766 slept = 1; 4767 } 4768 /* yield if we didn't sleep */ 4769 if (slept == 0) 4770 tsleep(sc, PWAIT, "sr_yield", 1); 4771 4772 sr_scsi_wu_put(sd, wu_r); 4773 sr_scsi_wu_put(sd, wu_w); 4774 4775 sd->sd_meta->ssd_rebuild = lba; 4776 4777 /* XXX - this should be based on size, not percentage. */ 4778 /* save metadata every percent */ 4779 psz = sd->sd_meta->ssdi.ssd_size; 4780 rb = sd->sd_meta->ssd_rebuild; 4781 if (rb > 0) 4782 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 4783 else 4784 percent = 0; 4785 if (percent != old_percent && blk != whole_blk) { 4786 if (sr_meta_save(sd, SR_META_DIRTY)) 4787 printf("%s: could not save metadata to %s\n", 4788 DEVNAME(sc), sd->sd_meta->ssd_devname); 4789 old_percent = percent; 4790 } 4791 4792 if (sd->sd_reb_abort) 4793 goto abort; 4794 } 4795 4796 /* all done */ 4797 sd->sd_meta->ssd_rebuild = 0; 4798 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 4799 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 4800 BIOC_SDREBUILD) { 4801 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 4802 break; 4803 } 4804 } 4805 4806 abort: 4807 if (sr_meta_save(sd, SR_META_DIRTY)) 4808 printf("%s: could not save metadata to %s\n", 4809 DEVNAME(sc), sd->sd_meta->ssd_devname); 4810 fail: 4811 dma_free(buf, SR_REBUILD_IO_SIZE << DEV_BSHIFT); 4812 } 4813 4814 #ifndef SMALL_KERNEL 4815 int 4816 sr_sensors_create(struct sr_discipline *sd) 4817 { 4818 struct sr_softc *sc = sd->sd_sc; 4819 int rv = 1; 4820 4821 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 4822 DEVNAME(sc), sd->sd_meta->ssd_devname); 4823 4824 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 4825 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 4826 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 4827 sizeof(sd->sd_vol.sv_sensor.desc)); 4828 4829 sensor_attach(&sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4830 sd->sd_vol.sv_sensor_attached = 1; 4831 4832 if (sc->sc_sensor_task == NULL) { 4833 sc->sc_sensor_task = sensor_task_register(sc, 4834 sr_sensors_refresh, 10); 4835 if (sc->sc_sensor_task == NULL) 4836 goto bad; 4837 } 4838 4839 rv = 0; 4840 bad: 4841 return (rv); 4842 } 4843 4844 void 4845 sr_sensors_delete(struct sr_discipline *sd) 4846 { 4847 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 4848 4849 if (sd->sd_vol.sv_sensor_attached) 4850 sensor_detach(&sd->sd_sc->sc_sensordev, &sd->sd_vol.sv_sensor); 4851 } 4852 4853 void 4854 sr_sensors_refresh(void *arg) 4855 { 4856 struct sr_softc *sc = arg; 4857 struct sr_volume *sv; 4858 struct sr_discipline *sd; 4859 4860 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 4861 4862 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 4863 sv = &sd->sd_vol; 4864 4865 switch(sd->sd_vol_status) { 4866 case BIOC_SVOFFLINE: 4867 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 4868 sv->sv_sensor.status = SENSOR_S_CRIT; 4869 break; 4870 4871 case BIOC_SVDEGRADED: 4872 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 4873 sv->sv_sensor.status = SENSOR_S_WARN; 4874 break; 4875 4876 case BIOC_SVSCRUB: 4877 case BIOC_SVONLINE: 4878 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 4879 sv->sv_sensor.status = SENSOR_S_OK; 4880 break; 4881 4882 default: 4883 sv->sv_sensor.value = 0; /* unknown */ 4884 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 4885 } 4886 } 4887 } 4888 #endif /* SMALL_KERNEL */ 4889 4890 #ifdef SR_FANCY_STATS 4891 void sr_print_stats(void); 4892 4893 void 4894 sr_print_stats(void) 4895 { 4896 struct sr_softc *sc = softraid0; 4897 struct sr_discipline *sd; 4898 4899 if (sc == NULL) { 4900 printf("no softraid softc found\n"); 4901 return; 4902 } 4903 4904 TAILQ_FOREACH(sd, &sc->sc_dis_list, sd_link) { 4905 printf("%s: ios pending %d, collisions %llu\n", 4906 sd->sd_meta->ssd_devname, 4907 sd->sd_wu_pending, 4908 sd->sd_wu_collisions); 4909 } 4910 } 4911 #endif /* SR_FANCY_STATS */ 4912 4913 #ifdef SR_DEBUG 4914 void 4915 sr_meta_print(struct sr_metadata *m) 4916 { 4917 int i; 4918 struct sr_meta_chunk *mc; 4919 struct sr_meta_opt_hdr *omh; 4920 4921 if (!(sr_debug & SR_D_META)) 4922 return; 4923 4924 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 4925 printf("\tssd_version %d\n", m->ssdi.ssd_version); 4926 printf("\tssd_vol_flags 0x%x\n", m->ssdi.ssd_vol_flags); 4927 printf("\tssd_uuid "); 4928 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 4929 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 4930 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 4931 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 4932 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 4933 printf("\tssd_level %d\n", m->ssdi.ssd_level); 4934 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 4935 printf("\tssd_devname %s\n", m->ssd_devname); 4936 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 4937 printf("\tssd_product %s\n", m->ssdi.ssd_product); 4938 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 4939 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 4940 printf("\tssd_checksum "); 4941 sr_checksum_print(m->ssd_checksum); 4942 printf("\n"); 4943 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 4944 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 4945 4946 mc = (struct sr_meta_chunk *)(m + 1); 4947 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 4948 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 4949 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 4950 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 4951 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 4952 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 4953 printf("\t\tscm_uuid "); 4954 sr_uuid_print(&mc->scmi.scm_uuid, 1); 4955 printf("\t\tscm_checksum "); 4956 sr_checksum_print(mc->scm_checksum); 4957 printf("\n"); 4958 printf("\t\tscm_status %d\n", mc->scm_status); 4959 } 4960 4961 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(m + 1) + 4962 sizeof(struct sr_meta_chunk) * m->ssdi.ssd_chunk_no); 4963 for (i = 0; i < m->ssdi.ssd_opt_no; i++) { 4964 printf("\t\t\tsom_type %d\n", omh->som_type); 4965 printf("\t\t\tsom_checksum "); 4966 sr_checksum_print(omh->som_checksum); 4967 printf("\n"); 4968 omh = (struct sr_meta_opt_hdr *)((void *)omh + 4969 omh->som_length); 4970 } 4971 } 4972 4973 void 4974 sr_dump_block(void *blk, int len) 4975 { 4976 uint8_t *b = blk; 4977 int i, j, c; 4978 4979 for (i = 0; i < len; i += 16) { 4980 for (j = 0; j < 16; j++) 4981 printf("%.2x ", b[i + j]); 4982 printf(" "); 4983 for (j = 0; j < 16; j++) { 4984 c = b[i + j]; 4985 if (c < ' ' || c > 'z' || i + j > len) 4986 c = '.'; 4987 printf("%c", c); 4988 } 4989 printf("\n"); 4990 } 4991 } 4992 4993 void 4994 sr_dump_mem(u_int8_t *p, int len) 4995 { 4996 int i; 4997 4998 for (i = 0; i < len; i++) 4999 printf("%02x ", *p++); 5000 printf("\n"); 5001 } 5002 5003 #endif /* SR_DEBUG */ 5004 5005 #ifdef HIBERNATE 5006 /* 5007 * Side-effect free (no malloc, printf, pool, splx) softraid crypto writer. 5008 * 5009 * This function must perform the following: 5010 * 1. Determine the underlying device's own side-effect free I/O function 5011 * (eg, ahci_hibernate_io, wd_hibernate_io, etc). 5012 * 2. Store enough information in the provided page argument for subsequent 5013 * I/O calls (such as the crypto discipline structure for the keys, the 5014 * offset of the softraid partition on the underlying disk, as well as 5015 * the offset of the swap partition within the crypto volume. 5016 * 3. Encrypt the incoming data using the sr_discipline keys, then pass 5017 * the request to the underlying device's own I/O function. 5018 */ 5019 int 5020 sr_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, int op, void *page) 5021 { 5022 /* Struct for stashing data obtained on HIB_INIT. 5023 * XXX 5024 * We share the page with the underlying device's own 5025 * side-effect free I/O function, so we pad our data to 5026 * the end of the page. Presently this does not overlap 5027 * with either of the two other side-effect free i/o 5028 * functions (ahci/wd). 5029 */ 5030 struct { 5031 char pad[3072]; 5032 struct sr_discipline *srd; 5033 hibio_fn subfn; /* underlying device i/o fn */ 5034 dev_t subdev; /* underlying device dev_t */ 5035 daddr_t sr_swapoff; /* ofs of swap part in sr volume */ 5036 char buf[DEV_BSIZE]; /* encryption performed into this buf */ 5037 } *my = page; 5038 extern struct cfdriver sd_cd; 5039 char errstr[128], *dl_ret; 5040 struct sr_chunk *schunk; 5041 struct sd_softc *sd; 5042 struct aes_xts_ctx ctx; 5043 struct sr_softc *sc; 5044 struct device *dv; 5045 daddr_t key_blkno; 5046 uint32_t sub_raidoff; /* ofs of sr part in underlying dev */ 5047 struct disklabel dl; 5048 struct partition *pp; 5049 size_t i, j; 5050 u_char iv[8]; 5051 5052 /* 5053 * In HIB_INIT, we are passed the swap partition size and offset 5054 * in 'size' and 'blkno' respectively. These are relative to the 5055 * start of the softraid partition, and we need to save these 5056 * for later translation to the underlying device's layout. 5057 */ 5058 if (op == HIB_INIT) { 5059 dv = disk_lookup(&sd_cd, DISKUNIT(dev)); 5060 sd = (struct sd_softc *)dv; 5061 sc = (struct sr_softc *)dv->dv_parent->dv_parent; 5062 5063 /* 5064 * Look up the sr discipline. This is used to determine 5065 * if we are SR crypto and what the underlying device is. 5066 */ 5067 my->srd = sc->sc_targets[sd->sc_link->target]; 5068 DNPRINTF(SR_D_MISC, "sr_hibernate_io: discipline is %s\n", 5069 my->srd->sd_name); 5070 if (strncmp(my->srd->sd_name, "CRYPTO", 10)) 5071 return (ENOTSUP); 5072 5073 /* Find the underlying device */ 5074 schunk = my->srd->sd_vol.sv_chunks[0]; 5075 my->subdev = schunk->src_dev_mm; 5076 5077 /* 5078 * Find the appropriate underlying device side effect free 5079 * I/O function, based on the type of device it is. 5080 */ 5081 my->subfn = get_hibernate_io_function(my->subdev); 5082 5083 /* 5084 * Find blkno where this raid partition starts on 5085 * the underlying disk. 5086 */ 5087 dl_ret = disk_readlabel(&dl, my->subdev, errstr, 5088 sizeof(errstr)); 5089 if (dl_ret) { 5090 printf("Hibernate error reading disklabel: %s\n", dl_ret); 5091 return (ENOTSUP); 5092 } 5093 5094 pp = &dl.d_partitions[DISKPART(my->subdev)]; 5095 if (pp->p_fstype != FS_RAID || DL_GETPSIZE(pp) == 0) 5096 return (ENOTSUP); 5097 5098 /* Find the blkno of the SR part in the underlying device */ 5099 sub_raidoff = my->srd->sd_meta->ssd_data_blkno + 5100 DL_SECTOBLK(&dl, DL_GETPOFFSET(pp)); 5101 DNPRINTF(SR_D_MISC,"sr_hibernate_io: blk trans ofs: %d blks\n", 5102 sub_raidoff); 5103 5104 /* Save the blkno of the swap partition in the SR disk */ 5105 my->sr_swapoff = blkno; 5106 5107 /* Initialize the sub-device */ 5108 return my->subfn(my->subdev, sub_raidoff + blkno, 5109 addr, size, op, page); 5110 } 5111 5112 /* Hibernate only uses (and we only support) writes */ 5113 if (op != HIB_W) 5114 return (ENOTSUP); 5115 5116 /* 5117 * Blocks act as the IV for the encryption. These block numbers 5118 * are relative to the start of the sr partition, but the 'blkno' 5119 * passed above is relative to the start of the swap partition 5120 * inside the sr partition, so bias appropriately. 5121 */ 5122 key_blkno = my->sr_swapoff + blkno; 5123 5124 /* Process each disk block one at a time. */ 5125 for (i = 0; i < size; i += DEV_BSIZE) { 5126 int res; 5127 5128 bzero(&ctx, sizeof(ctx)); 5129 5130 /* 5131 * Set encryption key (from the sr discipline stashed 5132 * during HIB_INIT. This code is based on the softraid 5133 * bootblock code. 5134 */ 5135 aes_xts_setkey(&ctx, my->srd->mds.mdd_crypto.scr_key[0], 64); 5136 /* We encrypt DEV_BSIZE bytes at a time in my->buf */ 5137 memcpy(my->buf, ((char *)addr) + i, DEV_BSIZE); 5138 5139 /* Block number is the IV */ 5140 memcpy(&iv, &key_blkno, sizeof(key_blkno)); 5141 aes_xts_reinit(&ctx, iv); 5142 5143 /* Encrypt DEV_BSIZE bytes, AES_XTS_BLOCKSIZE bytes at a time */ 5144 for (j = 0; j < DEV_BSIZE; j += AES_XTS_BLOCKSIZE) 5145 aes_xts_encrypt(&ctx, my->buf + j); 5146 5147 /* 5148 * Write one block out from my->buf to the underlying device 5149 * using its own side-effect free I/O function. 5150 */ 5151 res = my->subfn(my->subdev, blkno + (i / DEV_BSIZE), 5152 (vaddr_t)(my->buf), DEV_BSIZE, op, page); 5153 if (res != 0) 5154 return (res); 5155 key_blkno++; 5156 } 5157 return (0); 5158 } 5159 #endif /* HIBERNATE */ 5160