1 /* $OpenBSD: softraid.c,v 1.201 2010/03/28 16:38:57 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 44 #ifdef AOE 45 #include <sys/mbuf.h> 46 #include <net/if_aoe.h> 47 #endif /* AOE */ 48 49 #include <crypto/cryptodev.h> 50 51 #include <scsi/scsi_all.h> 52 #include <scsi/scsiconf.h> 53 #include <scsi/scsi_disk.h> 54 55 #include <dev/softraidvar.h> 56 #include <dev/rndvar.h> 57 58 /* #define SR_FANCY_STATS */ 59 60 #ifdef SR_DEBUG 61 #define SR_FANCY_STATS 62 uint32_t sr_debug = 0 63 /* | SR_D_CMD */ 64 /* | SR_D_MISC */ 65 /* | SR_D_INTR */ 66 /* | SR_D_IOCTL */ 67 /* | SR_D_CCB */ 68 /* | SR_D_WU */ 69 /* | SR_D_META */ 70 /* | SR_D_DIS */ 71 /* | SR_D_STATE */ 72 ; 73 #endif 74 75 int sr_match(struct device *, void *, void *); 76 void sr_attach(struct device *, struct device *, void *); 77 int sr_detach(struct device *, int); 78 int sr_activate(struct device *, int); 79 80 struct cfattach softraid_ca = { 81 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 82 sr_activate 83 }; 84 85 struct cfdriver softraid_cd = { 86 NULL, "softraid", DV_DULL 87 }; 88 89 /* scsi & discipline */ 90 void sr_scsi_cmd(struct scsi_xfer *); 91 void sr_minphys(struct buf *bp, struct scsi_link *sl); 92 void sr_copy_internal_data(struct scsi_xfer *, 93 void *, size_t); 94 int sr_scsi_ioctl(struct scsi_link *, u_long, 95 caddr_t, int, struct proc *); 96 int sr_ioctl(struct device *, u_long, caddr_t); 97 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 98 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 99 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 100 int sr_ioctl_setstate(struct sr_softc *, 101 struct bioc_setstate *); 102 int sr_ioctl_createraid(struct sr_softc *, 103 struct bioc_createraid *, int); 104 int sr_ioctl_deleteraid(struct sr_softc *, 105 struct bioc_deleteraid *); 106 int sr_ioctl_discipline(struct sr_softc *, 107 struct bioc_discipline *); 108 int sr_ioctl_installboot(struct sr_softc *, 109 struct bioc_installboot *); 110 void sr_chunks_unwind(struct sr_softc *, 111 struct sr_chunk_head *); 112 void sr_discipline_free(struct sr_discipline *); 113 void sr_discipline_shutdown(struct sr_discipline *); 114 int sr_discipline_init(struct sr_discipline *, int); 115 116 /* utility functions */ 117 void sr_shutdown(void *); 118 void sr_uuid_get(struct sr_uuid *); 119 void sr_uuid_print(struct sr_uuid *, int); 120 void sr_checksum_print(u_int8_t *); 121 int sr_boot_assembly(struct sr_softc *); 122 int sr_already_assembled(struct sr_discipline *); 123 int sr_hotspare(struct sr_softc *, dev_t); 124 void sr_hotspare_rebuild(struct sr_discipline *); 125 int sr_rebuild_init(struct sr_discipline *, dev_t, int); 126 void sr_rebuild(void *); 127 void sr_rebuild_thread(void *); 128 void sr_roam_chunks(struct sr_discipline *); 129 int sr_chunk_in_use(struct sr_softc *, dev_t); 130 131 /* don't include these on RAMDISK */ 132 #ifndef SMALL_KERNEL 133 void sr_sensors_refresh(void *); 134 int sr_sensors_create(struct sr_discipline *); 135 void sr_sensors_delete(struct sr_discipline *); 136 #endif 137 138 /* metadata */ 139 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 140 int sr_meta_attach(struct sr_discipline *, int, int); 141 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 142 size_t, daddr64_t, long); 143 int sr_meta_clear(struct sr_discipline *); 144 void sr_meta_chunks_create(struct sr_softc *, 145 struct sr_chunk_head *); 146 void sr_meta_init(struct sr_discipline *, 147 struct sr_chunk_head *); 148 void sr_meta_opt_load(struct sr_discipline *, 149 struct sr_meta_opt *); 150 151 /* hotplug magic */ 152 void sr_disk_attach(struct disk *, int); 153 154 struct sr_hotplug_list { 155 void (*sh_hotplug)(struct sr_discipline *, 156 struct disk *, int); 157 struct sr_discipline *sh_sd; 158 159 SLIST_ENTRY(sr_hotplug_list) shl_link; 160 }; 161 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 162 163 struct sr_hotplug_list_head sr_hotplug_callbacks; 164 extern void (*softraid_disk_attach)(struct disk *, int); 165 166 /* scsi glue */ 167 struct scsi_adapter sr_switch = { 168 sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl 169 }; 170 171 struct scsi_device sr_dev = { 172 NULL, NULL, NULL, NULL 173 }; 174 175 /* native metadata format */ 176 int sr_meta_native_bootprobe(struct sr_softc *, 177 struct device *, struct sr_metadata_list_head *); 178 #define SR_META_NOTCLAIMED (0) 179 #define SR_META_CLAIMED (1) 180 int sr_meta_native_probe(struct sr_softc *, 181 struct sr_chunk *); 182 int sr_meta_native_attach(struct sr_discipline *, int); 183 int sr_meta_native_write(struct sr_discipline *, dev_t, 184 struct sr_metadata *,void *); 185 186 #ifdef SR_DEBUG 187 void sr_meta_print(struct sr_metadata *); 188 #else 189 #define sr_meta_print(m) 190 #endif 191 192 /* the metadata driver should remain stateless */ 193 struct sr_meta_driver { 194 daddr64_t smd_offset; /* metadata location */ 195 u_int32_t smd_size; /* size of metadata */ 196 197 int (*smd_probe)(struct sr_softc *, 198 struct sr_chunk *); 199 int (*smd_attach)(struct sr_discipline *, int); 200 int (*smd_detach)(struct sr_discipline *); 201 int (*smd_read)(struct sr_discipline *, dev_t, 202 struct sr_metadata *, void *); 203 int (*smd_write)(struct sr_discipline *, dev_t, 204 struct sr_metadata *, void *); 205 int (*smd_validate)(struct sr_discipline *, 206 struct sr_metadata *, void *); 207 } smd[] = { 208 { SR_META_OFFSET, SR_META_SIZE * 512, 209 sr_meta_native_probe, sr_meta_native_attach, NULL, 210 sr_meta_native_read, sr_meta_native_write, NULL }, 211 { 0, 0, NULL, NULL, NULL, NULL } 212 }; 213 214 int 215 sr_meta_attach(struct sr_discipline *sd, int chunk_no, int force) 216 { 217 struct sr_softc *sc = sd->sd_sc; 218 struct sr_chunk_head *cl; 219 struct sr_chunk *ch_entry, *chunk1, *chunk2; 220 int rv = 1, i = 0; 221 222 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 223 224 /* in memory copy of metadata */ 225 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 226 if (!sd->sd_meta) { 227 printf("%s: could not allocate memory for metadata\n", 228 DEVNAME(sc)); 229 goto bad; 230 } 231 232 if (sd->sd_meta_type != SR_META_F_NATIVE) { 233 /* in memory copy of foreign metadata */ 234 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 235 M_DEVBUF, M_ZERO); 236 if (!sd->sd_meta_foreign) { 237 /* unwind frees sd_meta */ 238 printf("%s: could not allocate memory for foreign " 239 "metadata\n", DEVNAME(sc)); 240 goto bad; 241 } 242 } 243 244 /* we have a valid list now create an array index */ 245 cl = &sd->sd_vol.sv_chunk_list; 246 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * chunk_no, 247 M_DEVBUF, M_WAITOK | M_ZERO); 248 249 /* fill out chunk array */ 250 i = 0; 251 SLIST_FOREACH(ch_entry, cl, src_link) 252 sd->sd_vol.sv_chunks[i++] = ch_entry; 253 254 /* attach metadata */ 255 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 256 goto bad; 257 258 /* Force chunks into correct order now that metadata is attached. */ 259 SLIST_FOREACH(ch_entry, cl, src_link) 260 SLIST_REMOVE(cl, ch_entry, sr_chunk, src_link); 261 for (i = 0; i < chunk_no; i++) { 262 ch_entry = sd->sd_vol.sv_chunks[i]; 263 chunk2 = NULL; 264 SLIST_FOREACH(chunk1, cl, src_link) { 265 if (chunk1->src_meta.scmi.scm_chunk_id > 266 ch_entry->src_meta.scmi.scm_chunk_id) 267 break; 268 chunk2 = chunk1; 269 } 270 if (chunk2 == NULL) 271 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 272 else 273 SLIST_INSERT_AFTER(chunk2, ch_entry, src_link); 274 } 275 i = 0; 276 SLIST_FOREACH(ch_entry, cl, src_link) 277 sd->sd_vol.sv_chunks[i++] = ch_entry; 278 279 rv = 0; 280 bad: 281 return (rv); 282 } 283 284 int 285 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 286 { 287 struct sr_softc *sc = sd->sd_sc; 288 struct vnode *vn; 289 struct sr_chunk *ch_entry, *ch_prev = NULL; 290 struct sr_chunk_head *cl; 291 char devname[32]; 292 int i, d, type, found, prevf, error; 293 dev_t dev; 294 295 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 296 297 if (no_chunk == 0) 298 goto unwind; 299 300 301 cl = &sd->sd_vol.sv_chunk_list; 302 303 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 304 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 305 M_WAITOK | M_ZERO); 306 /* keep disks in user supplied order */ 307 if (ch_prev) 308 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 309 else 310 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 311 ch_prev = ch_entry; 312 dev = dt[d]; 313 ch_entry->src_dev_mm = dev; 314 315 if (dev == NODEV) { 316 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 317 continue; 318 } else { 319 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 320 if (bdevvp(dev, &vn)) { 321 printf("%s:, sr_meta_probe: can't allocate " 322 "vnode\n", DEVNAME(sc)); 323 goto unwind; 324 } 325 326 /* 327 * XXX leaving dev open for now; move this to attach 328 * and figure out the open/close dance for unwind. 329 */ 330 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0); 331 if (error) { 332 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 333 "open %s\n", DEVNAME(sc), devname); 334 vput(vn); 335 goto unwind; 336 } 337 338 strlcpy(ch_entry->src_devname, devname, 339 sizeof(ch_entry->src_devname)); 340 ch_entry->src_vn = vn; 341 } 342 343 /* determine if this is a device we understand */ 344 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 345 type = smd[i].smd_probe(sc, ch_entry); 346 if (type == SR_META_F_INVALID) 347 continue; 348 else { 349 found = type; 350 break; 351 } 352 } 353 354 if (found == SR_META_F_INVALID) 355 goto unwind; 356 if (prevf == SR_META_F_INVALID) 357 prevf = found; 358 if (prevf != found) { 359 DNPRINTF(SR_D_META, "%s: prevf != found\n", 360 DEVNAME(sc)); 361 goto unwind; 362 } 363 } 364 365 return (prevf); 366 unwind: 367 return (SR_META_F_INVALID); 368 } 369 370 void 371 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 372 { 373 int maj, unit, part; 374 char *name; 375 376 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 377 DEVNAME(sc), buf, size); 378 379 if (!buf) 380 return; 381 382 maj = major(dev); 383 part = DISKPART(dev); 384 unit = DISKUNIT(dev); 385 386 name = findblkname(maj); 387 if (name == NULL) 388 return; 389 390 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 391 } 392 393 int 394 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz, 395 daddr64_t ofs, long flags) 396 { 397 struct sr_softc *sc = sd->sd_sc; 398 struct buf b; 399 int rv = 1; 400 401 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 402 DEVNAME(sc), dev, md, sz, ofs, flags); 403 404 bzero(&b, sizeof(b)); 405 406 if (md == NULL) { 407 printf("%s: read invalid metadata pointer\n", DEVNAME(sc)); 408 goto done; 409 } 410 b.b_flags = flags | B_PHYS; 411 b.b_blkno = ofs; 412 b.b_bcount = sz; 413 b.b_bufsize = sz; 414 b.b_resid = sz; 415 b.b_data = md; 416 b.b_error = 0; 417 b.b_proc = curproc; 418 b.b_dev = dev; 419 b.b_iodone = NULL; 420 if (bdevvp(dev, &b.b_vp)) { 421 printf("%s: sr_meta_rw: can't allocate vnode\n", DEVNAME(sc)); 422 goto done; 423 } 424 if ((b.b_flags & B_READ) == 0) 425 b.b_vp->v_numoutput++; 426 427 LIST_INIT(&b.b_dep); 428 VOP_STRATEGY(&b); 429 biowait(&b); 430 431 if (b.b_flags & B_ERROR) { 432 printf("%s: 0x%x i/o error on block %llu while reading " 433 "metadata %d\n", DEVNAME(sc), dev, b.b_blkno, b.b_error); 434 goto done; 435 } 436 rv = 0; 437 done: 438 if (b.b_vp) 439 vput(b.b_vp); 440 441 return (rv); 442 } 443 444 int 445 sr_meta_clear(struct sr_discipline *sd) 446 { 447 struct sr_softc *sc = sd->sd_sc; 448 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 449 struct sr_chunk *ch_entry; 450 void *m; 451 int rv = 1; 452 453 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 454 455 if (sd->sd_meta_type != SR_META_F_NATIVE) { 456 printf("%s: sr_meta_clear can not clear foreign metadata\n", 457 DEVNAME(sc)); 458 goto done; 459 } 460 461 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 462 SLIST_FOREACH(ch_entry, cl, src_link) { 463 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 464 /* XXX mark disk offline */ 465 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 466 "clear %s\n", ch_entry->src_devname); 467 rv++; 468 continue; 469 } 470 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 471 } 472 473 bzero(sd->sd_meta, SR_META_SIZE * 512); 474 475 free(m, M_DEVBUF); 476 rv = 0; 477 done: 478 return (rv); 479 } 480 481 void 482 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) 483 { 484 struct sr_chunk *ch_entry; 485 struct sr_uuid uuid; 486 int cid = 0; 487 char *name; 488 u_int64_t max_chunk_sz = 0, min_chunk_sz; 489 490 DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); 491 492 sr_uuid_get(&uuid); 493 494 /* fill out stuff and get largest chunk size while looping */ 495 SLIST_FOREACH(ch_entry, cl, src_link) { 496 name = ch_entry->src_devname; 497 ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; 498 ch_entry->src_meta.scmi.scm_chunk_id = cid++; 499 ch_entry->src_meta.scm_status = BIOC_SDONLINE; 500 strlcpy(ch_entry->src_meta.scmi.scm_devname, name, 501 sizeof(ch_entry->src_meta.scmi.scm_devname)); 502 bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, 503 sizeof(ch_entry->src_meta.scmi.scm_uuid)); 504 505 if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) 506 max_chunk_sz = ch_entry->src_meta.scmi.scm_size; 507 } 508 509 /* get smallest chunk size */ 510 min_chunk_sz = max_chunk_sz; 511 SLIST_FOREACH(ch_entry, cl, src_link) 512 if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) 513 min_chunk_sz = ch_entry->src_meta.scmi.scm_size; 514 515 /* equalize all sizes */ 516 SLIST_FOREACH(ch_entry, cl, src_link) 517 ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; 518 519 /* whine if chunks are not the same size */ 520 if (min_chunk_sz != max_chunk_sz) 521 printf("%s: chunk sizes are not equal; up to %llu blocks " 522 "wasted per chunk\n", 523 DEVNAME(sc), max_chunk_sz - min_chunk_sz); 524 } 525 526 void 527 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) 528 { 529 struct sr_softc *sc = sd->sd_sc; 530 struct sr_metadata *sm = sd->sd_meta; 531 struct sr_meta_chunk *im_sc; 532 int i, chunk_no; 533 534 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 535 536 if (!sm) 537 return; 538 539 /* initial metadata */ 540 sm->ssdi.ssd_magic = SR_MAGIC; 541 sm->ssdi.ssd_version = SR_META_VERSION; 542 sm->ssd_ondisk = 0; 543 sm->ssdi.ssd_flags = sd->sd_meta_flags; 544 545 /* get uuid from chunk 0 */ 546 bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, 547 &sm->ssdi.ssd_uuid, 548 sizeof(struct sr_uuid)); 549 550 /* volume is filled in createraid */ 551 552 /* add missing chunk bits */ 553 chunk_no = sm->ssdi.ssd_chunk_no; 554 for (i = 0; i < chunk_no; i++) { 555 im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; 556 im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; 557 sr_checksum(sc, im_sc, &im_sc->scm_checksum, 558 sizeof(struct sr_meta_chunk_invariant)); 559 } 560 } 561 562 void 563 sr_meta_opt_load(struct sr_discipline *sd, struct sr_meta_opt *om) 564 { 565 if (om->somi.som_type == SR_OPT_BOOT) { 566 567 568 } else 569 panic("unknown optional metadata type"); 570 } 571 572 void 573 sr_meta_save_callback(void *arg1, void *arg2) 574 { 575 struct sr_discipline *sd = arg1; 576 int s; 577 578 s = splbio(); 579 580 if (sr_meta_save(arg1, SR_META_DIRTY)) 581 printf("%s: save metadata failed\n", 582 DEVNAME(sd->sd_sc)); 583 584 sd->sd_must_flush = 0; 585 splx(s); 586 } 587 588 int 589 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 590 { 591 struct sr_softc *sc = sd->sd_sc; 592 struct sr_metadata *sm = sd->sd_meta, *m; 593 struct sr_meta_driver *s; 594 struct sr_chunk *src; 595 struct sr_meta_chunk *cm; 596 struct sr_workunit wu; 597 struct sr_meta_opt_item *omi; 598 struct sr_meta_opt *om; 599 int i; 600 601 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 602 DEVNAME(sc), sd->sd_meta->ssd_devname); 603 604 if (!sm) { 605 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 606 goto bad; 607 } 608 609 /* meta scratchpad */ 610 s = &smd[sd->sd_meta_type]; 611 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 612 if (!m) { 613 printf("%s: could not allocate metadata scratch area\n", 614 DEVNAME(sc)); 615 goto bad; 616 } 617 618 /* from here on out metadata is updated */ 619 restart: 620 sm->ssd_ondisk++; 621 sm->ssd_meta_flags = flags; 622 bcopy(sm, m, sizeof(*m)); 623 624 /* Chunk metadata. */ 625 cm = (struct sr_meta_chunk *)(m + 1); 626 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 627 src = sd->sd_vol.sv_chunks[i]; 628 bcopy(&src->src_meta, cm, sizeof(*cm)); 629 cm++; 630 } 631 632 /* Optional metadata. */ 633 om = (struct sr_meta_opt *)(cm); 634 SLIST_FOREACH(omi, &sd->sd_meta_opt, omi_link) { 635 bcopy(&omi->omi_om, om, sizeof(*om)); 636 sr_checksum(sc, om, &om->som_checksum, 637 sizeof(struct sr_meta_opt_invariant)); 638 om++; 639 } 640 641 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 642 src = sd->sd_vol.sv_chunks[i]; 643 644 /* skip disks that are offline */ 645 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 646 continue; 647 648 /* calculate metadata checksum for correct chunk */ 649 m->ssdi.ssd_chunk_id = i; 650 sr_checksum(sc, m, &m->ssd_checksum, 651 sizeof(struct sr_meta_invariant)); 652 653 #ifdef SR_DEBUG 654 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 655 "chunkid: %d checksum: ", 656 DEVNAME(sc), src->src_meta.scmi.scm_devname, 657 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 658 659 if (sr_debug & SR_D_META) 660 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 661 DNPRINTF(SR_D_META, "\n"); 662 sr_meta_print(m); 663 #endif 664 665 /* translate and write to disk */ 666 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 667 printf("%s: could not write metadata to %s\n", 668 DEVNAME(sc), src->src_devname); 669 /* restart the meta write */ 670 src->src_meta.scm_status = BIOC_SDOFFLINE; 671 /* XXX recalculate volume status */ 672 goto restart; 673 } 674 } 675 676 /* not all disciplines have sync */ 677 if (sd->sd_scsi_sync) { 678 bzero(&wu, sizeof(wu)); 679 wu.swu_fake = 1; 680 wu.swu_dis = sd; 681 sd->sd_scsi_sync(&wu); 682 } 683 free(m, M_DEVBUF); 684 return (0); 685 bad: 686 return (1); 687 } 688 689 int 690 sr_meta_read(struct sr_discipline *sd) 691 { 692 #ifdef SR_DEBUG 693 struct sr_softc *sc = sd->sd_sc; 694 #endif 695 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 696 struct sr_metadata *sm; 697 struct sr_chunk *ch_entry; 698 struct sr_meta_chunk *cp; 699 struct sr_meta_driver *s; 700 struct sr_meta_opt_item *omi; 701 struct sr_meta_opt *om; 702 void *fm = NULL; 703 int i, no_disk = 0, got_meta = 0; 704 705 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 706 707 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 708 s = &smd[sd->sd_meta_type]; 709 if (sd->sd_meta_type != SR_META_F_NATIVE) 710 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 711 712 cp = (struct sr_meta_chunk *)(sm + 1); 713 SLIST_FOREACH(ch_entry, cl, src_link) { 714 /* skip disks that are offline */ 715 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 716 DNPRINTF(SR_D_META, 717 "%s: %s chunk marked offline, spoofing status\n", 718 DEVNAME(sc), ch_entry->src_devname); 719 cp++; /* adjust chunk pointer to match failure */ 720 continue; 721 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 722 /* read and translate */ 723 /* XXX mark chunk offline, elsewhere!! */ 724 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 725 cp++; /* adjust chunk pointer to match failure */ 726 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 727 DEVNAME(sc)); 728 continue; 729 } 730 731 if (sm->ssdi.ssd_magic != SR_MAGIC) { 732 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 733 DEVNAME(sc)); 734 continue; 735 } 736 737 /* validate metadata */ 738 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 739 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 740 DEVNAME(sc)); 741 no_disk = -1; 742 goto done; 743 } 744 745 /* assume first chunk contains metadata */ 746 if (got_meta == 0) { 747 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 748 got_meta = 1; 749 } 750 751 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 752 753 /* Process optional metadata. */ 754 om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + 755 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no); 756 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 757 758 omi = malloc(sizeof(struct sr_meta_opt_item), 759 M_DEVBUF, M_WAITOK | M_ZERO); 760 bcopy(om, &omi->omi_om, sizeof(struct sr_meta_opt)); 761 SLIST_INSERT_HEAD(&sd->sd_meta_opt, omi, omi_link); 762 763 /* See if discipline wants to handle it. */ 764 if (sd->sd_meta_opt_load && 765 sd->sd_meta_opt_load(sd, &omi->omi_om) == 0) 766 continue; 767 else 768 sr_meta_opt_load(sd, &omi->omi_om); 769 770 om++; 771 } 772 773 cp++; 774 no_disk++; 775 } 776 777 free(sm, M_DEVBUF); 778 if (fm) 779 free(fm, M_DEVBUF); 780 781 done: 782 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 783 no_disk); 784 return (no_disk); 785 } 786 787 int 788 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 789 void *fm) 790 { 791 struct sr_softc *sc = sd->sd_sc; 792 struct sr_meta_driver *s; 793 #ifdef SR_DEBUG 794 struct sr_meta_chunk *mc; 795 #endif 796 char devname[32]; 797 int rv = 1; 798 u_int8_t checksum[MD5_DIGEST_LENGTH]; 799 800 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 801 802 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 803 804 s = &smd[sd->sd_meta_type]; 805 if (sd->sd_meta_type != SR_META_F_NATIVE) 806 if (s->smd_validate(sd, sm, fm)) { 807 printf("%s: invalid foreign metadata\n", DEVNAME(sc)); 808 goto done; 809 } 810 811 /* 812 * at this point all foreign metadata has been translated to the native 813 * format and will be treated just like the native format 814 */ 815 816 if (sm->ssdi.ssd_magic != SR_MAGIC) { 817 printf("%s: not valid softraid metadata\n", DEVNAME(sc)); 818 goto done; 819 } 820 821 if (sm->ssdi.ssd_version != SR_META_VERSION) { 822 printf("%s: %s can not read metadata version %u, expected %u\n", 823 DEVNAME(sc), devname, sm->ssdi.ssd_version, 824 SR_META_VERSION); 825 goto done; 826 } 827 828 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 829 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 830 printf("%s: invalid metadata checksum\n", DEVNAME(sc)); 831 goto done; 832 } 833 834 /* XXX do other checksums */ 835 836 #ifdef SR_DEBUG 837 /* warn if disk changed order */ 838 mc = (struct sr_meta_chunk *)(sm + 1); 839 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 840 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 841 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 842 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 843 devname); 844 #endif 845 846 /* we have meta data on disk */ 847 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 848 DEVNAME(sc), devname); 849 850 rv = 0; 851 done: 852 return (rv); 853 } 854 855 int 856 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv, 857 struct sr_metadata_list_head *mlh) 858 { 859 struct vnode *vn; 860 struct disklabel label; 861 struct sr_metadata *md = NULL; 862 struct sr_discipline *fake_sd = NULL; 863 struct sr_metadata_list *mle; 864 char devname[32]; 865 dev_t dev, devr; 866 int error, i, majdev; 867 int rv = SR_META_NOTCLAIMED; 868 869 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 870 871 majdev = findblkmajor(dv); 872 if (majdev == -1) 873 goto done; 874 dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 875 876 /* 877 * Use character raw device to avoid SCSI complaints about missing 878 * media on removable media devices. 879 */ 880 dev = MAKEDISKDEV(major(blktochr(dev)), dv->dv_unit, RAW_PART); 881 if (cdevvp(dev, &vn)) { 882 printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n", 883 DEVNAME(sc)); 884 goto done; 885 } 886 887 /* open device */ 888 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 889 if (error) { 890 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 891 "failed\n", DEVNAME(sc)); 892 vput(vn); 893 goto done; 894 } 895 896 /* get disklabel */ 897 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0); 898 if (error) { 899 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 900 "failed\n", DEVNAME(sc)); 901 VOP_CLOSE(vn, FREAD, NOCRED, 0); 902 vput(vn); 903 goto done; 904 } 905 906 /* we are done, close device */ 907 error = VOP_CLOSE(vn, FREAD, NOCRED, 0); 908 if (error) { 909 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 910 "failed\n", DEVNAME(sc)); 911 vput(vn); 912 goto done; 913 } 914 vput(vn); 915 916 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 917 if (md == NULL) { 918 printf("%s: not enough memory for metadata buffer\n", 919 DEVNAME(sc)); 920 goto done; 921 } 922 923 /* create fake sd to use utility functions */ 924 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_ZERO); 925 if (fake_sd == NULL) { 926 printf("%s: not enough memory for fake discipline\n", 927 DEVNAME(sc)); 928 goto done; 929 } 930 fake_sd->sd_sc = sc; 931 fake_sd->sd_meta_type = SR_META_F_NATIVE; 932 933 for (i = 0; i < MAXPARTITIONS; i++) { 934 if (label.d_partitions[i].p_fstype != FS_RAID) 935 continue; 936 937 /* open partition */ 938 devr = MAKEDISKDEV(majdev, dv->dv_unit, i); 939 if (bdevvp(devr, &vn)) { 940 printf("%s:, sr_meta_native_bootprobe: can't allocate " 941 "vnode for partition\n", DEVNAME(sc)); 942 goto done; 943 } 944 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 945 if (error) { 946 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 947 "open failed, partition %d\n", 948 DEVNAME(sc), i); 949 vput(vn); 950 continue; 951 } 952 953 if (sr_meta_native_read(fake_sd, devr, md, NULL)) { 954 printf("%s: native bootprobe could not read native " 955 "metadata\n", DEVNAME(sc)); 956 VOP_CLOSE(vn, FREAD, NOCRED, 0); 957 vput(vn); 958 continue; 959 } 960 961 /* are we a softraid partition? */ 962 if (md->ssdi.ssd_magic != SR_MAGIC) { 963 VOP_CLOSE(vn, FREAD, NOCRED, 0); 964 vput(vn); 965 continue; 966 } 967 968 sr_meta_getdevname(sc, devr, devname, sizeof(devname)); 969 if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) { 970 if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) { 971 DNPRINTF(SR_D_META, "%s: don't save %s\n", 972 DEVNAME(sc), devname); 973 } else { 974 /* XXX fix M_WAITOK, this is boot time */ 975 mle = malloc(sizeof(*mle), M_DEVBUF, 976 M_WAITOK | M_ZERO); 977 bcopy(md, &mle->sml_metadata, 978 SR_META_SIZE * 512); 979 mle->sml_mm = devr; 980 SLIST_INSERT_HEAD(mlh, mle, sml_link); 981 rv = SR_META_CLAIMED; 982 } 983 } 984 985 /* we are done, close partition */ 986 VOP_CLOSE(vn, FREAD, NOCRED, 0); 987 vput(vn); 988 } 989 990 done: 991 if (fake_sd) 992 free(fake_sd, M_DEVBUF); 993 if (md) 994 free(md, M_DEVBUF); 995 996 return (rv); 997 } 998 999 int 1000 sr_boot_assembly(struct sr_softc *sc) 1001 { 1002 struct device *dv; 1003 struct bioc_createraid bc; 1004 struct sr_metadata_list_head mlh, kdh; 1005 struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; 1006 struct sr_metadata *metadata; 1007 struct sr_boot_volume_head bvh; 1008 struct sr_boot_volume *vol, *vp1, *vp2; 1009 struct sr_meta_chunk *hm; 1010 struct sr_chunk_head *cl; 1011 struct sr_chunk *hotspare, *chunk, *last; 1012 u_int32_t chunk_id; 1013 u_int64_t *ondisk = NULL; 1014 dev_t *devs = NULL; 1015 char devname[32]; 1016 int rv = 0, i; 1017 1018 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1019 1020 SLIST_INIT(&mlh); 1021 1022 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1023 if (dv->dv_class != DV_DISK) 1024 continue; 1025 1026 /* Only check sd(4) and wd(4) devices. */ 1027 if (strcmp(dv->dv_cfdata->cf_driver->cd_name, "sd") && 1028 strcmp(dv->dv_cfdata->cf_driver->cd_name, "wd")) 1029 continue; 1030 1031 /* native softraid uses partitions */ 1032 if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED) 1033 continue; 1034 1035 /* probe non-native disks */ 1036 } 1037 1038 /* 1039 * Create a list of volumes and associate chunks with each volume. 1040 */ 1041 1042 SLIST_INIT(&bvh); 1043 SLIST_INIT(&kdh); 1044 1045 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { 1046 1047 mlenext = SLIST_NEXT(mle, sml_link); 1048 SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); 1049 1050 metadata = (struct sr_metadata *)&mle->sml_metadata; 1051 mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; 1052 1053 /* Handle key disks separately. */ 1054 if (metadata->ssdi.ssd_level == SR_KEYDISK_LEVEL) { 1055 SLIST_INSERT_HEAD(&kdh, mle, sml_link); 1056 continue; 1057 } 1058 1059 SLIST_FOREACH(vol, &bvh, sbv_link) { 1060 if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1061 sizeof(metadata->ssdi.ssd_uuid)) == 0) 1062 break; 1063 } 1064 1065 if (vol == NULL) { 1066 vol = malloc(sizeof(struct sr_boot_volume), 1067 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1068 if (vol == NULL) { 1069 printf("%s: failed to allocate boot volume!\n", 1070 DEVNAME(sc)); 1071 goto unwind; 1072 } 1073 1074 vol->sbv_level = metadata->ssdi.ssd_level; 1075 vol->sbv_volid = metadata->ssdi.ssd_volid; 1076 vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; 1077 bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1078 sizeof(metadata->ssdi.ssd_uuid)); 1079 SLIST_INIT(&vol->sml); 1080 1081 /* Maintain volume order. */ 1082 vp2 = NULL; 1083 SLIST_FOREACH(vp1, &bvh, sbv_link) { 1084 if (vp1->sbv_volid > vol->sbv_volid) 1085 break; 1086 vp2 = vp1; 1087 } 1088 if (vp2 == NULL) { 1089 DNPRINTF(SR_D_META, "%s: insert volume %u " 1090 "at head\n", DEVNAME(sc), vol->sbv_volid); 1091 SLIST_INSERT_HEAD(&bvh, vol, sbv_link); 1092 } else { 1093 DNPRINTF(SR_D_META, "%s: insert volume %u " 1094 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1095 vp2->sbv_volid); 1096 SLIST_INSERT_AFTER(vp2, vol, sbv_link); 1097 } 1098 } 1099 1100 /* Maintain chunk order. */ 1101 mle2 = NULL; 1102 SLIST_FOREACH(mle1, &vol->sml, sml_link) { 1103 if (mle1->sml_chunk_id > mle->sml_chunk_id) 1104 break; 1105 mle2 = mle1; 1106 } 1107 if (mle2 == NULL) { 1108 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1109 "at head\n", DEVNAME(sc), vol->sbv_volid, 1110 mle->sml_chunk_id); 1111 SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); 1112 } else { 1113 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1114 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1115 mle->sml_chunk_id, mle2->sml_chunk_id); 1116 SLIST_INSERT_AFTER(mle2, mle, sml_link); 1117 } 1118 1119 vol->sbv_dev_no++; 1120 } 1121 1122 /* Allocate memory for device and ondisk version arrays. */ 1123 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1124 M_NOWAIT | M_CANFAIL); 1125 if (devs == NULL) { 1126 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1127 goto unwind; 1128 } 1129 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1130 M_NOWAIT | M_CANFAIL); 1131 if (ondisk == NULL) { 1132 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1133 goto unwind; 1134 } 1135 1136 /* 1137 * Assemble hotspare "volumes". 1138 */ 1139 SLIST_FOREACH(vol, &bvh, sbv_link) { 1140 1141 /* Check if this is a hotspare "volume". */ 1142 if (vol->sbv_level != SR_HOTSPARE_LEVEL || 1143 vol->sbv_chunk_no != 1) 1144 continue; 1145 1146 #ifdef SR_DEBUG 1147 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1148 DEVNAME(sc)); 1149 if (sr_debug & SR_D_META) 1150 sr_uuid_print(&vol->sbv_uuid, 0); 1151 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1152 vol->sbv_volid, vol->sbv_chunk_no); 1153 #endif 1154 1155 /* Create hotspare chunk metadata. */ 1156 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1157 M_NOWAIT | M_CANFAIL | M_ZERO); 1158 if (hotspare == NULL) { 1159 printf("%s: failed to allocate hotspare\n", 1160 DEVNAME(sc)); 1161 goto unwind; 1162 } 1163 1164 mle = SLIST_FIRST(&vol->sml); 1165 sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname)); 1166 hotspare->src_dev_mm = mle->sml_mm; 1167 strlcpy(hotspare->src_devname, devname, 1168 sizeof(hotspare->src_devname)); 1169 hotspare->src_size = metadata->ssdi.ssd_size; 1170 1171 hm = &hotspare->src_meta; 1172 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1173 hm->scmi.scm_chunk_id = 0; 1174 hm->scmi.scm_size = metadata->ssdi.ssd_size; 1175 hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size; 1176 strlcpy(hm->scmi.scm_devname, devname, 1177 sizeof(hm->scmi.scm_devname)); 1178 bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1179 sizeof(struct sr_uuid)); 1180 1181 sr_checksum(sc, hm, &hm->scm_checksum, 1182 sizeof(struct sr_meta_chunk_invariant)); 1183 1184 hm->scm_status = BIOC_SDHOTSPARE; 1185 1186 /* Add chunk to hotspare list. */ 1187 rw_enter_write(&sc->sc_hs_lock); 1188 cl = &sc->sc_hotspare_list; 1189 if (SLIST_EMPTY(cl)) 1190 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1191 else { 1192 SLIST_FOREACH(chunk, cl, src_link) 1193 last = chunk; 1194 SLIST_INSERT_AFTER(last, hotspare, src_link); 1195 } 1196 sc->sc_hotspare_no++; 1197 rw_exit_write(&sc->sc_hs_lock); 1198 1199 } 1200 1201 /* 1202 * Assemble RAID volumes. 1203 */ 1204 SLIST_FOREACH(vol, &bvh, sbv_link) { 1205 1206 bzero(&bc, sizeof(bc)); 1207 1208 /* Check if this is a hotspare "volume". */ 1209 if (vol->sbv_level == SR_HOTSPARE_LEVEL && 1210 vol->sbv_chunk_no == 1) 1211 continue; 1212 1213 #ifdef SR_DEBUG 1214 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1215 if (sr_debug & SR_D_META) 1216 sr_uuid_print(&vol->sbv_uuid, 0); 1217 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1218 vol->sbv_volid, vol->sbv_chunk_no); 1219 #endif 1220 1221 /* 1222 * If this is a crypto volume, try to find a matching 1223 * key disk... 1224 */ 1225 bc.bc_key_disk = NODEV; 1226 if (vol->sbv_level == 'C') { 1227 SLIST_FOREACH(mle, &kdh, sml_link) { 1228 metadata = 1229 (struct sr_metadata *)&mle->sml_metadata; 1230 if (bcmp(&metadata->ssdi.ssd_uuid, 1231 &vol->sbv_uuid, 1232 sizeof(metadata->ssdi.ssd_uuid)) == 0) { 1233 bc.bc_key_disk = mle->sml_mm; 1234 } 1235 } 1236 } 1237 1238 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1239 devs[i] = NODEV; /* mark device as illegal */ 1240 ondisk[i] = 0; 1241 } 1242 1243 SLIST_FOREACH(mle, &vol->sml, sml_link) { 1244 metadata = (struct sr_metadata *)&mle->sml_metadata; 1245 chunk_id = metadata->ssdi.ssd_chunk_id; 1246 1247 if (devs[chunk_id] != NODEV) { 1248 vol->sbv_dev_no--; 1249 sr_meta_getdevname(sc, mle->sml_mm, devname, 1250 sizeof(devname)); 1251 printf("%s: found duplicate chunk %u for " 1252 "volume %u on device %s\n", DEVNAME(sc), 1253 chunk_id, vol->sbv_volid, devname); 1254 } 1255 1256 if (devs[chunk_id] == NODEV || 1257 metadata->ssd_ondisk > ondisk[chunk_id]) { 1258 devs[chunk_id] = mle->sml_mm; 1259 ondisk[chunk_id] = metadata->ssd_ondisk; 1260 DNPRINTF(SR_D_META, "%s: using ondisk " 1261 "metadata version %llu for chunk %u\n", 1262 DEVNAME(sc), ondisk[chunk_id], chunk_id); 1263 } 1264 } 1265 1266 if (vol->sbv_chunk_no != vol->sbv_dev_no) { 1267 printf("%s: not all chunks were provided; " 1268 "attempting to bring volume %d online\n", 1269 DEVNAME(sc), vol->sbv_volid); 1270 } 1271 1272 bc.bc_level = vol->sbv_level; 1273 bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); 1274 bc.bc_dev_list = devs; 1275 bc.bc_flags = BIOC_SCDEVT; 1276 1277 rw_enter_write(&sc->sc_lock); 1278 sr_ioctl_createraid(sc, &bc, 0); 1279 rw_exit_write(&sc->sc_lock); 1280 1281 rv++; 1282 } 1283 1284 /* done with metadata */ 1285 unwind: 1286 for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { 1287 vp2 = SLIST_NEXT(vp1, sbv_link); 1288 for (mle1 = SLIST_FIRST(&vp1->sml); 1289 mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { 1290 mle2 = SLIST_NEXT(mle1, sml_link); 1291 free(mle1, M_DEVBUF); 1292 } 1293 free(vp1, M_DEVBUF); 1294 } 1295 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { 1296 mle2 = SLIST_NEXT(mle, sml_link); 1297 free(mle, M_DEVBUF); 1298 } 1299 SLIST_INIT(&mlh); 1300 1301 if (devs) 1302 free(devs, M_DEVBUF); 1303 if (ondisk) 1304 free(ondisk, M_DEVBUF); 1305 1306 return (rv); 1307 } 1308 1309 int 1310 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1311 { 1312 struct disklabel label; 1313 char *devname; 1314 int error, part; 1315 daddr64_t size; 1316 1317 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1318 DEVNAME(sc), ch_entry->src_devname); 1319 1320 devname = ch_entry->src_devname; 1321 part = DISKPART(ch_entry->src_dev_mm); 1322 1323 /* get disklabel */ 1324 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1325 NOCRED, 0); 1326 if (error) { 1327 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1328 DEVNAME(sc), devname); 1329 goto unwind; 1330 } 1331 1332 /* make sure the partition is of the right type */ 1333 if (label.d_partitions[part].p_fstype != FS_RAID) { 1334 DNPRINTF(SR_D_META, 1335 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1336 devname, 1337 label.d_partitions[part].p_fstype); 1338 goto unwind; 1339 } 1340 1341 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 1342 if (size <= 0) { 1343 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1344 devname); 1345 goto unwind; 1346 } 1347 ch_entry->src_size = size; 1348 1349 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1350 devname, size); 1351 1352 return (SR_META_F_NATIVE); 1353 unwind: 1354 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1355 devname ? devname : "nodev"); 1356 return (SR_META_F_INVALID); 1357 } 1358 1359 int 1360 sr_meta_native_attach(struct sr_discipline *sd, int force) 1361 { 1362 struct sr_softc *sc = sd->sd_sc; 1363 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1364 struct sr_metadata *md = NULL; 1365 struct sr_chunk *ch_entry, *ch_next; 1366 struct sr_uuid uuid; 1367 u_int64_t version = 0; 1368 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1369 1370 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1371 1372 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 1373 if (md == NULL) { 1374 printf("%s: not enough memory for metadata buffer\n", 1375 DEVNAME(sc)); 1376 goto bad; 1377 } 1378 1379 bzero(&uuid, sizeof uuid); 1380 1381 sr = not_sr = d = 0; 1382 SLIST_FOREACH(ch_entry, cl, src_link) { 1383 if (ch_entry->src_dev_mm == NODEV) 1384 continue; 1385 1386 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1387 printf("%s: could not read native metadata\n", 1388 DEVNAME(sc)); 1389 goto bad; 1390 } 1391 1392 if (md->ssdi.ssd_magic == SR_MAGIC) { 1393 sr++; 1394 ch_entry->src_meta.scmi.scm_chunk_id = 1395 md->ssdi.ssd_chunk_id; 1396 if (d == 0) { 1397 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1398 expected = md->ssdi.ssd_chunk_no; 1399 version = md->ssd_ondisk; 1400 d++; 1401 continue; 1402 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1403 sizeof uuid)) { 1404 printf("%s: not part of the same volume\n", 1405 DEVNAME(sc)); 1406 goto bad; 1407 } 1408 if (md->ssd_ondisk != version) { 1409 old_meta++; 1410 version = MAX(md->ssd_ondisk, version); 1411 } 1412 } else 1413 not_sr++; 1414 } 1415 1416 if (sr && not_sr) { 1417 printf("%s: not all chunks are of the native metadata format\n", 1418 DEVNAME(sc)); 1419 goto bad; 1420 } 1421 1422 /* mixed metadata versions; mark bad disks offline */ 1423 if (old_meta) { 1424 d = 0; 1425 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1426 ch_entry = ch_next, d++) { 1427 ch_next = SLIST_NEXT(ch_entry, src_link); 1428 1429 /* XXX do we want to read this again? */ 1430 if (ch_entry->src_dev_mm == NODEV) 1431 panic("src_dev_mm == NODEV"); 1432 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1433 NULL)) 1434 printf("%s: could not read native metadata\n", 1435 DEVNAME(sc)); 1436 if (md->ssd_ondisk != version) 1437 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1438 BIOC_SDOFFLINE; 1439 } 1440 } 1441 1442 if (expected != sr && !force && expected != -1) { 1443 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1444 "anyway\n", DEVNAME(sc)); 1445 } 1446 1447 rv = 0; 1448 bad: 1449 if (md) 1450 free(md, M_DEVBUF); 1451 return (rv); 1452 } 1453 1454 int 1455 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1456 struct sr_metadata *md, void *fm) 1457 { 1458 #ifdef SR_DEBUG 1459 struct sr_softc *sc = sd->sd_sc; 1460 #endif 1461 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1462 DEVNAME(sc), dev, md); 1463 1464 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1465 B_READ)); 1466 } 1467 1468 int 1469 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1470 struct sr_metadata *md, void *fm) 1471 { 1472 #ifdef SR_DEBUG 1473 struct sr_softc *sc = sd->sd_sc; 1474 #endif 1475 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1476 DEVNAME(sc), dev, md); 1477 1478 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1479 B_WRITE)); 1480 } 1481 1482 void 1483 sr_hotplug_register(struct sr_discipline *sd, void *func) 1484 { 1485 struct sr_hotplug_list *mhe; 1486 1487 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1488 DEVNAME(sd->sd_sc), func); 1489 1490 /* make sure we aren't on the list yet */ 1491 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1492 if (mhe->sh_hotplug == func) 1493 return; 1494 1495 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1496 M_WAITOK | M_ZERO); 1497 mhe->sh_hotplug = func; 1498 mhe->sh_sd = sd; 1499 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1500 } 1501 1502 void 1503 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1504 { 1505 struct sr_hotplug_list *mhe; 1506 1507 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1508 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1509 1510 /* make sure we are on the list yet */ 1511 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1512 if (mhe->sh_hotplug == func) { 1513 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1514 sr_hotplug_list, shl_link); 1515 free(mhe, M_DEVBUF); 1516 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1517 SLIST_INIT(&sr_hotplug_callbacks); 1518 return; 1519 } 1520 } 1521 1522 void 1523 sr_disk_attach(struct disk *diskp, int action) 1524 { 1525 struct sr_hotplug_list *mhe; 1526 1527 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1528 if (mhe->sh_sd->sd_ready) 1529 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1530 } 1531 1532 int 1533 sr_match(struct device *parent, void *match, void *aux) 1534 { 1535 return (1); 1536 } 1537 1538 void 1539 sr_attach(struct device *parent, struct device *self, void *aux) 1540 { 1541 struct sr_softc *sc = (void *)self; 1542 1543 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1544 1545 rw_init(&sc->sc_lock, "sr_lock"); 1546 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1547 1548 SLIST_INIT(&sr_hotplug_callbacks); 1549 SLIST_INIT(&sc->sc_hotspare_list); 1550 1551 if (bio_register(&sc->sc_dev, sr_ioctl) != 0) 1552 printf("%s: controller registration failed", DEVNAME(sc)); 1553 else 1554 sc->sc_ioctl = sr_ioctl; 1555 1556 printf("\n"); 1557 1558 softraid_disk_attach = sr_disk_attach; 1559 1560 sr_boot_assembly(sc); 1561 } 1562 1563 int 1564 sr_detach(struct device *self, int flags) 1565 { 1566 return (0); 1567 } 1568 1569 int 1570 sr_activate(struct device *self, int act) 1571 { 1572 return (1); 1573 } 1574 1575 void 1576 sr_minphys(struct buf *bp, struct scsi_link *sl) 1577 { 1578 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1579 1580 /* XXX currently using SR_MAXFER = MAXPHYS */ 1581 if (bp->b_bcount > SR_MAXFER) 1582 bp->b_bcount = SR_MAXFER; 1583 minphys(bp); 1584 } 1585 1586 void 1587 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1588 { 1589 size_t copy_cnt; 1590 1591 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1592 xs, size); 1593 1594 if (xs->datalen) { 1595 copy_cnt = MIN(size, xs->datalen); 1596 bcopy(v, xs->data, copy_cnt); 1597 } 1598 } 1599 1600 int 1601 sr_ccb_alloc(struct sr_discipline *sd) 1602 { 1603 struct sr_ccb *ccb; 1604 int i; 1605 1606 if (!sd) 1607 return (1); 1608 1609 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1610 1611 if (sd->sd_ccb) 1612 return (1); 1613 1614 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1615 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1616 TAILQ_INIT(&sd->sd_ccb_freeq); 1617 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1618 ccb = &sd->sd_ccb[i]; 1619 ccb->ccb_dis = sd; 1620 sr_ccb_put(ccb); 1621 } 1622 1623 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1624 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1625 1626 return (0); 1627 } 1628 1629 void 1630 sr_ccb_free(struct sr_discipline *sd) 1631 { 1632 struct sr_ccb *ccb; 1633 1634 if (!sd) 1635 return; 1636 1637 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1638 1639 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1640 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1641 1642 if (sd->sd_ccb) 1643 free(sd->sd_ccb, M_DEVBUF); 1644 } 1645 1646 struct sr_ccb * 1647 sr_ccb_get(struct sr_discipline *sd) 1648 { 1649 struct sr_ccb *ccb; 1650 int s; 1651 1652 s = splbio(); 1653 1654 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1655 if (ccb) { 1656 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1657 ccb->ccb_state = SR_CCB_INPROGRESS; 1658 } 1659 1660 splx(s); 1661 1662 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1663 ccb); 1664 1665 return (ccb); 1666 } 1667 1668 void 1669 sr_ccb_put(struct sr_ccb *ccb) 1670 { 1671 struct sr_discipline *sd = ccb->ccb_dis; 1672 int s; 1673 1674 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1675 ccb); 1676 1677 s = splbio(); 1678 1679 ccb->ccb_wu = NULL; 1680 ccb->ccb_state = SR_CCB_FREE; 1681 ccb->ccb_target = -1; 1682 ccb->ccb_opaque = NULL; 1683 1684 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1685 1686 splx(s); 1687 } 1688 1689 int 1690 sr_wu_alloc(struct sr_discipline *sd) 1691 { 1692 struct sr_workunit *wu; 1693 int i, no_wu; 1694 1695 if (!sd) 1696 return (1); 1697 1698 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 1699 sd, sd->sd_max_wu); 1700 1701 if (sd->sd_wu) 1702 return (1); 1703 1704 no_wu = sd->sd_max_wu; 1705 sd->sd_wu_pending = no_wu; 1706 1707 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 1708 M_DEVBUF, M_WAITOK | M_ZERO); 1709 TAILQ_INIT(&sd->sd_wu_freeq); 1710 TAILQ_INIT(&sd->sd_wu_pendq); 1711 TAILQ_INIT(&sd->sd_wu_defq); 1712 for (i = 0; i < no_wu; i++) { 1713 wu = &sd->sd_wu[i]; 1714 wu->swu_dis = sd; 1715 sr_wu_put(wu); 1716 } 1717 1718 return (0); 1719 } 1720 1721 void 1722 sr_wu_free(struct sr_discipline *sd) 1723 { 1724 struct sr_workunit *wu; 1725 1726 if (!sd) 1727 return; 1728 1729 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 1730 1731 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 1732 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1733 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 1734 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 1735 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 1736 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 1737 1738 if (sd->sd_wu) 1739 free(sd->sd_wu, M_DEVBUF); 1740 } 1741 1742 void 1743 sr_wu_put(struct sr_workunit *wu) 1744 { 1745 struct sr_discipline *sd = wu->swu_dis; 1746 struct sr_ccb *ccb; 1747 1748 int s; 1749 1750 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 1751 1752 s = splbio(); 1753 1754 wu->swu_xs = NULL; 1755 wu->swu_state = SR_WU_FREE; 1756 wu->swu_ios_complete = 0; 1757 wu->swu_ios_failed = 0; 1758 wu->swu_ios_succeeded = 0; 1759 wu->swu_io_count = 0; 1760 wu->swu_blk_start = 0; 1761 wu->swu_blk_end = 0; 1762 wu->swu_collider = NULL; 1763 wu->swu_fake = 0; 1764 wu->swu_flags = 0; 1765 1766 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1767 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1768 sr_ccb_put(ccb); 1769 } 1770 TAILQ_INIT(&wu->swu_ccb); 1771 1772 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 1773 sd->sd_wu_pending--; 1774 1775 /* wake up sleepers */ 1776 #ifdef DIAGNOSTIC 1777 if (sd->sd_wu_sleep < 0) 1778 panic("negative wu sleepers"); 1779 #endif /* DIAGNOSTIC */ 1780 if (sd->sd_wu_sleep) 1781 wakeup(&sd->sd_wu_sleep); 1782 1783 splx(s); 1784 } 1785 1786 struct sr_workunit * 1787 sr_wu_get(struct sr_discipline *sd, int canwait) 1788 { 1789 struct sr_workunit *wu; 1790 int s; 1791 1792 s = splbio(); 1793 1794 for (;;) { 1795 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 1796 if (wu) { 1797 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1798 wu->swu_state = SR_WU_INPROGRESS; 1799 sd->sd_wu_pending++; 1800 break; 1801 } else if (wu == NULL && canwait) { 1802 sd->sd_wu_sleep++; 1803 tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0); 1804 sd->sd_wu_sleep--; 1805 } else 1806 break; 1807 } 1808 1809 splx(s); 1810 1811 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 1812 1813 return (wu); 1814 } 1815 1816 void 1817 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 1818 { 1819 int s; 1820 1821 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 1822 1823 s = splbio(); 1824 scsi_done(xs); 1825 splx(s); 1826 } 1827 1828 void 1829 sr_scsi_cmd(struct scsi_xfer *xs) 1830 { 1831 int s; 1832 struct scsi_link *link = xs->sc_link; 1833 struct sr_softc *sc = link->adapter_softc; 1834 struct sr_workunit *wu = NULL; 1835 struct sr_discipline *sd; 1836 1837 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p " 1838 "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags); 1839 1840 sd = sc->sc_dis[link->scsibus]; 1841 if (sd == NULL) { 1842 s = splhigh(); 1843 sd = sc->sc_attach_dis; 1844 splx(s); 1845 1846 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n", 1847 DEVNAME(sc), sd); 1848 if (sd == NULL) { 1849 printf("%s: sr_scsi_cmd NULL discipline\n", 1850 DEVNAME(sc)); 1851 goto stuffup; 1852 } 1853 } 1854 1855 if (sd->sd_deleted) { 1856 printf("%s: %s device is being deleted, failing io\n", 1857 DEVNAME(sc), sd->sd_meta->ssd_devname); 1858 goto stuffup; 1859 } 1860 1861 /* 1862 * we'll let the midlayer deal with stalls instead of being clever 1863 * and sending sr_wu_get !(xs->flags & SCSI_NOSLEEP) in cansleep 1864 */ 1865 if ((wu = sr_wu_get(sd, 0)) == NULL) { 1866 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc)); 1867 xs->error = XS_NO_CCB; 1868 sr_scsi_done(sd, xs); 1869 return; 1870 } 1871 1872 xs->error = XS_NOERROR; 1873 wu->swu_xs = xs; 1874 1875 /* the midlayer will query LUNs so report sense to stop scanning */ 1876 if (link->target != 0 || link->lun != 0) { 1877 DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n", 1878 DEVNAME(sc), link->target, link->lun); 1879 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 1880 SSD_ERRCODE_VALID; 1881 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 1882 sd->sd_scsi_sense.add_sense_code = 0x25; 1883 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 1884 sd->sd_scsi_sense.extra_len = 4; 1885 goto stuffup; 1886 } 1887 1888 switch (xs->cmd->opcode) { 1889 case READ_COMMAND: 1890 case READ_BIG: 1891 case READ_16: 1892 case WRITE_COMMAND: 1893 case WRITE_BIG: 1894 case WRITE_16: 1895 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 1896 DEVNAME(sc), xs->cmd->opcode); 1897 if (sd->sd_scsi_rw(wu)) 1898 goto stuffup; 1899 break; 1900 1901 case SYNCHRONIZE_CACHE: 1902 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 1903 DEVNAME(sc)); 1904 if (sd->sd_scsi_sync(wu)) 1905 goto stuffup; 1906 goto complete; 1907 1908 case TEST_UNIT_READY: 1909 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 1910 DEVNAME(sc)); 1911 if (sd->sd_scsi_tur(wu)) 1912 goto stuffup; 1913 goto complete; 1914 1915 case START_STOP: 1916 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 1917 DEVNAME(sc)); 1918 if (sd->sd_scsi_start_stop(wu)) 1919 goto stuffup; 1920 goto complete; 1921 1922 case INQUIRY: 1923 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 1924 DEVNAME(sc)); 1925 if (sd->sd_scsi_inquiry(wu)) 1926 goto stuffup; 1927 goto complete; 1928 1929 case READ_CAPACITY: 1930 case READ_CAPACITY_16: 1931 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 1932 DEVNAME(sc), xs->cmd->opcode); 1933 if (sd->sd_scsi_read_cap(wu)) 1934 goto stuffup; 1935 goto complete; 1936 1937 case REQUEST_SENSE: 1938 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 1939 DEVNAME(sc)); 1940 if (sd->sd_scsi_req_sense(wu)) 1941 goto stuffup; 1942 goto complete; 1943 1944 default: 1945 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 1946 DEVNAME(sc), xs->cmd->opcode); 1947 /* XXX might need to add generic function to handle others */ 1948 goto stuffup; 1949 } 1950 1951 return; 1952 stuffup: 1953 if (sd && sd->sd_scsi_sense.error_code) { 1954 xs->error = XS_SENSE; 1955 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 1956 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 1957 } else { 1958 xs->error = XS_DRIVER_STUFFUP; 1959 } 1960 complete: 1961 if (wu) 1962 sr_wu_put(wu); 1963 sr_scsi_done(sd, xs); 1964 } 1965 int 1966 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag, 1967 struct proc *p) 1968 { 1969 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 1970 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 1971 1972 return (sr_ioctl(link->adapter_softc, cmd, addr)); 1973 } 1974 1975 int 1976 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr) 1977 { 1978 struct sr_softc *sc = (struct sr_softc *)dev; 1979 int rv = 0; 1980 1981 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc)); 1982 1983 rw_enter_write(&sc->sc_lock); 1984 1985 switch (cmd) { 1986 case BIOCINQ: 1987 DNPRINTF(SR_D_IOCTL, "inq\n"); 1988 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 1989 break; 1990 1991 case BIOCVOL: 1992 DNPRINTF(SR_D_IOCTL, "vol\n"); 1993 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 1994 break; 1995 1996 case BIOCDISK: 1997 DNPRINTF(SR_D_IOCTL, "disk\n"); 1998 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 1999 break; 2000 2001 case BIOCALARM: 2002 DNPRINTF(SR_D_IOCTL, "alarm\n"); 2003 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 2004 break; 2005 2006 case BIOCBLINK: 2007 DNPRINTF(SR_D_IOCTL, "blink\n"); 2008 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 2009 break; 2010 2011 case BIOCSETSTATE: 2012 DNPRINTF(SR_D_IOCTL, "setstate\n"); 2013 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 2014 break; 2015 2016 case BIOCCREATERAID: 2017 DNPRINTF(SR_D_IOCTL, "createraid\n"); 2018 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 2019 break; 2020 2021 case BIOCDELETERAID: 2022 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 2023 break; 2024 2025 case BIOCDISCIPLINE: 2026 rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr); 2027 break; 2028 2029 case BIOCINSTALLBOOT: 2030 rv = sr_ioctl_installboot(sc, (struct bioc_installboot *)addr); 2031 break; 2032 2033 default: 2034 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 2035 rv = ENOTTY; 2036 } 2037 2038 rw_exit_write(&sc->sc_lock); 2039 2040 return (rv); 2041 } 2042 2043 int 2044 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 2045 { 2046 int i, vol, disk; 2047 2048 for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++) 2049 /* XXX this will not work when we stagger disciplines */ 2050 if (sc->sc_dis[i]) { 2051 vol++; 2052 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2053 } 2054 2055 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2056 bi->bi_novol = vol + sc->sc_hotspare_no; 2057 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2058 2059 return (0); 2060 } 2061 2062 int 2063 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2064 { 2065 int i, vol, rv = EINVAL; 2066 struct sr_discipline *sd; 2067 struct sr_chunk *hotspare; 2068 daddr64_t rb, sz; 2069 2070 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2071 /* XXX this will not work when we stagger disciplines */ 2072 if (sc->sc_dis[i]) 2073 vol++; 2074 if (vol != bv->bv_volid) 2075 continue; 2076 2077 if (sc->sc_dis[i] == NULL) 2078 goto done; 2079 2080 sd = sc->sc_dis[i]; 2081 bv->bv_status = sd->sd_vol_status; 2082 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2083 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2084 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2085 2086 #ifdef CRYPTO 2087 if (sd->sd_meta->ssdi.ssd_level == 'C' && 2088 sd->mds.mdd_crypto.key_disk != NULL) 2089 bv->bv_nodisk++; 2090 #endif 2091 2092 if (bv->bv_status == BIOC_SVREBUILD) { 2093 sz = sd->sd_meta->ssdi.ssd_size; 2094 rb = sd->sd_meta->ssd_rebuild; 2095 if (rb > 0) 2096 bv->bv_percent = 100 - 2097 ((sz * 100 - rb * 100) / sz) - 1; 2098 else 2099 bv->bv_percent = 0; 2100 } 2101 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2102 sizeof(bv->bv_dev)); 2103 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2104 sizeof(bv->bv_vendor)); 2105 rv = 0; 2106 goto done; 2107 } 2108 2109 /* Check hotspares list. */ 2110 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2111 vol++; 2112 if (vol != bv->bv_volid) 2113 continue; 2114 2115 bv->bv_status = BIOC_SVONLINE; 2116 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2117 bv->bv_level = -1; /* Hotspare. */ 2118 bv->bv_nodisk = 1; 2119 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2120 sizeof(bv->bv_dev)); 2121 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2122 sizeof(bv->bv_vendor)); 2123 rv = 0; 2124 goto done; 2125 } 2126 2127 done: 2128 return (rv); 2129 } 2130 2131 int 2132 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2133 { 2134 int i, vol, rv = EINVAL, id; 2135 struct sr_chunk *src, *hotspare; 2136 2137 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2138 /* XXX this will not work when we stagger disciplines */ 2139 if (sc->sc_dis[i]) 2140 vol++; 2141 if (vol != bd->bd_volid) 2142 continue; 2143 2144 if (sc->sc_dis[i] == NULL) 2145 goto done; 2146 2147 id = bd->bd_diskid; 2148 2149 if (id < sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2150 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2151 #ifdef CRYPTO 2152 else if (id == sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no && 2153 sc->sc_dis[i]->sd_meta->ssdi.ssd_level == 'C' && 2154 sc->sc_dis[i]->mds.mdd_crypto.key_disk != NULL) 2155 src = sc->sc_dis[i]->mds.mdd_crypto.key_disk; 2156 #endif 2157 else 2158 break; 2159 2160 bd->bd_status = src->src_meta.scm_status; 2161 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2162 bd->bd_channel = vol; 2163 bd->bd_target = id; 2164 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2165 sizeof(bd->bd_vendor)); 2166 rv = 0; 2167 goto done; 2168 } 2169 2170 /* Check hotspares list. */ 2171 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2172 vol++; 2173 if (vol != bd->bd_volid) 2174 continue; 2175 2176 if (bd->bd_diskid != 0) 2177 break; 2178 2179 bd->bd_status = hotspare->src_meta.scm_status; 2180 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2181 bd->bd_channel = vol; 2182 bd->bd_target = bd->bd_diskid; 2183 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2184 sizeof(bd->bd_vendor)); 2185 rv = 0; 2186 goto done; 2187 } 2188 2189 done: 2190 return (rv); 2191 } 2192 2193 int 2194 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2195 { 2196 int rv = EINVAL; 2197 int i, vol, found, c; 2198 struct sr_discipline *sd = NULL; 2199 struct sr_chunk *ch_entry; 2200 struct sr_chunk_head *cl; 2201 2202 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2203 goto done; 2204 2205 if (bs->bs_status == BIOC_SSHOTSPARE) { 2206 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2207 goto done; 2208 } 2209 2210 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2211 /* XXX this will not work when we stagger disciplines */ 2212 if (sc->sc_dis[i]) 2213 vol++; 2214 if (vol != bs->bs_volid) 2215 continue; 2216 sd = sc->sc_dis[i]; 2217 break; 2218 } 2219 if (sd == NULL) 2220 goto done; 2221 2222 switch (bs->bs_status) { 2223 case BIOC_SSOFFLINE: 2224 /* Take chunk offline */ 2225 found = c = 0; 2226 cl = &sd->sd_vol.sv_chunk_list; 2227 SLIST_FOREACH(ch_entry, cl, src_link) { 2228 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2229 found = 1; 2230 break; 2231 } 2232 c++; 2233 } 2234 if (found == 0) { 2235 printf("%s: chunk not part of array\n", DEVNAME(sc)); 2236 goto done; 2237 } 2238 2239 /* XXX: check current state first */ 2240 sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE); 2241 2242 if (sr_meta_save(sd, SR_META_DIRTY)) { 2243 printf("%s: could not save metadata to %s\n", 2244 DEVNAME(sc), sd->sd_meta->ssd_devname); 2245 goto done; 2246 } 2247 rv = 0; 2248 break; 2249 2250 case BIOC_SDSCRUB: 2251 break; 2252 2253 case BIOC_SSREBUILD: 2254 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id, 0); 2255 break; 2256 2257 default: 2258 printf("%s: unsupported state request %d\n", 2259 DEVNAME(sc), bs->bs_status); 2260 } 2261 2262 done: 2263 return (rv); 2264 } 2265 2266 int 2267 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2268 { 2269 struct sr_discipline *sd; 2270 struct sr_chunk *chunk; 2271 int i, c; 2272 2273 /* See if chunk is already in use. */ 2274 for (i = 0; i < SR_MAXSCSIBUS; i++) { 2275 if (sc->sc_dis[i] == NULL) 2276 continue; 2277 sd = sc->sc_dis[i]; 2278 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2279 chunk = sd->sd_vol.sv_chunks[c]; 2280 if (chunk->src_dev_mm == dev) 2281 return chunk->src_meta.scm_status; 2282 } 2283 } 2284 2285 /* Check hotspares list. */ 2286 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2287 if (chunk->src_dev_mm == dev) 2288 return chunk->src_meta.scm_status; 2289 2290 return BIOC_SDINVALID; 2291 } 2292 2293 int 2294 sr_hotspare(struct sr_softc *sc, dev_t dev) 2295 { 2296 struct sr_discipline *sd = NULL; 2297 struct sr_metadata *sm = NULL; 2298 struct sr_meta_chunk *hm; 2299 struct sr_chunk_head *cl; 2300 struct sr_chunk *chunk, *last, *hotspare = NULL; 2301 struct sr_uuid uuid; 2302 struct disklabel label; 2303 struct vnode *vn; 2304 daddr64_t size; 2305 char devname[32]; 2306 int rv = EINVAL; 2307 int c, part, open = 0; 2308 2309 /* 2310 * Add device to global hotspares list. 2311 */ 2312 2313 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2314 2315 /* Make sure chunk is not already in use. */ 2316 c = sr_chunk_in_use(sc, dev); 2317 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2318 if (c == BIOC_SDHOTSPARE) 2319 printf("%s: %s is already a hotspare\n", 2320 DEVNAME(sc), devname); 2321 else 2322 printf("%s: %s is already in use\n", 2323 DEVNAME(sc), devname); 2324 goto done; 2325 } 2326 2327 /* XXX - See if there is an existing degraded volume... */ 2328 2329 /* Open device. */ 2330 if (bdevvp(dev, &vn)) { 2331 printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc)); 2332 goto done; 2333 } 2334 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2335 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2336 DEVNAME(sc), devname); 2337 vput(vn); 2338 goto fail; 2339 } 2340 open = 1; /* close dev on error */ 2341 2342 /* Get partition details. */ 2343 part = DISKPART(dev); 2344 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2345 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2346 DEVNAME(sc)); 2347 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2348 vput(vn); 2349 goto fail; 2350 } 2351 if (label.d_partitions[part].p_fstype != FS_RAID) { 2352 printf("%s: %s partition not of type RAID (%d)\n", 2353 DEVNAME(sc), devname, 2354 label.d_partitions[part].p_fstype); 2355 goto fail; 2356 } 2357 2358 /* Calculate partition size. */ 2359 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 2360 2361 /* 2362 * Create and populate chunk metadata. 2363 */ 2364 2365 sr_uuid_get(&uuid); 2366 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2367 2368 hotspare->src_dev_mm = dev; 2369 hotspare->src_vn = vn; 2370 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2371 hotspare->src_size = size; 2372 2373 hm = &hotspare->src_meta; 2374 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2375 hm->scmi.scm_chunk_id = 0; 2376 hm->scmi.scm_size = size; 2377 hm->scmi.scm_coerced_size = size; 2378 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2379 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2380 2381 sr_checksum(sc, hm, &hm->scm_checksum, 2382 sizeof(struct sr_meta_chunk_invariant)); 2383 2384 hm->scm_status = BIOC_SDHOTSPARE; 2385 2386 /* 2387 * Create and populate our own discipline and metadata. 2388 */ 2389 2390 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2391 sm->ssdi.ssd_magic = SR_MAGIC; 2392 sm->ssdi.ssd_version = SR_META_VERSION; 2393 sm->ssd_ondisk = 0; 2394 sm->ssdi.ssd_flags = 0; 2395 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2396 sm->ssdi.ssd_chunk_no = 1; 2397 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2398 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2399 sm->ssdi.ssd_size = size; 2400 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2401 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2402 "SR %s", "HOTSPARE"); 2403 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2404 "%03d", SR_META_VERSION); 2405 2406 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2407 sd->sd_sc = sc; 2408 sd->sd_meta = sm; 2409 sd->sd_meta_type = SR_META_F_NATIVE; 2410 sd->sd_vol_status = BIOC_SVONLINE; 2411 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2412 SLIST_INIT(&sd->sd_meta_opt); 2413 2414 /* Add chunk to volume. */ 2415 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2416 M_WAITOK | M_ZERO); 2417 sd->sd_vol.sv_chunks[0] = hotspare; 2418 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2419 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2420 2421 /* Save metadata. */ 2422 if (sr_meta_save(sd, SR_META_DIRTY)) { 2423 printf("%s: could not save metadata to %s\n", 2424 DEVNAME(sc), devname); 2425 goto fail; 2426 } 2427 2428 /* 2429 * Add chunk to hotspare list. 2430 */ 2431 rw_enter_write(&sc->sc_hs_lock); 2432 cl = &sc->sc_hotspare_list; 2433 if (SLIST_EMPTY(cl)) 2434 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2435 else { 2436 SLIST_FOREACH(chunk, cl, src_link) 2437 last = chunk; 2438 SLIST_INSERT_AFTER(last, hotspare, src_link); 2439 } 2440 sc->sc_hotspare_no++; 2441 rw_exit_write(&sc->sc_hs_lock); 2442 2443 rv = 0; 2444 goto done; 2445 2446 fail: 2447 if (hotspare) 2448 free(hotspare, M_DEVBUF); 2449 2450 done: 2451 if (sd && sd->sd_vol.sv_chunks) 2452 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2453 if (sd) 2454 free(sd, M_DEVBUF); 2455 if (sm) 2456 free(sm, M_DEVBUF); 2457 if (open) { 2458 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2459 vput(vn); 2460 } 2461 2462 return (rv); 2463 } 2464 2465 void 2466 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2467 { 2468 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2469 } 2470 2471 void 2472 sr_hotspare_rebuild(struct sr_discipline *sd) 2473 { 2474 struct sr_chunk_head *cl; 2475 struct sr_chunk *hotspare, *chunk = NULL; 2476 struct sr_workunit *wu; 2477 struct sr_ccb *ccb; 2478 int i, s, chunk_no, busy; 2479 2480 /* 2481 * Attempt to locate a hotspare and initiate rebuild. 2482 */ 2483 2484 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2485 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2486 BIOC_SDOFFLINE) { 2487 chunk_no = i; 2488 chunk = sd->sd_vol.sv_chunks[i]; 2489 break; 2490 } 2491 } 2492 2493 if (chunk == NULL) { 2494 printf("%s: no offline chunk found on %s!\n", 2495 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 2496 return; 2497 } 2498 2499 /* See if we have a suitable hotspare... */ 2500 rw_enter_write(&sd->sd_sc->sc_hs_lock); 2501 cl = &sd->sd_sc->sc_hotspare_list; 2502 SLIST_FOREACH(hotspare, cl, src_link) 2503 if (hotspare->src_size >= chunk->src_size) 2504 break; 2505 2506 if (hotspare != NULL) { 2507 2508 printf("%s: %s volume degraded, will attempt to " 2509 "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc), 2510 sd->sd_meta->ssd_devname, hotspare->src_devname); 2511 2512 /* 2513 * Ensure that all pending I/O completes on the failed chunk 2514 * before trying to initiate a rebuild. 2515 */ 2516 i = 0; 2517 do { 2518 busy = 0; 2519 2520 s = splbio(); 2521 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2522 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2523 if (ccb->ccb_target == chunk_no) 2524 busy = 1; 2525 } 2526 } 2527 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2528 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2529 if (ccb->ccb_target == chunk_no) 2530 busy = 1; 2531 } 2532 } 2533 splx(s); 2534 2535 if (busy) { 2536 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2537 i++; 2538 } 2539 2540 } while (busy && i < 120); 2541 2542 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2543 "complete on failed chunk %s\n", DEVNAME(sd->sd_sc), 2544 i, chunk->src_devname); 2545 2546 if (busy) { 2547 printf("%s: pending I/O failed to complete on " 2548 "failed chunk %s, hotspare rebuild aborted...\n", 2549 DEVNAME(sd->sd_sc), chunk->src_devname); 2550 goto done; 2551 } 2552 2553 s = splbio(); 2554 rw_enter_write(&sd->sd_sc->sc_lock); 2555 if (sr_rebuild_init(sd, hotspare->src_dev_mm, 1) == 0) { 2556 2557 /* Remove hotspare from available list. */ 2558 sd->sd_sc->sc_hotspare_no--; 2559 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2560 free(hotspare, M_DEVBUF); 2561 2562 } 2563 rw_exit_write(&sd->sd_sc->sc_lock); 2564 splx(s); 2565 } 2566 done: 2567 rw_exit_write(&sd->sd_sc->sc_hs_lock); 2568 } 2569 2570 int 2571 sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare) 2572 { 2573 struct sr_softc *sc = sd->sd_sc; 2574 int rv = EINVAL, part; 2575 int c, found, open = 0; 2576 char devname[32]; 2577 struct vnode *vn; 2578 daddr64_t size, csize; 2579 struct disklabel label; 2580 struct sr_meta_chunk *old, *new; 2581 2582 /* 2583 * Attempt to initiate a rebuild onto the specified device. 2584 */ 2585 2586 if (!(sd->sd_capabilities & SR_CAP_REBUILD)) { 2587 printf("%s: discipline does not support rebuild\n", 2588 DEVNAME(sc)); 2589 goto done; 2590 } 2591 2592 /* make sure volume is in the right state */ 2593 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2594 printf("%s: rebuild already in progress\n", DEVNAME(sc)); 2595 goto done; 2596 } 2597 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2598 printf("%s: %s not degraded\n", DEVNAME(sc), 2599 sd->sd_meta->ssd_devname); 2600 goto done; 2601 } 2602 2603 /* find offline chunk */ 2604 for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 2605 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 2606 BIOC_SDOFFLINE) { 2607 found = c; 2608 new = &sd->sd_vol.sv_chunks[c]->src_meta; 2609 if (c > 0) 2610 break; /* roll at least once over the for */ 2611 } else { 2612 csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size; 2613 old = &sd->sd_vol.sv_chunks[c]->src_meta; 2614 if (found != -1) 2615 break; 2616 } 2617 if (found == -1) { 2618 printf("%s: no offline chunks available for rebuild\n", 2619 DEVNAME(sc)); 2620 goto done; 2621 } 2622 2623 /* populate meta entry */ 2624 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2625 if (bdevvp(dev, &vn)) { 2626 printf("%s:, sr_rebuild_init: can't allocate vnode\n", 2627 DEVNAME(sc)); 2628 goto done; 2629 } 2630 2631 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2632 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2633 "open %s\n", DEVNAME(sc), devname); 2634 vput(vn); 2635 goto done; 2636 } 2637 open = 1; /* close dev on error */ 2638 2639 /* get partition */ 2640 part = DISKPART(dev); 2641 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2642 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2643 DEVNAME(sc)); 2644 goto done; 2645 } 2646 if (label.d_partitions[part].p_fstype != FS_RAID) { 2647 printf("%s: %s partition not of type RAID (%d)\n", 2648 DEVNAME(sc), devname, 2649 label.d_partitions[part].p_fstype); 2650 goto done; 2651 } 2652 2653 /* is partition large enough? */ 2654 size = DL_GETPSIZE(&label.d_partitions[part]) - SR_DATA_OFFSET; 2655 if (size < csize) { 2656 printf("%s: partition too small, at least %llu B required\n", 2657 DEVNAME(sc), csize << DEV_BSHIFT); 2658 goto done; 2659 } else if (size > csize) 2660 printf("%s: partition too large, wasting %llu B\n", 2661 DEVNAME(sc), (size - csize) << DEV_BSHIFT); 2662 2663 /* make sure we are not stomping on some other partition */ 2664 c = sr_chunk_in_use(sc, dev); 2665 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE && 2666 !(hotspare && c == BIOC_SDHOTSPARE)) { 2667 printf("%s: %s is already in use\n", DEVNAME(sc), devname); 2668 goto done; 2669 } 2670 2671 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2672 sd->sd_meta->ssd_rebuild = 0; 2673 2674 /* recreate metadata */ 2675 open = 0; /* leave dev open from here on out */ 2676 sd->sd_vol.sv_chunks[found]->src_dev_mm = dev; 2677 sd->sd_vol.sv_chunks[found]->src_vn = vn; 2678 new->scmi.scm_volid = old->scmi.scm_volid; 2679 new->scmi.scm_chunk_id = found; 2680 strlcpy(new->scmi.scm_devname, devname, 2681 sizeof new->scmi.scm_devname); 2682 new->scmi.scm_size = size; 2683 new->scmi.scm_coerced_size = old->scmi.scm_coerced_size; 2684 bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid, 2685 sizeof new->scmi.scm_uuid); 2686 sr_checksum(sc, new, &new->scm_checksum, 2687 sizeof(struct sr_meta_chunk_invariant)); 2688 sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD); 2689 if (sr_meta_save(sd, SR_META_DIRTY)) { 2690 printf("%s: could not save metadata to %s\n", 2691 DEVNAME(sc), devname); 2692 open = 1; 2693 goto done; 2694 } 2695 2696 printf("%s: rebuild of %s started on %s\n", DEVNAME(sc), 2697 sd->sd_meta->ssd_devname, devname); 2698 2699 sd->sd_reb_abort = 0; 2700 kthread_create_deferred(sr_rebuild, sd); 2701 2702 rv = 0; 2703 done: 2704 if (open) { 2705 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2706 vput(vn); 2707 } 2708 2709 return (rv); 2710 } 2711 2712 void 2713 sr_roam_chunks(struct sr_discipline *sd) 2714 { 2715 struct sr_softc *sc = sd->sd_sc; 2716 struct sr_chunk *chunk; 2717 struct sr_meta_chunk *meta; 2718 int roamed = 0; 2719 2720 /* Have any chunks roamed? */ 2721 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 2722 2723 meta = &chunk->src_meta; 2724 2725 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 2726 sizeof(meta->scmi.scm_devname))) { 2727 2728 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 2729 meta->scmi.scm_devname, chunk->src_devname); 2730 2731 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 2732 sizeof(meta->scmi.scm_devname)); 2733 2734 roamed++; 2735 } 2736 } 2737 2738 if (roamed) 2739 sr_meta_save(sd, SR_META_DIRTY); 2740 } 2741 2742 int 2743 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 2744 { 2745 dev_t *dt; 2746 int i, s, no_chunk, rv = EINVAL, vol; 2747 int no_meta, updatemeta = 0; 2748 struct sr_chunk_head *cl; 2749 struct sr_discipline *sd = NULL; 2750 struct sr_chunk *ch_entry; 2751 struct device *dev, *dev2; 2752 struct scsibus_attach_args saa; 2753 char devname[32]; 2754 2755 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 2756 DEVNAME(sc), user); 2757 2758 /* user input */ 2759 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 2760 goto unwind; 2761 2762 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 2763 if (user) { 2764 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 2765 goto unwind; 2766 } else 2767 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 2768 2769 /* Initialise discipline. */ 2770 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2771 sd->sd_sc = sc; 2772 SLIST_INIT(&sd->sd_meta_opt); 2773 if (sr_discipline_init(sd, bc->bc_level)) { 2774 printf("%s: could not initialize discipline\n", DEVNAME(sc)); 2775 goto unwind; 2776 } 2777 2778 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 2779 cl = &sd->sd_vol.sv_chunk_list; 2780 SLIST_INIT(cl); 2781 2782 /* Ensure that chunks are not already in use. */ 2783 for (i = 0; i < no_chunk; i++) { 2784 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) { 2785 sr_meta_getdevname(sc, dt[i], devname, sizeof(devname)); 2786 printf("%s: chunk %s already in use\n", 2787 DEVNAME(sc), devname); 2788 goto unwind; 2789 } 2790 } 2791 2792 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 2793 if (sd->sd_meta_type == SR_META_F_INVALID) { 2794 printf("%s: invalid metadata format\n", DEVNAME(sc)); 2795 goto unwind; 2796 } 2797 2798 if (sr_meta_attach(sd, no_chunk, bc->bc_flags & BIOC_SCFORCE)) { 2799 printf("%s: can't attach metadata type %d\n", DEVNAME(sc), 2800 sd->sd_meta_type); 2801 goto unwind; 2802 } 2803 2804 /* force the raid volume by clearing metadata region */ 2805 if (bc->bc_flags & BIOC_SCFORCE) { 2806 /* make sure disk isn't up and running */ 2807 if (sr_meta_read(sd)) 2808 if (sr_already_assembled(sd)) { 2809 printf("%s: disk ", DEVNAME(sc)); 2810 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2811 printf(" is currently in use; can't force " 2812 "create\n"); 2813 goto unwind; 2814 } 2815 2816 if (sr_meta_clear(sd)) { 2817 printf("%s: failed to clear metadata\n", DEVNAME(sc)); 2818 goto unwind; 2819 } 2820 } 2821 2822 if ((no_meta = sr_meta_read(sd)) == 0) { 2823 /* fill out all chunk metadata */ 2824 sr_meta_chunks_create(sc, cl); 2825 ch_entry = SLIST_FIRST(cl); 2826 2827 sd->sd_vol_status = BIOC_SVONLINE; 2828 sd->sd_meta->ssdi.ssd_level = bc->bc_level; 2829 sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; 2830 2831 /* Make the volume UUID available. */ 2832 bcopy(&ch_entry->src_meta.scmi.scm_uuid, 2833 &sd->sd_meta->ssdi.ssd_uuid, 2834 sizeof(sd->sd_meta->ssdi.ssd_uuid)); 2835 2836 if (sd->sd_create) { 2837 if ((i = sd->sd_create(sd, bc, no_chunk, 2838 ch_entry->src_meta.scmi.scm_coerced_size))) { 2839 rv = i; 2840 goto unwind; 2841 } 2842 } 2843 2844 /* fill out all volume metadata */ 2845 DNPRINTF(SR_D_IOCTL, 2846 "%s: sr_ioctl_createraid: vol_size: %lld\n", 2847 DEVNAME(sc), sd->sd_meta->ssdi.ssd_size); 2848 strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", 2849 sizeof(sd->sd_meta->ssdi.ssd_vendor)); 2850 snprintf(sd->sd_meta->ssdi.ssd_product, 2851 sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", 2852 sd->sd_name); 2853 snprintf(sd->sd_meta->ssdi.ssd_revision, 2854 sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", 2855 SR_META_VERSION); 2856 2857 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2858 updatemeta = 1; 2859 } else if (no_meta == no_chunk) { 2860 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 2861 printf("%s: %s was not shutdown properly\n", 2862 DEVNAME(sc), sd->sd_meta->ssd_devname); 2863 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 2864 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 2865 "metadata\n", DEVNAME(sc)); 2866 goto unwind; 2867 } 2868 if (sr_already_assembled(sd)) { 2869 printf("%s: disk ", DEVNAME(sc)); 2870 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2871 printf(" already assembled\n"); 2872 goto unwind; 2873 } 2874 2875 if (sd->sd_assemble) { 2876 if ((i = sd->sd_assemble(sd, bc, no_chunk))) { 2877 rv = i; 2878 goto unwind; 2879 } 2880 } 2881 2882 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 2883 DEVNAME(sc)); 2884 updatemeta = 0; 2885 } else if (no_meta == -1) { 2886 printf("%s: one of the chunks has corrupt metadata; aborting " 2887 "assembly\n", DEVNAME(sc)); 2888 goto unwind; 2889 } else { 2890 if (sr_already_assembled(sd)) { 2891 printf("%s: disk ", DEVNAME(sc)); 2892 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2893 printf(" already assembled; will not partial " 2894 "assemble it\n"); 2895 goto unwind; 2896 } 2897 2898 if (sd->sd_assemble) { 2899 if ((i = sd->sd_assemble(sd, bc, no_chunk))) { 2900 rv = i; 2901 goto unwind; 2902 } 2903 } 2904 2905 printf("%s: trying to bring up %s degraded\n", DEVNAME(sc), 2906 sd->sd_meta->ssd_devname); 2907 } 2908 2909 /* metadata SHALL be fully filled in at this point */ 2910 2911 /* Make sure that metadata level matches assembly level. */ 2912 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 2913 printf("%s: volume level does not match metadata level!\n", 2914 DEVNAME(sc)); 2915 goto unwind; 2916 } 2917 2918 /* allocate all resources */ 2919 if ((rv = sd->sd_alloc_resources(sd))) 2920 goto unwind; 2921 2922 /* Adjust flags if necessary. */ 2923 if ((sd->sd_capabilities & SR_CAP_AUTO_ASSEMBLE) && 2924 (bc->bc_flags & BIOC_SCNOAUTOASSEMBLE) != 2925 (sd->sd_meta->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE)) { 2926 sd->sd_meta->ssdi.ssd_flags &= ~BIOC_SCNOAUTOASSEMBLE; 2927 sd->sd_meta->ssdi.ssd_flags |= 2928 bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2929 } 2930 2931 if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) { 2932 /* set volume status */ 2933 sd->sd_set_vol_state(sd); 2934 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 2935 printf("%s: %s offline, will not be brought online\n", 2936 DEVNAME(sc), sd->sd_meta->ssd_devname); 2937 goto unwind; 2938 } 2939 2940 /* setup scsi midlayer */ 2941 if (sd->sd_openings) 2942 sd->sd_link.openings = sd->sd_openings(sd); 2943 else 2944 sd->sd_link.openings = sd->sd_max_wu; 2945 sd->sd_link.device = &sr_dev; 2946 sd->sd_link.device_softc = sc; 2947 sd->sd_link.adapter_softc = sc; 2948 sd->sd_link.adapter = &sr_switch; 2949 sd->sd_link.adapter_target = SR_MAX_LD; 2950 sd->sd_link.adapter_buswidth = 1; 2951 bzero(&saa, sizeof(saa)); 2952 saa.saa_sc_link = &sd->sd_link; 2953 2954 /* 2955 * we passed all checks return ENXIO if volume can't be created 2956 */ 2957 rv = ENXIO; 2958 2959 /* clear sense data */ 2960 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2961 2962 /* use temporary discipline pointer */ 2963 s = splhigh(); 2964 sc->sc_attach_dis = sd; 2965 splx(s); 2966 dev2 = config_found(&sc->sc_dev, &saa, scsiprint); 2967 s = splhigh(); 2968 sc->sc_attach_dis = NULL; 2969 splx(s); 2970 TAILQ_FOREACH(dev, &alldevs, dv_list) 2971 if (dev->dv_parent == dev2) 2972 break; 2973 if (dev == NULL) 2974 goto unwind; 2975 2976 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n", 2977 DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus); 2978 2979 sc->sc_dis[sd->sd_link.scsibus] = sd; 2980 for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++) 2981 if (sc->sc_dis[i]) 2982 vol++; 2983 sd->sd_scsibus_dev = dev2; 2984 2985 rv = 0; 2986 if (updatemeta) { 2987 /* fill out remaining volume metadata */ 2988 sd->sd_meta->ssdi.ssd_volid = vol; 2989 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 2990 sizeof(sd->sd_meta->ssd_devname)); 2991 sr_meta_init(sd, cl); 2992 } else { 2993 if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 2994 sizeof(dev->dv_xname))) { 2995 printf("%s: volume %s is roaming, it used to " 2996 "be %s, updating metadata\n", 2997 DEVNAME(sc), dev->dv_xname, 2998 sd->sd_meta->ssd_devname); 2999 3000 sd->sd_meta->ssdi.ssd_volid = vol; 3001 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3002 sizeof(sd->sd_meta->ssd_devname)); 3003 } 3004 } 3005 3006 /* Update device name on any chunks which roamed. */ 3007 sr_roam_chunks(sd); 3008 3009 #ifndef SMALL_KERNEL 3010 if (sr_sensors_create(sd)) 3011 printf("%s: unable to create sensor for %s\n", 3012 DEVNAME(sc), dev->dv_xname); 3013 else 3014 sd->sd_vol.sv_sensor_valid = 1; 3015 #endif /* SMALL_KERNEL */ 3016 } else { 3017 /* we are not an os disk */ 3018 if (updatemeta) { 3019 /* fill out remaining volume metadata */ 3020 sd->sd_meta->ssdi.ssd_volid = 0; 3021 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3022 sizeof(sd->sd_meta->ssd_devname)); 3023 sr_meta_init(sd, cl); 3024 } 3025 if (sd->sd_start_discipline(sd)) 3026 goto unwind; 3027 } 3028 3029 /* save metadata to disk */ 3030 rv = sr_meta_save(sd, SR_META_DIRTY); 3031 sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd); 3032 3033 if (sd->sd_vol_status == BIOC_SVREBUILD) 3034 kthread_create_deferred(sr_rebuild, sd); 3035 3036 sd->sd_ready = 1; 3037 3038 return (rv); 3039 unwind: 3040 sr_discipline_shutdown(sd); 3041 3042 /* XXX - use internal status values! */ 3043 if (rv == EAGAIN) 3044 rv = 0; 3045 3046 return (rv); 3047 } 3048 3049 int 3050 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3051 { 3052 struct sr_discipline *sd = NULL; 3053 int rv = 1; 3054 int i; 3055 3056 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3057 dr->bd_dev); 3058 3059 for (i = 0; i < SR_MAXSCSIBUS; i++) 3060 if (sc->sc_dis[i]) { 3061 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3062 dr->bd_dev, 3063 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3064 sd = sc->sc_dis[i]; 3065 break; 3066 } 3067 } 3068 3069 if (sd == NULL) 3070 goto bad; 3071 3072 sd->sd_deleted = 1; 3073 sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE; 3074 sr_shutdown(sd); 3075 3076 rv = 0; 3077 bad: 3078 return (rv); 3079 } 3080 3081 int 3082 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd) 3083 { 3084 struct sr_discipline *sd = NULL; 3085 int i, rv = 1; 3086 3087 /* Dispatch a discipline specific ioctl. */ 3088 3089 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc), 3090 bd->bd_dev); 3091 3092 for (i = 0; i < SR_MAXSCSIBUS; i++) 3093 if (sc->sc_dis[i]) { 3094 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3095 bd->bd_dev, 3096 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3097 sd = sc->sc_dis[i]; 3098 break; 3099 } 3100 } 3101 3102 if (sd && sd->sd_ioctl_handler) 3103 rv = sd->sd_ioctl_handler(sd, bd); 3104 3105 return (rv); 3106 } 3107 3108 int 3109 sr_ioctl_installboot(struct sr_softc *sc, struct bioc_installboot *bb) 3110 { 3111 void *bootblk = NULL, *bootldr = NULL; 3112 struct sr_discipline *sd = NULL; 3113 struct sr_chunk *chunk; 3114 struct buf b; 3115 u_int32_t bbs, bls; 3116 int rv = EINVAL; 3117 int i; 3118 3119 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_installboot %s\n", DEVNAME(sc), 3120 bb->bb_dev); 3121 3122 for (i = 0; i < SR_MAXSCSIBUS; i++) 3123 if (sc->sc_dis[i]) { 3124 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3125 bb->bb_dev, 3126 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3127 sd = sc->sc_dis[i]; 3128 break; 3129 } 3130 } 3131 3132 if (sd == NULL) 3133 goto done; 3134 3135 if (bb->bb_bootblk_size > SR_BOOT_BLOCKS_SIZE * 512) 3136 goto done; 3137 3138 if (bb->bb_bootldr_size > SR_BOOT_LOADER_SIZE * 512) 3139 goto done; 3140 3141 /* Copy in boot block. */ 3142 bbs = howmany(bb->bb_bootblk_size, DEV_BSIZE) * DEV_BSIZE; 3143 bootblk = malloc(bbs, M_DEVBUF, M_WAITOK | M_ZERO); 3144 if (copyin(bb->bb_bootblk, bootblk, bb->bb_bootblk_size) != 0) 3145 goto done; 3146 3147 /* Copy in boot loader. */ 3148 bls = howmany(bb->bb_bootldr_size, DEV_BSIZE) * DEV_BSIZE; 3149 bootldr = malloc(bls, M_DEVBUF, M_WAITOK | M_ZERO); 3150 if (copyin(bb->bb_bootldr, bootldr, bb->bb_bootldr_size) != 0) 3151 goto done; 3152 3153 /* Save boot block and boot loader to each chunk. */ 3154 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 3155 3156 chunk = sd->sd_vol.sv_chunks[i]; 3157 3158 /* Save boot blocks. */ 3159 DNPRINTF(SR_D_IOCTL, 3160 "sr_ioctl_installboot: saving boot block to %s " 3161 "(%u bytes)\n", chunk->src_devname, bbs); 3162 3163 bzero(&b, sizeof(b)); 3164 b.b_flags = B_WRITE | B_PHYS; 3165 b.b_blkno = SR_BOOT_BLOCKS_OFFSET; 3166 b.b_bcount = bbs; 3167 b.b_bufsize = bbs; 3168 b.b_resid = bbs; 3169 b.b_data = bootblk; 3170 b.b_error = 0; 3171 b.b_proc = curproc; 3172 b.b_dev = chunk->src_dev_mm; 3173 b.b_vp = NULL; 3174 b.b_iodone = NULL; 3175 if (bdevvp(chunk->src_dev_mm, &b.b_vp)) { 3176 printf("%s: sr_ioctl_installboot: vnode allocation " 3177 "failed\n", DEVNAME(sc)); 3178 goto done; 3179 } 3180 if ((b.b_flags & B_READ) == 0) 3181 b.b_vp->v_numoutput++; 3182 LIST_INIT(&b.b_dep); 3183 VOP_STRATEGY(&b); 3184 biowait(&b); 3185 vput(b.b_vp); 3186 3187 if (b.b_flags & B_ERROR) { 3188 printf("%s: 0x%x i/o error on block %llu while " 3189 "writing boot block %d\n", DEVNAME(sc), 3190 chunk->src_dev_mm, b.b_blkno, b.b_error); 3191 goto done; 3192 } 3193 3194 /* Save boot loader.*/ 3195 DNPRINTF(SR_D_IOCTL, 3196 "sr_ioctl_installboot: saving boot loader to %s " 3197 "(%u bytes)\n", chunk->src_devname, bls); 3198 3199 bzero(&b, sizeof(b)); 3200 b.b_flags = B_WRITE | B_PHYS; 3201 b.b_blkno = SR_BOOT_LOADER_OFFSET; 3202 b.b_bcount = bls; 3203 b.b_bufsize = bls; 3204 b.b_resid = bls; 3205 b.b_data = bootldr; 3206 b.b_error = 0; 3207 b.b_proc = curproc; 3208 b.b_dev = chunk->src_dev_mm; 3209 b.b_vp = NULL; 3210 b.b_iodone = NULL; 3211 if (bdevvp(chunk->src_dev_mm, &b.b_vp)) { 3212 printf("%s: sr_ioctl_installboot: vnode alocation " 3213 "failed\n", DEVNAME(sc)); 3214 goto done; 3215 } 3216 if ((b.b_flags & B_READ) == 0) 3217 b.b_vp->v_numoutput++; 3218 LIST_INIT(&b.b_dep); 3219 VOP_STRATEGY(&b); 3220 biowait(&b); 3221 vput(b.b_vp); 3222 3223 if (b.b_flags & B_ERROR) { 3224 printf("%s: 0x%x i/o error on block %llu while " 3225 "writing boot blocks %d\n", DEVNAME(sc), 3226 chunk->src_dev_mm, b.b_blkno, b.b_error); 3227 goto done; 3228 } 3229 3230 } 3231 3232 /* XXX - Install boot block on disk - MD code. */ 3233 3234 /* Save boot details in metadata. */ 3235 sd->sd_meta->ssdi.ssd_flags |= BIOC_SCBOOTABLE; 3236 3237 /* XXX - Store size of boot block/loader in optional metadata. */ 3238 3239 /* Save metadata. */ 3240 if (sr_meta_save(sd, SR_META_DIRTY)) { 3241 printf("%s: could not save metadata to %s\n", 3242 DEVNAME(sc), chunk->src_devname); 3243 goto done; 3244 } 3245 3246 rv = 0; 3247 3248 done: 3249 if (bootblk) 3250 free(bootblk, M_DEVBUF); 3251 if (bootldr) 3252 free(bootldr, M_DEVBUF); 3253 3254 return (rv); 3255 } 3256 3257 void 3258 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3259 { 3260 struct sr_chunk *ch_entry, *ch_next; 3261 3262 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3263 3264 if (!cl) 3265 return; 3266 3267 for (ch_entry = SLIST_FIRST(cl); 3268 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3269 ch_next = SLIST_NEXT(ch_entry, src_link); 3270 3271 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3272 DEVNAME(sc), ch_entry->src_devname); 3273 if (ch_entry->src_vn) { 3274 /* 3275 * XXX - explicitly lock the vnode until we can resolve 3276 * the problem introduced by vnode aliasing... specfs 3277 * has no locking, whereas ufs/ffs does! 3278 */ 3279 vn_lock(ch_entry->src_vn, LK_EXCLUSIVE | LK_RETRY, 0); 3280 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 0); 3281 vput(ch_entry->src_vn); 3282 } 3283 free(ch_entry, M_DEVBUF); 3284 } 3285 SLIST_INIT(cl); 3286 } 3287 3288 void 3289 sr_discipline_free(struct sr_discipline *sd) 3290 { 3291 struct sr_softc *sc; 3292 struct sr_meta_opt_head *omh; 3293 struct sr_meta_opt_item *omi, *omi_next; 3294 int i; 3295 3296 if (!sd) 3297 return; 3298 3299 sc = sd->sd_sc; 3300 3301 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3302 DEVNAME(sc), 3303 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3304 if (sd->sd_free_resources) 3305 sd->sd_free_resources(sd); 3306 if (sd->sd_vol.sv_chunks) 3307 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3308 if (sd->sd_meta) 3309 free(sd->sd_meta, M_DEVBUF); 3310 if (sd->sd_meta_foreign) 3311 free(sd->sd_meta_foreign, M_DEVBUF); 3312 3313 omh = &sd->sd_meta_opt; 3314 for (omi = SLIST_FIRST(omh); omi != SLIST_END(omh); omi = omi_next) { 3315 omi_next = SLIST_NEXT(omi, omi_link); 3316 free(omi, M_DEVBUF); 3317 } 3318 3319 for (i = 0; i < SR_MAXSCSIBUS; i++) 3320 if (sc->sc_dis[i] == sd) { 3321 sc->sc_dis[i] = NULL; 3322 break; 3323 } 3324 3325 free(sd, M_DEVBUF); 3326 } 3327 3328 void 3329 sr_discipline_shutdown(struct sr_discipline *sd) 3330 { 3331 struct sr_softc *sc = sd->sd_sc; 3332 int s; 3333 3334 if (!sd || !sc) 3335 return; 3336 3337 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3338 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3339 3340 s = splbio(); 3341 3342 sd->sd_ready = 0; 3343 3344 if (sd->sd_shutdownhook) 3345 shutdownhook_disestablish(sd->sd_shutdownhook); 3346 3347 /* make sure there isn't a sync pending and yield */ 3348 wakeup(sd); 3349 while (sd->sd_sync || sd->sd_must_flush) 3350 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3351 EWOULDBLOCK) 3352 break; 3353 3354 #ifndef SMALL_KERNEL 3355 sr_sensors_delete(sd); 3356 #endif /* SMALL_KERNEL */ 3357 3358 if (sd->sd_scsibus_dev) 3359 config_detach(sd->sd_scsibus_dev, DETACH_FORCE); 3360 3361 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3362 3363 if (sd) 3364 sr_discipline_free(sd); 3365 3366 splx(s); 3367 } 3368 3369 int 3370 sr_discipline_init(struct sr_discipline *sd, int level) 3371 { 3372 int rv = 1; 3373 3374 switch (level) { 3375 case 0: 3376 sr_raid0_discipline_init(sd); 3377 break; 3378 case 1: 3379 sr_raid1_discipline_init(sd); 3380 break; 3381 case 4: 3382 sr_raidp_discipline_init(sd, SR_MD_RAID4); 3383 break; 3384 case 5: 3385 sr_raidp_discipline_init(sd, SR_MD_RAID5); 3386 break; 3387 case 6: 3388 sr_raid6_discipline_init(sd); 3389 break; 3390 #ifdef AOE 3391 /* AOE target. */ 3392 case 'A': 3393 sr_aoe_server_discipline_init(sd); 3394 break; 3395 /* AOE initiator. */ 3396 case 'a': 3397 sr_aoe_discipline_init(sd); 3398 break; 3399 #endif 3400 #ifdef CRYPTO 3401 case 'C': 3402 sr_crypto_discipline_init(sd); 3403 break; 3404 #endif 3405 default: 3406 goto bad; 3407 } 3408 3409 rv = 0; 3410 bad: 3411 return (rv); 3412 } 3413 3414 int 3415 sr_raid_inquiry(struct sr_workunit *wu) 3416 { 3417 struct sr_discipline *sd = wu->swu_dis; 3418 struct scsi_xfer *xs = wu->swu_xs; 3419 struct scsi_inquiry_data inq; 3420 3421 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3422 3423 bzero(&inq, sizeof(inq)); 3424 inq.device = T_DIRECT; 3425 inq.dev_qual2 = 0; 3426 inq.version = 2; 3427 inq.response_format = 2; 3428 inq.additional_length = 32; 3429 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3430 sizeof(inq.vendor)); 3431 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3432 sizeof(inq.product)); 3433 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3434 sizeof(inq.revision)); 3435 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3436 3437 return (0); 3438 } 3439 3440 int 3441 sr_raid_read_cap(struct sr_workunit *wu) 3442 { 3443 struct sr_discipline *sd = wu->swu_dis; 3444 struct scsi_xfer *xs = wu->swu_xs; 3445 struct scsi_read_cap_data rcd; 3446 struct scsi_read_cap_data_16 rcd16; 3447 int rv = 1; 3448 3449 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3450 3451 if (xs->cmd->opcode == READ_CAPACITY) { 3452 bzero(&rcd, sizeof(rcd)); 3453 if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu) 3454 _lto4b(0xffffffff, rcd.addr); 3455 else 3456 _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr); 3457 _lto4b(512, rcd.length); 3458 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3459 rv = 0; 3460 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3461 bzero(&rcd16, sizeof(rcd16)); 3462 _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr); 3463 _lto4b(512, rcd16.length); 3464 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3465 rv = 0; 3466 } 3467 3468 return (rv); 3469 } 3470 3471 int 3472 sr_raid_tur(struct sr_workunit *wu) 3473 { 3474 struct sr_discipline *sd = wu->swu_dis; 3475 3476 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3477 3478 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3479 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3480 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3481 sd->sd_scsi_sense.add_sense_code = 0x04; 3482 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3483 sd->sd_scsi_sense.extra_len = 4; 3484 return (1); 3485 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3486 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3487 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3488 sd->sd_scsi_sense.add_sense_code = 0x05; 3489 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3490 sd->sd_scsi_sense.extra_len = 4; 3491 return (1); 3492 } 3493 3494 return (0); 3495 } 3496 3497 int 3498 sr_raid_request_sense(struct sr_workunit *wu) 3499 { 3500 struct sr_discipline *sd = wu->swu_dis; 3501 struct scsi_xfer *xs = wu->swu_xs; 3502 3503 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3504 DEVNAME(sd->sd_sc)); 3505 3506 /* use latest sense data */ 3507 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3508 3509 /* clear sense data */ 3510 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3511 3512 return (0); 3513 } 3514 3515 int 3516 sr_raid_start_stop(struct sr_workunit *wu) 3517 { 3518 struct sr_discipline *sd = wu->swu_dis; 3519 struct scsi_xfer *xs = wu->swu_xs; 3520 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3521 int rv = 1; 3522 3523 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3524 DEVNAME(sd->sd_sc)); 3525 3526 if (!ss) 3527 return (rv); 3528 3529 if (ss->byte2 == 0x00) { 3530 /* START */ 3531 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3532 /* bring volume online */ 3533 /* XXX check to see if volume can be brought online */ 3534 sd->sd_vol_status = BIOC_SVONLINE; 3535 } 3536 rv = 0; 3537 } else /* XXX is this the check? if (byte == 0x01) */ { 3538 /* STOP */ 3539 if (sd->sd_vol_status == BIOC_SVONLINE) { 3540 /* bring volume offline */ 3541 sd->sd_vol_status = BIOC_SVOFFLINE; 3542 } 3543 rv = 0; 3544 } 3545 3546 return (rv); 3547 } 3548 3549 int 3550 sr_raid_sync(struct sr_workunit *wu) 3551 { 3552 struct sr_discipline *sd = wu->swu_dis; 3553 int s, rv = 0, ios; 3554 3555 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3556 3557 /* when doing a fake sync don't count the wu */ 3558 ios = wu->swu_fake ? 0 : 1; 3559 3560 s = splbio(); 3561 sd->sd_sync = 1; 3562 3563 while (sd->sd_wu_pending > ios) 3564 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3565 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3566 DEVNAME(sd->sd_sc)); 3567 rv = 1; 3568 break; 3569 } 3570 3571 sd->sd_sync = 0; 3572 splx(s); 3573 3574 wakeup(&sd->sd_sync); 3575 3576 return (rv); 3577 } 3578 3579 void 3580 sr_raid_startwu(struct sr_workunit *wu) 3581 { 3582 struct sr_discipline *sd = wu->swu_dis; 3583 struct sr_ccb *ccb; 3584 3585 splassert(IPL_BIO); 3586 3587 if (wu->swu_state == SR_WU_RESTART) 3588 /* 3589 * no need to put the wu on the pending queue since we 3590 * are restarting the io 3591 */ 3592 ; 3593 else 3594 /* move wu to pending queue */ 3595 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3596 3597 /* start all individual ios */ 3598 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3599 VOP_STRATEGY(&ccb->ccb_buf); 3600 } 3601 } 3602 3603 void 3604 sr_checksum_print(u_int8_t *md5) 3605 { 3606 int i; 3607 3608 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 3609 printf("%02x", md5[i]); 3610 } 3611 3612 void 3613 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 3614 { 3615 MD5_CTX ctx; 3616 3617 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 3618 md5, len); 3619 3620 MD5Init(&ctx); 3621 MD5Update(&ctx, src, len); 3622 MD5Final(md5, &ctx); 3623 } 3624 3625 void 3626 sr_uuid_get(struct sr_uuid *uuid) 3627 { 3628 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 3629 /* UUID version 4: random */ 3630 uuid->sui_id[6] &= 0x0f; 3631 uuid->sui_id[6] |= 0x40; 3632 /* RFC4122 variant */ 3633 uuid->sui_id[8] &= 0x3f; 3634 uuid->sui_id[8] |= 0x80; 3635 } 3636 3637 void 3638 sr_uuid_print(struct sr_uuid *uuid, int cr) 3639 { 3640 printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 3641 "%02x%02x%02x%02x%02x%02x", 3642 uuid->sui_id[0], uuid->sui_id[1], 3643 uuid->sui_id[2], uuid->sui_id[3], 3644 uuid->sui_id[4], uuid->sui_id[5], 3645 uuid->sui_id[6], uuid->sui_id[7], 3646 uuid->sui_id[8], uuid->sui_id[9], 3647 uuid->sui_id[10], uuid->sui_id[11], 3648 uuid->sui_id[12], uuid->sui_id[13], 3649 uuid->sui_id[14], uuid->sui_id[15]); 3650 3651 if (cr) 3652 printf("\n"); 3653 } 3654 3655 int 3656 sr_already_assembled(struct sr_discipline *sd) 3657 { 3658 struct sr_softc *sc = sd->sd_sc; 3659 int i; 3660 3661 for (i = 0; i < SR_MAXSCSIBUS; i++) 3662 if (sc->sc_dis[i]) 3663 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 3664 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 3665 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 3666 return (1); 3667 3668 return (0); 3669 } 3670 3671 int32_t 3672 sr_validate_stripsize(u_int32_t b) 3673 { 3674 int s = 0; 3675 3676 if (b % 512) 3677 return (-1); 3678 3679 while ((b & 1) == 0) { 3680 b >>= 1; 3681 s++; 3682 } 3683 3684 /* only multiple of twos */ 3685 b >>= 1; 3686 if (b) 3687 return(-1); 3688 3689 return (s); 3690 } 3691 3692 void 3693 sr_shutdown(void *arg) 3694 { 3695 struct sr_discipline *sd = arg; 3696 #ifdef SR_DEBUG 3697 struct sr_softc *sc = sd->sd_sc; 3698 #endif 3699 DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", 3700 DEVNAME(sc), sd->sd_meta->ssd_devname); 3701 3702 /* abort rebuild and drain io */ 3703 sd->sd_reb_abort = 1; 3704 while (sd->sd_reb_active) 3705 tsleep(sd, PWAIT, "sr_shutdown", 1); 3706 3707 sr_meta_save(sd, 0); 3708 3709 sr_discipline_shutdown(sd); 3710 } 3711 3712 int 3713 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 3714 { 3715 struct sr_discipline *sd = wu->swu_dis; 3716 struct scsi_xfer *xs = wu->swu_xs; 3717 int rv = 1; 3718 3719 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 3720 xs->cmd->opcode); 3721 3722 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3723 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 3724 DEVNAME(sd->sd_sc), func); 3725 goto bad; 3726 } 3727 3728 if (xs->datalen == 0) { 3729 printf("%s: %s: illegal block count for %s\n", 3730 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3731 goto bad; 3732 } 3733 3734 if (xs->cmdlen == 10) 3735 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 3736 else if (xs->cmdlen == 16) 3737 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 3738 else if (xs->cmdlen == 6) 3739 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 3740 else { 3741 printf("%s: %s: illegal cmdlen for %s\n", 3742 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3743 goto bad; 3744 } 3745 3746 wu->swu_blk_start = *blk; 3747 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 3748 3749 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 3750 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 3751 "end: %lld length: %d\n", 3752 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 3753 wu->swu_blk_end, xs->datalen); 3754 3755 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 3756 SSD_ERRCODE_VALID; 3757 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 3758 sd->sd_scsi_sense.add_sense_code = 0x21; 3759 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3760 sd->sd_scsi_sense.extra_len = 4; 3761 goto bad; 3762 } 3763 3764 rv = 0; 3765 bad: 3766 return (rv); 3767 } 3768 3769 int 3770 sr_check_io_collision(struct sr_workunit *wu) 3771 { 3772 struct sr_discipline *sd = wu->swu_dis; 3773 struct sr_workunit *wup; 3774 3775 splassert(IPL_BIO); 3776 3777 /* walk queue backwards and fill in collider if we have one */ 3778 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 3779 if (wu->swu_blk_end < wup->swu_blk_start || 3780 wup->swu_blk_end < wu->swu_blk_start) 3781 continue; 3782 3783 /* we have an LBA collision, defer wu */ 3784 wu->swu_state = SR_WU_DEFERRED; 3785 if (wup->swu_collider) 3786 /* wu is on deferred queue, append to last wu */ 3787 while (wup->swu_collider) 3788 wup = wup->swu_collider; 3789 3790 wup->swu_collider = wu; 3791 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 3792 sd->sd_wu_collisions++; 3793 goto queued; 3794 } 3795 3796 return (0); 3797 queued: 3798 return (1); 3799 } 3800 3801 void 3802 sr_rebuild(void *arg) 3803 { 3804 struct sr_discipline *sd = arg; 3805 struct sr_softc *sc = sd->sd_sc; 3806 3807 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 3808 DEVNAME(sc)) != 0) 3809 printf("%s: unable to start backgound operation\n", 3810 DEVNAME(sc)); 3811 } 3812 3813 void 3814 sr_rebuild_thread(void *arg) 3815 { 3816 struct sr_discipline *sd = arg; 3817 struct sr_softc *sc = sd->sd_sc; 3818 daddr64_t whole_blk, partial_blk, blk, sz, lba; 3819 daddr64_t psz, rb, restart; 3820 uint64_t mysize = 0; 3821 struct sr_workunit *wu_r, *wu_w; 3822 struct scsi_xfer xs_r, xs_w; 3823 struct scsi_rw_16 cr, cw; 3824 int c, s, slept, percent = 0, old_percent = -1; 3825 u_int8_t *buf; 3826 3827 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 3828 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 3829 3830 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 3831 if (restart > whole_blk) { 3832 printf("%s: bogus rebuild restart offset, starting from 0\n", 3833 DEVNAME(sc)); 3834 restart = 0; 3835 } 3836 if (restart) { 3837 /* 3838 * XXX there is a hole here; there is a posibility that we 3839 * had a restart however the chunk that was supposed to 3840 * be rebuilt is no longer valid; we can reach this situation 3841 * when a rebuild is in progress and the box crashes and 3842 * on reboot the rebuild chunk is different (like zero'd or 3843 * replaced). We need to check the uuid of the chunk that is 3844 * being rebuilt to assert this. 3845 */ 3846 psz = sd->sd_meta->ssdi.ssd_size; 3847 rb = sd->sd_meta->ssd_rebuild; 3848 if (rb > 0) 3849 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3850 else 3851 percent = 0; 3852 printf("%s: resuming rebuild on %s at %llu%%\n", 3853 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 3854 } 3855 3856 sd->sd_reb_active = 1; 3857 3858 buf = malloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, M_DEVBUF, M_WAITOK); 3859 for (blk = restart; blk <= whole_blk; blk++) { 3860 if (blk == whole_blk) 3861 sz = partial_blk; 3862 else 3863 sz = SR_REBUILD_IO_SIZE; 3864 mysize += sz; 3865 lba = blk * sz; 3866 3867 /* get some wu */ 3868 if ((wu_r = sr_wu_get(sd, 1)) == NULL) 3869 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 3870 if ((wu_w = sr_wu_get(sd, 1)) == NULL) 3871 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 3872 3873 /* setup read io */ 3874 bzero(&xs_r, sizeof xs_r); 3875 bzero(&cr, sizeof cr); 3876 xs_r.error = XS_NOERROR; 3877 xs_r.flags = SCSI_DATA_IN; 3878 xs_r.datalen = sz << DEV_BSHIFT; 3879 xs_r.data = buf; 3880 xs_r.cmdlen = 16; 3881 cr.opcode = READ_16; 3882 _lto4b(sz, cr.length); 3883 _lto8b(lba, cr.addr); 3884 xs_r.cmd = (struct scsi_generic *)&cr; 3885 wu_r->swu_flags |= SR_WUF_REBUILD; 3886 wu_r->swu_xs = &xs_r; 3887 if (sd->sd_scsi_rw(wu_r)) { 3888 printf("%s: could not create read io\n", 3889 DEVNAME(sc)); 3890 goto fail; 3891 } 3892 3893 /* setup write io */ 3894 bzero(&xs_w, sizeof xs_w); 3895 bzero(&cw, sizeof cw); 3896 xs_w.error = XS_NOERROR; 3897 xs_w.flags = SCSI_DATA_OUT; 3898 xs_w.datalen = sz << DEV_BSHIFT; 3899 xs_w.data = buf; 3900 xs_w.cmdlen = 16; 3901 cw.opcode = WRITE_16; 3902 _lto4b(sz, cw.length); 3903 _lto8b(lba, cw.addr); 3904 xs_w.cmd = (struct scsi_generic *)&cw; 3905 wu_w->swu_flags |= SR_WUF_REBUILD; 3906 wu_w->swu_xs = &xs_w; 3907 if (sd->sd_scsi_rw(wu_w)) { 3908 printf("%s: could not create write io\n", 3909 DEVNAME(sc)); 3910 goto fail; 3911 } 3912 3913 /* 3914 * collide with the read io so that we get automatically 3915 * started when the read is done 3916 */ 3917 wu_w->swu_state = SR_WU_DEFERRED; 3918 wu_r->swu_collider = wu_w; 3919 s = splbio(); 3920 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 3921 3922 /* schedule io */ 3923 if (sr_check_io_collision(wu_r)) 3924 goto queued; 3925 3926 sr_raid_startwu(wu_r); 3927 queued: 3928 splx(s); 3929 3930 /* wait for read completion */ 3931 slept = 0; 3932 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 3933 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 3934 slept = 1; 3935 } 3936 /* yield if we didn't sleep */ 3937 if (slept == 0) 3938 tsleep(sc, PWAIT, "sr_yield", 1); 3939 3940 sr_wu_put(wu_r); 3941 sr_wu_put(wu_w); 3942 3943 sd->sd_meta->ssd_rebuild = lba; 3944 3945 /* save metadata every percent */ 3946 psz = sd->sd_meta->ssdi.ssd_size; 3947 rb = sd->sd_meta->ssd_rebuild; 3948 if (rb > 0) 3949 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3950 else 3951 percent = 0; 3952 if (percent != old_percent && blk != whole_blk) { 3953 if (sr_meta_save(sd, SR_META_DIRTY)) 3954 printf("%s: could not save metadata to %s\n", 3955 DEVNAME(sc), sd->sd_meta->ssd_devname); 3956 old_percent = percent; 3957 } 3958 3959 if (sd->sd_reb_abort) 3960 goto abort; 3961 } 3962 3963 /* all done */ 3964 sd->sd_meta->ssd_rebuild = 0; 3965 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 3966 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 3967 BIOC_SDREBUILD) { 3968 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 3969 break; 3970 } 3971 3972 abort: 3973 if (sr_meta_save(sd, SR_META_DIRTY)) 3974 printf("%s: could not save metadata to %s\n", 3975 DEVNAME(sc), sd->sd_meta->ssd_devname); 3976 fail: 3977 free(buf, M_DEVBUF); 3978 sd->sd_reb_active = 0; 3979 kthread_exit(0); 3980 } 3981 3982 #ifndef SMALL_KERNEL 3983 int 3984 sr_sensors_create(struct sr_discipline *sd) 3985 { 3986 struct sr_softc *sc = sd->sd_sc; 3987 int rv = 1; 3988 3989 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 3990 DEVNAME(sc), sd->sd_meta->ssd_devname); 3991 3992 strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc), 3993 sizeof(sd->sd_vol.sv_sensordev.xname)); 3994 3995 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 3996 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 3997 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 3998 sizeof(sd->sd_vol.sv_sensor.desc)); 3999 4000 sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor); 4001 4002 if (sc->sc_sensors_running == 0) { 4003 if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) 4004 goto bad; 4005 sc->sc_sensors_running = 1; 4006 } 4007 sensordev_install(&sd->sd_vol.sv_sensordev); 4008 4009 rv = 0; 4010 bad: 4011 return (rv); 4012 } 4013 4014 void 4015 sr_sensors_delete(struct sr_discipline *sd) 4016 { 4017 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 4018 4019 if (sd->sd_vol.sv_sensor_valid) 4020 sensordev_deinstall(&sd->sd_vol.sv_sensordev); 4021 } 4022 4023 void 4024 sr_sensors_refresh(void *arg) 4025 { 4026 struct sr_softc *sc = arg; 4027 struct sr_volume *sv; 4028 struct sr_discipline *sd; 4029 int i, vol; 4030 4031 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 4032 4033 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 4034 /* XXX this will not work when we stagger disciplines */ 4035 if (!sc->sc_dis[i]) 4036 continue; 4037 4038 sd = sc->sc_dis[i]; 4039 sv = &sd->sd_vol; 4040 4041 switch(sd->sd_vol_status) { 4042 case BIOC_SVOFFLINE: 4043 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 4044 sv->sv_sensor.status = SENSOR_S_CRIT; 4045 break; 4046 4047 case BIOC_SVDEGRADED: 4048 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 4049 sv->sv_sensor.status = SENSOR_S_WARN; 4050 break; 4051 4052 case BIOC_SVSCRUB: 4053 case BIOC_SVONLINE: 4054 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 4055 sv->sv_sensor.status = SENSOR_S_OK; 4056 break; 4057 4058 default: 4059 sv->sv_sensor.value = 0; /* unknown */ 4060 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 4061 } 4062 } 4063 } 4064 #endif /* SMALL_KERNEL */ 4065 4066 #ifdef SR_FANCY_STATS 4067 void sr_print_stats(void); 4068 4069 void 4070 sr_print_stats(void) 4071 { 4072 struct sr_softc *sc; 4073 struct sr_discipline *sd; 4074 int i, vol; 4075 4076 for (i = 0; i < softraid_cd.cd_ndevs; i++) 4077 if (softraid_cd.cd_devs[i]) { 4078 sc = softraid_cd.cd_devs[i]; 4079 /* we'll only have one softc */ 4080 break; 4081 } 4082 4083 if (!sc) { 4084 printf("no softraid softc found\n"); 4085 return; 4086 } 4087 4088 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 4089 /* XXX this will not work when we stagger disciplines */ 4090 if (!sc->sc_dis[i]) 4091 continue; 4092 4093 sd = sc->sc_dis[i]; 4094 printf("%s: ios pending: %d collisions %llu\n", 4095 sd->sd_meta->ssd_devname, 4096 sd->sd_wu_pending, 4097 sd->sd_wu_collisions); 4098 } 4099 } 4100 #endif /* SR_FANCY_STATS */ 4101 4102 #ifdef SR_DEBUG 4103 void 4104 sr_meta_print(struct sr_metadata *m) 4105 { 4106 int i; 4107 struct sr_meta_chunk *mc; 4108 struct sr_meta_opt *mo; 4109 4110 if (!(sr_debug & SR_D_META)) 4111 return; 4112 4113 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 4114 printf("\tssd_version %d\n", m->ssdi.ssd_version); 4115 printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags); 4116 printf("\tssd_uuid "); 4117 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 4118 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 4119 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 4120 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 4121 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 4122 printf("\tssd_level %d\n", m->ssdi.ssd_level); 4123 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 4124 printf("\tssd_devname %s\n", m->ssd_devname); 4125 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 4126 printf("\tssd_product %s\n", m->ssdi.ssd_product); 4127 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 4128 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 4129 printf("\tssd_checksum "); 4130 sr_checksum_print(m->ssd_checksum); 4131 printf("\n"); 4132 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 4133 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 4134 4135 mc = (struct sr_meta_chunk *)(m + 1); 4136 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 4137 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 4138 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 4139 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 4140 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 4141 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 4142 printf("\t\tscm_uuid "); 4143 sr_uuid_print(&mc->scmi.scm_uuid, 1); 4144 printf("\t\tscm_checksum "); 4145 sr_checksum_print(mc->scm_checksum); 4146 printf("\n"); 4147 printf("\t\tscm_status %d\n", mc->scm_status); 4148 } 4149 4150 mo = (struct sr_meta_opt *)(mc); 4151 for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { 4152 printf("\t\t\tsom_type %d\n", mo->somi.som_type); 4153 printf("\t\t\tsom_checksum "); 4154 sr_checksum_print(mo->som_checksum); 4155 printf("\n"); 4156 } 4157 } 4158 4159 void 4160 sr_dump_mem(u_int8_t *p, int len) 4161 { 4162 int i; 4163 4164 for (i = 0; i < len; i++) 4165 printf("%02x ", *p++); 4166 printf("\n"); 4167 } 4168 4169 #endif /* SR_DEBUG */ 4170