1 /* $OpenBSD: softraid.c,v 1.184 2009/12/07 14:33:38 jsing Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008, 2009 Marco Peereboom <marco@peereboom.us> 4 * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org> 5 * Copyright (c) 2009 Joel Sing <jsing@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include "bio.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/device.h> 26 #include <sys/ioctl.h> 27 #include <sys/proc.h> 28 #include <sys/malloc.h> 29 #include <sys/pool.h> 30 #include <sys/kernel.h> 31 #include <sys/disk.h> 32 #include <sys/rwlock.h> 33 #include <sys/queue.h> 34 #include <sys/fcntl.h> 35 #include <sys/disklabel.h> 36 #include <sys/mount.h> 37 #include <sys/sensors.h> 38 #include <sys/stat.h> 39 #include <sys/conf.h> 40 #include <sys/uio.h> 41 #include <sys/workq.h> 42 #include <sys/kthread.h> 43 44 #ifdef AOE 45 #include <sys/mbuf.h> 46 #include <net/if_aoe.h> 47 #endif /* AOE */ 48 49 #include <crypto/cryptodev.h> 50 51 #include <scsi/scsi_all.h> 52 #include <scsi/scsiconf.h> 53 #include <scsi/scsi_disk.h> 54 55 #include <dev/softraidvar.h> 56 #include <dev/rndvar.h> 57 58 /* #define SR_FANCY_STATS */ 59 60 #ifdef SR_DEBUG 61 #define SR_FANCY_STATS 62 uint32_t sr_debug = 0 63 /* | SR_D_CMD */ 64 /* | SR_D_MISC */ 65 /* | SR_D_INTR */ 66 /* | SR_D_IOCTL */ 67 /* | SR_D_CCB */ 68 /* | SR_D_WU */ 69 /* | SR_D_META */ 70 /* | SR_D_DIS */ 71 /* | SR_D_STATE */ 72 ; 73 #endif 74 75 int sr_match(struct device *, void *, void *); 76 void sr_attach(struct device *, struct device *, void *); 77 int sr_detach(struct device *, int); 78 int sr_activate(struct device *, int); 79 80 struct cfattach softraid_ca = { 81 sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, 82 sr_activate 83 }; 84 85 struct cfdriver softraid_cd = { 86 NULL, "softraid", DV_DULL 87 }; 88 89 /* scsi & discipline */ 90 int sr_scsi_cmd(struct scsi_xfer *); 91 void sr_minphys(struct buf *bp, struct scsi_link *sl); 92 void sr_copy_internal_data(struct scsi_xfer *, 93 void *, size_t); 94 int sr_scsi_ioctl(struct scsi_link *, u_long, 95 caddr_t, int, struct proc *); 96 int sr_ioctl(struct device *, u_long, caddr_t); 97 int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); 98 int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); 99 int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); 100 int sr_ioctl_setstate(struct sr_softc *, 101 struct bioc_setstate *); 102 int sr_ioctl_createraid(struct sr_softc *, 103 struct bioc_createraid *, int); 104 int sr_ioctl_deleteraid(struct sr_softc *, 105 struct bioc_deleteraid *); 106 int sr_ioctl_discipline(struct sr_softc *, 107 struct bioc_discipline *); 108 void sr_chunks_unwind(struct sr_softc *, 109 struct sr_chunk_head *); 110 void sr_discipline_free(struct sr_discipline *); 111 void sr_discipline_shutdown(struct sr_discipline *); 112 int sr_discipline_init(struct sr_discipline *, int); 113 114 /* utility functions */ 115 void sr_shutdown(void *); 116 void sr_uuid_get(struct sr_uuid *); 117 void sr_uuid_print(struct sr_uuid *, int); 118 void sr_checksum_print(u_int8_t *); 119 int sr_boot_assembly(struct sr_softc *); 120 int sr_already_assembled(struct sr_discipline *); 121 int sr_hotspare(struct sr_softc *, dev_t); 122 void sr_hotspare_rebuild(struct sr_discipline *); 123 int sr_rebuild_init(struct sr_discipline *, dev_t); 124 void sr_rebuild(void *); 125 void sr_rebuild_thread(void *); 126 void sr_roam_chunks(struct sr_discipline *); 127 int sr_chunk_in_use(struct sr_softc *, dev_t); 128 129 /* don't include these on RAMDISK */ 130 #ifndef SMALL_KERNEL 131 void sr_sensors_refresh(void *); 132 int sr_sensors_create(struct sr_discipline *); 133 void sr_sensors_delete(struct sr_discipline *); 134 #endif 135 136 /* metadata */ 137 int sr_meta_probe(struct sr_discipline *, dev_t *, int); 138 int sr_meta_attach(struct sr_discipline *, int); 139 void sr_meta_getdevname(struct sr_softc *, dev_t, char *, 140 int); 141 int sr_meta_rw(struct sr_discipline *, dev_t, void *, 142 size_t, daddr64_t, long); 143 int sr_meta_clear(struct sr_discipline *); 144 int sr_meta_read(struct sr_discipline *); 145 int sr_meta_validate(struct sr_discipline *, dev_t, 146 struct sr_metadata *, void *); 147 void sr_meta_chunks_create(struct sr_softc *, 148 struct sr_chunk_head *); 149 void sr_meta_init(struct sr_discipline *, 150 struct sr_chunk_head *); 151 152 /* hotplug magic */ 153 void sr_disk_attach(struct disk *, int); 154 155 struct sr_hotplug_list { 156 void (*sh_hotplug)(struct sr_discipline *, 157 struct disk *, int); 158 struct sr_discipline *sh_sd; 159 160 SLIST_ENTRY(sr_hotplug_list) shl_link; 161 }; 162 SLIST_HEAD(sr_hotplug_list_head, sr_hotplug_list); 163 164 struct sr_hotplug_list_head sr_hotplug_callbacks; 165 extern void (*softraid_disk_attach)(struct disk *, int); 166 167 /* scsi glue */ 168 struct scsi_adapter sr_switch = { 169 sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl 170 }; 171 172 struct scsi_device sr_dev = { 173 NULL, NULL, NULL, NULL 174 }; 175 176 /* native metadata format */ 177 int sr_meta_native_bootprobe(struct sr_softc *, 178 struct device *, struct sr_metadata_list_head *); 179 #define SR_META_NOTCLAIMED (0) 180 #define SR_META_CLAIMED (1) 181 int sr_meta_native_probe(struct sr_softc *, 182 struct sr_chunk *); 183 int sr_meta_native_attach(struct sr_discipline *, int); 184 int sr_meta_native_read(struct sr_discipline *, dev_t, 185 struct sr_metadata *, void *); 186 int sr_meta_native_write(struct sr_discipline *, dev_t, 187 struct sr_metadata *,void *); 188 189 #ifdef SR_DEBUG 190 void sr_meta_print(struct sr_metadata *); 191 #else 192 #define sr_meta_print(m) 193 #endif 194 195 /* the metadata driver should remain stateless */ 196 struct sr_meta_driver { 197 daddr64_t smd_offset; /* metadata location */ 198 u_int32_t smd_size; /* size of metadata */ 199 200 int (*smd_probe)(struct sr_softc *, 201 struct sr_chunk *); 202 int (*smd_attach)(struct sr_discipline *, int); 203 int (*smd_detach)(struct sr_discipline *); 204 int (*smd_read)(struct sr_discipline *, dev_t, 205 struct sr_metadata *, void *); 206 int (*smd_write)(struct sr_discipline *, dev_t, 207 struct sr_metadata *, void *); 208 int (*smd_validate)(struct sr_discipline *, 209 struct sr_metadata *, void *); 210 } smd[] = { 211 { SR_META_OFFSET, SR_META_SIZE * 512, 212 sr_meta_native_probe, sr_meta_native_attach, NULL, 213 sr_meta_native_read, sr_meta_native_write, NULL }, 214 #define SR_META_F_NATIVE 0 215 { 0, 0, NULL, NULL, NULL, NULL } 216 #define SR_META_F_INVALID -1 217 }; 218 219 int 220 sr_meta_attach(struct sr_discipline *sd, int force) 221 { 222 struct sr_softc *sc = sd->sd_sc; 223 struct sr_chunk_head *cl; 224 struct sr_chunk *ch_entry; 225 int rv = 1, i = 0; 226 227 DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); 228 229 /* in memory copy of metadata */ 230 sd->sd_meta = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 231 if (!sd->sd_meta) { 232 printf("%s: could not allocate memory for metadata\n", 233 DEVNAME(sc)); 234 goto bad; 235 } 236 237 if (sd->sd_meta_type != SR_META_F_NATIVE) { 238 /* in memory copy of foreign metadata */ 239 sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size, 240 M_DEVBUF, M_ZERO); 241 if (!sd->sd_meta_foreign) { 242 /* unwind frees sd_meta */ 243 printf("%s: could not allocate memory for foreign " 244 "metadata\n", DEVNAME(sc)); 245 goto bad; 246 } 247 } 248 249 /* we have a valid list now create an array index */ 250 cl = &sd->sd_vol.sv_chunk_list; 251 SLIST_FOREACH(ch_entry, cl, src_link) { 252 i++; 253 } 254 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * i, 255 M_DEVBUF, M_WAITOK | M_ZERO); 256 257 /* fill out chunk array */ 258 i = 0; 259 SLIST_FOREACH(ch_entry, cl, src_link) 260 sd->sd_vol.sv_chunks[i++] = ch_entry; 261 262 /* attach metadata */ 263 if (smd[sd->sd_meta_type].smd_attach(sd, force)) 264 goto bad; 265 266 rv = 0; 267 bad: 268 return (rv); 269 } 270 271 int 272 sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) 273 { 274 struct sr_softc *sc = sd->sd_sc; 275 struct vnode *vn; 276 struct sr_chunk *ch_entry, *ch_prev = NULL; 277 struct sr_chunk_head *cl; 278 char devname[32]; 279 int i, d, type, found, prevf, error; 280 dev_t dev; 281 282 DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); 283 284 if (no_chunk == 0) 285 goto unwind; 286 287 288 cl = &sd->sd_vol.sv_chunk_list; 289 290 for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { 291 ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, 292 M_WAITOK | M_ZERO); 293 /* keep disks in user supplied order */ 294 if (ch_prev) 295 SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); 296 else 297 SLIST_INSERT_HEAD(cl, ch_entry, src_link); 298 ch_prev = ch_entry; 299 dev = dt[d]; 300 ch_entry->src_dev_mm = dev; 301 302 if (dev == NODEV) { 303 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 304 continue; 305 } else { 306 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 307 if (bdevvp(dev, &vn)) { 308 printf("%s:, sr_meta_probe: can't allocate " 309 "vnode\n", DEVNAME(sc)); 310 goto unwind; 311 } 312 313 /* 314 * XXX leaving dev open for now; move this to attach 315 * and figure out the open/close dance for unwind. 316 */ 317 error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0); 318 if (error) { 319 DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " 320 "open %s\n", DEVNAME(sc), devname); 321 vput(vn); 322 goto unwind; 323 } 324 325 strlcpy(ch_entry->src_devname, devname, 326 sizeof(ch_entry->src_devname)); 327 ch_entry->src_vn = vn; 328 } 329 330 /* determine if this is a device we understand */ 331 for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { 332 type = smd[i].smd_probe(sc, ch_entry); 333 if (type == SR_META_F_INVALID) 334 continue; 335 else { 336 found = type; 337 break; 338 } 339 } 340 341 if (found == SR_META_F_INVALID) 342 goto unwind; 343 if (prevf == SR_META_F_INVALID) 344 prevf = found; 345 if (prevf != found) { 346 DNPRINTF(SR_D_META, "%s: prevf != found\n", 347 DEVNAME(sc)); 348 goto unwind; 349 } 350 } 351 352 return (prevf); 353 unwind: 354 return (SR_META_F_INVALID); 355 } 356 357 void 358 sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) 359 { 360 int maj, unit, part; 361 char *name; 362 363 DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", 364 DEVNAME(sc), buf, size); 365 366 if (!buf) 367 return; 368 369 maj = major(dev); 370 part = DISKPART(dev); 371 unit = DISKUNIT(dev); 372 373 name = findblkname(maj); 374 if (name == NULL) 375 return; 376 377 snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); 378 } 379 380 int 381 sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz, 382 daddr64_t ofs, long flags) 383 { 384 struct sr_softc *sc = sd->sd_sc; 385 struct buf b; 386 int rv = 1; 387 388 DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", 389 DEVNAME(sc), dev, md, sz, ofs, flags); 390 391 bzero(&b, sizeof(b)); 392 393 if (md == NULL) { 394 printf("%s: read invalid metadata pointer\n", DEVNAME(sc)); 395 goto done; 396 } 397 b.b_flags = flags | B_PHYS; 398 b.b_blkno = ofs; 399 b.b_bcount = sz; 400 b.b_bufsize = sz; 401 b.b_resid = sz; 402 b.b_data = md; 403 b.b_error = 0; 404 b.b_proc = curproc; 405 b.b_dev = dev; 406 b.b_iodone = NULL; 407 if (bdevvp(dev, &b.b_vp)) { 408 printf("%s: sr_meta_rw: can't allocate vnode\n", DEVNAME(sc)); 409 goto done; 410 } 411 if ((b.b_flags & B_READ) == 0) 412 b.b_vp->v_numoutput++; 413 414 LIST_INIT(&b.b_dep); 415 VOP_STRATEGY(&b); 416 biowait(&b); 417 418 if (b.b_flags & B_ERROR) { 419 printf("%s: 0x%x i/o error on block %llu while reading " 420 "metadata %d\n", DEVNAME(sc), dev, b.b_blkno, b.b_error); 421 goto done; 422 } 423 rv = 0; 424 done: 425 if (b.b_vp) 426 vput(b.b_vp); 427 428 return (rv); 429 } 430 431 int 432 sr_meta_clear(struct sr_discipline *sd) 433 { 434 struct sr_softc *sc = sd->sd_sc; 435 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 436 struct sr_chunk *ch_entry; 437 void *m; 438 int rv = 1; 439 440 DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); 441 442 if (sd->sd_meta_type != SR_META_F_NATIVE) { 443 printf("%s: sr_meta_clear can not clear foreign metadata\n", 444 DEVNAME(sc)); 445 goto done; 446 } 447 448 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 449 SLIST_FOREACH(ch_entry, cl, src_link) { 450 if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { 451 /* XXX mark disk offline */ 452 DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " 453 "clear %s\n", ch_entry->src_devname); 454 rv++; 455 continue; 456 } 457 bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); 458 bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt)); 459 } 460 461 bzero(sd->sd_meta, SR_META_SIZE * 512); 462 463 free(m, M_DEVBUF); 464 rv = 0; 465 done: 466 return (rv); 467 } 468 469 void 470 sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) 471 { 472 struct sr_chunk *ch_entry; 473 struct sr_uuid uuid; 474 int cid = 0; 475 char *name; 476 u_int64_t max_chunk_sz = 0, min_chunk_sz; 477 478 DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); 479 480 sr_uuid_get(&uuid); 481 482 /* fill out stuff and get largest chunk size while looping */ 483 SLIST_FOREACH(ch_entry, cl, src_link) { 484 name = ch_entry->src_devname; 485 ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; 486 ch_entry->src_meta.scmi.scm_chunk_id = cid++; 487 ch_entry->src_meta.scm_status = BIOC_SDONLINE; 488 strlcpy(ch_entry->src_meta.scmi.scm_devname, name, 489 sizeof(ch_entry->src_meta.scmi.scm_devname)); 490 bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, 491 sizeof(ch_entry->src_meta.scmi.scm_uuid)); 492 493 if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) 494 max_chunk_sz = ch_entry->src_meta.scmi.scm_size; 495 } 496 497 /* get smallest chunk size */ 498 min_chunk_sz = max_chunk_sz; 499 SLIST_FOREACH(ch_entry, cl, src_link) 500 if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) 501 min_chunk_sz = ch_entry->src_meta.scmi.scm_size; 502 503 /* equalize all sizes */ 504 SLIST_FOREACH(ch_entry, cl, src_link) 505 ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; 506 507 /* whine if chunks are not the same size */ 508 if (min_chunk_sz != max_chunk_sz) 509 printf("%s: chunk sizes are not equal; up to %llu blocks " 510 "wasted per chunk\n", 511 DEVNAME(sc), max_chunk_sz - min_chunk_sz); 512 } 513 514 void 515 sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) 516 { 517 struct sr_softc *sc = sd->sd_sc; 518 struct sr_metadata *sm = sd->sd_meta; 519 struct sr_meta_chunk *im_sc; 520 struct sr_meta_opt *im_so; 521 int i, chunk_no; 522 523 DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); 524 525 if (!sm) 526 return; 527 528 /* initial metadata */ 529 sm->ssdi.ssd_magic = SR_MAGIC; 530 sm->ssdi.ssd_version = SR_META_VERSION; 531 sm->ssd_ondisk = 0; 532 sm->ssdi.ssd_flags = sd->sd_meta_flags; 533 /* get uuid from chunk 0 */ 534 bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, 535 &sm->ssdi.ssd_uuid, 536 sizeof(struct sr_uuid)); 537 538 /* volume is filled in createraid */ 539 540 /* add missing chunk bits */ 541 chunk_no = sm->ssdi.ssd_chunk_no; 542 for (i = 0; i < chunk_no; i++) { 543 im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; 544 im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; 545 sr_checksum(sc, im_sc, &im_sc->scm_checksum, 546 sizeof(struct sr_meta_chunk_invariant)); 547 548 /* carry optional meta also in chunk area */ 549 im_so = &sd->sd_vol.sv_chunks[i]->src_opt; 550 bzero(im_so, sizeof(*im_so)); 551 if (sd->sd_type == SR_MD_CRYPTO) { 552 sm->ssdi.ssd_opt_no = 1; 553 im_so->somi.som_type = SR_OPT_CRYPTO; 554 555 /* 556 * copy encrypted key / passphrase into optional 557 * metadata area 558 */ 559 bcopy(&sd->mds.mdd_crypto.scr_meta, 560 &im_so->somi.som_meta.smm_crypto, 561 sizeof(im_so->somi.som_meta.smm_crypto)); 562 563 sr_checksum(sc, im_so, im_so->som_checksum, 564 sizeof(struct sr_meta_opt_invariant)); 565 } 566 } 567 } 568 569 void 570 sr_meta_save_callback(void *arg1, void *arg2) 571 { 572 struct sr_discipline *sd = arg1; 573 int s; 574 575 s = splbio(); 576 577 if (sr_meta_save(arg1, SR_META_DIRTY)) 578 printf("%s: save metadata failed\n", 579 DEVNAME(sd->sd_sc)); 580 581 sd->sd_must_flush = 0; 582 splx(s); 583 } 584 585 int 586 sr_meta_save(struct sr_discipline *sd, u_int32_t flags) 587 { 588 struct sr_softc *sc = sd->sd_sc; 589 struct sr_metadata *sm = sd->sd_meta, *m; 590 struct sr_meta_driver *s; 591 struct sr_chunk *src; 592 struct sr_meta_chunk *cm; 593 struct sr_workunit wu; 594 struct sr_meta_opt *om; 595 int i; 596 597 DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", 598 DEVNAME(sc), sd->sd_meta->ssd_devname); 599 600 if (!sm) { 601 printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); 602 goto bad; 603 } 604 605 /* meta scratchpad */ 606 s = &smd[sd->sd_meta_type]; 607 m = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 608 if (!m) { 609 printf("%s: could not allocate metadata scratch area\n", 610 DEVNAME(sc)); 611 goto bad; 612 } 613 614 if (sm->ssdi.ssd_opt_no > 1) 615 panic("not yet save > 1 optional metadata members"); 616 617 /* from here on out metadata is updated */ 618 restart: 619 sm->ssd_ondisk++; 620 sm->ssd_meta_flags = flags; 621 bcopy(sm, m, sizeof(*m)); 622 623 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 624 src = sd->sd_vol.sv_chunks[i]; 625 cm = (struct sr_meta_chunk *)(m + 1); 626 bcopy(&src->src_meta, cm + i, sizeof(*cm)); 627 } 628 629 /* optional metadata */ 630 om = (struct sr_meta_opt *)(cm + i); 631 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { 632 bcopy(&src->src_opt, om + i, sizeof(*om)); 633 sr_checksum(sc, om, &om->som_checksum, 634 sizeof(struct sr_meta_opt_invariant)); 635 } 636 637 for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { 638 src = sd->sd_vol.sv_chunks[i]; 639 640 /* skip disks that are offline */ 641 if (src->src_meta.scm_status == BIOC_SDOFFLINE) 642 continue; 643 644 /* calculate metadata checksum for correct chunk */ 645 m->ssdi.ssd_chunk_id = i; 646 sr_checksum(sc, m, &m->ssd_checksum, 647 sizeof(struct sr_meta_invariant)); 648 649 #ifdef SR_DEBUG 650 DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " 651 "chunkid: %d checksum: ", 652 DEVNAME(sc), src->src_meta.scmi.scm_devname, 653 m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); 654 655 if (sr_debug & SR_D_META) 656 sr_checksum_print((u_int8_t *)&m->ssd_checksum); 657 DNPRINTF(SR_D_META, "\n"); 658 sr_meta_print(m); 659 #endif 660 661 /* translate and write to disk */ 662 if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { 663 printf("%s: could not write metadata to %s\n", 664 DEVNAME(sc), src->src_devname); 665 /* restart the meta write */ 666 src->src_meta.scm_status = BIOC_SDOFFLINE; 667 /* XXX recalculate volume status */ 668 goto restart; 669 } 670 } 671 672 /* not all disciplines have sync */ 673 if (sd->sd_scsi_sync) { 674 bzero(&wu, sizeof(wu)); 675 wu.swu_fake = 1; 676 wu.swu_dis = sd; 677 sd->sd_scsi_sync(&wu); 678 } 679 free(m, M_DEVBUF); 680 return (0); 681 bad: 682 return (1); 683 } 684 685 int 686 sr_meta_read(struct sr_discipline *sd) 687 { 688 #ifdef SR_DEBUG 689 struct sr_softc *sc = sd->sd_sc; 690 #endif 691 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 692 struct sr_metadata *sm; 693 struct sr_chunk *ch_entry; 694 struct sr_meta_chunk *cp; 695 struct sr_meta_driver *s; 696 struct sr_meta_opt *om; 697 void *fm = NULL; 698 int no_disk = 0, got_meta = 0; 699 700 DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); 701 702 sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); 703 s = &smd[sd->sd_meta_type]; 704 if (sd->sd_meta_type != SR_META_F_NATIVE) 705 fm = malloc(s->smd_size, M_DEVBUF, M_WAITOK | M_ZERO); 706 707 cp = (struct sr_meta_chunk *)(sm + 1); 708 SLIST_FOREACH(ch_entry, cl, src_link) { 709 /* skip disks that are offline */ 710 if (ch_entry->src_meta.scm_status == BIOC_SDOFFLINE) { 711 DNPRINTF(SR_D_META, 712 "%s: %s chunk marked offline, spoofing status\n", 713 DEVNAME(sc), ch_entry->src_devname); 714 cp++; /* adjust chunk pointer to match failure */ 715 continue; 716 } else if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { 717 /* read and translate */ 718 /* XXX mark chunk offline, elsewhere!! */ 719 ch_entry->src_meta.scm_status = BIOC_SDOFFLINE; 720 cp++; /* adjust chunk pointer to match failure */ 721 DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", 722 DEVNAME(sc)); 723 continue; 724 } 725 726 if (sm->ssdi.ssd_magic != SR_MAGIC) { 727 DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", 728 DEVNAME(sc)); 729 continue; 730 } 731 732 /* validate metadata */ 733 if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { 734 DNPRINTF(SR_D_META, "%s: invalid metadata\n", 735 DEVNAME(sc)); 736 no_disk = -1; 737 goto done; 738 } 739 740 /* assume first chunk contains metadata */ 741 if (got_meta == 0) { 742 bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); 743 got_meta = 1; 744 } 745 746 bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); 747 748 if (sm->ssdi.ssd_opt_no > 1) 749 panic("not yet read > 1 optional metadata members"); 750 751 if (sm->ssdi.ssd_opt_no) { 752 om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + 753 sizeof(struct sr_meta_chunk) * 754 sm->ssdi.ssd_chunk_no); 755 bcopy(om, &ch_entry->src_opt, 756 sizeof(ch_entry->src_opt)); 757 758 if (om->somi.som_type == SR_OPT_CRYPTO) { 759 bcopy( 760 &ch_entry->src_opt.somi.som_meta.smm_crypto, 761 &sd->mds.mdd_crypto.scr_meta, 762 sizeof(sd->mds.mdd_crypto.scr_meta)); 763 } 764 } 765 766 cp++; 767 no_disk++; 768 } 769 770 free(sm, M_DEVBUF); 771 if (fm) 772 free(fm, M_DEVBUF); 773 774 done: 775 DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), 776 no_disk); 777 return (no_disk); 778 } 779 780 int 781 sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, 782 void *fm) 783 { 784 struct sr_softc *sc = sd->sd_sc; 785 struct sr_meta_driver *s; 786 #ifdef SR_DEBUG 787 struct sr_meta_chunk *mc; 788 #endif 789 char devname[32]; 790 int rv = 1; 791 u_int8_t checksum[MD5_DIGEST_LENGTH]; 792 793 DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); 794 795 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 796 797 s = &smd[sd->sd_meta_type]; 798 if (sd->sd_meta_type != SR_META_F_NATIVE) 799 if (s->smd_validate(sd, sm, fm)) { 800 printf("%s: invalid foreign metadata\n", DEVNAME(sc)); 801 goto done; 802 } 803 804 /* 805 * at this point all foreign metadata has been translated to the native 806 * format and will be treated just like the native format 807 */ 808 809 if (sm->ssdi.ssd_magic != SR_MAGIC) { 810 printf("%s: not valid softraid metadata\n", DEVNAME(sc)); 811 goto done; 812 } 813 814 if (sm->ssdi.ssd_version != SR_META_VERSION) { 815 printf("%s: %s can not read metadata version %u, expected %u\n", 816 DEVNAME(sc), devname, sm->ssdi.ssd_version, 817 SR_META_VERSION); 818 goto done; 819 } 820 821 sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); 822 if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { 823 printf("%s: invalid metadata checksum\n", DEVNAME(sc)); 824 goto done; 825 } 826 827 /* XXX do other checksums */ 828 829 #ifdef SR_DEBUG 830 /* warn if disk changed order */ 831 mc = (struct sr_meta_chunk *)(sm + 1); 832 if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, 833 sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) 834 DNPRINTF(SR_D_META, "%s: roaming device %s -> %s\n", 835 DEVNAME(sc), mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, 836 devname); 837 #endif 838 839 /* we have meta data on disk */ 840 DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", 841 DEVNAME(sc), devname); 842 843 rv = 0; 844 done: 845 return (rv); 846 } 847 848 int 849 sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv, 850 struct sr_metadata_list_head *mlh) 851 { 852 struct vnode *vn; 853 struct disklabel label; 854 struct sr_metadata *md = NULL; 855 struct sr_discipline *fake_sd = NULL; 856 struct sr_metadata_list *mle; 857 char devname[32]; 858 dev_t dev, devr; 859 int error, i, majdev; 860 int rv = SR_META_NOTCLAIMED; 861 862 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); 863 864 majdev = findblkmajor(dv); 865 if (majdev == -1) 866 goto done; 867 dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); 868 if (bdevvp(dev, &vn)) { 869 printf("%s:, sr_meta_native_bootprobe: can't allocate vnode\n", 870 DEVNAME(sc)); 871 goto done; 872 } 873 874 /* open device */ 875 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 876 if (error) { 877 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " 878 "failed\n", DEVNAME(sc)); 879 vput(vn); 880 goto done; 881 } 882 883 /* get disklabel */ 884 error = VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0); 885 if (error) { 886 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " 887 "failed\n", DEVNAME(sc)); 888 VOP_CLOSE(vn, FREAD, NOCRED, 0); 889 vput(vn); 890 goto done; 891 } 892 893 /* we are done, close device */ 894 error = VOP_CLOSE(vn, FREAD, NOCRED, 0); 895 if (error) { 896 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " 897 "failed\n", DEVNAME(sc)); 898 vput(vn); 899 goto done; 900 } 901 vput(vn); 902 903 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 904 if (md == NULL) { 905 printf("%s: not enough memory for metadata buffer\n", 906 DEVNAME(sc)); 907 goto done; 908 } 909 910 /* create fake sd to use utility functions */ 911 fake_sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_ZERO); 912 if (fake_sd == NULL) { 913 printf("%s: not enough memory for fake discipline\n", 914 DEVNAME(sc)); 915 goto done; 916 } 917 fake_sd->sd_sc = sc; 918 fake_sd->sd_meta_type = SR_META_F_NATIVE; 919 920 for (i = 0; i < MAXPARTITIONS; i++) { 921 if (label.d_partitions[i].p_fstype != FS_RAID) 922 continue; 923 924 /* open partition */ 925 devr = MAKEDISKDEV(majdev, dv->dv_unit, i); 926 if (bdevvp(devr, &vn)) { 927 printf("%s:, sr_meta_native_bootprobe: can't allocate " 928 "vnode for partition\n", DEVNAME(sc)); 929 goto done; 930 } 931 error = VOP_OPEN(vn, FREAD, NOCRED, 0); 932 if (error) { 933 DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " 934 "open failed, partition %d\n", 935 DEVNAME(sc), i); 936 vput(vn); 937 continue; 938 } 939 940 if (sr_meta_native_read(fake_sd, devr, md, NULL)) { 941 printf("%s: native bootprobe could not read native " 942 "metadata\n", DEVNAME(sc)); 943 VOP_CLOSE(vn, FREAD, NOCRED, 0); 944 vput(vn); 945 continue; 946 } 947 948 /* are we a softraid partition? */ 949 if (md->ssdi.ssd_magic != SR_MAGIC) { 950 VOP_CLOSE(vn, FREAD, NOCRED, 0); 951 vput(vn); 952 continue; 953 } 954 955 sr_meta_getdevname(sc, devr, devname, sizeof(devname)); 956 if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) { 957 if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) { 958 DNPRINTF(SR_D_META, "%s: don't save %s\n", 959 DEVNAME(sc), devname); 960 } else { 961 /* XXX fix M_WAITOK, this is boot time */ 962 mle = malloc(sizeof(*mle), M_DEVBUF, 963 M_WAITOK | M_ZERO); 964 bcopy(md, &mle->sml_metadata, 965 SR_META_SIZE * 512); 966 mle->sml_mm = devr; 967 mle->sml_vn = vn; 968 SLIST_INSERT_HEAD(mlh, mle, sml_link); 969 rv = SR_META_CLAIMED; 970 } 971 } 972 973 /* we are done, close partition */ 974 VOP_CLOSE(vn, FREAD, NOCRED, 0); 975 vput(vn); 976 } 977 978 done: 979 if (fake_sd) 980 free(fake_sd, M_DEVBUF); 981 if (md) 982 free(md, M_DEVBUF); 983 984 return (rv); 985 } 986 987 int 988 sr_boot_assembly(struct sr_softc *sc) 989 { 990 struct device *dv; 991 struct bioc_createraid bc; 992 struct sr_metadata_list_head mlh; 993 struct sr_metadata_list *mle, *mlenext, *mle1, *mle2; 994 struct sr_metadata *metadata; 995 struct sr_boot_volume_head bvh; 996 struct sr_boot_volume *vol, *vp1, *vp2; 997 struct sr_meta_chunk *hm; 998 struct sr_chunk_head *cl; 999 struct sr_chunk *hotspare, *chunk, *last; 1000 u_int32_t chunk_id; 1001 u_int64_t *ondisk = NULL; 1002 dev_t *devs = NULL; 1003 char devname[32]; 1004 int rv = 0, i; 1005 1006 DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); 1007 1008 SLIST_INIT(&mlh); 1009 1010 TAILQ_FOREACH(dv, &alldevs, dv_list) { 1011 if (dv->dv_class != DV_DISK) 1012 continue; 1013 1014 /* Only check sd(4) and wd(4) devices. */ 1015 if (strcmp(dv->dv_cfdata->cf_driver->cd_name, "sd") && 1016 strcmp(dv->dv_cfdata->cf_driver->cd_name, "wd")) 1017 continue; 1018 1019 /* native softraid uses partitions */ 1020 if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED) 1021 continue; 1022 1023 /* probe non-native disks */ 1024 } 1025 1026 /* 1027 * Create a list of volumes and associate chunks with each volume. 1028 */ 1029 SLIST_INIT(&bvh); 1030 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mlenext) { 1031 1032 mlenext = SLIST_NEXT(mle, sml_link); 1033 SLIST_REMOVE(&mlh, mle, sr_metadata_list, sml_link); 1034 1035 metadata = (struct sr_metadata *)&mle->sml_metadata; 1036 mle->sml_chunk_id = metadata->ssdi.ssd_chunk_id; 1037 1038 SLIST_FOREACH(vol, &bvh, sbv_link) { 1039 if (bcmp(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1040 sizeof(metadata->ssdi.ssd_uuid)) == 0) 1041 break; 1042 } 1043 1044 if (vol == NULL) { 1045 vol = malloc(sizeof(struct sr_boot_volume), 1046 M_DEVBUF, M_NOWAIT | M_CANFAIL | M_ZERO); 1047 if (vol == NULL) { 1048 printf("%s: failed to allocate boot volume!\n", 1049 DEVNAME(sc)); 1050 goto unwind; 1051 } 1052 1053 vol->sbv_level = metadata->ssdi.ssd_level; 1054 vol->sbv_volid = metadata->ssdi.ssd_volid; 1055 vol->sbv_chunk_no = metadata->ssdi.ssd_chunk_no; 1056 bcopy(&metadata->ssdi.ssd_uuid, &vol->sbv_uuid, 1057 sizeof(metadata->ssdi.ssd_uuid)); 1058 SLIST_INIT(&vol->sml); 1059 1060 /* Maintain volume order. */ 1061 vp2 = NULL; 1062 SLIST_FOREACH(vp1, &bvh, sbv_link) { 1063 if (vp1->sbv_volid > vol->sbv_volid) 1064 break; 1065 vp2 = vp1; 1066 } 1067 if (vp2 == NULL) { 1068 DNPRINTF(SR_D_META, "%s: insert volume %u " 1069 "at head\n", DEVNAME(sc), vol->sbv_volid); 1070 SLIST_INSERT_HEAD(&bvh, vol, sbv_link); 1071 } else { 1072 DNPRINTF(SR_D_META, "%s: insert volume %u " 1073 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1074 vp2->sbv_volid); 1075 SLIST_INSERT_AFTER(vp2, vol, sbv_link); 1076 } 1077 } 1078 1079 /* Maintain chunk order. */ 1080 mle2 = NULL; 1081 SLIST_FOREACH(mle1, &vol->sml, sml_link) { 1082 if (mle1->sml_chunk_id > mle->sml_chunk_id) 1083 break; 1084 mle2 = mle1; 1085 } 1086 if (mle2 == NULL) { 1087 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1088 "at head\n", DEVNAME(sc), vol->sbv_volid, 1089 mle->sml_chunk_id); 1090 SLIST_INSERT_HEAD(&vol->sml, mle, sml_link); 1091 } else { 1092 DNPRINTF(SR_D_META, "%s: volume %u insert chunk %u " 1093 "after %u\n", DEVNAME(sc), vol->sbv_volid, 1094 mle->sml_chunk_id, mle2->sml_chunk_id); 1095 SLIST_INSERT_AFTER(mle2, mle, sml_link); 1096 } 1097 1098 vol->sbv_dev_no++; 1099 } 1100 1101 /* Allocate memory for device and ondisk version arrays. */ 1102 devs = malloc(BIOC_CRMAXLEN * sizeof(dev_t), M_DEVBUF, 1103 M_NOWAIT | M_CANFAIL); 1104 if (devs == NULL) { 1105 printf("%s: failed to allocate device array\n", DEVNAME(sc)); 1106 goto unwind; 1107 } 1108 ondisk = malloc(BIOC_CRMAXLEN * sizeof(u_int64_t), M_DEVBUF, 1109 M_NOWAIT | M_CANFAIL); 1110 if (ondisk == NULL) { 1111 printf("%s: failed to allocate ondisk array\n", DEVNAME(sc)); 1112 goto unwind; 1113 } 1114 1115 /* 1116 * Assemble hotspare "volumes". 1117 */ 1118 SLIST_FOREACH(vol, &bvh, sbv_link) { 1119 1120 /* Check if this is a hotspare "volume". */ 1121 if (vol->sbv_level != SR_HOTSPARE_LEVEL || 1122 vol->sbv_chunk_no != 1) 1123 continue; 1124 1125 #ifdef SR_DEBUG 1126 DNPRINTF(SR_D_META, "%s: assembling hotspare volume ", 1127 DEVNAME(sc)); 1128 if (sr_debug & SR_D_META) 1129 sr_uuid_print(&vol->sbv_uuid, 0); 1130 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1131 vol->sbv_volid, vol->sbv_chunk_no); 1132 #endif 1133 1134 /* Create hotspare chunk metadata. */ 1135 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, 1136 M_NOWAIT | M_CANFAIL | M_ZERO); 1137 if (hotspare == NULL) { 1138 printf("%s: failed to allocate hotspare\n", 1139 DEVNAME(sc)); 1140 goto unwind; 1141 } 1142 1143 mle = SLIST_FIRST(&vol->sml); 1144 sr_meta_getdevname(sc, mle->sml_mm, devname, sizeof(devname)); 1145 hotspare->src_dev_mm = mle->sml_mm; 1146 hotspare->src_vn = mle->sml_vn; 1147 strlcpy(hotspare->src_devname, devname, 1148 sizeof(hotspare->src_devname)); 1149 hotspare->src_size = metadata->ssdi.ssd_size; 1150 1151 hm = &hotspare->src_meta; 1152 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 1153 hm->scmi.scm_chunk_id = 0; 1154 hm->scmi.scm_size = metadata->ssdi.ssd_size; 1155 hm->scmi.scm_coerced_size = metadata->ssdi.ssd_size; 1156 strlcpy(hm->scmi.scm_devname, devname, 1157 sizeof(hm->scmi.scm_devname)); 1158 bcopy(&metadata->ssdi.ssd_uuid, &hm->scmi.scm_uuid, 1159 sizeof(struct sr_uuid)); 1160 1161 sr_checksum(sc, hm, &hm->scm_checksum, 1162 sizeof(struct sr_meta_chunk_invariant)); 1163 1164 hm->scm_status = BIOC_SDHOTSPARE; 1165 1166 /* Add chunk to hotspare list. */ 1167 rw_enter_write(&sc->sc_hs_lock); 1168 cl = &sc->sc_hotspare_list; 1169 if (SLIST_EMPTY(cl)) 1170 SLIST_INSERT_HEAD(cl, hotspare, src_link); 1171 else { 1172 SLIST_FOREACH(chunk, cl, src_link) 1173 last = chunk; 1174 SLIST_INSERT_AFTER(last, hotspare, src_link); 1175 } 1176 sc->sc_hotspare_no++; 1177 rw_exit_write(&sc->sc_hs_lock); 1178 1179 } 1180 1181 /* 1182 * Assemble RAID volumes. 1183 */ 1184 SLIST_FOREACH(vol, &bvh, sbv_link) { 1185 1186 /* Check if this is a hotspare "volume". */ 1187 if (vol->sbv_level == SR_HOTSPARE_LEVEL && 1188 vol->sbv_chunk_no == 1) 1189 continue; 1190 1191 #ifdef SR_DEBUG 1192 DNPRINTF(SR_D_META, "%s: assembling volume ", DEVNAME(sc)); 1193 if (sr_debug & SR_D_META) 1194 sr_uuid_print(&vol->sbv_uuid, 0); 1195 DNPRINTF(SR_D_META, " volid %u with %u chunks\n", 1196 vol->sbv_volid, vol->sbv_chunk_no); 1197 #endif 1198 1199 for (i = 0; i < BIOC_CRMAXLEN; i++) { 1200 devs[i] = NODEV; /* mark device as illegal */ 1201 ondisk[i] = 0; 1202 } 1203 1204 SLIST_FOREACH(mle, &vol->sml, sml_link) { 1205 metadata = (struct sr_metadata *)&mle->sml_metadata; 1206 chunk_id = metadata->ssdi.ssd_chunk_id; 1207 1208 if (devs[chunk_id] != NODEV) { 1209 vol->sbv_dev_no--; 1210 sr_meta_getdevname(sc, mle->sml_mm, devname, 1211 sizeof(devname)); 1212 printf("%s: found duplicate chunk %u for " 1213 "volume %u on device %s\n", DEVNAME(sc), 1214 chunk_id, vol->sbv_volid, devname); 1215 } 1216 1217 if (devs[chunk_id] == NODEV || 1218 metadata->ssd_ondisk > ondisk[chunk_id]) { 1219 devs[chunk_id] = mle->sml_mm; 1220 ondisk[chunk_id] = metadata->ssd_ondisk; 1221 DNPRINTF(SR_D_META, "%s: using ondisk " 1222 "metadata version %llu for chunk %u\n", 1223 DEVNAME(sc), ondisk[chunk_id], chunk_id); 1224 } 1225 } 1226 1227 if (vol->sbv_chunk_no != vol->sbv_dev_no) { 1228 printf("%s: not all chunks were provided; " 1229 "attempting to bring volume %d online\n", 1230 DEVNAME(sc), vol->sbv_volid); 1231 } 1232 1233 bzero(&bc, sizeof(bc)); 1234 bc.bc_level = vol->sbv_level; 1235 bc.bc_dev_list_len = vol->sbv_chunk_no * sizeof(dev_t); 1236 bc.bc_dev_list = devs; 1237 bc.bc_flags = BIOC_SCDEVT; 1238 1239 rw_enter_write(&sc->sc_lock); 1240 sr_ioctl_createraid(sc, &bc, 0); 1241 rw_exit_write(&sc->sc_lock); 1242 1243 rv++; 1244 } 1245 1246 /* done with metadata */ 1247 unwind: 1248 for (vp1 = SLIST_FIRST(&bvh); vp1 != SLIST_END(&bvh); vp1 = vp2) { 1249 vp2 = SLIST_NEXT(vp1, sbv_link); 1250 for (mle1 = SLIST_FIRST(&vp1->sml); 1251 mle1 != SLIST_END(&vp1->sml); mle1 = mle2) { 1252 mle2 = SLIST_NEXT(mle1, sml_link); 1253 free(mle1, M_DEVBUF); 1254 } 1255 free(vp1, M_DEVBUF); 1256 } 1257 for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { 1258 mle2 = SLIST_NEXT(mle, sml_link); 1259 free(mle, M_DEVBUF); 1260 } 1261 SLIST_INIT(&mlh); 1262 1263 if (devs) 1264 free(devs, M_DEVBUF); 1265 if (ondisk) 1266 free(ondisk, M_DEVBUF); 1267 1268 return (rv); 1269 } 1270 1271 int 1272 sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) 1273 { 1274 struct disklabel label; 1275 char *devname; 1276 int error, part; 1277 daddr64_t size; 1278 1279 DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", 1280 DEVNAME(sc), ch_entry->src_devname); 1281 1282 devname = ch_entry->src_devname; 1283 part = DISKPART(ch_entry->src_dev_mm); 1284 1285 /* get disklabel */ 1286 error = VOP_IOCTL(ch_entry->src_vn, DIOCGDINFO, (caddr_t)&label, FREAD, 1287 NOCRED, 0); 1288 if (error) { 1289 DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", 1290 DEVNAME(sc), devname); 1291 goto unwind; 1292 } 1293 1294 /* make sure the partition is of the right type */ 1295 if (label.d_partitions[part].p_fstype != FS_RAID) { 1296 DNPRINTF(SR_D_META, 1297 "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc), 1298 devname, 1299 label.d_partitions[part].p_fstype); 1300 goto unwind; 1301 } 1302 1303 size = DL_GETPSIZE(&label.d_partitions[part]) - 1304 SR_META_SIZE - SR_META_OFFSET; 1305 if (size <= 0) { 1306 DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), 1307 devname); 1308 goto unwind; 1309 } 1310 ch_entry->src_size = size; 1311 1312 DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), 1313 devname, size); 1314 1315 return (SR_META_F_NATIVE); 1316 unwind: 1317 DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), 1318 devname ? devname : "nodev"); 1319 return (SR_META_F_INVALID); 1320 } 1321 1322 int 1323 sr_meta_native_attach(struct sr_discipline *sd, int force) 1324 { 1325 struct sr_softc *sc = sd->sd_sc; 1326 struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; 1327 struct sr_metadata *md = NULL; 1328 struct sr_chunk *ch_entry, *ch_next; 1329 struct sr_uuid uuid; 1330 u_int64_t version = 0; 1331 int sr, not_sr, rv = 1, d, expected = -1, old_meta = 0; 1332 1333 DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); 1334 1335 md = malloc(SR_META_SIZE * 512, M_DEVBUF, M_ZERO); 1336 if (md == NULL) { 1337 printf("%s: not enough memory for metadata buffer\n", 1338 DEVNAME(sc)); 1339 goto bad; 1340 } 1341 1342 bzero(&uuid, sizeof uuid); 1343 1344 sr = not_sr = d = 0; 1345 SLIST_FOREACH(ch_entry, cl, src_link) { 1346 if (ch_entry->src_dev_mm == NODEV) 1347 continue; 1348 1349 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { 1350 printf("%s: could not read native metadata\n", 1351 DEVNAME(sc)); 1352 goto bad; 1353 } 1354 1355 if (md->ssdi.ssd_magic == SR_MAGIC) { 1356 sr++; 1357 if (d == 0) { 1358 bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); 1359 expected = md->ssdi.ssd_chunk_no; 1360 version = md->ssd_ondisk; 1361 d++; 1362 continue; 1363 } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, 1364 sizeof uuid)) { 1365 printf("%s: not part of the same volume\n", 1366 DEVNAME(sc)); 1367 goto bad; 1368 } 1369 if (md->ssd_ondisk != version) { 1370 old_meta++; 1371 version = MAX(md->ssd_ondisk, version); 1372 } 1373 } else 1374 not_sr++; 1375 } 1376 1377 if (sr && not_sr) { 1378 printf("%s: not all chunks are of the native metadata format\n", 1379 DEVNAME(sc)); 1380 goto bad; 1381 } 1382 1383 /* mixed metadata versions; mark bad disks offline */ 1384 if (old_meta) { 1385 d = 0; 1386 for (ch_entry = SLIST_FIRST(cl); ch_entry != SLIST_END(cl); 1387 ch_entry = ch_next, d++) { 1388 ch_next = SLIST_NEXT(ch_entry, src_link); 1389 1390 /* XXX do we want to read this again? */ 1391 if (ch_entry->src_dev_mm == NODEV) 1392 panic("src_dev_mm == NODEV"); 1393 if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, 1394 NULL)) 1395 printf("%s: could not read native metadata\n", 1396 DEVNAME(sc)); 1397 if (md->ssd_ondisk != version) 1398 sd->sd_vol.sv_chunks[d]->src_meta.scm_status = 1399 BIOC_SDOFFLINE; 1400 } 1401 } 1402 1403 if (expected != sr && !force && expected != -1) { 1404 DNPRINTF(SR_D_META, "%s: not all chunks were provided, trying " 1405 "anyway\n", DEVNAME(sc)); 1406 } 1407 1408 rv = 0; 1409 bad: 1410 if (md) 1411 free(md, M_DEVBUF); 1412 return (rv); 1413 } 1414 1415 int 1416 sr_meta_native_read(struct sr_discipline *sd, dev_t dev, 1417 struct sr_metadata *md, void *fm) 1418 { 1419 #ifdef SR_DEBUG 1420 struct sr_softc *sc = sd->sd_sc; 1421 #endif 1422 DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", 1423 DEVNAME(sc), dev, md); 1424 1425 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1426 B_READ)); 1427 } 1428 1429 int 1430 sr_meta_native_write(struct sr_discipline *sd, dev_t dev, 1431 struct sr_metadata *md, void *fm) 1432 { 1433 #ifdef SR_DEBUG 1434 struct sr_softc *sc = sd->sd_sc; 1435 #endif 1436 DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", 1437 DEVNAME(sc), dev, md); 1438 1439 return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, 1440 B_WRITE)); 1441 } 1442 1443 void 1444 sr_hotplug_register(struct sr_discipline *sd, void *func) 1445 { 1446 struct sr_hotplug_list *mhe; 1447 1448 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_register: %p\n", 1449 DEVNAME(sd->sd_sc), func); 1450 1451 /* make sure we aren't on the list yet */ 1452 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1453 if (mhe->sh_hotplug == func) 1454 return; 1455 1456 mhe = malloc(sizeof(struct sr_hotplug_list), M_DEVBUF, 1457 M_WAITOK | M_ZERO); 1458 mhe->sh_hotplug = func; 1459 mhe->sh_sd = sd; 1460 SLIST_INSERT_HEAD(&sr_hotplug_callbacks, mhe, shl_link); 1461 } 1462 1463 void 1464 sr_hotplug_unregister(struct sr_discipline *sd, void *func) 1465 { 1466 struct sr_hotplug_list *mhe; 1467 1468 DNPRINTF(SR_D_MISC, "%s: sr_hotplug_unregister: %s %p\n", 1469 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, func); 1470 1471 /* make sure we are on the list yet */ 1472 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1473 if (mhe->sh_hotplug == func) { 1474 SLIST_REMOVE(&sr_hotplug_callbacks, mhe, 1475 sr_hotplug_list, shl_link); 1476 free(mhe, M_DEVBUF); 1477 if (SLIST_EMPTY(&sr_hotplug_callbacks)) 1478 SLIST_INIT(&sr_hotplug_callbacks); 1479 return; 1480 } 1481 } 1482 1483 void 1484 sr_disk_attach(struct disk *diskp, int action) 1485 { 1486 struct sr_hotplug_list *mhe; 1487 1488 SLIST_FOREACH(mhe, &sr_hotplug_callbacks, shl_link) 1489 if (mhe->sh_sd->sd_ready) 1490 mhe->sh_hotplug(mhe->sh_sd, diskp, action); 1491 } 1492 1493 int 1494 sr_match(struct device *parent, void *match, void *aux) 1495 { 1496 return (1); 1497 } 1498 1499 void 1500 sr_attach(struct device *parent, struct device *self, void *aux) 1501 { 1502 struct sr_softc *sc = (void *)self; 1503 1504 DNPRINTF(SR_D_MISC, "\n%s: sr_attach", DEVNAME(sc)); 1505 1506 rw_init(&sc->sc_lock, "sr_lock"); 1507 rw_init(&sc->sc_hs_lock, "sr_hs_lock"); 1508 1509 SLIST_INIT(&sr_hotplug_callbacks); 1510 SLIST_INIT(&sc->sc_hotspare_list); 1511 1512 if (bio_register(&sc->sc_dev, sr_ioctl) != 0) 1513 printf("%s: controller registration failed", DEVNAME(sc)); 1514 else 1515 sc->sc_ioctl = sr_ioctl; 1516 1517 printf("\n"); 1518 1519 softraid_disk_attach = sr_disk_attach; 1520 1521 sr_boot_assembly(sc); 1522 } 1523 1524 int 1525 sr_detach(struct device *self, int flags) 1526 { 1527 return (0); 1528 } 1529 1530 int 1531 sr_activate(struct device *self, int act) 1532 { 1533 return (1); 1534 } 1535 1536 void 1537 sr_minphys(struct buf *bp, struct scsi_link *sl) 1538 { 1539 DNPRINTF(SR_D_MISC, "sr_minphys: %d\n", bp->b_bcount); 1540 1541 /* XXX currently using SR_MAXFER = MAXPHYS */ 1542 if (bp->b_bcount > SR_MAXFER) 1543 bp->b_bcount = SR_MAXFER; 1544 minphys(bp); 1545 } 1546 1547 void 1548 sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) 1549 { 1550 size_t copy_cnt; 1551 1552 DNPRINTF(SR_D_MISC, "sr_copy_internal_data xs: %p size: %d\n", 1553 xs, size); 1554 1555 if (xs->datalen) { 1556 copy_cnt = MIN(size, xs->datalen); 1557 bcopy(v, xs->data, copy_cnt); 1558 } 1559 } 1560 1561 int 1562 sr_ccb_alloc(struct sr_discipline *sd) 1563 { 1564 struct sr_ccb *ccb; 1565 int i; 1566 1567 if (!sd) 1568 return (1); 1569 1570 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); 1571 1572 if (sd->sd_ccb) 1573 return (1); 1574 1575 sd->sd_ccb = malloc(sizeof(struct sr_ccb) * 1576 sd->sd_max_wu * sd->sd_max_ccb_per_wu, M_DEVBUF, M_WAITOK | M_ZERO); 1577 TAILQ_INIT(&sd->sd_ccb_freeq); 1578 for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { 1579 ccb = &sd->sd_ccb[i]; 1580 ccb->ccb_dis = sd; 1581 sr_ccb_put(ccb); 1582 } 1583 1584 DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", 1585 DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); 1586 1587 return (0); 1588 } 1589 1590 void 1591 sr_ccb_free(struct sr_discipline *sd) 1592 { 1593 struct sr_ccb *ccb; 1594 1595 if (!sd) 1596 return; 1597 1598 DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); 1599 1600 while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) 1601 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1602 1603 if (sd->sd_ccb) 1604 free(sd->sd_ccb, M_DEVBUF); 1605 } 1606 1607 struct sr_ccb * 1608 sr_ccb_get(struct sr_discipline *sd) 1609 { 1610 struct sr_ccb *ccb; 1611 int s; 1612 1613 s = splbio(); 1614 1615 ccb = TAILQ_FIRST(&sd->sd_ccb_freeq); 1616 if (ccb) { 1617 TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); 1618 ccb->ccb_state = SR_CCB_INPROGRESS; 1619 } 1620 1621 splx(s); 1622 1623 DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), 1624 ccb); 1625 1626 return (ccb); 1627 } 1628 1629 void 1630 sr_ccb_put(struct sr_ccb *ccb) 1631 { 1632 struct sr_discipline *sd = ccb->ccb_dis; 1633 int s; 1634 1635 DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), 1636 ccb); 1637 1638 s = splbio(); 1639 1640 ccb->ccb_wu = NULL; 1641 ccb->ccb_state = SR_CCB_FREE; 1642 ccb->ccb_target = -1; 1643 ccb->ccb_opaque = NULL; 1644 1645 TAILQ_INSERT_TAIL(&sd->sd_ccb_freeq, ccb, ccb_link); 1646 1647 splx(s); 1648 } 1649 1650 int 1651 sr_wu_alloc(struct sr_discipline *sd) 1652 { 1653 struct sr_workunit *wu; 1654 int i, no_wu; 1655 1656 if (!sd) 1657 return (1); 1658 1659 DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), 1660 sd, sd->sd_max_wu); 1661 1662 if (sd->sd_wu) 1663 return (1); 1664 1665 no_wu = sd->sd_max_wu; 1666 sd->sd_wu_pending = no_wu; 1667 1668 sd->sd_wu = malloc(sizeof(struct sr_workunit) * no_wu, 1669 M_DEVBUF, M_WAITOK | M_ZERO); 1670 TAILQ_INIT(&sd->sd_wu_freeq); 1671 TAILQ_INIT(&sd->sd_wu_pendq); 1672 TAILQ_INIT(&sd->sd_wu_defq); 1673 for (i = 0; i < no_wu; i++) { 1674 wu = &sd->sd_wu[i]; 1675 wu->swu_dis = sd; 1676 sr_wu_put(wu); 1677 } 1678 1679 return (0); 1680 } 1681 1682 void 1683 sr_wu_free(struct sr_discipline *sd) 1684 { 1685 struct sr_workunit *wu; 1686 1687 if (!sd) 1688 return; 1689 1690 DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); 1691 1692 while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) 1693 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1694 while ((wu = TAILQ_FIRST(&sd->sd_wu_pendq)) != NULL) 1695 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 1696 while ((wu = TAILQ_FIRST(&sd->sd_wu_defq)) != NULL) 1697 TAILQ_REMOVE(&sd->sd_wu_defq, wu, swu_link); 1698 1699 if (sd->sd_wu) 1700 free(sd->sd_wu, M_DEVBUF); 1701 } 1702 1703 void 1704 sr_wu_put(struct sr_workunit *wu) 1705 { 1706 struct sr_discipline *sd = wu->swu_dis; 1707 struct sr_ccb *ccb; 1708 1709 int s; 1710 1711 DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); 1712 1713 s = splbio(); 1714 1715 wu->swu_xs = NULL; 1716 wu->swu_state = SR_WU_FREE; 1717 wu->swu_ios_complete = 0; 1718 wu->swu_ios_failed = 0; 1719 wu->swu_ios_succeeded = 0; 1720 wu->swu_io_count = 0; 1721 wu->swu_blk_start = 0; 1722 wu->swu_blk_end = 0; 1723 wu->swu_collider = NULL; 1724 wu->swu_fake = 0; 1725 wu->swu_flags = 0; 1726 1727 while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { 1728 TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); 1729 sr_ccb_put(ccb); 1730 } 1731 TAILQ_INIT(&wu->swu_ccb); 1732 1733 TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link); 1734 sd->sd_wu_pending--; 1735 1736 /* wake up sleepers */ 1737 #ifdef DIAGNOSTIC 1738 if (sd->sd_wu_sleep < 0) 1739 panic("negative wu sleepers"); 1740 #endif /* DIAGNOSTIC */ 1741 if (sd->sd_wu_sleep) 1742 wakeup(&sd->sd_wu_sleep); 1743 1744 splx(s); 1745 } 1746 1747 struct sr_workunit * 1748 sr_wu_get(struct sr_discipline *sd, int canwait) 1749 { 1750 struct sr_workunit *wu; 1751 int s; 1752 1753 s = splbio(); 1754 1755 for (;;) { 1756 wu = TAILQ_FIRST(&sd->sd_wu_freeq); 1757 if (wu) { 1758 TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); 1759 wu->swu_state = SR_WU_INPROGRESS; 1760 sd->sd_wu_pending++; 1761 break; 1762 } else if (wu == NULL && canwait) { 1763 sd->sd_wu_sleep++; 1764 tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0); 1765 sd->sd_wu_sleep--; 1766 } else 1767 break; 1768 } 1769 1770 splx(s); 1771 1772 DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); 1773 1774 return (wu); 1775 } 1776 1777 void 1778 sr_scsi_done(struct sr_discipline *sd, struct scsi_xfer *xs) 1779 { 1780 int s; 1781 1782 DNPRINTF(SR_D_DIS, "%s: sr_scsi_done: xs %p\n", DEVNAME(sd->sd_sc), xs); 1783 1784 s = splbio(); 1785 scsi_done(xs); 1786 splx(s); 1787 } 1788 1789 int 1790 sr_scsi_cmd(struct scsi_xfer *xs) 1791 { 1792 int s; 1793 struct scsi_link *link = xs->sc_link; 1794 struct sr_softc *sc = link->adapter_softc; 1795 struct sr_workunit *wu = NULL; 1796 struct sr_discipline *sd; 1797 1798 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: scsibus%d xs: %p " 1799 "flags: %#x\n", DEVNAME(sc), link->scsibus, xs, xs->flags); 1800 1801 sd = sc->sc_dis[link->scsibus]; 1802 if (sd == NULL) { 1803 s = splhigh(); 1804 sd = sc->sc_attach_dis; 1805 splx(s); 1806 1807 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: attaching %p\n", 1808 DEVNAME(sc), sd); 1809 if (sd == NULL) { 1810 printf("%s: sr_scsi_cmd NULL discipline\n", 1811 DEVNAME(sc)); 1812 goto stuffup; 1813 } 1814 } 1815 1816 if (sd->sd_deleted) { 1817 printf("%s: %s device is being deleted, failing io\n", 1818 DEVNAME(sc), sd->sd_meta->ssd_devname); 1819 goto stuffup; 1820 } 1821 1822 /* 1823 * we'll let the midlayer deal with stalls instead of being clever 1824 * and sending sr_wu_get !(xs->flags & SCSI_NOSLEEP) in cansleep 1825 */ 1826 if ((wu = sr_wu_get(sd, 0)) == NULL) { 1827 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc)); 1828 return (NO_CCB); 1829 } 1830 1831 xs->error = XS_NOERROR; 1832 wu->swu_xs = xs; 1833 1834 /* the midlayer will query LUNs so report sense to stop scanning */ 1835 if (link->target != 0 || link->lun != 0) { 1836 DNPRINTF(SR_D_CMD, "%s: bad target:lun %d:%d\n", 1837 DEVNAME(sc), link->target, link->lun); 1838 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 1839 SSD_ERRCODE_VALID; 1840 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 1841 sd->sd_scsi_sense.add_sense_code = 0x25; 1842 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 1843 sd->sd_scsi_sense.extra_len = 4; 1844 goto stuffup; 1845 } 1846 1847 switch (xs->cmd->opcode) { 1848 case READ_COMMAND: 1849 case READ_BIG: 1850 case READ_16: 1851 case WRITE_COMMAND: 1852 case WRITE_BIG: 1853 case WRITE_16: 1854 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: READ/WRITE %02x\n", 1855 DEVNAME(sc), xs->cmd->opcode); 1856 if (sd->sd_scsi_rw(wu)) 1857 goto stuffup; 1858 break; 1859 1860 case SYNCHRONIZE_CACHE: 1861 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: SYNCHRONIZE_CACHE\n", 1862 DEVNAME(sc)); 1863 if (sd->sd_scsi_sync(wu)) 1864 goto stuffup; 1865 goto complete; 1866 1867 case TEST_UNIT_READY: 1868 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: TEST_UNIT_READY\n", 1869 DEVNAME(sc)); 1870 if (sd->sd_scsi_tur(wu)) 1871 goto stuffup; 1872 goto complete; 1873 1874 case START_STOP: 1875 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: START_STOP\n", 1876 DEVNAME(sc)); 1877 if (sd->sd_scsi_start_stop(wu)) 1878 goto stuffup; 1879 goto complete; 1880 1881 case INQUIRY: 1882 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd: INQUIRY\n", 1883 DEVNAME(sc)); 1884 if (sd->sd_scsi_inquiry(wu)) 1885 goto stuffup; 1886 goto complete; 1887 1888 case READ_CAPACITY: 1889 case READ_CAPACITY_16: 1890 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd READ CAPACITY 0x%02x\n", 1891 DEVNAME(sc), xs->cmd->opcode); 1892 if (sd->sd_scsi_read_cap(wu)) 1893 goto stuffup; 1894 goto complete; 1895 1896 case REQUEST_SENSE: 1897 DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd REQUEST SENSE\n", 1898 DEVNAME(sc)); 1899 if (sd->sd_scsi_req_sense(wu)) 1900 goto stuffup; 1901 goto complete; 1902 1903 default: 1904 DNPRINTF(SR_D_CMD, "%s: unsupported scsi command %x\n", 1905 DEVNAME(sc), xs->cmd->opcode); 1906 /* XXX might need to add generic function to handle others */ 1907 goto stuffup; 1908 } 1909 1910 return (SUCCESSFULLY_QUEUED); 1911 stuffup: 1912 if (sd && sd->sd_scsi_sense.error_code) { 1913 xs->error = XS_SENSE; 1914 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 1915 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 1916 } else { 1917 xs->error = XS_DRIVER_STUFFUP; 1918 xs->flags |= ITSDONE; 1919 } 1920 complete: 1921 if (wu) 1922 sr_wu_put(wu); 1923 sr_scsi_done(sd, xs); 1924 return (COMPLETE); 1925 } 1926 int 1927 sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag, 1928 struct proc *p) 1929 { 1930 DNPRINTF(SR_D_IOCTL, "%s: sr_scsi_ioctl cmd: %#x\n", 1931 DEVNAME((struct sr_softc *)link->adapter_softc), cmd); 1932 1933 return (sr_ioctl(link->adapter_softc, cmd, addr)); 1934 } 1935 1936 int 1937 sr_ioctl(struct device *dev, u_long cmd, caddr_t addr) 1938 { 1939 struct sr_softc *sc = (struct sr_softc *)dev; 1940 int rv = 0; 1941 1942 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl ", DEVNAME(sc)); 1943 1944 rw_enter_write(&sc->sc_lock); 1945 1946 switch (cmd) { 1947 case BIOCINQ: 1948 DNPRINTF(SR_D_IOCTL, "inq\n"); 1949 rv = sr_ioctl_inq(sc, (struct bioc_inq *)addr); 1950 break; 1951 1952 case BIOCVOL: 1953 DNPRINTF(SR_D_IOCTL, "vol\n"); 1954 rv = sr_ioctl_vol(sc, (struct bioc_vol *)addr); 1955 break; 1956 1957 case BIOCDISK: 1958 DNPRINTF(SR_D_IOCTL, "disk\n"); 1959 rv = sr_ioctl_disk(sc, (struct bioc_disk *)addr); 1960 break; 1961 1962 case BIOCALARM: 1963 DNPRINTF(SR_D_IOCTL, "alarm\n"); 1964 /*rv = sr_ioctl_alarm(sc, (struct bioc_alarm *)addr); */ 1965 break; 1966 1967 case BIOCBLINK: 1968 DNPRINTF(SR_D_IOCTL, "blink\n"); 1969 /*rv = sr_ioctl_blink(sc, (struct bioc_blink *)addr); */ 1970 break; 1971 1972 case BIOCSETSTATE: 1973 DNPRINTF(SR_D_IOCTL, "setstate\n"); 1974 rv = sr_ioctl_setstate(sc, (struct bioc_setstate *)addr); 1975 break; 1976 1977 case BIOCCREATERAID: 1978 DNPRINTF(SR_D_IOCTL, "createraid\n"); 1979 rv = sr_ioctl_createraid(sc, (struct bioc_createraid *)addr, 1); 1980 break; 1981 1982 case BIOCDELETERAID: 1983 rv = sr_ioctl_deleteraid(sc, (struct bioc_deleteraid *)addr); 1984 break; 1985 1986 case BIOCDISCIPLINE: 1987 rv = sr_ioctl_discipline(sc, (struct bioc_discipline *)addr); 1988 break; 1989 1990 default: 1991 DNPRINTF(SR_D_IOCTL, "invalid ioctl\n"); 1992 rv = ENOTTY; 1993 } 1994 1995 rw_exit_write(&sc->sc_lock); 1996 1997 return (rv); 1998 } 1999 2000 int 2001 sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) 2002 { 2003 int i, vol, disk; 2004 2005 for (i = 0, vol = 0, disk = 0; i < SR_MAXSCSIBUS; i++) 2006 /* XXX this will not work when we stagger disciplines */ 2007 if (sc->sc_dis[i]) { 2008 vol++; 2009 disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; 2010 } 2011 2012 strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); 2013 bi->bi_novol = vol + sc->sc_hotspare_no; 2014 bi->bi_nodisk = disk + sc->sc_hotspare_no; 2015 2016 return (0); 2017 } 2018 2019 int 2020 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) 2021 { 2022 int i, vol, rv = EINVAL; 2023 struct sr_discipline *sd; 2024 struct sr_chunk *hotspare; 2025 daddr64_t rb, sz; 2026 2027 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2028 /* XXX this will not work when we stagger disciplines */ 2029 if (sc->sc_dis[i]) 2030 vol++; 2031 if (vol != bv->bv_volid) 2032 continue; 2033 2034 sd = sc->sc_dis[i]; 2035 bv->bv_status = sd->sd_vol_status; 2036 bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; 2037 bv->bv_level = sd->sd_meta->ssdi.ssd_level; 2038 bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; 2039 if (bv->bv_status == BIOC_SVREBUILD) { 2040 sz = sd->sd_meta->ssdi.ssd_size; 2041 rb = sd->sd_meta->ssd_rebuild; 2042 if (rb > 0) 2043 bv->bv_percent = 100 - 2044 ((sz * 100 - rb * 100) / sz) - 1; 2045 else 2046 bv->bv_percent = 0; 2047 } 2048 strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, 2049 sizeof(bv->bv_dev)); 2050 strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, 2051 sizeof(bv->bv_vendor)); 2052 rv = 0; 2053 goto done; 2054 } 2055 2056 /* Check hotspares list. */ 2057 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2058 vol++; 2059 if (vol != bv->bv_volid) 2060 continue; 2061 2062 bv->bv_status = BIOC_SVONLINE; 2063 bv->bv_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2064 bv->bv_level = -1; /* Hotspare. */ 2065 bv->bv_nodisk = 1; 2066 strlcpy(bv->bv_dev, hotspare->src_meta.scmi.scm_devname, 2067 sizeof(bv->bv_dev)); 2068 strlcpy(bv->bv_vendor, hotspare->src_meta.scmi.scm_devname, 2069 sizeof(bv->bv_vendor)); 2070 rv = 0; 2071 goto done; 2072 } 2073 2074 done: 2075 return (rv); 2076 } 2077 2078 int 2079 sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) 2080 { 2081 int i, vol, rv = EINVAL, id; 2082 struct sr_chunk *src, *hotspare; 2083 2084 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2085 /* XXX this will not work when we stagger disciplines */ 2086 if (sc->sc_dis[i]) 2087 vol++; 2088 if (vol != bd->bd_volid) 2089 continue; 2090 2091 id = bd->bd_diskid; 2092 if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) 2093 break; 2094 2095 src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; 2096 bd->bd_status = src->src_meta.scm_status; 2097 bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; 2098 bd->bd_channel = vol; 2099 bd->bd_target = id; 2100 strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, 2101 sizeof(bd->bd_vendor)); 2102 rv = 0; 2103 goto done; 2104 } 2105 2106 /* Check hotspares list. */ 2107 SLIST_FOREACH(hotspare, &sc->sc_hotspare_list, src_link) { 2108 vol++; 2109 if (vol != bd->bd_volid) 2110 continue; 2111 2112 if (bd->bd_diskid != 0) 2113 break; 2114 2115 bd->bd_status = hotspare->src_meta.scm_status; 2116 bd->bd_size = hotspare->src_meta.scmi.scm_size << DEV_BSHIFT; 2117 bd->bd_channel = vol; 2118 bd->bd_target = bd->bd_diskid; 2119 strlcpy(bd->bd_vendor, hotspare->src_meta.scmi.scm_devname, 2120 sizeof(bd->bd_vendor)); 2121 rv = 0; 2122 goto done; 2123 } 2124 2125 done: 2126 return (rv); 2127 } 2128 2129 int 2130 sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) 2131 { 2132 int rv = EINVAL; 2133 int i, vol, found, c; 2134 struct sr_discipline *sd = NULL; 2135 struct sr_chunk *ch_entry; 2136 struct sr_chunk_head *cl; 2137 2138 if (bs->bs_other_id_type == BIOC_SSOTHER_UNUSED) 2139 goto done; 2140 2141 if (bs->bs_status == BIOC_SSHOTSPARE) { 2142 rv = sr_hotspare(sc, (dev_t)bs->bs_other_id); 2143 goto done; 2144 } 2145 2146 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 2147 /* XXX this will not work when we stagger disciplines */ 2148 if (sc->sc_dis[i]) 2149 vol++; 2150 if (vol != bs->bs_volid) 2151 continue; 2152 sd = sc->sc_dis[i]; 2153 break; 2154 } 2155 if (sd == NULL) 2156 goto done; 2157 2158 switch (bs->bs_status) { 2159 case BIOC_SSOFFLINE: 2160 /* Take chunk offline */ 2161 found = c = 0; 2162 cl = &sd->sd_vol.sv_chunk_list; 2163 SLIST_FOREACH(ch_entry, cl, src_link) { 2164 if (ch_entry->src_dev_mm == bs->bs_other_id) { 2165 found = 1; 2166 break; 2167 } 2168 c++; 2169 } 2170 if (found == 0) { 2171 printf("%s: chunk not part of array\n", DEVNAME(sc)); 2172 goto done; 2173 } 2174 2175 /* XXX: check current state first */ 2176 sd->sd_set_chunk_state(sd, c, BIOC_SSOFFLINE); 2177 2178 if (sr_meta_save(sd, SR_META_DIRTY)) { 2179 printf("%s: could not save metadata to %s\n", 2180 DEVNAME(sc), sd->sd_meta->ssd_devname); 2181 goto done; 2182 } 2183 rv = 0; 2184 break; 2185 2186 case BIOC_SDSCRUB: 2187 break; 2188 2189 case BIOC_SSREBUILD: 2190 rv = sr_rebuild_init(sd, (dev_t)bs->bs_other_id); 2191 break; 2192 2193 default: 2194 printf("%s: unsupported state request %d\n", 2195 DEVNAME(sc), bs->bs_status); 2196 } 2197 2198 done: 2199 return (rv); 2200 } 2201 2202 int 2203 sr_chunk_in_use(struct sr_softc *sc, dev_t dev) 2204 { 2205 struct sr_discipline *sd; 2206 struct sr_chunk *chunk; 2207 int i, c; 2208 2209 /* See if chunk is already in use. */ 2210 for (i = 0; i < SR_MAXSCSIBUS; i++) { 2211 if (!sc->sc_dis[i]) 2212 continue; 2213 sd = sc->sc_dis[i]; 2214 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) { 2215 chunk = sd->sd_vol.sv_chunks[c]; 2216 if (chunk->src_dev_mm == dev) 2217 return chunk->src_meta.scm_status; 2218 } 2219 } 2220 2221 /* Check hotspares list. */ 2222 SLIST_FOREACH(chunk, &sc->sc_hotspare_list, src_link) 2223 if (chunk->src_dev_mm == dev) 2224 return chunk->src_meta.scm_status; 2225 2226 return BIOC_SDINVALID; 2227 } 2228 2229 int 2230 sr_hotspare(struct sr_softc *sc, dev_t dev) 2231 { 2232 struct sr_discipline *sd = NULL; 2233 struct sr_metadata *sm = NULL; 2234 struct sr_meta_chunk *hm; 2235 struct sr_chunk_head *cl; 2236 struct sr_chunk *hotspare, *chunk, *last; 2237 struct sr_uuid uuid; 2238 struct disklabel label; 2239 struct vnode *vn; 2240 daddr64_t size; 2241 char devname[32]; 2242 int rv = EINVAL; 2243 int c, part, open = 0; 2244 2245 /* 2246 * Add device to global hotspares list. 2247 */ 2248 2249 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2250 2251 /* Make sure chunk is not already in use. */ 2252 c = sr_chunk_in_use(sc, dev); 2253 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2254 if (c == BIOC_SDHOTSPARE) 2255 printf("%s: %s is already a hotspare\n", 2256 DEVNAME(sc), devname); 2257 else 2258 printf("%s: %s is already in use\n", 2259 DEVNAME(sc), devname); 2260 goto done; 2261 } 2262 2263 /* XXX - See if there is an existing degraded volume... */ 2264 2265 /* Open device. */ 2266 if (bdevvp(dev, &vn)) { 2267 printf("%s:, sr_hotspare: can't allocate vnode\n", DEVNAME(sc)); 2268 goto done; 2269 } 2270 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2271 DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", 2272 DEVNAME(sc), devname); 2273 vput(vn); 2274 goto fail; 2275 } 2276 open = 1; /* close dev on error */ 2277 2278 /* Get partition details. */ 2279 part = DISKPART(dev); 2280 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2281 DNPRINTF(SR_D_META, "%s: sr_hotspare ioctl failed\n", 2282 DEVNAME(sc)); 2283 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2284 vput(vn); 2285 goto fail; 2286 } 2287 if (label.d_partitions[part].p_fstype != FS_RAID) { 2288 printf("%s: %s partition not of type RAID (%d)\n", 2289 DEVNAME(sc), devname, 2290 label.d_partitions[part].p_fstype); 2291 goto fail; 2292 } 2293 2294 /* Calculate partition size. */ 2295 size = DL_GETPSIZE(&label.d_partitions[part]) - 2296 SR_META_SIZE - SR_META_OFFSET; 2297 2298 /* 2299 * Create and populate chunk metadata. 2300 */ 2301 2302 sr_uuid_get(&uuid); 2303 hotspare = malloc(sizeof(struct sr_chunk), M_DEVBUF, M_WAITOK | M_ZERO); 2304 2305 hotspare->src_dev_mm = dev; 2306 hotspare->src_vn = vn; 2307 strlcpy(hotspare->src_devname, devname, sizeof(hm->scmi.scm_devname)); 2308 hotspare->src_size = size; 2309 2310 hm = &hotspare->src_meta; 2311 hm->scmi.scm_volid = SR_HOTSPARE_VOLID; 2312 hm->scmi.scm_chunk_id = 0; 2313 hm->scmi.scm_size = size; 2314 hm->scmi.scm_coerced_size = size; 2315 strlcpy(hm->scmi.scm_devname, devname, sizeof(hm->scmi.scm_devname)); 2316 bcopy(&uuid, &hm->scmi.scm_uuid, sizeof(struct sr_uuid)); 2317 2318 sr_checksum(sc, hm, &hm->scm_checksum, 2319 sizeof(struct sr_meta_chunk_invariant)); 2320 2321 hm->scm_status = BIOC_SDHOTSPARE; 2322 2323 /* 2324 * Create and populate our own discipline and metadata. 2325 */ 2326 2327 sm = malloc(sizeof(struct sr_metadata), M_DEVBUF, M_WAITOK | M_ZERO); 2328 sm->ssdi.ssd_magic = SR_MAGIC; 2329 sm->ssdi.ssd_version = SR_META_VERSION; 2330 sm->ssd_ondisk = 0; 2331 sm->ssdi.ssd_flags = 0; 2332 bcopy(&uuid, &sm->ssdi.ssd_uuid, sizeof(struct sr_uuid)); 2333 sm->ssdi.ssd_chunk_no = 1; 2334 sm->ssdi.ssd_volid = SR_HOTSPARE_VOLID; 2335 sm->ssdi.ssd_level = SR_HOTSPARE_LEVEL; 2336 sm->ssdi.ssd_size = size; 2337 strlcpy(sm->ssdi.ssd_vendor, "OPENBSD", sizeof(sm->ssdi.ssd_vendor)); 2338 snprintf(sm->ssdi.ssd_product, sizeof(sm->ssdi.ssd_product), 2339 "SR %s", "HOTSPARE"); 2340 snprintf(sm->ssdi.ssd_revision, sizeof(sm->ssdi.ssd_revision), 2341 "%03d", SR_META_VERSION); 2342 2343 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2344 sd->sd_sc = sc; 2345 sd->sd_meta = sm; 2346 sd->sd_meta_type = SR_META_F_NATIVE; 2347 sd->sd_vol_status = BIOC_SVONLINE; 2348 strlcpy(sd->sd_name, "HOTSPARE", sizeof(sd->sd_name)); 2349 2350 /* Add chunk to volume. */ 2351 sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *), M_DEVBUF, 2352 M_WAITOK | M_ZERO); 2353 sd->sd_vol.sv_chunks[0] = hotspare; 2354 SLIST_INIT(&sd->sd_vol.sv_chunk_list); 2355 SLIST_INSERT_HEAD(&sd->sd_vol.sv_chunk_list, hotspare, src_link); 2356 2357 /* Save metadata. */ 2358 if (sr_meta_save(sd, SR_META_DIRTY)) { 2359 printf("%s: could not save metadata to %s\n", 2360 DEVNAME(sc), devname); 2361 goto fail; 2362 } 2363 2364 /* 2365 * Add chunk to hotspare list. 2366 */ 2367 rw_enter_write(&sc->sc_hs_lock); 2368 cl = &sc->sc_hotspare_list; 2369 if (SLIST_EMPTY(cl)) 2370 SLIST_INSERT_HEAD(cl, hotspare, src_link); 2371 else { 2372 SLIST_FOREACH(chunk, cl, src_link) 2373 last = chunk; 2374 SLIST_INSERT_AFTER(last, hotspare, src_link); 2375 } 2376 sc->sc_hotspare_no++; 2377 rw_exit_write(&sc->sc_hs_lock); 2378 2379 rv = 0; 2380 goto done; 2381 2382 fail: 2383 if (hotspare) 2384 free(hotspare, M_DEVBUF); 2385 2386 done: 2387 if (sd && sd->sd_vol.sv_chunks) 2388 free(sd->sd_vol.sv_chunks, M_DEVBUF); 2389 if (sd) 2390 free(sd, M_DEVBUF); 2391 if (sm) 2392 free(sm, M_DEVBUF); 2393 if (open) { 2394 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2395 vput(vn); 2396 } 2397 2398 return (rv); 2399 } 2400 2401 void 2402 sr_hotspare_rebuild_callback(void *arg1, void *arg2) 2403 { 2404 sr_hotspare_rebuild((struct sr_discipline *)arg1); 2405 } 2406 2407 void 2408 sr_hotspare_rebuild(struct sr_discipline *sd) 2409 { 2410 struct sr_chunk_head *cl; 2411 struct sr_chunk *hotspare, *chunk = NULL; 2412 struct sr_workunit *wu; 2413 struct sr_ccb *ccb; 2414 int i, s, chunk_no, busy; 2415 2416 /* 2417 * Attempt to locate a hotspare and initiate rebuild. 2418 */ 2419 2420 for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) { 2421 if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status == 2422 BIOC_SDOFFLINE) { 2423 chunk_no = i; 2424 chunk = sd->sd_vol.sv_chunks[i]; 2425 break; 2426 } 2427 } 2428 2429 if (chunk == NULL) { 2430 printf("%s: no offline chunk found on %s!\n", 2431 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 2432 return; 2433 } 2434 2435 /* See if we have a suitable hotspare... */ 2436 rw_enter_write(&sd->sd_sc->sc_hs_lock); 2437 cl = &sd->sd_sc->sc_hotspare_list; 2438 SLIST_FOREACH(hotspare, cl, src_link) 2439 if (hotspare->src_size >= chunk->src_size) 2440 break; 2441 2442 if (hotspare != NULL) { 2443 2444 printf("%s: %s volume degraded, will attempt to " 2445 "rebuild on hotspare %s\n", DEVNAME(sd->sd_sc), 2446 sd->sd_meta->ssd_devname, hotspare->src_devname); 2447 2448 /* 2449 * Ensure that all pending I/O completes on the failed chunk 2450 * before trying to initiate a rebuild. 2451 */ 2452 i = 0; 2453 do { 2454 busy = 0; 2455 2456 s = splbio(); 2457 TAILQ_FOREACH(wu, &sd->sd_wu_pendq, swu_link) { 2458 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2459 if (ccb->ccb_target == chunk_no) 2460 busy = 1; 2461 } 2462 } 2463 TAILQ_FOREACH(wu, &sd->sd_wu_defq, swu_link) { 2464 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 2465 if (ccb->ccb_target == chunk_no) 2466 busy = 1; 2467 } 2468 } 2469 splx(s); 2470 2471 if (busy) { 2472 tsleep(sd, PRIBIO, "sr_hotspare", hz); 2473 i++; 2474 } 2475 2476 } while (busy && i < 120); 2477 2478 DNPRINTF(SR_D_META, "%s: waited %i seconds for I/O to " 2479 "complete on failed chunk %s\n", DEVNAME(sd->sd_sc), 2480 i, chunk->src_devname); 2481 2482 if (busy) { 2483 printf("%s: pending I/O failed to complete on " 2484 "failed chunk %s, hotspare rebuild aborted...\n", 2485 DEVNAME(sd->sd_sc), chunk->src_devname); 2486 goto done; 2487 } 2488 2489 s = splbio(); 2490 rw_enter_write(&sd->sd_sc->sc_lock); 2491 if (sr_rebuild_init(sd, hotspare->src_dev_mm) == 0) { 2492 2493 /* Remove hotspare from available list. */ 2494 sd->sd_sc->sc_hotspare_no--; 2495 SLIST_REMOVE(cl, hotspare, sr_chunk, src_link); 2496 free(hotspare, M_DEVBUF); 2497 2498 } 2499 rw_exit_write(&sd->sd_sc->sc_lock); 2500 splx(s); 2501 } 2502 done: 2503 rw_exit_write(&sd->sd_sc->sc_hs_lock); 2504 } 2505 2506 int 2507 sr_rebuild_init(struct sr_discipline *sd, dev_t dev) 2508 { 2509 struct sr_softc *sc = sd->sd_sc; 2510 int rv = EINVAL, part; 2511 int c, found, open = 0; 2512 char devname[32]; 2513 struct vnode *vn; 2514 daddr64_t size, csize; 2515 struct disklabel label; 2516 struct sr_meta_chunk *old, *new; 2517 2518 /* 2519 * Attempt to initiate a rebuild onto the specified device. 2520 */ 2521 2522 if (!(sd->sd_capabilities & SR_CAP_REBUILD)) { 2523 printf("%s: discipline does not support rebuild\n", 2524 DEVNAME(sc)); 2525 goto done; 2526 } 2527 2528 /* make sure volume is in the right state */ 2529 if (sd->sd_vol_status == BIOC_SVREBUILD) { 2530 printf("%s: rebuild already in progress\n", DEVNAME(sc)); 2531 goto done; 2532 } 2533 if (sd->sd_vol_status != BIOC_SVDEGRADED) { 2534 printf("%s: %s not degraded\n", DEVNAME(sc), 2535 sd->sd_meta->ssd_devname); 2536 goto done; 2537 } 2538 2539 /* find offline chunk */ 2540 for (c = 0, found = -1; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 2541 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 2542 BIOC_SDOFFLINE) { 2543 found = c; 2544 new = &sd->sd_vol.sv_chunks[c]->src_meta; 2545 if (c > 0) 2546 break; /* roll at least once over the for */ 2547 } else { 2548 csize = sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_size; 2549 old = &sd->sd_vol.sv_chunks[c]->src_meta; 2550 if (found != -1) 2551 break; 2552 } 2553 if (found == -1) { 2554 printf("%s: no offline chunks available for rebuild\n", 2555 DEVNAME(sc)); 2556 goto done; 2557 } 2558 2559 /* populate meta entry */ 2560 sr_meta_getdevname(sc, dev, devname, sizeof(devname)); 2561 if (bdevvp(dev, &vn)) { 2562 printf("%s:, sr_rebuild_init: can't allocate vnode\n", 2563 DEVNAME(sc)); 2564 goto done; 2565 } 2566 2567 if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, 0)) { 2568 DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " 2569 "open %s\n", DEVNAME(sc), devname); 2570 vput(vn); 2571 goto done; 2572 } 2573 open = 1; /* close dev on error */ 2574 2575 /* get partition */ 2576 part = DISKPART(dev); 2577 if (VOP_IOCTL(vn, DIOCGDINFO, (caddr_t)&label, FREAD, NOCRED, 0)) { 2578 DNPRINTF(SR_D_META, "%s: sr_ioctl_setstate ioctl failed\n", 2579 DEVNAME(sc)); 2580 goto done; 2581 } 2582 if (label.d_partitions[part].p_fstype != FS_RAID) { 2583 printf("%s: %s partition not of type RAID (%d)\n", 2584 DEVNAME(sc), devname, 2585 label.d_partitions[part].p_fstype); 2586 goto done; 2587 } 2588 2589 /* is partition large enough? */ 2590 size = DL_GETPSIZE(&label.d_partitions[part]) - 2591 SR_META_SIZE - SR_META_OFFSET; 2592 if (size < csize) { 2593 printf("%s: partition too small, at least %llu B required\n", 2594 DEVNAME(sc), csize << DEV_BSHIFT); 2595 goto done; 2596 } else if (size > csize) 2597 printf("%s: partition too large, wasting %llu B\n", 2598 DEVNAME(sc), (size - csize) << DEV_BSHIFT); 2599 2600 /* make sure we are not stomping on some other partition */ 2601 c = sr_chunk_in_use(sc, dev); 2602 if (c != BIOC_SDINVALID && c != BIOC_SDOFFLINE) { 2603 printf("%s: %s is already in use\n", DEVNAME(sc), devname); 2604 goto done; 2605 } 2606 2607 /* Reset rebuild counter since we rebuilding onto a new chunk. */ 2608 sd->sd_meta->ssd_rebuild = 0; 2609 2610 /* recreate metadata */ 2611 open = 0; /* leave dev open from here on out */ 2612 sd->sd_vol.sv_chunks[found]->src_dev_mm = dev; 2613 sd->sd_vol.sv_chunks[found]->src_vn = vn; 2614 new->scmi.scm_volid = old->scmi.scm_volid; 2615 new->scmi.scm_chunk_id = found; 2616 strlcpy(new->scmi.scm_devname, devname, 2617 sizeof new->scmi.scm_devname); 2618 new->scmi.scm_size = size; 2619 new->scmi.scm_coerced_size = old->scmi.scm_coerced_size; 2620 bcopy(&old->scmi.scm_uuid, &new->scmi.scm_uuid, 2621 sizeof new->scmi.scm_uuid); 2622 sr_checksum(sc, new, &new->scm_checksum, 2623 sizeof(struct sr_meta_chunk_invariant)); 2624 sd->sd_set_chunk_state(sd, found, BIOC_SDREBUILD); 2625 if (sr_meta_save(sd, SR_META_DIRTY)) { 2626 printf("%s: could not save metadata to %s\n", 2627 DEVNAME(sc), devname); 2628 open = 1; 2629 goto done; 2630 } 2631 2632 printf("%s: rebuild of %s started on %s\n", DEVNAME(sc), 2633 sd->sd_meta->ssd_devname, devname); 2634 2635 sd->sd_reb_abort = 0; 2636 kthread_create_deferred(sr_rebuild, sd); 2637 2638 rv = 0; 2639 done: 2640 if (open) { 2641 VOP_CLOSE(vn, FREAD | FWRITE, NOCRED, 0); 2642 vput(vn); 2643 } 2644 2645 return (rv); 2646 } 2647 2648 void 2649 sr_roam_chunks(struct sr_discipline *sd) 2650 { 2651 struct sr_softc *sc = sd->sd_sc; 2652 struct sr_chunk *chunk; 2653 struct sr_meta_chunk *meta; 2654 int roamed = 0; 2655 2656 /* Have any chunks roamed? */ 2657 SLIST_FOREACH(chunk, &sd->sd_vol.sv_chunk_list, src_link) { 2658 2659 meta = &chunk->src_meta; 2660 2661 if (strncmp(meta->scmi.scm_devname, chunk->src_devname, 2662 sizeof(meta->scmi.scm_devname))) { 2663 2664 printf("%s: roaming device %s -> %s\n", DEVNAME(sc), 2665 meta->scmi.scm_devname, chunk->src_devname); 2666 2667 strlcpy(meta->scmi.scm_devname, chunk->src_devname, 2668 sizeof(meta->scmi.scm_devname)); 2669 2670 roamed++; 2671 } 2672 } 2673 2674 if (roamed) 2675 sr_meta_save(sd, SR_META_DIRTY); 2676 } 2677 2678 int 2679 sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) 2680 { 2681 dev_t *dt; 2682 int i, s, no_chunk, rv = EINVAL, vol; 2683 int no_meta, updatemeta = 0; 2684 u_int64_t vol_size; 2685 int32_t strip_size = 0; 2686 struct sr_chunk_head *cl; 2687 struct sr_discipline *sd = NULL; 2688 struct sr_chunk *ch_entry; 2689 struct device *dev, *dev2; 2690 struct scsibus_attach_args saa; 2691 char devname[32]; 2692 2693 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid(%d)\n", 2694 DEVNAME(sc), user); 2695 2696 /* user input */ 2697 if (bc->bc_dev_list_len > BIOC_CRMAXLEN) 2698 goto unwind; 2699 2700 dt = malloc(bc->bc_dev_list_len, M_DEVBUF, M_WAITOK | M_ZERO); 2701 if (user) { 2702 if (copyin(bc->bc_dev_list, dt, bc->bc_dev_list_len) != 0) 2703 goto unwind; 2704 } else 2705 bcopy(bc->bc_dev_list, dt, bc->bc_dev_list_len); 2706 2707 sd = malloc(sizeof(struct sr_discipline), M_DEVBUF, M_WAITOK | M_ZERO); 2708 sd->sd_sc = sc; 2709 2710 no_chunk = bc->bc_dev_list_len / sizeof(dev_t); 2711 cl = &sd->sd_vol.sv_chunk_list; 2712 SLIST_INIT(cl); 2713 2714 /* Ensure that chunks are not already in use. */ 2715 for (i = 0; i < no_chunk; i++) { 2716 if (sr_chunk_in_use(sc, dt[i]) != BIOC_SDINVALID) { 2717 sr_meta_getdevname(sc, dt[i], devname, sizeof(devname)); 2718 printf("%s: chunk %s already in use\n", 2719 DEVNAME(sc), devname); 2720 goto unwind; 2721 } 2722 } 2723 2724 sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); 2725 if (sd->sd_meta_type == SR_META_F_INVALID) { 2726 printf("%s: invalid metadata format\n", DEVNAME(sc)); 2727 goto unwind; 2728 } 2729 2730 if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) { 2731 printf("%s: can't attach metadata type %d\n", DEVNAME(sc), 2732 sd->sd_meta_type); 2733 goto unwind; 2734 } 2735 2736 /* force the raid volume by clearing metadata region */ 2737 if (bc->bc_flags & BIOC_SCFORCE) { 2738 /* make sure disk isn't up and running */ 2739 if (sr_meta_read(sd)) 2740 if (sr_already_assembled(sd)) { 2741 printf("%s: disk ", DEVNAME(sc)); 2742 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2743 printf(" is currently in use; can't force " 2744 "create\n"); 2745 goto unwind; 2746 } 2747 2748 if (sr_meta_clear(sd)) { 2749 printf("%s: failed to clear metadata\n", DEVNAME(sc)); 2750 goto unwind; 2751 } 2752 } 2753 2754 if ((no_meta = sr_meta_read(sd)) == 0) { 2755 /* fill out all chunk metadata */ 2756 sr_meta_chunks_create(sc, cl); 2757 ch_entry = SLIST_FIRST(cl); 2758 2759 /* no metadata available */ 2760 switch (bc->bc_level) { 2761 case 0: 2762 if (no_chunk < 2) 2763 goto unwind; 2764 strlcpy(sd->sd_name, "RAID 0", sizeof(sd->sd_name)); 2765 /* 2766 * XXX add variable strip size later even though 2767 * MAXPHYS is really the clever value, users like 2768 * to tinker with that type of stuff 2769 */ 2770 strip_size = MAXPHYS; 2771 vol_size = 2772 (ch_entry->src_meta.scmi.scm_coerced_size & 2773 ~((strip_size >> DEV_BSHIFT) - 1)) * no_chunk; 2774 break; 2775 case 1: 2776 if (no_chunk < 2) 2777 goto unwind; 2778 strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); 2779 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2780 break; 2781 case 4: 2782 case 5: 2783 if (no_chunk < 3) 2784 goto unwind; 2785 if (bc->bc_level == 4) 2786 strlcpy(sd->sd_name, "RAID 4", 2787 sizeof(sd->sd_name)); 2788 else 2789 strlcpy(sd->sd_name, "RAID 5", 2790 sizeof(sd->sd_name)); 2791 /* 2792 * XXX add variable strip size later even though 2793 * MAXPHYS is really the clever value, users like 2794 * to tinker with that type of stuff 2795 */ 2796 strip_size = MAXPHYS; 2797 vol_size = 2798 (ch_entry->src_meta.scmi.scm_coerced_size & 2799 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 1); 2800 break; 2801 //#ifdef not_yet 2802 case 6: 2803 if (no_chunk < 4) 2804 goto unwind; 2805 strlcpy(sd->sd_name, "RAID 6", 2806 sizeof(sd->sd_name)); 2807 /* 2808 * XXX add variable strip size later even though 2809 * MAXPHYS is really the clever value, users like 2810 * to tinker with that type of stuff 2811 */ 2812 strip_size = MAXPHYS; 2813 vol_size = 2814 (ch_entry->src_meta.scmi.scm_coerced_size & 2815 ~((strip_size >> DEV_BSHIFT) - 1)) * (no_chunk - 2); 2816 break; 2817 //#endif /* not_yet */ 2818 #ifdef AOE 2819 #ifdef not_yet 2820 case 'A': 2821 /* target */ 2822 if (no_chunk != 1) 2823 goto unwind; 2824 strlcpy(sd->sd_name, "AOE TARG", sizeof(sd->sd_name)); 2825 vol_size = ch_entry->src_meta.scmi.scm_coerced_size; 2826 break; 2827 case 'a': 2828 /* initiator */ 2829 if (no_chunk != 1) 2830 goto unwind; 2831 strlcpy(sd->sd_name, "AOE INIT", sizeof(sd->sd_name)); 2832 break; 2833 #endif /* not_yet */ 2834 #endif /* AOE */ 2835 #ifdef CRYPTO 2836 case 'C': 2837 DNPRINTF(SR_D_IOCTL, 2838 "%s: sr_ioctl_createraid: no_chunk %d\n", 2839 DEVNAME(sc), no_chunk); 2840 2841 if (no_chunk != 1) 2842 goto unwind; 2843 2844 /* no hint available yet */ 2845 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2846 bc->bc_opaque_status = BIOC_SOINOUT_FAILED; 2847 rv = 0; 2848 goto unwind; 2849 } 2850 2851 if (!(bc->bc_flags & BIOC_SCNOAUTOASSEMBLE)) 2852 goto unwind; 2853 2854 if (sr_crypto_get_kdf(bc, sd)) 2855 goto unwind; 2856 2857 strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name)); 2858 vol_size = ch_entry->src_meta.scmi.scm_size; 2859 2860 sr_crypto_create_keys(sd); 2861 2862 break; 2863 #endif /* CRYPTO */ 2864 default: 2865 goto unwind; 2866 } 2867 2868 /* fill out all volume metadata */ 2869 DNPRINTF(SR_D_IOCTL, 2870 "%s: sr_ioctl_createraid: vol_size: %lld\n", 2871 DEVNAME(sc), vol_size); 2872 sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; 2873 sd->sd_meta->ssdi.ssd_size = vol_size; 2874 sd->sd_vol_status = BIOC_SVONLINE; 2875 sd->sd_meta->ssdi.ssd_level = bc->bc_level; 2876 sd->sd_meta->ssdi.ssd_strip_size = strip_size; 2877 strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", 2878 sizeof(sd->sd_meta->ssdi.ssd_vendor)); 2879 snprintf(sd->sd_meta->ssdi.ssd_product, 2880 sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", 2881 sd->sd_name); 2882 snprintf(sd->sd_meta->ssdi.ssd_revision, 2883 sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", 2884 SR_META_VERSION); 2885 2886 sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; 2887 updatemeta = 1; 2888 } else if (no_meta == no_chunk) { 2889 if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) 2890 printf("%s: %s was not shutdown properly\n", 2891 DEVNAME(sc), sd->sd_meta->ssd_devname); 2892 if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { 2893 DNPRINTF(SR_D_META, "%s: disk not auto assembled from " 2894 "metadata\n", DEVNAME(sc)); 2895 goto unwind; 2896 } 2897 if (sr_already_assembled(sd)) { 2898 printf("%s: disk ", DEVNAME(sc)); 2899 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2900 printf(" already assembled\n"); 2901 goto unwind; 2902 } 2903 #ifdef CRYPTO 2904 /* provide userland with kdf hint */ 2905 if (bc->bc_opaque_flags & BIOC_SOOUT) { 2906 if (bc->bc_opaque == NULL) 2907 goto unwind; 2908 2909 if (sizeof(sd->mds.mdd_crypto.scr_meta.scm_kdfhint) < 2910 bc->bc_opaque_size) 2911 goto unwind; 2912 2913 if (copyout(sd->mds.mdd_crypto.scr_meta.scm_kdfhint, 2914 bc->bc_opaque, bc->bc_opaque_size)) 2915 goto unwind; 2916 2917 /* we're done */ 2918 bc->bc_opaque_status = BIOC_SOINOUT_OK; 2919 rv = 0; 2920 goto unwind; 2921 } 2922 /* get kdf with maskkey from userland */ 2923 if (bc->bc_opaque_flags & BIOC_SOIN) { 2924 if (sr_crypto_get_kdf(bc, sd)) 2925 goto unwind; 2926 } 2927 #endif /* CRYPTO */ 2928 DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", 2929 DEVNAME(sc)); 2930 updatemeta = 0; 2931 } else if (no_meta == -1) { 2932 printf("%s: one of the chunks has corrupt metadata; aborting " 2933 "assembly\n", DEVNAME(sc)); 2934 goto unwind; 2935 } else { 2936 if (sr_already_assembled(sd)) { 2937 printf("%s: disk ", DEVNAME(sc)); 2938 sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); 2939 printf(" already assembled; will not partial " 2940 "assemble it\n"); 2941 goto unwind; 2942 } 2943 printf("%s: trying to bring up %s degraded\n", DEVNAME(sc), 2944 sd->sd_meta->ssd_devname); 2945 } 2946 2947 /* metadata SHALL be fully filled in at this point */ 2948 2949 /* Make sure that metadata level matches assembly level. */ 2950 if (sd->sd_meta->ssdi.ssd_level != bc->bc_level) { 2951 printf("%s: volume level does not match metadata level!\n", 2952 DEVNAME(sc)); 2953 goto unwind; 2954 } 2955 2956 if (sr_discipline_init(sd, sd->sd_meta->ssdi.ssd_level)) { 2957 printf("%s: could not initialize discipline\n", DEVNAME(sc)); 2958 goto unwind; 2959 } 2960 2961 /* allocate all resources */ 2962 if ((rv = sd->sd_alloc_resources(sd))) 2963 goto unwind; 2964 2965 if (sd->sd_capabilities & SR_CAP_SYSTEM_DISK) { 2966 /* set volume status */ 2967 sd->sd_set_vol_state(sd); 2968 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 2969 printf("%s: %s offline, will not be brought online\n", 2970 DEVNAME(sc), sd->sd_meta->ssd_devname); 2971 goto unwind; 2972 } 2973 2974 /* setup scsi midlayer */ 2975 if (sd->sd_openings) 2976 sd->sd_link.openings = sd->sd_openings(sd); 2977 else 2978 sd->sd_link.openings = sd->sd_max_wu; 2979 sd->sd_link.device = &sr_dev; 2980 sd->sd_link.device_softc = sc; 2981 sd->sd_link.adapter_softc = sc; 2982 sd->sd_link.adapter = &sr_switch; 2983 sd->sd_link.adapter_target = SR_MAX_LD; 2984 sd->sd_link.adapter_buswidth = 1; 2985 bzero(&saa, sizeof(saa)); 2986 saa.saa_sc_link = &sd->sd_link; 2987 2988 /* 2989 * we passed all checks return ENXIO if volume can't be created 2990 */ 2991 rv = ENXIO; 2992 2993 /* clear sense data */ 2994 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 2995 2996 /* use temporary discipline pointer */ 2997 s = splhigh(); 2998 sc->sc_attach_dis = sd; 2999 splx(s); 3000 dev2 = config_found(&sc->sc_dev, &saa, scsiprint); 3001 s = splhigh(); 3002 sc->sc_attach_dis = NULL; 3003 splx(s); 3004 TAILQ_FOREACH(dev, &alldevs, dv_list) 3005 if (dev->dv_parent == dev2) 3006 break; 3007 if (dev == NULL) 3008 goto unwind; 3009 3010 DNPRINTF(SR_D_IOCTL, "%s: sr device added: %s on scsibus%d\n", 3011 DEVNAME(sc), dev->dv_xname, sd->sd_link.scsibus); 3012 3013 sc->sc_dis[sd->sd_link.scsibus] = sd; 3014 for (i = 0, vol = -1; i <= sd->sd_link.scsibus; i++) 3015 if (sc->sc_dis[i]) 3016 vol++; 3017 sd->sd_scsibus_dev = dev2; 3018 3019 rv = 0; 3020 if (updatemeta) { 3021 /* fill out remaining volume metadata */ 3022 sd->sd_meta->ssdi.ssd_volid = vol; 3023 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3024 sizeof(sd->sd_meta->ssd_devname)); 3025 sr_meta_init(sd, cl); 3026 } else { 3027 if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, 3028 sizeof(dev->dv_xname))) { 3029 printf("%s: volume %s is roaming, it used to " 3030 "be %s, updating metadata\n", 3031 DEVNAME(sc), dev->dv_xname, 3032 sd->sd_meta->ssd_devname); 3033 3034 sd->sd_meta->ssdi.ssd_volid = vol; 3035 strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, 3036 sizeof(sd->sd_meta->ssd_devname)); 3037 } 3038 } 3039 3040 /* Update device name on any chunks which roamed. */ 3041 sr_roam_chunks(sd); 3042 3043 #ifndef SMALL_KERNEL 3044 if (sr_sensors_create(sd)) 3045 printf("%s: unable to create sensor for %s\n", 3046 DEVNAME(sc), dev->dv_xname); 3047 else 3048 sd->sd_vol.sv_sensor_valid = 1; 3049 #endif /* SMALL_KERNEL */ 3050 } else { 3051 /* we are not an os disk */ 3052 if (updatemeta) { 3053 /* fill out remaining volume metadata */ 3054 sd->sd_meta->ssdi.ssd_volid = 0; 3055 strlcpy(sd->sd_meta->ssd_devname, ch_entry->src_devname, 3056 sizeof(sd->sd_meta->ssd_devname)); 3057 sr_meta_init(sd, cl); 3058 } 3059 if (sd->sd_start_discipline(sd)) 3060 goto unwind; 3061 } 3062 3063 /* save metadata to disk */ 3064 rv = sr_meta_save(sd, SR_META_DIRTY); 3065 sd->sd_shutdownhook = shutdownhook_establish(sr_shutdown, sd); 3066 3067 if (sd->sd_vol_status == BIOC_SVREBUILD) 3068 kthread_create_deferred(sr_rebuild, sd); 3069 3070 sd->sd_ready = 1; 3071 3072 return (rv); 3073 unwind: 3074 sr_discipline_shutdown(sd); 3075 3076 return (rv); 3077 } 3078 3079 int 3080 sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) 3081 { 3082 struct sr_discipline *sd = NULL; 3083 int rv = 1; 3084 int i; 3085 3086 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), 3087 dr->bd_dev); 3088 3089 for (i = 0; i < SR_MAXSCSIBUS; i++) 3090 if (sc->sc_dis[i]) { 3091 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3092 dr->bd_dev, 3093 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3094 sd = sc->sc_dis[i]; 3095 break; 3096 } 3097 } 3098 3099 if (sd == NULL) 3100 goto bad; 3101 3102 sd->sd_deleted = 1; 3103 sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE; 3104 sr_shutdown(sd); 3105 3106 rv = 0; 3107 bad: 3108 return (rv); 3109 } 3110 3111 int 3112 sr_ioctl_discipline(struct sr_softc *sc, struct bioc_discipline *bd) 3113 { 3114 struct sr_discipline *sd = NULL; 3115 int i, rv = 1; 3116 3117 /* Dispatch a discipline specific ioctl. */ 3118 3119 DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_discipline %s\n", DEVNAME(sc), 3120 dr->bd_dev); 3121 3122 for (i = 0; i < SR_MAXSCSIBUS; i++) 3123 if (sc->sc_dis[i]) { 3124 if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, 3125 bd->bd_dev, 3126 sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { 3127 sd = sc->sc_dis[i]; 3128 break; 3129 } 3130 } 3131 3132 if (sd && sd->sd_ioctl_handler) 3133 rv = sd->sd_ioctl_handler(sd, bd); 3134 3135 return (rv); 3136 } 3137 3138 void 3139 sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) 3140 { 3141 struct sr_chunk *ch_entry, *ch_next; 3142 3143 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); 3144 3145 if (!cl) 3146 return; 3147 3148 for (ch_entry = SLIST_FIRST(cl); 3149 ch_entry != SLIST_END(cl); ch_entry = ch_next) { 3150 ch_next = SLIST_NEXT(ch_entry, src_link); 3151 3152 DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", 3153 DEVNAME(sc), ch_entry->src_devname); 3154 if (ch_entry->src_vn) { 3155 VOP_CLOSE(ch_entry->src_vn, FREAD | FWRITE, NOCRED, 0); 3156 vput(ch_entry->src_vn); 3157 } 3158 free(ch_entry, M_DEVBUF); 3159 } 3160 SLIST_INIT(cl); 3161 } 3162 3163 void 3164 sr_discipline_free(struct sr_discipline *sd) 3165 { 3166 struct sr_softc *sc; 3167 int i; 3168 3169 if (!sd) 3170 return; 3171 3172 sc = sd->sd_sc; 3173 3174 DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", 3175 DEVNAME(sc), 3176 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3177 if (sd->sd_free_resources) 3178 sd->sd_free_resources(sd); 3179 if (sd->sd_vol.sv_chunks) 3180 free(sd->sd_vol.sv_chunks, M_DEVBUF); 3181 if (sd->sd_meta) 3182 free(sd->sd_meta, M_DEVBUF); 3183 if (sd->sd_meta_foreign) 3184 free(sd->sd_meta_foreign, M_DEVBUF); 3185 3186 for (i = 0; i < SR_MAXSCSIBUS; i++) 3187 if (sc->sc_dis[i] == sd) { 3188 sc->sc_dis[i] = NULL; 3189 break; 3190 } 3191 3192 free(sd, M_DEVBUF); 3193 } 3194 3195 void 3196 sr_discipline_shutdown(struct sr_discipline *sd) 3197 { 3198 struct sr_softc *sc = sd->sd_sc; 3199 int s; 3200 3201 if (!sd || !sc) 3202 return; 3203 3204 DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), 3205 sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); 3206 3207 s = splbio(); 3208 3209 sd->sd_ready = 0; 3210 3211 if (sd->sd_shutdownhook) 3212 shutdownhook_disestablish(sd->sd_shutdownhook); 3213 3214 /* make sure there isn't a sync pending and yield */ 3215 wakeup(sd); 3216 while (sd->sd_sync || sd->sd_must_flush) 3217 if (tsleep(&sd->sd_sync, MAXPRI, "sr_down", 60 * hz) == 3218 EWOULDBLOCK) 3219 break; 3220 3221 #ifndef SMALL_KERNEL 3222 sr_sensors_delete(sd); 3223 #endif /* SMALL_KERNEL */ 3224 3225 if (sd->sd_scsibus_dev) 3226 config_detach(sd->sd_scsibus_dev, DETACH_FORCE); 3227 3228 sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); 3229 3230 if (sd) 3231 sr_discipline_free(sd); 3232 3233 splx(s); 3234 } 3235 3236 int 3237 sr_discipline_init(struct sr_discipline *sd, int level) 3238 { 3239 int rv = 1; 3240 3241 switch (level) { 3242 case 0: 3243 sr_raid0_discipline_init(sd); 3244 break; 3245 case 1: 3246 sr_raid1_discipline_init(sd); 3247 break; 3248 case 4: 3249 sr_raidp_discipline_init(sd, SR_MD_RAID4); 3250 break; 3251 case 5: 3252 sr_raidp_discipline_init(sd, SR_MD_RAID5); 3253 break; 3254 case 6: 3255 sr_raid6_discipline_init(sd); 3256 break; 3257 #ifdef AOE 3258 /* AOE target. */ 3259 case 'A': 3260 sr_aoe_server_discipline_init(sd); 3261 break; 3262 /* AOE initiator. */ 3263 case 'a': 3264 sr_aoe_discipline_init(sd); 3265 break; 3266 #endif 3267 #ifdef CRYPTO 3268 case 'C': 3269 sr_crypto_discipline_init(sd); 3270 break; 3271 #endif 3272 default: 3273 goto bad; 3274 } 3275 3276 rv = 0; 3277 bad: 3278 return (rv); 3279 } 3280 3281 int 3282 sr_raid_inquiry(struct sr_workunit *wu) 3283 { 3284 struct sr_discipline *sd = wu->swu_dis; 3285 struct scsi_xfer *xs = wu->swu_xs; 3286 struct scsi_inquiry_data inq; 3287 3288 DNPRINTF(SR_D_DIS, "%s: sr_raid_inquiry\n", DEVNAME(sd->sd_sc)); 3289 3290 bzero(&inq, sizeof(inq)); 3291 inq.device = T_DIRECT; 3292 inq.dev_qual2 = 0; 3293 inq.version = 2; 3294 inq.response_format = 2; 3295 inq.additional_length = 32; 3296 strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, 3297 sizeof(inq.vendor)); 3298 strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, 3299 sizeof(inq.product)); 3300 strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, 3301 sizeof(inq.revision)); 3302 sr_copy_internal_data(xs, &inq, sizeof(inq)); 3303 3304 return (0); 3305 } 3306 3307 int 3308 sr_raid_read_cap(struct sr_workunit *wu) 3309 { 3310 struct sr_discipline *sd = wu->swu_dis; 3311 struct scsi_xfer *xs = wu->swu_xs; 3312 struct scsi_read_cap_data rcd; 3313 struct scsi_read_cap_data_16 rcd16; 3314 int rv = 1; 3315 3316 DNPRINTF(SR_D_DIS, "%s: sr_raid_read_cap\n", DEVNAME(sd->sd_sc)); 3317 3318 if (xs->cmd->opcode == READ_CAPACITY) { 3319 bzero(&rcd, sizeof(rcd)); 3320 if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu) 3321 _lto4b(0xffffffff, rcd.addr); 3322 else 3323 _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr); 3324 _lto4b(512, rcd.length); 3325 sr_copy_internal_data(xs, &rcd, sizeof(rcd)); 3326 rv = 0; 3327 } else if (xs->cmd->opcode == READ_CAPACITY_16) { 3328 bzero(&rcd16, sizeof(rcd16)); 3329 _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr); 3330 _lto4b(512, rcd16.length); 3331 sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); 3332 rv = 0; 3333 } 3334 3335 return (rv); 3336 } 3337 3338 int 3339 sr_raid_tur(struct sr_workunit *wu) 3340 { 3341 struct sr_discipline *sd = wu->swu_dis; 3342 3343 DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); 3344 3345 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3346 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3347 sd->sd_scsi_sense.flags = SKEY_NOT_READY; 3348 sd->sd_scsi_sense.add_sense_code = 0x04; 3349 sd->sd_scsi_sense.add_sense_code_qual = 0x11; 3350 sd->sd_scsi_sense.extra_len = 4; 3351 return (1); 3352 } else if (sd->sd_vol_status == BIOC_SVINVALID) { 3353 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; 3354 sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; 3355 sd->sd_scsi_sense.add_sense_code = 0x05; 3356 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3357 sd->sd_scsi_sense.extra_len = 4; 3358 return (1); 3359 } 3360 3361 return (0); 3362 } 3363 3364 int 3365 sr_raid_request_sense(struct sr_workunit *wu) 3366 { 3367 struct sr_discipline *sd = wu->swu_dis; 3368 struct scsi_xfer *xs = wu->swu_xs; 3369 3370 DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", 3371 DEVNAME(sd->sd_sc)); 3372 3373 /* use latest sense data */ 3374 bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); 3375 3376 /* clear sense data */ 3377 bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); 3378 3379 return (0); 3380 } 3381 3382 int 3383 sr_raid_start_stop(struct sr_workunit *wu) 3384 { 3385 struct sr_discipline *sd = wu->swu_dis; 3386 struct scsi_xfer *xs = wu->swu_xs; 3387 struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; 3388 int rv = 1; 3389 3390 DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", 3391 DEVNAME(sd->sd_sc)); 3392 3393 if (!ss) 3394 return (rv); 3395 3396 if (ss->byte2 == 0x00) { 3397 /* START */ 3398 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3399 /* bring volume online */ 3400 /* XXX check to see if volume can be brought online */ 3401 sd->sd_vol_status = BIOC_SVONLINE; 3402 } 3403 rv = 0; 3404 } else /* XXX is this the check? if (byte == 0x01) */ { 3405 /* STOP */ 3406 if (sd->sd_vol_status == BIOC_SVONLINE) { 3407 /* bring volume offline */ 3408 sd->sd_vol_status = BIOC_SVOFFLINE; 3409 } 3410 rv = 0; 3411 } 3412 3413 return (rv); 3414 } 3415 3416 int 3417 sr_raid_sync(struct sr_workunit *wu) 3418 { 3419 struct sr_discipline *sd = wu->swu_dis; 3420 int s, rv = 0, ios; 3421 3422 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); 3423 3424 /* when doing a fake sync don't count the wu */ 3425 ios = wu->swu_fake ? 0 : 1; 3426 3427 s = splbio(); 3428 sd->sd_sync = 1; 3429 3430 while (sd->sd_wu_pending > ios) 3431 if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { 3432 DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", 3433 DEVNAME(sd->sd_sc)); 3434 rv = 1; 3435 break; 3436 } 3437 3438 sd->sd_sync = 0; 3439 splx(s); 3440 3441 wakeup(&sd->sd_sync); 3442 3443 return (rv); 3444 } 3445 3446 void 3447 sr_raid_startwu(struct sr_workunit *wu) 3448 { 3449 struct sr_discipline *sd = wu->swu_dis; 3450 struct sr_ccb *ccb; 3451 3452 splassert(IPL_BIO); 3453 3454 if (wu->swu_state == SR_WU_RESTART) 3455 /* 3456 * no need to put the wu on the pending queue since we 3457 * are restarting the io 3458 */ 3459 ; 3460 else 3461 /* move wu to pending queue */ 3462 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); 3463 3464 /* start all individual ios */ 3465 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { 3466 VOP_STRATEGY(&ccb->ccb_buf); 3467 } 3468 } 3469 3470 void 3471 sr_checksum_print(u_int8_t *md5) 3472 { 3473 int i; 3474 3475 for (i = 0; i < MD5_DIGEST_LENGTH; i++) 3476 printf("%02x", md5[i]); 3477 } 3478 3479 void 3480 sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) 3481 { 3482 MD5_CTX ctx; 3483 3484 DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, 3485 md5, len); 3486 3487 MD5Init(&ctx); 3488 MD5Update(&ctx, src, len); 3489 MD5Final(md5, &ctx); 3490 } 3491 3492 void 3493 sr_uuid_get(struct sr_uuid *uuid) 3494 { 3495 arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); 3496 /* UUID version 4: random */ 3497 uuid->sui_id[6] &= 0x0f; 3498 uuid->sui_id[6] |= 0x40; 3499 /* RFC4122 variant */ 3500 uuid->sui_id[8] &= 0x3f; 3501 uuid->sui_id[8] |= 0x80; 3502 } 3503 3504 void 3505 sr_uuid_print(struct sr_uuid *uuid, int cr) 3506 { 3507 printf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" 3508 "%02x%02x%02x%02x%02x%02x", 3509 uuid->sui_id[0], uuid->sui_id[1], 3510 uuid->sui_id[2], uuid->sui_id[3], 3511 uuid->sui_id[4], uuid->sui_id[5], 3512 uuid->sui_id[6], uuid->sui_id[7], 3513 uuid->sui_id[8], uuid->sui_id[9], 3514 uuid->sui_id[10], uuid->sui_id[11], 3515 uuid->sui_id[12], uuid->sui_id[13], 3516 uuid->sui_id[14], uuid->sui_id[15]); 3517 3518 if (cr) 3519 printf("\n"); 3520 } 3521 3522 int 3523 sr_already_assembled(struct sr_discipline *sd) 3524 { 3525 struct sr_softc *sc = sd->sd_sc; 3526 int i; 3527 3528 for (i = 0; i < SR_MAXSCSIBUS; i++) 3529 if (sc->sc_dis[i]) 3530 if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, 3531 &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, 3532 sizeof(sd->sd_meta->ssdi.ssd_uuid))) 3533 return (1); 3534 3535 return (0); 3536 } 3537 3538 int32_t 3539 sr_validate_stripsize(u_int32_t b) 3540 { 3541 int s = 0; 3542 3543 if (b % 512) 3544 return (-1); 3545 3546 while ((b & 1) == 0) { 3547 b >>= 1; 3548 s++; 3549 } 3550 3551 /* only multiple of twos */ 3552 b >>= 1; 3553 if (b) 3554 return(-1); 3555 3556 return (s); 3557 } 3558 3559 void 3560 sr_shutdown(void *arg) 3561 { 3562 struct sr_discipline *sd = arg; 3563 #ifdef SR_DEBUG 3564 struct sr_softc *sc = sd->sd_sc; 3565 #endif 3566 DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", 3567 DEVNAME(sc), sd->sd_meta->ssd_devname); 3568 3569 /* abort rebuild and drain io */ 3570 sd->sd_reb_abort = 1; 3571 while (sd->sd_reb_active) 3572 tsleep(sd, PWAIT, "sr_shutdown", 1); 3573 3574 sr_meta_save(sd, 0); 3575 3576 sr_discipline_shutdown(sd); 3577 } 3578 3579 int 3580 sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) 3581 { 3582 struct sr_discipline *sd = wu->swu_dis; 3583 struct scsi_xfer *xs = wu->swu_xs; 3584 int rv = 1; 3585 3586 DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, 3587 xs->cmd->opcode); 3588 3589 if (sd->sd_vol_status == BIOC_SVOFFLINE) { 3590 DNPRINTF(SR_D_DIS, "%s: %s device offline\n", 3591 DEVNAME(sd->sd_sc), func); 3592 goto bad; 3593 } 3594 3595 if (xs->datalen == 0) { 3596 printf("%s: %s: illegal block count for %s\n", 3597 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3598 goto bad; 3599 } 3600 3601 if (xs->cmdlen == 10) 3602 *blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr); 3603 else if (xs->cmdlen == 16) 3604 *blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr); 3605 else if (xs->cmdlen == 6) 3606 *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); 3607 else { 3608 printf("%s: %s: illegal cmdlen for %s\n", 3609 DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); 3610 goto bad; 3611 } 3612 3613 wu->swu_blk_start = *blk; 3614 wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; 3615 3616 if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { 3617 DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " 3618 "end: %lld length: %d\n", 3619 DEVNAME(sd->sd_sc), func, wu->swu_blk_start, 3620 wu->swu_blk_end, xs->datalen); 3621 3622 sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT | 3623 SSD_ERRCODE_VALID; 3624 sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST; 3625 sd->sd_scsi_sense.add_sense_code = 0x21; 3626 sd->sd_scsi_sense.add_sense_code_qual = 0x00; 3627 sd->sd_scsi_sense.extra_len = 4; 3628 goto bad; 3629 } 3630 3631 rv = 0; 3632 bad: 3633 return (rv); 3634 } 3635 3636 int 3637 sr_check_io_collision(struct sr_workunit *wu) 3638 { 3639 struct sr_discipline *sd = wu->swu_dis; 3640 struct sr_workunit *wup; 3641 3642 splassert(IPL_BIO); 3643 3644 /* walk queue backwards and fill in collider if we have one */ 3645 TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) { 3646 if (wu->swu_blk_end < wup->swu_blk_start || 3647 wup->swu_blk_end < wu->swu_blk_start) 3648 continue; 3649 3650 /* we have an LBA collision, defer wu */ 3651 wu->swu_state = SR_WU_DEFERRED; 3652 if (wup->swu_collider) 3653 /* wu is on deferred queue, append to last wu */ 3654 while (wup->swu_collider) 3655 wup = wup->swu_collider; 3656 3657 wup->swu_collider = wu; 3658 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link); 3659 sd->sd_wu_collisions++; 3660 goto queued; 3661 } 3662 3663 return (0); 3664 queued: 3665 return (1); 3666 } 3667 3668 void 3669 sr_rebuild(void *arg) 3670 { 3671 struct sr_discipline *sd = arg; 3672 struct sr_softc *sc = sd->sd_sc; 3673 3674 if (kthread_create(sr_rebuild_thread, sd, &sd->sd_background_proc, 3675 DEVNAME(sc)) != 0) 3676 printf("%s: unable to start backgound operation\n", 3677 DEVNAME(sc)); 3678 } 3679 3680 void 3681 sr_rebuild_thread(void *arg) 3682 { 3683 struct sr_discipline *sd = arg; 3684 struct sr_softc *sc = sd->sd_sc; 3685 daddr64_t whole_blk, partial_blk, blk, sz, lba; 3686 daddr64_t psz, rb, restart; 3687 uint64_t mysize = 0; 3688 struct sr_workunit *wu_r, *wu_w; 3689 struct scsi_xfer xs_r, xs_w; 3690 struct scsi_rw_16 cr, cw; 3691 int c, s, slept, percent = 0, old_percent = -1; 3692 u_int8_t *buf; 3693 3694 whole_blk = sd->sd_meta->ssdi.ssd_size / SR_REBUILD_IO_SIZE; 3695 partial_blk = sd->sd_meta->ssdi.ssd_size % SR_REBUILD_IO_SIZE; 3696 3697 restart = sd->sd_meta->ssd_rebuild / SR_REBUILD_IO_SIZE; 3698 if (restart > whole_blk) { 3699 printf("%s: bogus rebuild restart offset, starting from 0\n", 3700 DEVNAME(sc)); 3701 restart = 0; 3702 } 3703 if (restart) { 3704 /* 3705 * XXX there is a hole here; there is a posibility that we 3706 * had a restart however the chunk that was supposed to 3707 * be rebuilt is no longer valid; we can reach this situation 3708 * when a rebuild is in progress and the box crashes and 3709 * on reboot the rebuild chunk is different (like zero'd or 3710 * replaced). We need to check the uuid of the chunk that is 3711 * being rebuilt to assert this. 3712 */ 3713 psz = sd->sd_meta->ssdi.ssd_size; 3714 rb = sd->sd_meta->ssd_rebuild; 3715 if (rb > 0) 3716 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3717 else 3718 percent = 0; 3719 printf("%s: resuming rebuild on %s at %llu%%\n", 3720 DEVNAME(sc), sd->sd_meta->ssd_devname, percent); 3721 } 3722 3723 sd->sd_reb_active = 1; 3724 3725 buf = malloc(SR_REBUILD_IO_SIZE << DEV_BSHIFT, M_DEVBUF, M_WAITOK); 3726 for (blk = restart; blk <= whole_blk; blk++) { 3727 if (blk == whole_blk) 3728 sz = partial_blk; 3729 else 3730 sz = SR_REBUILD_IO_SIZE; 3731 mysize += sz; 3732 lba = blk * sz; 3733 3734 /* get some wu */ 3735 if ((wu_r = sr_wu_get(sd, 1)) == NULL) 3736 panic("%s: rebuild exhausted wu_r", DEVNAME(sc)); 3737 if ((wu_w = sr_wu_get(sd, 1)) == NULL) 3738 panic("%s: rebuild exhausted wu_w", DEVNAME(sc)); 3739 3740 /* setup read io */ 3741 bzero(&xs_r, sizeof xs_r); 3742 bzero(&cr, sizeof cr); 3743 xs_r.error = XS_NOERROR; 3744 xs_r.flags = SCSI_DATA_IN; 3745 xs_r.datalen = sz << DEV_BSHIFT; 3746 xs_r.data = buf; 3747 xs_r.cmdlen = 16; 3748 cr.opcode = READ_16; 3749 _lto4b(sz, cr.length); 3750 _lto8b(lba, cr.addr); 3751 xs_r.cmd = (struct scsi_generic *)&cr; 3752 wu_r->swu_flags |= SR_WUF_REBUILD; 3753 wu_r->swu_xs = &xs_r; 3754 if (sd->sd_scsi_rw(wu_r)) { 3755 printf("%s: could not create read io\n", 3756 DEVNAME(sc)); 3757 goto fail; 3758 } 3759 3760 /* setup write io */ 3761 bzero(&xs_w, sizeof xs_w); 3762 bzero(&cw, sizeof cw); 3763 xs_w.error = XS_NOERROR; 3764 xs_w.flags = SCSI_DATA_OUT; 3765 xs_w.datalen = sz << DEV_BSHIFT; 3766 xs_w.data = buf; 3767 xs_w.cmdlen = 16; 3768 cw.opcode = WRITE_16; 3769 _lto4b(sz, cw.length); 3770 _lto8b(lba, cw.addr); 3771 xs_w.cmd = (struct scsi_generic *)&cw; 3772 wu_w->swu_flags |= SR_WUF_REBUILD; 3773 wu_w->swu_xs = &xs_w; 3774 if (sd->sd_scsi_rw(wu_w)) { 3775 printf("%s: could not create write io\n", 3776 DEVNAME(sc)); 3777 goto fail; 3778 } 3779 3780 /* 3781 * collide with the read io so that we get automatically 3782 * started when the read is done 3783 */ 3784 wu_w->swu_state = SR_WU_DEFERRED; 3785 wu_r->swu_collider = wu_w; 3786 s = splbio(); 3787 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link); 3788 3789 /* schedule io */ 3790 if (sr_check_io_collision(wu_r)) 3791 goto queued; 3792 3793 sr_raid_startwu(wu_r); 3794 queued: 3795 splx(s); 3796 3797 /* wait for read completion */ 3798 slept = 0; 3799 while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) { 3800 tsleep(wu_w, PRIBIO, "sr_rebuild", 0); 3801 slept = 1; 3802 } 3803 /* yield if we didn't sleep */ 3804 if (slept == 0) 3805 tsleep(sc, PWAIT, "sr_yield", 1); 3806 3807 sr_wu_put(wu_r); 3808 sr_wu_put(wu_w); 3809 3810 sd->sd_meta->ssd_rebuild = lba; 3811 3812 /* save metadata every percent */ 3813 psz = sd->sd_meta->ssdi.ssd_size; 3814 rb = sd->sd_meta->ssd_rebuild; 3815 if (rb > 0) 3816 percent = 100 - ((psz * 100 - rb * 100) / psz) - 1; 3817 else 3818 percent = 0; 3819 if (percent != old_percent && blk != whole_blk) { 3820 if (sr_meta_save(sd, SR_META_DIRTY)) 3821 printf("%s: could not save metadata to %s\n", 3822 DEVNAME(sc), sd->sd_meta->ssd_devname); 3823 old_percent = percent; 3824 } 3825 3826 if (sd->sd_reb_abort) 3827 goto abort; 3828 } 3829 3830 /* all done */ 3831 sd->sd_meta->ssd_rebuild = 0; 3832 for (c = 0; c < sd->sd_meta->ssdi.ssd_chunk_no; c++) 3833 if (sd->sd_vol.sv_chunks[c]->src_meta.scm_status == 3834 BIOC_SDREBUILD) { 3835 sd->sd_set_chunk_state(sd, c, BIOC_SDONLINE); 3836 break; 3837 } 3838 3839 abort: 3840 if (sr_meta_save(sd, SR_META_DIRTY)) 3841 printf("%s: could not save metadata to %s\n", 3842 DEVNAME(sc), sd->sd_meta->ssd_devname); 3843 fail: 3844 free(buf, M_DEVBUF); 3845 sd->sd_reb_active = 0; 3846 kthread_exit(0); 3847 } 3848 3849 #ifndef SMALL_KERNEL 3850 int 3851 sr_sensors_create(struct sr_discipline *sd) 3852 { 3853 struct sr_softc *sc = sd->sd_sc; 3854 int rv = 1; 3855 3856 DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", 3857 DEVNAME(sc), sd->sd_meta->ssd_devname); 3858 3859 strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc), 3860 sizeof(sd->sd_vol.sv_sensordev.xname)); 3861 3862 sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; 3863 sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; 3864 strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, 3865 sizeof(sd->sd_vol.sv_sensor.desc)); 3866 3867 sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor); 3868 3869 if (sc->sc_sensors_running == 0) { 3870 if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) 3871 goto bad; 3872 sc->sc_sensors_running = 1; 3873 } 3874 sensordev_install(&sd->sd_vol.sv_sensordev); 3875 3876 rv = 0; 3877 bad: 3878 return (rv); 3879 } 3880 3881 void 3882 sr_sensors_delete(struct sr_discipline *sd) 3883 { 3884 DNPRINTF(SR_D_STATE, "%s: sr_sensors_delete\n", DEVNAME(sd->sd_sc)); 3885 3886 if (sd->sd_vol.sv_sensor_valid) 3887 sensordev_deinstall(&sd->sd_vol.sv_sensordev); 3888 } 3889 3890 void 3891 sr_sensors_refresh(void *arg) 3892 { 3893 struct sr_softc *sc = arg; 3894 struct sr_volume *sv; 3895 struct sr_discipline *sd; 3896 int i, vol; 3897 3898 DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); 3899 3900 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3901 /* XXX this will not work when we stagger disciplines */ 3902 if (!sc->sc_dis[i]) 3903 continue; 3904 3905 sd = sc->sc_dis[i]; 3906 sv = &sd->sd_vol; 3907 3908 switch(sd->sd_vol_status) { 3909 case BIOC_SVOFFLINE: 3910 sv->sv_sensor.value = SENSOR_DRIVE_FAIL; 3911 sv->sv_sensor.status = SENSOR_S_CRIT; 3912 break; 3913 3914 case BIOC_SVDEGRADED: 3915 sv->sv_sensor.value = SENSOR_DRIVE_PFAIL; 3916 sv->sv_sensor.status = SENSOR_S_WARN; 3917 break; 3918 3919 case BIOC_SVSCRUB: 3920 case BIOC_SVONLINE: 3921 sv->sv_sensor.value = SENSOR_DRIVE_ONLINE; 3922 sv->sv_sensor.status = SENSOR_S_OK; 3923 break; 3924 3925 default: 3926 sv->sv_sensor.value = 0; /* unknown */ 3927 sv->sv_sensor.status = SENSOR_S_UNKNOWN; 3928 } 3929 } 3930 } 3931 #endif /* SMALL_KERNEL */ 3932 3933 #ifdef SR_FANCY_STATS 3934 void sr_print_stats(void); 3935 3936 void 3937 sr_print_stats(void) 3938 { 3939 struct sr_softc *sc; 3940 struct sr_discipline *sd; 3941 int i, vol; 3942 3943 for (i = 0; i < softraid_cd.cd_ndevs; i++) 3944 if (softraid_cd.cd_devs[i]) { 3945 sc = softraid_cd.cd_devs[i]; 3946 /* we'll only have one softc */ 3947 break; 3948 } 3949 3950 if (!sc) { 3951 printf("no softraid softc found\n"); 3952 return; 3953 } 3954 3955 for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { 3956 /* XXX this will not work when we stagger disciplines */ 3957 if (!sc->sc_dis[i]) 3958 continue; 3959 3960 sd = sc->sc_dis[i]; 3961 printf("%s: ios pending: %d collisions %llu\n", 3962 sd->sd_meta->ssd_devname, 3963 sd->sd_wu_pending, 3964 sd->sd_wu_collisions); 3965 } 3966 } 3967 #endif /* SR_FANCY_STATS */ 3968 3969 #ifdef SR_DEBUG 3970 void 3971 sr_meta_print(struct sr_metadata *m) 3972 { 3973 int i; 3974 struct sr_meta_chunk *mc; 3975 struct sr_meta_opt *mo; 3976 3977 if (!(sr_debug & SR_D_META)) 3978 return; 3979 3980 printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); 3981 printf("\tssd_version %d\n", m->ssdi.ssd_version); 3982 printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags); 3983 printf("\tssd_uuid "); 3984 sr_uuid_print(&m->ssdi.ssd_uuid, 1); 3985 printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); 3986 printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); 3987 printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); 3988 printf("\tssd_volid %d\n", m->ssdi.ssd_volid); 3989 printf("\tssd_level %d\n", m->ssdi.ssd_level); 3990 printf("\tssd_size %lld\n", m->ssdi.ssd_size); 3991 printf("\tssd_devname %s\n", m->ssd_devname); 3992 printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); 3993 printf("\tssd_product %s\n", m->ssdi.ssd_product); 3994 printf("\tssd_revision %s\n", m->ssdi.ssd_revision); 3995 printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); 3996 printf("\tssd_checksum "); 3997 sr_checksum_print(m->ssd_checksum); 3998 printf("\n"); 3999 printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); 4000 printf("\tssd_ondisk %llu\n", m->ssd_ondisk); 4001 4002 mc = (struct sr_meta_chunk *)(m + 1); 4003 for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { 4004 printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); 4005 printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); 4006 printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); 4007 printf("\t\tscm_size %lld\n", mc->scmi.scm_size); 4008 printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); 4009 printf("\t\tscm_uuid "); 4010 sr_uuid_print(&mc->scmi.scm_uuid, 1); 4011 printf("\t\tscm_checksum "); 4012 sr_checksum_print(mc->scm_checksum); 4013 printf("\n"); 4014 printf("\t\tscm_status %d\n", mc->scm_status); 4015 } 4016 4017 mo = (struct sr_meta_opt *)(mc); 4018 for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { 4019 printf("\t\t\tsom_type %d\n", mo->somi.som_type); 4020 printf("\t\t\tsom_checksum "); 4021 sr_checksum_print(mo->som_checksum); 4022 printf("\n"); 4023 } 4024 } 4025 4026 void 4027 sr_dump_mem(u_int8_t *p, int len) 4028 { 4029 int i; 4030 4031 for (i = 0; i < len; i++) 4032 printf("%02x ", *p++); 4033 printf("\n"); 4034 } 4035 4036 #endif /* SR_DEBUG */ 4037