1 /* $OpenBSD: mpath.c,v 1.36 2014/07/12 18:50:25 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/buf.h> 22 #include <sys/kernel.h> 23 #include <sys/malloc.h> 24 #include <sys/device.h> 25 #include <sys/proc.h> 26 #include <sys/conf.h> 27 #include <sys/queue.h> 28 #include <sys/rwlock.h> 29 #include <sys/ioctl.h> 30 #include <sys/poll.h> 31 #include <sys/selinfo.h> 32 33 #include <scsi/scsi_all.h> 34 #include <scsi/scsiconf.h> 35 #include <scsi/mpathvar.h> 36 37 #define MPATH_BUSWIDTH 256 38 39 int mpath_match(struct device *, void *, void *); 40 void mpath_attach(struct device *, struct device *, void *); 41 void mpath_shutdown(void *); 42 43 TAILQ_HEAD(mpath_paths, mpath_path); 44 45 struct mpath_group { 46 TAILQ_ENTRY(mpath_group) g_entry; 47 struct mpath_paths g_paths; 48 struct mpath_dev *g_dev; 49 u_int g_id; 50 }; 51 TAILQ_HEAD(mpath_groups, mpath_group); 52 53 struct mpath_dev { 54 struct mutex d_mtx; 55 56 struct scsi_xfer_list d_xfers; 57 struct mpath_path *d_next_path; 58 59 struct mpath_groups d_groups; 60 61 struct mpath_group *d_failover_iter; 62 struct timeout d_failover_tmo; 63 u_int d_failover; 64 65 const struct mpath_ops *d_ops; 66 struct devid *d_id; 67 }; 68 69 struct mpath_softc { 70 struct device sc_dev; 71 struct scsi_link sc_link; 72 struct scsibus_softc *sc_scsibus; 73 struct mpath_dev *sc_devs[MPATH_BUSWIDTH]; 74 }; 75 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 76 77 struct mpath_softc *mpath; 78 79 struct cfattach mpath_ca = { 80 sizeof(struct mpath_softc), 81 mpath_match, 82 mpath_attach 83 }; 84 85 struct cfdriver mpath_cd = { 86 NULL, 87 "mpath", 88 DV_DULL 89 }; 90 91 void mpath_cmd(struct scsi_xfer *); 92 void mpath_minphys(struct buf *, struct scsi_link *); 93 int mpath_probe(struct scsi_link *); 94 95 struct mpath_path *mpath_next_path(struct mpath_dev *); 96 void mpath_done(struct scsi_xfer *); 97 98 void mpath_failover(struct mpath_dev *); 99 void mpath_failover_start(void *); 100 void mpath_failover_check(struct mpath_dev *); 101 102 struct scsi_adapter mpath_switch = { 103 mpath_cmd, 104 scsi_minphys, 105 mpath_probe 106 }; 107 108 void mpath_xs_stuffup(struct scsi_xfer *); 109 110 int 111 mpath_match(struct device *parent, void *match, void *aux) 112 { 113 return (1); 114 } 115 116 void 117 mpath_attach(struct device *parent, struct device *self, void *aux) 118 { 119 struct mpath_softc *sc = (struct mpath_softc *)self; 120 struct scsibus_attach_args saa; 121 122 mpath = sc; 123 124 printf("\n"); 125 126 sc->sc_link.adapter = &mpath_switch; 127 sc->sc_link.adapter_softc = sc; 128 sc->sc_link.adapter_target = MPATH_BUSWIDTH; 129 sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH; 130 sc->sc_link.luns = 1; 131 sc->sc_link.openings = 1024; /* XXX magical */ 132 133 bzero(&saa, sizeof(saa)); 134 saa.saa_sc_link = &sc->sc_link; 135 136 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 137 &saa, scsiprint); 138 } 139 140 void 141 mpath_xs_stuffup(struct scsi_xfer *xs) 142 { 143 xs->error = XS_DRIVER_STUFFUP; 144 scsi_done(xs); 145 } 146 147 int 148 mpath_probe(struct scsi_link *link) 149 { 150 struct mpath_softc *sc = link->adapter_softc; 151 struct mpath_dev *d = sc->sc_devs[link->target]; 152 153 if (link->lun != 0 || d == NULL) 154 return (ENXIO); 155 156 link->id = devid_copy(d->d_id); 157 158 return (0); 159 } 160 161 struct mpath_path * 162 mpath_next_path(struct mpath_dev *d) 163 { 164 struct mpath_group *g; 165 struct mpath_path *p; 166 167 #ifdef DIAGNOSTIC 168 if (d == NULL) 169 panic("%s: d is NULL", __func__); 170 #endif 171 172 p = d->d_next_path; 173 if (p != NULL) { 174 d->d_next_path = TAILQ_NEXT(p, p_entry); 175 if (d->d_next_path == NULL && 176 (g = TAILQ_FIRST(&d->d_groups)) != NULL) 177 d->d_next_path = TAILQ_FIRST(&g->g_paths); 178 } 179 180 return (p); 181 } 182 183 void 184 mpath_cmd(struct scsi_xfer *xs) 185 { 186 struct scsi_link *link = xs->sc_link; 187 struct mpath_softc *sc = link->adapter_softc; 188 struct mpath_dev *d = sc->sc_devs[link->target]; 189 struct mpath_path *p; 190 struct scsi_xfer *mxs; 191 192 #ifdef DIAGNOSTIC 193 if (d == NULL) 194 panic("mpath_cmd issued against nonexistant device"); 195 #endif 196 197 if (ISSET(xs->flags, SCSI_POLL)) { 198 mtx_enter(&d->d_mtx); 199 p = mpath_next_path(d); 200 mtx_leave(&d->d_mtx); 201 if (p == NULL) { 202 mpath_xs_stuffup(xs); 203 return; 204 } 205 206 mxs = scsi_xs_get(p->p_link, xs->flags); 207 if (mxs == NULL) { 208 mpath_xs_stuffup(xs); 209 return; 210 } 211 212 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 213 mxs->cmdlen = xs->cmdlen; 214 mxs->data = xs->data; 215 mxs->datalen = xs->datalen; 216 mxs->retries = xs->retries; 217 mxs->timeout = xs->timeout; 218 mxs->bp = xs->bp; 219 220 scsi_xs_sync(mxs); 221 222 xs->error = mxs->error; 223 xs->status = mxs->status; 224 xs->resid = mxs->resid; 225 226 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 227 228 scsi_xs_put(mxs); 229 scsi_done(xs); 230 return; 231 } 232 233 mtx_enter(&d->d_mtx); 234 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list); 235 p = mpath_next_path(d); 236 mtx_leave(&d->d_mtx); 237 238 if (p != NULL) 239 scsi_xsh_add(&p->p_xsh); 240 } 241 242 void 243 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs) 244 { 245 struct mpath_dev *d = p->p_group->g_dev; 246 struct scsi_xfer *xs; 247 int addxsh = 0; 248 249 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL) 250 goto fail; 251 252 mtx_enter(&d->d_mtx); 253 xs = SIMPLEQ_FIRST(&d->d_xfers); 254 if (xs != NULL) { 255 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list); 256 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 257 addxsh = 1; 258 } 259 mtx_leave(&d->d_mtx); 260 261 if (xs == NULL) 262 goto fail; 263 264 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 265 mxs->cmdlen = xs->cmdlen; 266 mxs->data = xs->data; 267 mxs->datalen = xs->datalen; 268 mxs->retries = xs->retries; 269 mxs->timeout = xs->timeout; 270 mxs->bp = xs->bp; 271 mxs->flags = xs->flags; 272 273 mxs->cookie = xs; 274 mxs->done = mpath_done; 275 276 scsi_xs_exec(mxs); 277 278 if (addxsh) 279 scsi_xsh_add(&p->p_xsh); 280 281 return; 282 fail: 283 scsi_xs_put(mxs); 284 } 285 286 void 287 mpath_done(struct scsi_xfer *mxs) 288 { 289 struct scsi_xfer *xs = mxs->cookie; 290 struct scsi_link *link = xs->sc_link; 291 struct mpath_softc *sc = link->adapter_softc; 292 struct mpath_dev *d = sc->sc_devs[link->target]; 293 struct mpath_path *p; 294 295 switch (mxs->error) { 296 case XS_SELTIMEOUT: /* physical path is gone, try the next */ 297 case XS_RESET: 298 mtx_enter(&d->d_mtx); 299 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 300 p = mpath_next_path(d); 301 mtx_leave(&d->d_mtx); 302 303 scsi_xs_put(mxs); 304 305 if (p != NULL) 306 scsi_xsh_add(&p->p_xsh); 307 return; 308 case XS_SENSE: 309 switch (d->d_ops->op_checksense(mxs)) { 310 case MPATH_SENSE_FAILOVER: 311 mtx_enter(&d->d_mtx); 312 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 313 p = mpath_next_path(d); 314 mtx_leave(&d->d_mtx); 315 316 scsi_xs_put(mxs); 317 318 mpath_failover(d); 319 return; 320 case MPATH_SENSE_DECLINED: 321 break; 322 #ifdef DIAGNOSTIC 323 default: 324 panic("unexpected return from checksense"); 325 #endif 326 } 327 break; 328 } 329 330 xs->error = mxs->error; 331 xs->status = mxs->status; 332 xs->resid = mxs->resid; 333 334 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 335 336 scsi_xs_put(mxs); 337 338 scsi_done(xs); 339 } 340 341 void 342 mpath_failover(struct mpath_dev *d) 343 { 344 if (!scsi_pending_start(&d->d_mtx, &d->d_failover)) 345 return; 346 347 mpath_failover_start(d); 348 } 349 350 void 351 mpath_failover_start(void *xd) 352 { 353 struct mpath_dev *d = xd; 354 355 mtx_enter(&d->d_mtx); 356 d->d_failover_iter = TAILQ_FIRST(&d->d_groups); 357 mtx_leave(&d->d_mtx); 358 359 mpath_failover_check(d); 360 } 361 362 void 363 mpath_failover_check(struct mpath_dev *d) 364 { 365 struct mpath_group *g = d->d_failover_iter; 366 struct mpath_path *p; 367 368 if (g == NULL) 369 timeout_add_sec(&d->d_failover_tmo, 1); 370 else { 371 p = TAILQ_FIRST(&g->g_paths); 372 d->d_ops->op_status(p->p_link); 373 } 374 } 375 376 void 377 mpath_path_status(struct mpath_path *p, int status) 378 { 379 struct mpath_group *g = p->p_group; 380 struct mpath_dev *d = g->g_dev; 381 382 mtx_enter(&d->d_mtx); 383 if (status == MPATH_S_ACTIVE) { 384 TAILQ_REMOVE(&d->d_groups, g, g_entry); 385 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry); 386 d->d_next_path = p; 387 } else 388 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry); 389 mtx_leave(&d->d_mtx); 390 391 if (status == MPATH_S_ACTIVE) { 392 scsi_xsh_add(&p->p_xsh); 393 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover)) 394 mpath_failover_start(d); 395 } else 396 mpath_failover_check(d); 397 } 398 399 void 400 mpath_minphys(struct buf *bp, struct scsi_link *link) 401 { 402 struct mpath_softc *sc = link->adapter_softc; 403 struct mpath_dev *d = sc->sc_devs[link->target]; 404 struct mpath_group *g; 405 struct mpath_path *p; 406 407 #ifdef DIAGNOSTIC 408 if (d == NULL) 409 panic("mpath_minphys against nonexistant device"); 410 #endif 411 412 mtx_enter(&d->d_mtx); 413 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 414 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 415 /* XXX crossing layers with mutex held */ 416 p->p_link->adapter->scsi_minphys(bp, p->p_link); 417 } 418 } 419 mtx_leave(&d->d_mtx); 420 } 421 422 int 423 mpath_path_probe(struct scsi_link *link) 424 { 425 if (mpath == NULL) 426 return (ENXIO); 427 428 if (link->id == NULL) 429 return (EINVAL); 430 431 if (mpath == link->adapter_softc) 432 return (ENXIO); 433 434 return (0); 435 } 436 437 int 438 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops) 439 { 440 struct mpath_softc *sc = mpath; 441 struct scsi_link *link = p->p_link; 442 struct mpath_dev *d = NULL; 443 struct mpath_group *g; 444 int newdev = 0, addxsh = 0; 445 int target; 446 447 #ifdef DIAGNOSTIC 448 if (p->p_link == NULL) 449 panic("mpath_path_attach: NULL link"); 450 if (p->p_group != NULL) 451 panic("mpath_path_attach: group is not NULL"); 452 #endif 453 454 for (target = 0; target < MPATH_BUSWIDTH; target++) { 455 if ((d = sc->sc_devs[target]) == NULL) 456 continue; 457 458 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops) 459 break; 460 461 d = NULL; 462 } 463 464 if (d == NULL) { 465 for (target = 0; target < MPATH_BUSWIDTH; target++) { 466 if (sc->sc_devs[target] == NULL) 467 break; 468 } 469 if (target >= MPATH_BUSWIDTH) 470 return (ENXIO); 471 472 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO); 473 if (d == NULL) 474 return (ENOMEM); 475 476 mtx_init(&d->d_mtx, IPL_BIO); 477 TAILQ_INIT(&d->d_groups); 478 SIMPLEQ_INIT(&d->d_xfers); 479 d->d_id = devid_copy(link->id); 480 d->d_ops = ops; 481 482 timeout_set(&d->d_failover_tmo, mpath_failover_start, d); 483 484 sc->sc_devs[target] = d; 485 newdev = 1; 486 } else { 487 /* 488 * instead of carrying identical values in different devid 489 * instances, delete the new one and reference the old one in 490 * the new scsi_link. 491 */ 492 devid_free(link->id); 493 link->id = devid_copy(d->d_id); 494 } 495 496 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 497 if (g->g_id == g_id) 498 break; 499 } 500 501 if (g == NULL) { 502 g = malloc(sizeof(*g), M_DEVBUF, 503 M_WAITOK | M_CANFAIL | M_ZERO); 504 if (g == NULL) { 505 if (newdev) { 506 free(d, M_DEVBUF, 0); 507 sc->sc_devs[target] = NULL; 508 } 509 510 return (ENOMEM); 511 } 512 513 TAILQ_INIT(&g->g_paths); 514 g->g_dev = d; 515 g->g_id = g_id; 516 517 mtx_enter(&d->d_mtx); 518 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry); 519 mtx_leave(&d->d_mtx); 520 } 521 522 p->p_group = g; 523 524 mtx_enter(&d->d_mtx); 525 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry); 526 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 527 addxsh = 1; 528 529 if (d->d_next_path == NULL) 530 d->d_next_path = p; 531 mtx_leave(&d->d_mtx); 532 533 if (newdev) 534 scsi_probe_target(mpath->sc_scsibus, target); 535 else if (addxsh) 536 scsi_xsh_add(&p->p_xsh); 537 538 return (0); 539 } 540 541 int 542 mpath_path_detach(struct mpath_path *p) 543 { 544 struct mpath_group *g = p->p_group; 545 struct mpath_dev *d; 546 struct mpath_path *np = NULL; 547 548 #ifdef DIAGNOSTIC 549 if (g == NULL) 550 panic("mpath: detaching a path from a nonexistant bus"); 551 #endif 552 d = g->g_dev; 553 p->p_group = NULL; 554 555 mtx_enter(&d->d_mtx); 556 TAILQ_REMOVE(&g->g_paths, p, p_entry); 557 if (d->d_next_path == p) 558 d->d_next_path = TAILQ_FIRST(&g->g_paths); 559 560 if (TAILQ_EMPTY(&g->g_paths)) 561 TAILQ_REMOVE(&d->d_groups, g, g_entry); 562 else 563 g = NULL; 564 565 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 566 np = d->d_next_path; 567 mtx_leave(&d->d_mtx); 568 569 if (g != NULL) 570 free(g, M_DEVBUF, 0); 571 572 scsi_xsh_del(&p->p_xsh); 573 574 if (np == NULL) 575 mpath_failover(d); 576 else 577 scsi_xsh_add(&np->p_xsh); 578 579 return (0); 580 } 581 582 struct device * 583 mpath_bootdv(struct device *dev) 584 { 585 struct mpath_softc *sc = mpath; 586 struct mpath_dev *d; 587 struct mpath_group *g; 588 struct mpath_path *p; 589 int target; 590 591 if (sc == NULL) 592 return (dev); 593 594 for (target = 0; target < MPATH_BUSWIDTH; target++) { 595 if ((d = sc->sc_devs[target]) == NULL) 596 continue; 597 598 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 599 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 600 if (p->p_link->device_softc == dev) { 601 return (scsi_get_link(mpath->sc_scsibus, 602 target, 0)->device_softc); 603 } 604 } 605 } 606 } 607 608 return (dev); 609 } 610