1 /* $OpenBSD: mpath.c,v 1.52 2020/07/20 14:41:14 krw Exp $ */ 2 3 /* 4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/malloc.h> 23 #include <sys/device.h> 24 #include <sys/conf.h> 25 #include <sys/queue.h> 26 #include <sys/rwlock.h> 27 #include <sys/ioctl.h> 28 #include <sys/poll.h> 29 #include <sys/selinfo.h> 30 31 #include <scsi/scsi_all.h> 32 #include <scsi/scsiconf.h> 33 #include <scsi/mpathvar.h> 34 35 #define MPATH_BUSWIDTH 256 36 37 int mpath_match(struct device *, void *, void *); 38 void mpath_attach(struct device *, struct device *, void *); 39 void mpath_shutdown(void *); 40 41 TAILQ_HEAD(mpath_paths, mpath_path); 42 43 struct mpath_group { 44 TAILQ_ENTRY(mpath_group) g_entry; 45 struct mpath_paths g_paths; 46 struct mpath_dev *g_dev; 47 u_int g_id; 48 }; 49 TAILQ_HEAD(mpath_groups, mpath_group); 50 51 struct mpath_dev { 52 struct mutex d_mtx; 53 54 struct scsi_xfer_list d_xfers; 55 struct mpath_path *d_next_path; 56 57 struct mpath_groups d_groups; 58 59 struct mpath_group *d_failover_iter; 60 struct timeout d_failover_tmo; 61 u_int d_failover; 62 63 const struct mpath_ops *d_ops; 64 struct devid *d_id; 65 }; 66 67 struct mpath_softc { 68 struct device sc_dev; 69 struct scsi_link sc_link; 70 struct scsibus_softc *sc_scsibus; 71 struct mpath_dev *sc_devs[MPATH_BUSWIDTH]; 72 }; 73 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 74 75 struct mpath_softc *mpath; 76 77 struct cfattach mpath_ca = { 78 sizeof(struct mpath_softc), 79 mpath_match, 80 mpath_attach 81 }; 82 83 struct cfdriver mpath_cd = { 84 NULL, 85 "mpath", 86 DV_DULL 87 }; 88 89 void mpath_cmd(struct scsi_xfer *); 90 void mpath_minphys(struct buf *, struct scsi_link *); 91 int mpath_probe(struct scsi_link *); 92 93 struct mpath_path *mpath_next_path(struct mpath_dev *); 94 void mpath_done(struct scsi_xfer *); 95 96 void mpath_failover(struct mpath_dev *); 97 void mpath_failover_start(void *); 98 void mpath_failover_check(struct mpath_dev *); 99 100 struct scsi_adapter mpath_switch = { 101 mpath_cmd, NULL, mpath_probe, NULL, NULL 102 }; 103 104 void mpath_xs_stuffup(struct scsi_xfer *); 105 106 int 107 mpath_match(struct device *parent, void *match, void *aux) 108 { 109 return (1); 110 } 111 112 void 113 mpath_attach(struct device *parent, struct device *self, void *aux) 114 { 115 struct mpath_softc *sc = (struct mpath_softc *)self; 116 struct scsibus_attach_args saa; 117 118 mpath = sc; 119 120 printf("\n"); 121 122 saa.saa_adapter = &mpath_switch; 123 saa.saa_adapter_softc = sc; 124 saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET; 125 saa.saa_adapter_buswidth = MPATH_BUSWIDTH; 126 saa.saa_luns = 1; 127 saa.saa_openings = 1024; /* XXX magical */ 128 saa.saa_pool = NULL; 129 saa.saa_quirks = saa.saa_flags = 0; 130 saa.saa_wwpn = saa.saa_wwnn = 0; 131 132 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 133 &saa, scsiprint); 134 } 135 136 void 137 mpath_xs_stuffup(struct scsi_xfer *xs) 138 { 139 xs->error = XS_DRIVER_STUFFUP; 140 scsi_done(xs); 141 } 142 143 int 144 mpath_probe(struct scsi_link *link) 145 { 146 struct mpath_softc *sc = link->bus->sb_adapter_softc; 147 struct mpath_dev *d = sc->sc_devs[link->target]; 148 149 if (link->lun != 0 || d == NULL) 150 return (ENXIO); 151 152 link->id = devid_copy(d->d_id); 153 154 return (0); 155 } 156 157 struct mpath_path * 158 mpath_next_path(struct mpath_dev *d) 159 { 160 struct mpath_group *g; 161 struct mpath_path *p; 162 163 #ifdef DIAGNOSTIC 164 if (d == NULL) 165 panic("%s: d is NULL", __func__); 166 #endif /* DIAGNOSTIC */ 167 168 p = d->d_next_path; 169 if (p != NULL) { 170 d->d_next_path = TAILQ_NEXT(p, p_entry); 171 if (d->d_next_path == NULL && 172 (g = TAILQ_FIRST(&d->d_groups)) != NULL) 173 d->d_next_path = TAILQ_FIRST(&g->g_paths); 174 } 175 176 return (p); 177 } 178 179 void 180 mpath_cmd(struct scsi_xfer *xs) 181 { 182 struct scsi_link *link = xs->sc_link; 183 struct mpath_softc *sc = link->bus->sb_adapter_softc; 184 struct mpath_dev *d = sc->sc_devs[link->target]; 185 struct mpath_path *p; 186 struct scsi_xfer *mxs; 187 188 #ifdef DIAGNOSTIC 189 if (d == NULL) 190 panic("mpath_cmd issued against nonexistent device"); 191 #endif /* DIAGNOSTIC */ 192 193 if (ISSET(xs->flags, SCSI_POLL)) { 194 mtx_enter(&d->d_mtx); 195 p = mpath_next_path(d); 196 mtx_leave(&d->d_mtx); 197 if (p == NULL) { 198 mpath_xs_stuffup(xs); 199 return; 200 } 201 202 mxs = scsi_xs_get(p->p_link, xs->flags); 203 if (mxs == NULL) { 204 mpath_xs_stuffup(xs); 205 return; 206 } 207 208 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 209 mxs->cmdlen = xs->cmdlen; 210 mxs->data = xs->data; 211 mxs->datalen = xs->datalen; 212 mxs->retries = xs->retries; 213 mxs->timeout = xs->timeout; 214 mxs->bp = xs->bp; 215 216 scsi_xs_sync(mxs); 217 218 xs->error = mxs->error; 219 xs->status = mxs->status; 220 xs->resid = mxs->resid; 221 222 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 223 224 scsi_xs_put(mxs); 225 scsi_done(xs); 226 return; 227 } 228 229 mtx_enter(&d->d_mtx); 230 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list); 231 p = mpath_next_path(d); 232 mtx_leave(&d->d_mtx); 233 234 if (p != NULL) 235 scsi_xsh_add(&p->p_xsh); 236 } 237 238 void 239 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs) 240 { 241 struct mpath_dev *d = p->p_group->g_dev; 242 struct scsi_xfer *xs; 243 int addxsh = 0; 244 245 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL) 246 goto fail; 247 248 mtx_enter(&d->d_mtx); 249 xs = SIMPLEQ_FIRST(&d->d_xfers); 250 if (xs != NULL) { 251 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list); 252 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 253 addxsh = 1; 254 } 255 mtx_leave(&d->d_mtx); 256 257 if (xs == NULL) 258 goto fail; 259 260 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 261 mxs->cmdlen = xs->cmdlen; 262 mxs->data = xs->data; 263 mxs->datalen = xs->datalen; 264 mxs->retries = xs->retries; 265 mxs->timeout = xs->timeout; 266 mxs->bp = xs->bp; 267 mxs->flags = xs->flags; 268 269 mxs->cookie = xs; 270 mxs->done = mpath_done; 271 272 scsi_xs_exec(mxs); 273 274 if (addxsh) 275 scsi_xsh_add(&p->p_xsh); 276 277 return; 278 fail: 279 scsi_xs_put(mxs); 280 } 281 282 void 283 mpath_done(struct scsi_xfer *mxs) 284 { 285 struct scsi_xfer *xs = mxs->cookie; 286 struct scsi_link *link = xs->sc_link; 287 struct mpath_softc *sc = link->bus->sb_adapter_softc; 288 struct mpath_dev *d = sc->sc_devs[link->target]; 289 struct mpath_path *p; 290 291 switch (mxs->error) { 292 case XS_SELTIMEOUT: /* physical path is gone, try the next */ 293 case XS_RESET: 294 mtx_enter(&d->d_mtx); 295 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 296 p = mpath_next_path(d); 297 mtx_leave(&d->d_mtx); 298 299 scsi_xs_put(mxs); 300 301 if (p != NULL) 302 scsi_xsh_add(&p->p_xsh); 303 return; 304 case XS_SENSE: 305 switch (d->d_ops->op_checksense(mxs)) { 306 case MPATH_SENSE_FAILOVER: 307 mtx_enter(&d->d_mtx); 308 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 309 p = mpath_next_path(d); 310 mtx_leave(&d->d_mtx); 311 312 scsi_xs_put(mxs); 313 314 mpath_failover(d); 315 return; 316 case MPATH_SENSE_DECLINED: 317 break; 318 #ifdef DIAGNOSTIC 319 default: 320 panic("unexpected return from checksense"); 321 #endif /* DIAGNOSTIC */ 322 } 323 break; 324 } 325 326 xs->error = mxs->error; 327 xs->status = mxs->status; 328 xs->resid = mxs->resid; 329 330 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 331 332 scsi_xs_put(mxs); 333 334 scsi_done(xs); 335 } 336 337 void 338 mpath_failover(struct mpath_dev *d) 339 { 340 if (!scsi_pending_start(&d->d_mtx, &d->d_failover)) 341 return; 342 343 mpath_failover_start(d); 344 } 345 346 void 347 mpath_failover_start(void *xd) 348 { 349 struct mpath_dev *d = xd; 350 351 mtx_enter(&d->d_mtx); 352 d->d_failover_iter = TAILQ_FIRST(&d->d_groups); 353 mtx_leave(&d->d_mtx); 354 355 mpath_failover_check(d); 356 } 357 358 void 359 mpath_failover_check(struct mpath_dev *d) 360 { 361 struct mpath_group *g = d->d_failover_iter; 362 struct mpath_path *p; 363 364 if (g == NULL) 365 timeout_add_sec(&d->d_failover_tmo, 1); 366 else { 367 p = TAILQ_FIRST(&g->g_paths); 368 d->d_ops->op_status(p->p_link); 369 } 370 } 371 372 void 373 mpath_path_status(struct mpath_path *p, int status) 374 { 375 struct mpath_group *g = p->p_group; 376 struct mpath_dev *d = g->g_dev; 377 378 mtx_enter(&d->d_mtx); 379 if (status == MPATH_S_ACTIVE) { 380 TAILQ_REMOVE(&d->d_groups, g, g_entry); 381 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry); 382 d->d_next_path = p; 383 } else 384 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry); 385 mtx_leave(&d->d_mtx); 386 387 if (status == MPATH_S_ACTIVE) { 388 scsi_xsh_add(&p->p_xsh); 389 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover)) 390 mpath_failover_start(d); 391 } else 392 mpath_failover_check(d); 393 } 394 395 void 396 mpath_minphys(struct buf *bp, struct scsi_link *link) 397 { 398 struct mpath_softc *sc = link->bus->sb_adapter_softc; 399 struct mpath_dev *d = sc->sc_devs[link->target]; 400 struct mpath_group *g; 401 struct mpath_path *p; 402 403 #ifdef DIAGNOSTIC 404 if (d == NULL) 405 panic("mpath_minphys against nonexistent device"); 406 #endif /* DIAGNOSTIC */ 407 408 mtx_enter(&d->d_mtx); 409 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 410 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 411 /* XXX crossing layers with mutex held */ 412 if (p->p_link->bus->sb_adapter->dev_minphys != NULL) 413 p->p_link->bus->sb_adapter->dev_minphys(bp, 414 p->p_link); 415 } 416 } 417 mtx_leave(&d->d_mtx); 418 } 419 420 int 421 mpath_path_probe(struct scsi_link *link) 422 { 423 if (mpath == NULL) 424 return (ENXIO); 425 426 if (link->id == NULL) 427 return (EINVAL); 428 429 if (ISSET(link->flags, SDEV_UMASS)) 430 return (EINVAL); 431 432 if (mpath == link->bus->sb_adapter_softc) 433 return (ENXIO); 434 435 return (0); 436 } 437 438 int 439 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops) 440 { 441 struct mpath_softc *sc = mpath; 442 struct scsi_link *link = p->p_link; 443 struct mpath_dev *d = NULL; 444 struct mpath_group *g; 445 int newdev = 0, addxsh = 0; 446 int target; 447 448 #ifdef DIAGNOSTIC 449 if (p->p_link == NULL) 450 panic("mpath_path_attach: NULL link"); 451 if (p->p_group != NULL) 452 panic("mpath_path_attach: group is not NULL"); 453 #endif /* DIAGNOSTIC */ 454 455 for (target = 0; target < MPATH_BUSWIDTH; target++) { 456 if ((d = sc->sc_devs[target]) == NULL) 457 continue; 458 459 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops) 460 break; 461 462 d = NULL; 463 } 464 465 if (d == NULL) { 466 for (target = 0; target < MPATH_BUSWIDTH; target++) { 467 if (sc->sc_devs[target] == NULL) 468 break; 469 } 470 if (target >= MPATH_BUSWIDTH) 471 return (ENXIO); 472 473 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO); 474 if (d == NULL) 475 return (ENOMEM); 476 477 mtx_init(&d->d_mtx, IPL_BIO); 478 TAILQ_INIT(&d->d_groups); 479 SIMPLEQ_INIT(&d->d_xfers); 480 d->d_id = devid_copy(link->id); 481 d->d_ops = ops; 482 483 timeout_set(&d->d_failover_tmo, mpath_failover_start, d); 484 485 sc->sc_devs[target] = d; 486 newdev = 1; 487 } else { 488 /* 489 * instead of carrying identical values in different devid 490 * instances, delete the new one and reference the old one in 491 * the new scsi_link. 492 */ 493 devid_free(link->id); 494 link->id = devid_copy(d->d_id); 495 } 496 497 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 498 if (g->g_id == g_id) 499 break; 500 } 501 502 if (g == NULL) { 503 g = malloc(sizeof(*g), M_DEVBUF, 504 M_WAITOK | M_CANFAIL | M_ZERO); 505 if (g == NULL) { 506 if (newdev) { 507 free(d, M_DEVBUF, sizeof(*d)); 508 sc->sc_devs[target] = NULL; 509 } 510 511 return (ENOMEM); 512 } 513 514 TAILQ_INIT(&g->g_paths); 515 g->g_dev = d; 516 g->g_id = g_id; 517 518 mtx_enter(&d->d_mtx); 519 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry); 520 mtx_leave(&d->d_mtx); 521 } 522 523 p->p_group = g; 524 525 mtx_enter(&d->d_mtx); 526 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry); 527 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 528 addxsh = 1; 529 530 if (d->d_next_path == NULL) 531 d->d_next_path = p; 532 mtx_leave(&d->d_mtx); 533 534 if (newdev) 535 scsi_probe_target(mpath->sc_scsibus, target); 536 else if (addxsh) 537 scsi_xsh_add(&p->p_xsh); 538 539 return (0); 540 } 541 542 int 543 mpath_path_detach(struct mpath_path *p) 544 { 545 struct mpath_group *g = p->p_group; 546 struct mpath_dev *d; 547 struct mpath_path *np = NULL; 548 549 #ifdef DIAGNOSTIC 550 if (g == NULL) 551 panic("mpath: detaching a path from a nonexistent bus"); 552 #endif /* DIAGNOSTIC */ 553 d = g->g_dev; 554 p->p_group = NULL; 555 556 mtx_enter(&d->d_mtx); 557 TAILQ_REMOVE(&g->g_paths, p, p_entry); 558 if (d->d_next_path == p) 559 d->d_next_path = TAILQ_FIRST(&g->g_paths); 560 561 if (TAILQ_EMPTY(&g->g_paths)) 562 TAILQ_REMOVE(&d->d_groups, g, g_entry); 563 else 564 g = NULL; 565 566 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 567 np = d->d_next_path; 568 mtx_leave(&d->d_mtx); 569 570 if (g != NULL) 571 free(g, M_DEVBUF, sizeof(*g)); 572 573 scsi_xsh_del(&p->p_xsh); 574 575 if (np == NULL) 576 mpath_failover(d); 577 else 578 scsi_xsh_add(&np->p_xsh); 579 580 return (0); 581 } 582 583 struct device * 584 mpath_bootdv(struct device *dev) 585 { 586 struct mpath_softc *sc = mpath; 587 struct mpath_dev *d; 588 struct mpath_group *g; 589 struct mpath_path *p; 590 int target; 591 592 if (sc == NULL) 593 return (dev); 594 595 for (target = 0; target < MPATH_BUSWIDTH; target++) { 596 if ((d = sc->sc_devs[target]) == NULL) 597 continue; 598 599 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 600 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 601 if (p->p_link->device_softc == dev) { 602 return (scsi_get_link(mpath->sc_scsibus, 603 target, 0)->device_softc); 604 } 605 } 606 } 607 } 608 609 return (dev); 610 } 611