1 /* $OpenBSD: mpath.c,v 1.47 2020/02/05 21:50:41 krw Exp $ */ 2 3 /* 4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/malloc.h> 23 #include <sys/device.h> 24 #include <sys/conf.h> 25 #include <sys/queue.h> 26 #include <sys/rwlock.h> 27 #include <sys/ioctl.h> 28 #include <sys/poll.h> 29 #include <sys/selinfo.h> 30 31 #include <scsi/scsi_all.h> 32 #include <scsi/scsiconf.h> 33 #include <scsi/mpathvar.h> 34 35 #define MPATH_BUSWIDTH 256 36 37 int mpath_match(struct device *, void *, void *); 38 void mpath_attach(struct device *, struct device *, void *); 39 void mpath_shutdown(void *); 40 41 TAILQ_HEAD(mpath_paths, mpath_path); 42 43 struct mpath_group { 44 TAILQ_ENTRY(mpath_group) g_entry; 45 struct mpath_paths g_paths; 46 struct mpath_dev *g_dev; 47 u_int g_id; 48 }; 49 TAILQ_HEAD(mpath_groups, mpath_group); 50 51 struct mpath_dev { 52 struct mutex d_mtx; 53 54 struct scsi_xfer_list d_xfers; 55 struct mpath_path *d_next_path; 56 57 struct mpath_groups d_groups; 58 59 struct mpath_group *d_failover_iter; 60 struct timeout d_failover_tmo; 61 u_int d_failover; 62 63 const struct mpath_ops *d_ops; 64 struct devid *d_id; 65 }; 66 67 struct mpath_softc { 68 struct device sc_dev; 69 struct scsi_link sc_link; 70 struct scsibus_softc *sc_scsibus; 71 struct mpath_dev *sc_devs[MPATH_BUSWIDTH]; 72 }; 73 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname) 74 75 struct mpath_softc *mpath; 76 77 struct cfattach mpath_ca = { 78 sizeof(struct mpath_softc), 79 mpath_match, 80 mpath_attach 81 }; 82 83 struct cfdriver mpath_cd = { 84 NULL, 85 "mpath", 86 DV_DULL 87 }; 88 89 void mpath_cmd(struct scsi_xfer *); 90 void mpath_minphys(struct buf *, struct scsi_link *); 91 int mpath_probe(struct scsi_link *); 92 93 struct mpath_path *mpath_next_path(struct mpath_dev *); 94 void mpath_done(struct scsi_xfer *); 95 96 void mpath_failover(struct mpath_dev *); 97 void mpath_failover_start(void *); 98 void mpath_failover_check(struct mpath_dev *); 99 100 struct scsi_adapter mpath_switch = { 101 mpath_cmd, NULL, mpath_probe, NULL, NULL 102 }; 103 104 void mpath_xs_stuffup(struct scsi_xfer *); 105 106 int 107 mpath_match(struct device *parent, void *match, void *aux) 108 { 109 return (1); 110 } 111 112 void 113 mpath_attach(struct device *parent, struct device *self, void *aux) 114 { 115 struct mpath_softc *sc = (struct mpath_softc *)self; 116 struct scsibus_attach_args saa; 117 118 mpath = sc; 119 120 printf("\n"); 121 122 sc->sc_link.adapter = &mpath_switch; 123 sc->sc_link.adapter_softc = sc; 124 sc->sc_link.adapter_target = MPATH_BUSWIDTH; 125 sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH; 126 sc->sc_link.luns = 1; 127 sc->sc_link.openings = 1024; /* XXX magical */ 128 129 bzero(&saa, sizeof(saa)); 130 saa.saa_sc_link = &sc->sc_link; 131 132 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 133 &saa, scsiprint); 134 } 135 136 void 137 mpath_xs_stuffup(struct scsi_xfer *xs) 138 { 139 xs->error = XS_DRIVER_STUFFUP; 140 scsi_done(xs); 141 } 142 143 int 144 mpath_probe(struct scsi_link *link) 145 { 146 struct mpath_softc *sc = link->adapter_softc; 147 struct mpath_dev *d = sc->sc_devs[link->target]; 148 149 if (link->lun != 0 || d == NULL) 150 return (ENXIO); 151 152 link->id = devid_copy(d->d_id); 153 154 return (0); 155 } 156 157 struct mpath_path * 158 mpath_next_path(struct mpath_dev *d) 159 { 160 struct mpath_group *g; 161 struct mpath_path *p; 162 163 #ifdef DIAGNOSTIC 164 if (d == NULL) 165 panic("%s: d is NULL", __func__); 166 #endif /* DIAGNOSTIC */ 167 168 p = d->d_next_path; 169 if (p != NULL) { 170 d->d_next_path = TAILQ_NEXT(p, p_entry); 171 if (d->d_next_path == NULL && 172 (g = TAILQ_FIRST(&d->d_groups)) != NULL) 173 d->d_next_path = TAILQ_FIRST(&g->g_paths); 174 } 175 176 return (p); 177 } 178 179 void 180 mpath_cmd(struct scsi_xfer *xs) 181 { 182 struct scsi_link *link = xs->sc_link; 183 struct mpath_softc *sc = link->adapter_softc; 184 struct mpath_dev *d = sc->sc_devs[link->target]; 185 struct mpath_path *p; 186 struct scsi_xfer *mxs; 187 188 #ifdef DIAGNOSTIC 189 if (d == NULL) 190 panic("mpath_cmd issued against nonexistent device"); 191 #endif /* DIAGNOSTIC */ 192 193 if (ISSET(xs->flags, SCSI_POLL)) { 194 mtx_enter(&d->d_mtx); 195 p = mpath_next_path(d); 196 mtx_leave(&d->d_mtx); 197 if (p == NULL) { 198 mpath_xs_stuffup(xs); 199 return; 200 } 201 202 mxs = scsi_xs_get(p->p_link, xs->flags); 203 if (mxs == NULL) { 204 mpath_xs_stuffup(xs); 205 return; 206 } 207 208 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 209 mxs->cmdlen = xs->cmdlen; 210 mxs->data = xs->data; 211 mxs->datalen = xs->datalen; 212 mxs->retries = xs->retries; 213 mxs->timeout = xs->timeout; 214 mxs->bp = xs->bp; 215 216 scsi_xs_sync(mxs); 217 218 xs->error = mxs->error; 219 xs->status = mxs->status; 220 xs->resid = mxs->resid; 221 222 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 223 224 scsi_xs_put(mxs); 225 scsi_done(xs); 226 return; 227 } 228 229 mtx_enter(&d->d_mtx); 230 SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list); 231 p = mpath_next_path(d); 232 mtx_leave(&d->d_mtx); 233 234 if (p != NULL) 235 scsi_xsh_add(&p->p_xsh); 236 } 237 238 void 239 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs) 240 { 241 struct mpath_dev *d = p->p_group->g_dev; 242 struct scsi_xfer *xs; 243 int addxsh = 0; 244 245 if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL) 246 goto fail; 247 248 mtx_enter(&d->d_mtx); 249 xs = SIMPLEQ_FIRST(&d->d_xfers); 250 if (xs != NULL) { 251 SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list); 252 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 253 addxsh = 1; 254 } 255 mtx_leave(&d->d_mtx); 256 257 if (xs == NULL) 258 goto fail; 259 260 memcpy(mxs->cmd, xs->cmd, xs->cmdlen); 261 mxs->cmdlen = xs->cmdlen; 262 mxs->data = xs->data; 263 mxs->datalen = xs->datalen; 264 mxs->retries = xs->retries; 265 mxs->timeout = xs->timeout; 266 mxs->bp = xs->bp; 267 mxs->flags = xs->flags; 268 269 mxs->cookie = xs; 270 mxs->done = mpath_done; 271 272 scsi_xs_exec(mxs); 273 274 if (addxsh) 275 scsi_xsh_add(&p->p_xsh); 276 277 return; 278 fail: 279 scsi_xs_put(mxs); 280 } 281 282 void 283 mpath_done(struct scsi_xfer *mxs) 284 { 285 struct scsi_xfer *xs = mxs->cookie; 286 struct scsi_link *link = xs->sc_link; 287 struct mpath_softc *sc = link->adapter_softc; 288 struct mpath_dev *d = sc->sc_devs[link->target]; 289 struct mpath_path *p; 290 291 switch (mxs->error) { 292 case XS_SELTIMEOUT: /* physical path is gone, try the next */ 293 case XS_RESET: 294 mtx_enter(&d->d_mtx); 295 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 296 p = mpath_next_path(d); 297 mtx_leave(&d->d_mtx); 298 299 scsi_xs_put(mxs); 300 301 if (p != NULL) 302 scsi_xsh_add(&p->p_xsh); 303 return; 304 case XS_SENSE: 305 switch (d->d_ops->op_checksense(mxs)) { 306 case MPATH_SENSE_FAILOVER: 307 mtx_enter(&d->d_mtx); 308 SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list); 309 p = mpath_next_path(d); 310 mtx_leave(&d->d_mtx); 311 312 scsi_xs_put(mxs); 313 314 mpath_failover(d); 315 return; 316 case MPATH_SENSE_DECLINED: 317 break; 318 #ifdef DIAGNOSTIC 319 default: 320 panic("unexpected return from checksense"); 321 #endif /* DIAGNOSTIC */ 322 } 323 break; 324 } 325 326 xs->error = mxs->error; 327 xs->status = mxs->status; 328 xs->resid = mxs->resid; 329 330 memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense)); 331 332 scsi_xs_put(mxs); 333 334 scsi_done(xs); 335 } 336 337 void 338 mpath_failover(struct mpath_dev *d) 339 { 340 if (!scsi_pending_start(&d->d_mtx, &d->d_failover)) 341 return; 342 343 mpath_failover_start(d); 344 } 345 346 void 347 mpath_failover_start(void *xd) 348 { 349 struct mpath_dev *d = xd; 350 351 mtx_enter(&d->d_mtx); 352 d->d_failover_iter = TAILQ_FIRST(&d->d_groups); 353 mtx_leave(&d->d_mtx); 354 355 mpath_failover_check(d); 356 } 357 358 void 359 mpath_failover_check(struct mpath_dev *d) 360 { 361 struct mpath_group *g = d->d_failover_iter; 362 struct mpath_path *p; 363 364 if (g == NULL) 365 timeout_add_sec(&d->d_failover_tmo, 1); 366 else { 367 p = TAILQ_FIRST(&g->g_paths); 368 d->d_ops->op_status(p->p_link); 369 } 370 } 371 372 void 373 mpath_path_status(struct mpath_path *p, int status) 374 { 375 struct mpath_group *g = p->p_group; 376 struct mpath_dev *d = g->g_dev; 377 378 mtx_enter(&d->d_mtx); 379 if (status == MPATH_S_ACTIVE) { 380 TAILQ_REMOVE(&d->d_groups, g, g_entry); 381 TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry); 382 d->d_next_path = p; 383 } else 384 d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry); 385 mtx_leave(&d->d_mtx); 386 387 if (status == MPATH_S_ACTIVE) { 388 scsi_xsh_add(&p->p_xsh); 389 if (!scsi_pending_finish(&d->d_mtx, &d->d_failover)) 390 mpath_failover_start(d); 391 } else 392 mpath_failover_check(d); 393 } 394 395 void 396 mpath_minphys(struct buf *bp, struct scsi_link *link) 397 { 398 struct mpath_softc *sc = link->adapter_softc; 399 struct mpath_dev *d = sc->sc_devs[link->target]; 400 struct mpath_group *g; 401 struct mpath_path *p; 402 403 #ifdef DIAGNOSTIC 404 if (d == NULL) 405 panic("mpath_minphys against nonexistent device"); 406 #endif /* DIAGNOSTIC */ 407 408 mtx_enter(&d->d_mtx); 409 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 410 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 411 /* XXX crossing layers with mutex held */ 412 if (p->p_link->adapter->dev_minphys != NULL) 413 p->p_link->adapter->dev_minphys(bp, p->p_link); 414 } 415 } 416 mtx_leave(&d->d_mtx); 417 } 418 419 int 420 mpath_path_probe(struct scsi_link *link) 421 { 422 if (mpath == NULL) 423 return (ENXIO); 424 425 if (link->id == NULL) 426 return (EINVAL); 427 428 if (ISSET(link->flags, SDEV_UMASS)) 429 return (EINVAL); 430 431 if (mpath == link->adapter_softc) 432 return (ENXIO); 433 434 return (0); 435 } 436 437 int 438 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops) 439 { 440 struct mpath_softc *sc = mpath; 441 struct scsi_link *link = p->p_link; 442 struct mpath_dev *d = NULL; 443 struct mpath_group *g; 444 int newdev = 0, addxsh = 0; 445 int target; 446 447 #ifdef DIAGNOSTIC 448 if (p->p_link == NULL) 449 panic("mpath_path_attach: NULL link"); 450 if (p->p_group != NULL) 451 panic("mpath_path_attach: group is not NULL"); 452 #endif /* DIAGNOSTIC */ 453 454 for (target = 0; target < MPATH_BUSWIDTH; target++) { 455 if ((d = sc->sc_devs[target]) == NULL) 456 continue; 457 458 if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops) 459 break; 460 461 d = NULL; 462 } 463 464 if (d == NULL) { 465 for (target = 0; target < MPATH_BUSWIDTH; target++) { 466 if (sc->sc_devs[target] == NULL) 467 break; 468 } 469 if (target >= MPATH_BUSWIDTH) 470 return (ENXIO); 471 472 d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO); 473 if (d == NULL) 474 return (ENOMEM); 475 476 mtx_init(&d->d_mtx, IPL_BIO); 477 TAILQ_INIT(&d->d_groups); 478 SIMPLEQ_INIT(&d->d_xfers); 479 d->d_id = devid_copy(link->id); 480 d->d_ops = ops; 481 482 timeout_set(&d->d_failover_tmo, mpath_failover_start, d); 483 484 sc->sc_devs[target] = d; 485 newdev = 1; 486 } else { 487 /* 488 * instead of carrying identical values in different devid 489 * instances, delete the new one and reference the old one in 490 * the new scsi_link. 491 */ 492 devid_free(link->id); 493 link->id = devid_copy(d->d_id); 494 } 495 496 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 497 if (g->g_id == g_id) 498 break; 499 } 500 501 if (g == NULL) { 502 g = malloc(sizeof(*g), M_DEVBUF, 503 M_WAITOK | M_CANFAIL | M_ZERO); 504 if (g == NULL) { 505 if (newdev) { 506 free(d, M_DEVBUF, sizeof(*d)); 507 sc->sc_devs[target] = NULL; 508 } 509 510 return (ENOMEM); 511 } 512 513 TAILQ_INIT(&g->g_paths); 514 g->g_dev = d; 515 g->g_id = g_id; 516 517 mtx_enter(&d->d_mtx); 518 TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry); 519 mtx_leave(&d->d_mtx); 520 } 521 522 p->p_group = g; 523 524 mtx_enter(&d->d_mtx); 525 TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry); 526 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 527 addxsh = 1; 528 529 if (d->d_next_path == NULL) 530 d->d_next_path = p; 531 mtx_leave(&d->d_mtx); 532 533 if (newdev) 534 scsi_probe_target(mpath->sc_scsibus, target); 535 else if (addxsh) 536 scsi_xsh_add(&p->p_xsh); 537 538 return (0); 539 } 540 541 int 542 mpath_path_detach(struct mpath_path *p) 543 { 544 struct mpath_group *g = p->p_group; 545 struct mpath_dev *d; 546 struct mpath_path *np = NULL; 547 548 #ifdef DIAGNOSTIC 549 if (g == NULL) 550 panic("mpath: detaching a path from a nonexistent bus"); 551 #endif /* DIAGNOSTIC */ 552 d = g->g_dev; 553 p->p_group = NULL; 554 555 mtx_enter(&d->d_mtx); 556 TAILQ_REMOVE(&g->g_paths, p, p_entry); 557 if (d->d_next_path == p) 558 d->d_next_path = TAILQ_FIRST(&g->g_paths); 559 560 if (TAILQ_EMPTY(&g->g_paths)) 561 TAILQ_REMOVE(&d->d_groups, g, g_entry); 562 else 563 g = NULL; 564 565 if (!SIMPLEQ_EMPTY(&d->d_xfers)) 566 np = d->d_next_path; 567 mtx_leave(&d->d_mtx); 568 569 if (g != NULL) 570 free(g, M_DEVBUF, sizeof(*g)); 571 572 scsi_xsh_del(&p->p_xsh); 573 574 if (np == NULL) 575 mpath_failover(d); 576 else 577 scsi_xsh_add(&np->p_xsh); 578 579 return (0); 580 } 581 582 struct device * 583 mpath_bootdv(struct device *dev) 584 { 585 struct mpath_softc *sc = mpath; 586 struct mpath_dev *d; 587 struct mpath_group *g; 588 struct mpath_path *p; 589 int target; 590 591 if (sc == NULL) 592 return (dev); 593 594 for (target = 0; target < MPATH_BUSWIDTH; target++) { 595 if ((d = sc->sc_devs[target]) == NULL) 596 continue; 597 598 TAILQ_FOREACH(g, &d->d_groups, g_entry) { 599 TAILQ_FOREACH(p, &g->g_paths, p_entry) { 600 if (p->p_link->device_softc == dev) { 601 return (scsi_get_link(mpath->sc_scsibus, 602 target, 0)->device_softc); 603 } 604 } 605 } 606 } 607 608 return (dev); 609 } 610