xref: /openbsd-src/sys/scsi/mpath.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: mpath.c,v 1.36 2014/07/12 18:50:25 tedu Exp $ */
2 
3 /*
4  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/buf.h>
22 #include <sys/kernel.h>
23 #include <sys/malloc.h>
24 #include <sys/device.h>
25 #include <sys/proc.h>
26 #include <sys/conf.h>
27 #include <sys/queue.h>
28 #include <sys/rwlock.h>
29 #include <sys/ioctl.h>
30 #include <sys/poll.h>
31 #include <sys/selinfo.h>
32 
33 #include <scsi/scsi_all.h>
34 #include <scsi/scsiconf.h>
35 #include <scsi/mpathvar.h>
36 
37 #define MPATH_BUSWIDTH 256
38 
39 int		mpath_match(struct device *, void *, void *);
40 void		mpath_attach(struct device *, struct device *, void *);
41 void		mpath_shutdown(void *);
42 
43 TAILQ_HEAD(mpath_paths, mpath_path);
44 
45 struct mpath_group {
46 	TAILQ_ENTRY(mpath_group) g_entry;
47 	struct mpath_paths	 g_paths;
48 	struct mpath_dev	*g_dev;
49 	u_int			 g_id;
50 };
51 TAILQ_HEAD(mpath_groups, mpath_group);
52 
53 struct mpath_dev {
54 	struct mutex		 d_mtx;
55 
56 	struct scsi_xfer_list	 d_xfers;
57 	struct mpath_path	*d_next_path;
58 
59 	struct mpath_groups	 d_groups;
60 
61 	struct mpath_group	*d_failover_iter;
62 	struct timeout		 d_failover_tmo;
63 	u_int			 d_failover;
64 
65 	const struct mpath_ops	*d_ops;
66 	struct devid		*d_id;
67 };
68 
69 struct mpath_softc {
70 	struct device		sc_dev;
71 	struct scsi_link	sc_link;
72 	struct scsibus_softc	*sc_scsibus;
73 	struct mpath_dev	*sc_devs[MPATH_BUSWIDTH];
74 };
75 #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
76 
77 struct mpath_softc	*mpath;
78 
79 struct cfattach mpath_ca = {
80 	sizeof(struct mpath_softc),
81 	mpath_match,
82 	mpath_attach
83 };
84 
85 struct cfdriver mpath_cd = {
86 	NULL,
87 	"mpath",
88 	DV_DULL
89 };
90 
91 void		mpath_cmd(struct scsi_xfer *);
92 void		mpath_minphys(struct buf *, struct scsi_link *);
93 int		mpath_probe(struct scsi_link *);
94 
95 struct mpath_path *mpath_next_path(struct mpath_dev *);
96 void		mpath_done(struct scsi_xfer *);
97 
98 void		mpath_failover(struct mpath_dev *);
99 void		mpath_failover_start(void *);
100 void		mpath_failover_check(struct mpath_dev *);
101 
102 struct scsi_adapter mpath_switch = {
103 	mpath_cmd,
104 	scsi_minphys,
105 	mpath_probe
106 };
107 
108 void		mpath_xs_stuffup(struct scsi_xfer *);
109 
110 int
111 mpath_match(struct device *parent, void *match, void *aux)
112 {
113 	return (1);
114 }
115 
116 void
117 mpath_attach(struct device *parent, struct device *self, void *aux)
118 {
119 	struct mpath_softc		*sc = (struct mpath_softc *)self;
120 	struct scsibus_attach_args	saa;
121 
122 	mpath = sc;
123 
124 	printf("\n");
125 
126 	sc->sc_link.adapter = &mpath_switch;
127 	sc->sc_link.adapter_softc = sc;
128 	sc->sc_link.adapter_target = MPATH_BUSWIDTH;
129 	sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH;
130 	sc->sc_link.luns = 1;
131 	sc->sc_link.openings = 1024; /* XXX magical */
132 
133 	bzero(&saa, sizeof(saa));
134 	saa.saa_sc_link = &sc->sc_link;
135 
136 	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
137 	    &saa, scsiprint);
138 }
139 
140 void
141 mpath_xs_stuffup(struct scsi_xfer *xs)
142 {
143 	xs->error = XS_DRIVER_STUFFUP;
144 	scsi_done(xs);
145 }
146 
147 int
148 mpath_probe(struct scsi_link *link)
149 {
150 	struct mpath_softc *sc = link->adapter_softc;
151 	struct mpath_dev *d = sc->sc_devs[link->target];
152 
153 	if (link->lun != 0 || d == NULL)
154 		return (ENXIO);
155 
156 	link->id = devid_copy(d->d_id);
157 
158 	return (0);
159 }
160 
161 struct mpath_path *
162 mpath_next_path(struct mpath_dev *d)
163 {
164 	struct mpath_group *g;
165 	struct mpath_path *p;
166 
167 #ifdef DIAGNOSTIC
168 	if (d == NULL)
169 		panic("%s: d is NULL", __func__);
170 #endif
171 
172 	p = d->d_next_path;
173 	if (p != NULL) {
174 		d->d_next_path = TAILQ_NEXT(p, p_entry);
175 		if (d->d_next_path == NULL &&
176 		    (g = TAILQ_FIRST(&d->d_groups)) != NULL)
177 			d->d_next_path = TAILQ_FIRST(&g->g_paths);
178 	}
179 
180 	return (p);
181 }
182 
183 void
184 mpath_cmd(struct scsi_xfer *xs)
185 {
186 	struct scsi_link *link = xs->sc_link;
187 	struct mpath_softc *sc = link->adapter_softc;
188 	struct mpath_dev *d = sc->sc_devs[link->target];
189 	struct mpath_path *p;
190 	struct scsi_xfer *mxs;
191 
192 #ifdef DIAGNOSTIC
193 	if (d == NULL)
194 		panic("mpath_cmd issued against nonexistant device");
195 #endif
196 
197 	if (ISSET(xs->flags, SCSI_POLL)) {
198 		mtx_enter(&d->d_mtx);
199 		p = mpath_next_path(d);
200 		mtx_leave(&d->d_mtx);
201 		if (p == NULL) {
202 			mpath_xs_stuffup(xs);
203 			return;
204 		}
205 
206 		mxs = scsi_xs_get(p->p_link, xs->flags);
207 		if (mxs == NULL) {
208 			mpath_xs_stuffup(xs);
209 			return;
210 		}
211 
212 		memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
213 		mxs->cmdlen = xs->cmdlen;
214 		mxs->data = xs->data;
215 		mxs->datalen = xs->datalen;
216 		mxs->retries = xs->retries;
217 		mxs->timeout = xs->timeout;
218 		mxs->bp = xs->bp;
219 
220 		scsi_xs_sync(mxs);
221 
222 		xs->error = mxs->error;
223 		xs->status = mxs->status;
224 		xs->resid = mxs->resid;
225 
226 		memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
227 
228 		scsi_xs_put(mxs);
229 		scsi_done(xs);
230 		return;
231 	}
232 
233 	mtx_enter(&d->d_mtx);
234 	SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
235 	p = mpath_next_path(d);
236 	mtx_leave(&d->d_mtx);
237 
238 	if (p != NULL)
239 		scsi_xsh_add(&p->p_xsh);
240 }
241 
242 void
243 mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
244 {
245 	struct mpath_dev *d = p->p_group->g_dev;
246 	struct scsi_xfer *xs;
247 	int addxsh = 0;
248 
249 	if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
250 		goto fail;
251 
252 	mtx_enter(&d->d_mtx);
253 	xs = SIMPLEQ_FIRST(&d->d_xfers);
254 	if (xs != NULL) {
255 		SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
256 		if (!SIMPLEQ_EMPTY(&d->d_xfers))
257 			addxsh = 1;
258 	}
259 	mtx_leave(&d->d_mtx);
260 
261 	if (xs == NULL)
262 		goto fail;
263 
264 	memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
265 	mxs->cmdlen = xs->cmdlen;
266 	mxs->data = xs->data;
267 	mxs->datalen = xs->datalen;
268 	mxs->retries = xs->retries;
269 	mxs->timeout = xs->timeout;
270 	mxs->bp = xs->bp;
271 	mxs->flags = xs->flags;
272 
273 	mxs->cookie = xs;
274 	mxs->done = mpath_done;
275 
276 	scsi_xs_exec(mxs);
277 
278 	if (addxsh)
279 		scsi_xsh_add(&p->p_xsh);
280 
281 	return;
282 fail:
283 	scsi_xs_put(mxs);
284 }
285 
286 void
287 mpath_done(struct scsi_xfer *mxs)
288 {
289 	struct scsi_xfer *xs = mxs->cookie;
290 	struct scsi_link *link = xs->sc_link;
291 	struct mpath_softc *sc = link->adapter_softc;
292 	struct mpath_dev *d = sc->sc_devs[link->target];
293 	struct mpath_path *p;
294 
295 	switch (mxs->error) {
296 	case XS_SELTIMEOUT: /* physical path is gone, try the next */
297 	case XS_RESET:
298 		mtx_enter(&d->d_mtx);
299 		SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
300 		p = mpath_next_path(d);
301 		mtx_leave(&d->d_mtx);
302 
303 		scsi_xs_put(mxs);
304 
305 		if (p != NULL)
306 			scsi_xsh_add(&p->p_xsh);
307 		return;
308 	case XS_SENSE:
309 		switch (d->d_ops->op_checksense(mxs)) {
310 		case MPATH_SENSE_FAILOVER:
311 			mtx_enter(&d->d_mtx);
312 			SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
313 			p = mpath_next_path(d);
314 			mtx_leave(&d->d_mtx);
315 
316 			scsi_xs_put(mxs);
317 
318 			mpath_failover(d);
319 			return;
320 		case MPATH_SENSE_DECLINED:
321 			break;
322 #ifdef DIAGNOSTIC
323 		default:
324 			panic("unexpected return from checksense");
325 #endif
326 		}
327 		break;
328 	}
329 
330 	xs->error = mxs->error;
331 	xs->status = mxs->status;
332 	xs->resid = mxs->resid;
333 
334 	memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
335 
336 	scsi_xs_put(mxs);
337 
338 	scsi_done(xs);
339 }
340 
341 void
342 mpath_failover(struct mpath_dev *d)
343 {
344 	if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
345 		return;
346 
347 	mpath_failover_start(d);
348 }
349 
350 void
351 mpath_failover_start(void *xd)
352 {
353 	struct mpath_dev *d = xd;
354 
355 	mtx_enter(&d->d_mtx);
356 	d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
357 	mtx_leave(&d->d_mtx);
358 
359 	mpath_failover_check(d);
360 }
361 
362 void
363 mpath_failover_check(struct mpath_dev *d)
364 {
365 	struct mpath_group *g = d->d_failover_iter;
366 	struct mpath_path *p;
367 
368 	if (g == NULL)
369 		timeout_add_sec(&d->d_failover_tmo, 1);
370 	else {
371 		p = TAILQ_FIRST(&g->g_paths);
372 		d->d_ops->op_status(p->p_link);
373 	}
374 }
375 
376 void
377 mpath_path_status(struct mpath_path *p, int status)
378 {
379 	struct mpath_group *g = p->p_group;
380 	struct mpath_dev *d = g->g_dev;
381 
382 	mtx_enter(&d->d_mtx);
383 	if (status == MPATH_S_ACTIVE) {
384 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
385 		TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
386 		d->d_next_path = p;
387 	} else
388 		d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
389 	mtx_leave(&d->d_mtx);
390 
391 	if (status == MPATH_S_ACTIVE) {
392 		scsi_xsh_add(&p->p_xsh);
393 		if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
394 			mpath_failover_start(d);
395 	} else
396 		mpath_failover_check(d);
397 }
398 
399 void
400 mpath_minphys(struct buf *bp, struct scsi_link *link)
401 {
402 	struct mpath_softc *sc = link->adapter_softc;
403 	struct mpath_dev *d = sc->sc_devs[link->target];
404 	struct mpath_group *g;
405 	struct mpath_path *p;
406 
407 #ifdef DIAGNOSTIC
408 	if (d == NULL)
409 		panic("mpath_minphys against nonexistant device");
410 #endif
411 
412 	mtx_enter(&d->d_mtx);
413 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
414 		TAILQ_FOREACH(p, &g->g_paths, p_entry) {
415 			/* XXX crossing layers with mutex held */
416 			p->p_link->adapter->scsi_minphys(bp, p->p_link);
417 		}
418 	}
419 	mtx_leave(&d->d_mtx);
420 }
421 
422 int
423 mpath_path_probe(struct scsi_link *link)
424 {
425 	if (mpath == NULL)
426 		return (ENXIO);
427 
428 	if (link->id == NULL)
429 		return (EINVAL);
430 
431 	if (mpath == link->adapter_softc)
432 		return (ENXIO);
433 
434 	return (0);
435 }
436 
437 int
438 mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
439 {
440 	struct mpath_softc *sc = mpath;
441 	struct scsi_link *link = p->p_link;
442 	struct mpath_dev *d = NULL;
443 	struct mpath_group *g;
444 	int newdev = 0, addxsh = 0;
445 	int target;
446 
447 #ifdef DIAGNOSTIC
448 	if (p->p_link == NULL)
449 		panic("mpath_path_attach: NULL link");
450 	if (p->p_group != NULL)
451 		panic("mpath_path_attach: group is not NULL");
452 #endif
453 
454 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
455 		if ((d = sc->sc_devs[target]) == NULL)
456 			continue;
457 
458 		if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
459 			break;
460 
461 		d = NULL;
462 	}
463 
464 	if (d == NULL) {
465 		for (target = 0; target < MPATH_BUSWIDTH; target++) {
466 			if (sc->sc_devs[target] == NULL)
467 				break;
468 		}
469 		if (target >= MPATH_BUSWIDTH)
470 			return (ENXIO);
471 
472 		d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
473 		if (d == NULL)
474 			return (ENOMEM);
475 
476 		mtx_init(&d->d_mtx, IPL_BIO);
477 		TAILQ_INIT(&d->d_groups);
478 		SIMPLEQ_INIT(&d->d_xfers);
479 		d->d_id = devid_copy(link->id);
480 		d->d_ops = ops;
481 
482 		timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
483 
484 		sc->sc_devs[target] = d;
485 		newdev = 1;
486 	} else {
487 		/*
488 		 * instead of carrying identical values in different devid
489 		 * instances, delete the new one and reference the old one in
490 		 * the new scsi_link.
491 		 */
492 		devid_free(link->id);
493 		link->id = devid_copy(d->d_id);
494 	}
495 
496 	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
497 		if (g->g_id == g_id)
498 			break;
499 	}
500 
501 	if (g == NULL) {
502 		g = malloc(sizeof(*g),  M_DEVBUF,
503 		    M_WAITOK | M_CANFAIL | M_ZERO);
504 		if (g == NULL) {
505 			if (newdev) {
506 				free(d, M_DEVBUF, 0);
507 				sc->sc_devs[target] = NULL;
508 			}
509 
510 			return (ENOMEM);
511 		}
512 
513 		TAILQ_INIT(&g->g_paths);
514 		g->g_dev = d;
515 		g->g_id = g_id;
516 
517 		mtx_enter(&d->d_mtx);
518 		TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
519 		mtx_leave(&d->d_mtx);
520 	}
521 
522 	p->p_group = g;
523 
524 	mtx_enter(&d->d_mtx);
525 	TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
526 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
527 		addxsh = 1;
528 
529 	if (d->d_next_path == NULL)
530 		d->d_next_path = p;
531 	mtx_leave(&d->d_mtx);
532 
533 	if (newdev)
534 		scsi_probe_target(mpath->sc_scsibus, target);
535 	else if (addxsh)
536 		scsi_xsh_add(&p->p_xsh);
537 
538 	return (0);
539 }
540 
541 int
542 mpath_path_detach(struct mpath_path *p)
543 {
544 	struct mpath_group *g = p->p_group;
545 	struct mpath_dev *d;
546 	struct mpath_path *np = NULL;
547 
548 #ifdef DIAGNOSTIC
549 	if (g == NULL)
550 		panic("mpath: detaching a path from a nonexistant bus");
551 #endif
552 	d = g->g_dev;
553 	p->p_group = NULL;
554 
555 	mtx_enter(&d->d_mtx);
556 	TAILQ_REMOVE(&g->g_paths, p, p_entry);
557 	if (d->d_next_path == p)
558 		d->d_next_path = TAILQ_FIRST(&g->g_paths);
559 
560 	if (TAILQ_EMPTY(&g->g_paths))
561 		TAILQ_REMOVE(&d->d_groups, g, g_entry);
562 	else
563 		g = NULL;
564 
565 	if (!SIMPLEQ_EMPTY(&d->d_xfers))
566 		np = d->d_next_path;
567 	mtx_leave(&d->d_mtx);
568 
569 	if (g != NULL)
570 		free(g, M_DEVBUF, 0);
571 
572 	scsi_xsh_del(&p->p_xsh);
573 
574 	if (np == NULL)
575 		mpath_failover(d);
576 	else
577 		scsi_xsh_add(&np->p_xsh);
578 
579 	return (0);
580 }
581 
582 struct device *
583 mpath_bootdv(struct device *dev)
584 {
585 	struct mpath_softc *sc = mpath;
586 	struct mpath_dev *d;
587 	struct mpath_group *g;
588 	struct mpath_path *p;
589 	int target;
590 
591 	if (sc == NULL)
592 		return (dev);
593 
594 	for (target = 0; target < MPATH_BUSWIDTH; target++) {
595 		if ((d = sc->sc_devs[target]) == NULL)
596 			continue;
597 
598 		TAILQ_FOREACH(g, &d->d_groups, g_entry) {
599 			TAILQ_FOREACH(p, &g->g_paths, p_entry) {
600 				if (p->p_link->device_softc == dev) {
601 					return (scsi_get_link(mpath->sc_scsibus,
602 					    target, 0)->device_softc);
603 				}
604 			}
605 		}
606 	}
607 
608 	return (dev);
609 }
610