xref: /netbsd-src/sys/dev/dmover/dmover_io.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: dmover_io.c,v 1.42 2014/03/16 05:20:27 dholland Exp $	*/
2 
3 /*
4  * Copyright (c) 2002, 2003 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * dmover_io.c: Support for user-space access to dmover-api
40  *
41  * This interface is quite simple:
42  *
43  *	1.  The user opens /dev/dmover, which is a cloning device.  This
44  *	    allocates internal state for the session.
45  *
46  *	2.  The user does a DMIO_SETFUNC to select the data movement
47  *	    function.  This actually creates the dmover session.
48  *
49  *	3.  The user writes request messages to its dmover handle.
50  *
51  *	4.  The user reads request responses from its dmover handle.
52  *
53  *	5.  The user closes the file descriptor and the session is
54  *	    torn down.
55  */
56 
57 #include <sys/cdefs.h>
58 __KERNEL_RCSID(0, "$NetBSD: dmover_io.c,v 1.42 2014/03/16 05:20:27 dholland Exp $");
59 
60 #include <sys/param.h>
61 #include <sys/queue.h>
62 #include <sys/conf.h>
63 #include <sys/pool.h>
64 #include <sys/proc.h>
65 #include <sys/poll.h>
66 #include <sys/malloc.h>
67 #include <sys/file.h>
68 #include <sys/filedesc.h>
69 #include <sys/filio.h>
70 #include <sys/select.h>
71 #include <sys/systm.h>
72 #include <sys/workqueue.h>
73 #include <sys/once.h>
74 #include <sys/stat.h>
75 #include <sys/kauth.h>
76 #include <sys/mutex.h>
77 #include <sys/condvar.h>
78 
79 #include <uvm/uvm_extern.h>
80 
81 #include <dev/dmover/dmovervar.h>
82 #include <dev/dmover/dmover_io.h>
83 
84 struct dmio_usrreq_state {
85 	union {
86 		struct work u_work;
87 		TAILQ_ENTRY(dmio_usrreq_state) u_q;
88 	} dus_u;
89 #define	dus_q		dus_u.u_q
90 #define	dus_work	dus_u.u_work
91 	struct uio dus_uio_out;
92 	struct uio *dus_uio_in;
93 	struct dmover_request *dus_req;
94 	uint32_t dus_id;
95 	struct vmspace *dus_vmspace;
96 };
97 
98 struct dmio_state {
99 	struct dmover_session *ds_session;
100 	TAILQ_HEAD(, dmio_usrreq_state) ds_pending;
101 	TAILQ_HEAD(, dmio_usrreq_state) ds_complete;
102 	struct selinfo ds_selq;
103 	volatile int ds_flags;
104 	u_int ds_nreqs;
105 	kmutex_t ds_lock;
106 	kcondvar_t ds_complete_cv;
107 	kcondvar_t ds_nreqs_cv;
108 	struct timespec ds_atime;
109 	struct timespec ds_mtime;
110 	struct timespec ds_btime;
111 };
112 
113 static ONCE_DECL(dmio_cleaner_control);
114 static struct workqueue *dmio_cleaner;
115 static int dmio_cleaner_init(void);
116 static struct dmio_state *dmio_state_get(void);
117 static void dmio_state_put(struct dmio_state *);
118 static void dmio_usrreq_fini1(struct work *wk, void *);
119 
120 #define	DMIO_STATE_SEL		0x0001
121 #define	DMIO_STATE_DEAD		0x0002
122 #define	DMIO_STATE_LARVAL	0x0004
123 #define	DMIO_STATE_READ_WAIT	0x0008
124 #define	DMIO_STATE_WRITE_WAIT	0x0010
125 
126 #define	DMIO_NREQS_MAX		64	/* XXX pulled out of a hat */
127 
128 struct pool dmio_state_pool;
129 struct pool dmio_usrreq_state_pool;
130 
131 void	dmoverioattach(int);
132 
133 dev_type_open(dmoverioopen);
134 
135 const struct cdevsw dmoverio_cdevsw = {
136 	.d_open = dmoverioopen,
137 	.d_close = noclose,
138 	.d_read = noread,
139 	.d_write = nowrite,
140 	.d_ioctl = noioctl,
141 	.d_stop = nostop,
142 	.d_tty = notty,
143 	.d_poll = nopoll,
144 	.d_mmap = nommap,
145 	.d_kqfilter = nokqfilter,
146 	.d_flag = D_OTHER
147 };
148 
149 /*
150  * dmoverioattach:
151  *
152  *	Pseudo-device attach routine.
153  */
154 void
155 dmoverioattach(int count)
156 {
157 
158 	pool_init(&dmio_state_pool, sizeof(struct dmio_state),
159 	    0, 0, 0, "dmiostate", NULL, IPL_SOFTCLOCK);
160 	pool_init(&dmio_usrreq_state_pool, sizeof(struct dmio_usrreq_state),
161 	    0, 0, 0, "dmiourstate", NULL, IPL_SOFTCLOCK);
162 }
163 
164 /*
165  * dmio_cleaner_init:
166  *
167  *	Create cleaner thread.
168  */
169 static int
170 dmio_cleaner_init(void)
171 {
172 
173 	return workqueue_create(&dmio_cleaner, "dmioclean", dmio_usrreq_fini1,
174 	    NULL, PWAIT, IPL_SOFTCLOCK, 0);
175 }
176 
177 static struct dmio_state *
178 dmio_state_get(void)
179 {
180 	struct dmio_state *ds;
181 
182 	ds = pool_get(&dmio_state_pool, PR_WAITOK);
183 
184 	memset(ds, 0, sizeof(*ds));
185 
186 	getnanotime(&ds->ds_btime);
187 	ds->ds_atime = ds->ds_mtime = ds->ds_btime;
188 
189 	mutex_init(&ds->ds_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
190 	cv_init(&ds->ds_complete_cv, "dmvrrd");
191 	cv_init(&ds->ds_nreqs_cv, "dmiowr");
192 	TAILQ_INIT(&ds->ds_pending);
193 	TAILQ_INIT(&ds->ds_complete);
194 	selinit(&ds->ds_selq);
195 
196 	return ds;
197 }
198 
199 static void
200 dmio_state_put(struct dmio_state *ds)
201 {
202 
203 	seldestroy(&ds->ds_selq);
204 	cv_destroy(&ds->ds_nreqs_cv);
205 	cv_destroy(&ds->ds_complete_cv);
206 	mutex_destroy(&ds->ds_lock);
207 
208 	pool_put(&dmio_state_pool, ds);
209 }
210 
211 /*
212  * dmio_usrreq_init:
213  *
214  *	Build a request structure.
215  */
216 static int
217 dmio_usrreq_init(struct file *fp, struct dmio_usrreq_state *dus,
218     struct dmio_usrreq *req, struct dmover_request *dreq)
219 {
220 	struct dmio_state *ds = (struct dmio_state *) fp->f_data;
221 	struct dmover_session *dses = ds->ds_session;
222 	struct uio *uio_out = &dus->dus_uio_out;
223 	struct uio *uio_in;
224 	dmio_buffer inbuf;
225 	size_t len;
226 	int i, error;
227 	u_int j;
228 
229 	/* XXX How should malloc interact w/ FNONBLOCK? */
230 
231 	error = RUN_ONCE(&dmio_cleaner_control, dmio_cleaner_init);
232 	if (error) {
233 		return error;
234 	}
235 
236 	error = proc_vmspace_getref(curproc, &dus->dus_vmspace);
237 	if (error) {
238 		return error;
239 	}
240 
241 	if (req->req_outbuf.dmbuf_iovcnt != 0) {
242 		if (req->req_outbuf.dmbuf_iovcnt > IOV_MAX)
243 			return (EINVAL);
244 		len = sizeof(struct iovec) * req->req_outbuf.dmbuf_iovcnt;
245 		uio_out->uio_iov = malloc(len, M_TEMP, M_WAITOK);
246 		error = copyin(req->req_outbuf.dmbuf_iov, uio_out->uio_iov,
247 		    len);
248 		if (error) {
249 			free(uio_out->uio_iov, M_TEMP);
250 			return (error);
251 		}
252 
253 		for (j = 0, len = 0; j < req->req_outbuf.dmbuf_iovcnt; j++) {
254 			len += uio_out->uio_iov[j].iov_len;
255 			if (len > SSIZE_MAX) {
256 				free(uio_out->uio_iov, M_TEMP);
257 				return (EINVAL);
258 			}
259 		}
260 
261 		uio_out->uio_iovcnt = req->req_outbuf.dmbuf_iovcnt;
262 		uio_out->uio_resid = len;
263 		uio_out->uio_rw = UIO_READ;
264 		uio_out->uio_vmspace = dus->dus_vmspace;
265 
266 		dreq->dreq_outbuf_type = DMOVER_BUF_UIO;
267 		dreq->dreq_outbuf.dmbuf_uio = uio_out;
268 	} else {
269 		uio_out->uio_iov = NULL;
270 		uio_out = NULL;
271 		dreq->dreq_outbuf_type = DMOVER_BUF_NONE;
272 	}
273 
274 	memcpy(dreq->dreq_immediate, req->req_immediate,
275 	    sizeof(dreq->dreq_immediate));
276 
277 	if (dses->dses_ninputs == 0) {
278 		/* No inputs; all done. */
279 		return (0);
280 	}
281 
282 	dreq->dreq_inbuf_type = DMOVER_BUF_UIO;
283 
284 	dus->dus_uio_in = malloc(sizeof(struct uio) * dses->dses_ninputs,
285 	    M_TEMP, M_WAITOK);
286 	memset(dus->dus_uio_in, 0, sizeof(struct uio) * dses->dses_ninputs);
287 
288 	for (i = 0; i < dses->dses_ninputs; i++) {
289 		uio_in = &dus->dus_uio_in[i];
290 
291 		error = copyin(&req->req_inbuf[i], &inbuf, sizeof(inbuf));
292 		if (error)
293 			goto bad;
294 
295 		if (inbuf.dmbuf_iovcnt > IOV_MAX) {
296 			error = EINVAL;
297 			goto bad;
298 		}
299 		len = sizeof(struct iovec) * inbuf.dmbuf_iovcnt;
300 		if (len == 0) {
301 			error = EINVAL;
302 			goto bad;
303 		}
304 		uio_in->uio_iov = malloc(len, M_TEMP, M_WAITOK);
305 
306 		error = copyin(inbuf.dmbuf_iov, uio_in->uio_iov, len);
307 		if (error) {
308 			free(uio_in->uio_iov, M_TEMP);
309 			goto bad;
310 		}
311 
312 		for (j = 0, len = 0; j < inbuf.dmbuf_iovcnt; j++) {
313 			len += uio_in->uio_iov[j].iov_len;
314 			if (len > SSIZE_MAX) {
315 				free(uio_in->uio_iov, M_TEMP);
316 				error = EINVAL;
317 				goto bad;
318 			}
319 		}
320 
321 		if (uio_out != NULL && len != uio_out->uio_resid) {
322 			free(uio_in->uio_iov, M_TEMP);
323 			error = EINVAL;
324 			goto bad;
325 		}
326 
327 		uio_in->uio_iovcnt = inbuf.dmbuf_iovcnt;
328 		uio_in->uio_resid = len;
329 		uio_in->uio_rw = UIO_WRITE;
330 		uio_in->uio_vmspace = dus->dus_vmspace;
331 
332 		dreq->dreq_inbuf[i].dmbuf_uio = uio_in;
333 	}
334 
335 	return (0);
336 
337  bad:
338 	if (i > 0) {
339 		for (--i; i >= 0; i--) {
340 			uio_in = &dus->dus_uio_in[i];
341 			free(uio_in->uio_iov, M_TEMP);
342 		}
343 	}
344 	free(dus->dus_uio_in, M_TEMP);
345 	if (uio_out != NULL)
346 		free(uio_out->uio_iov, M_TEMP);
347 	uvmspace_free(dus->dus_vmspace);
348 	return (error);
349 }
350 
351 /*
352  * dmio_usrreq_fini:
353  *
354  *	Tear down a request.  Must be called at splsoftclock().
355  */
356 static void
357 dmio_usrreq_fini(struct dmio_state *ds, struct dmio_usrreq_state *dus)
358 {
359 	struct dmover_session *dses = ds->ds_session;
360 	struct uio *uio_out = &dus->dus_uio_out;
361 	struct uio *uio_in;
362 	int i;
363 
364 	if (uio_out->uio_iov != NULL)
365 		free(uio_out->uio_iov, M_TEMP);
366 
367 	if (dses->dses_ninputs) {
368 		for (i = 0; i < dses->dses_ninputs; i++) {
369 			uio_in = &dus->dus_uio_in[i];
370 			free(uio_in->uio_iov, M_TEMP);
371 		}
372 		free(dus->dus_uio_in, M_TEMP);
373 	}
374 
375 	workqueue_enqueue(dmio_cleaner, &dus->dus_work, NULL);
376 }
377 
378 static void
379 dmio_usrreq_fini1(struct work *wk, void *dummy)
380 {
381 	struct dmio_usrreq_state *dus = (void *)wk;
382 
383 	KASSERT(wk == &dus->dus_work);
384 
385 	uvmspace_free(dus->dus_vmspace);
386 	pool_put(&dmio_usrreq_state_pool, dus);
387 }
388 
389 /*
390  * dmio_read:
391  *
392  *	Read file op.
393  */
394 static int
395 dmio_read(struct file *fp, off_t *offp, struct uio *uio,
396     kauth_cred_t cred, int flags)
397 {
398 	struct dmio_state *ds = (struct dmio_state *) fp->f_data;
399 	struct dmio_usrreq_state *dus;
400 	struct dmover_request *dreq;
401 	struct dmio_usrresp resp;
402 	int error = 0, progress = 0;
403 
404 	if ((uio->uio_resid % sizeof(resp)) != 0)
405 		return (EINVAL);
406 
407 	if (ds->ds_session == NULL)
408 		return (ENXIO);
409 
410 	getnanotime(&ds->ds_atime);
411 	mutex_enter(&ds->ds_lock);
412 
413 	while (uio->uio_resid != 0) {
414 
415 		for (;;) {
416 			dus = TAILQ_FIRST(&ds->ds_complete);
417 			if (dus == NULL) {
418 				if (fp->f_flag & FNONBLOCK) {
419 					error = progress ? 0 : EWOULDBLOCK;
420 					goto out;
421 				}
422 				ds->ds_flags |= DMIO_STATE_READ_WAIT;
423 				error = cv_wait_sig(&ds->ds_complete_cv, &ds->ds_lock);
424 				if (error)
425 					goto out;
426 				continue;
427 			}
428 			/* Have a completed request. */
429 			TAILQ_REMOVE(&ds->ds_complete, dus, dus_q);
430 			ds->ds_nreqs--;
431 			if (ds->ds_flags & DMIO_STATE_WRITE_WAIT) {
432 				ds->ds_flags &= ~DMIO_STATE_WRITE_WAIT;
433 				cv_broadcast(&ds->ds_nreqs_cv);
434 			}
435 			if (ds->ds_flags & DMIO_STATE_SEL) {
436 				ds->ds_flags &= ~DMIO_STATE_SEL;
437 				selnotify(&ds->ds_selq, POLLIN | POLLRDNORM, 0);
438 			}
439 			break;
440 		}
441 
442 		dreq = dus->dus_req;
443 		resp.resp_id = dus->dus_id;
444 		if (dreq->dreq_flags & DMOVER_REQ_ERROR)
445 			resp.resp_error = dreq->dreq_error;
446 		else {
447 			resp.resp_error = 0;
448 			memcpy(resp.resp_immediate, dreq->dreq_immediate,
449 			    sizeof(resp.resp_immediate));
450 		}
451 
452 		dmio_usrreq_fini(ds, dus);
453 
454 		mutex_exit(&ds->ds_lock);
455 
456 		progress = 1;
457 
458 		dmover_request_free(dreq);
459 
460 		error = uiomove(&resp, sizeof(resp), uio);
461 		if (error)
462 			return (error);
463 
464 		mutex_enter(&ds->ds_lock);
465 	}
466 
467  out:
468 	mutex_exit(&ds->ds_lock);
469 
470 	return (error);
471 }
472 
473 /*
474  * dmio_usrreq_done:
475  *
476  *	Dmover completion callback.
477  */
478 static void
479 dmio_usrreq_done(struct dmover_request *dreq)
480 {
481 	struct dmio_usrreq_state *dus = dreq->dreq_cookie;
482 	struct dmio_state *ds = dreq->dreq_session->dses_cookie;
483 
484 	/* We're already at splsoftclock(). */
485 
486 	mutex_enter(&ds->ds_lock);
487 	TAILQ_REMOVE(&ds->ds_pending, dus, dus_q);
488 	if (ds->ds_flags & DMIO_STATE_DEAD) {
489 		int nreqs = --ds->ds_nreqs;
490 		mutex_exit(&ds->ds_lock);
491 		dmio_usrreq_fini(ds, dus);
492 		dmover_request_free(dreq);
493 		if (nreqs == 0) {
494 			dmio_state_put(ds);
495 		}
496 		return;
497 	}
498 
499 	TAILQ_INSERT_TAIL(&ds->ds_complete, dus, dus_q);
500 	if (ds->ds_flags & DMIO_STATE_READ_WAIT) {
501 		ds->ds_flags &= ~DMIO_STATE_READ_WAIT;
502 		cv_broadcast(&ds->ds_complete_cv);
503 	}
504 	if (ds->ds_flags & DMIO_STATE_SEL) {
505 		ds->ds_flags &= ~DMIO_STATE_SEL;
506 		selnotify(&ds->ds_selq, POLLOUT | POLLWRNORM, 0);
507 	}
508 	mutex_exit(&ds->ds_lock);
509 }
510 
511 /*
512  * dmio_write:
513  *
514  *	Write file op.
515  */
516 static int
517 dmio_write(struct file *fp, off_t *offp, struct uio *uio,
518     kauth_cred_t cred, int flags)
519 {
520 	struct dmio_state *ds = (struct dmio_state *) fp->f_data;
521 	struct dmio_usrreq_state *dus;
522 	struct dmover_request *dreq;
523 	struct dmio_usrreq req;
524 	int error = 0, progress = 0;
525 
526 	if ((uio->uio_resid % sizeof(req)) != 0)
527 		return (EINVAL);
528 
529 	if (ds->ds_session == NULL)
530 		return (ENXIO);
531 
532 	getnanotime(&ds->ds_mtime);
533 	mutex_enter(&ds->ds_lock);
534 
535 	while (uio->uio_resid != 0) {
536 
537 		if (ds->ds_nreqs == DMIO_NREQS_MAX) {
538 			if (fp->f_flag & FNONBLOCK) {
539 				error = progress ? 0 : EWOULDBLOCK;
540 				break;
541 			}
542 			ds->ds_flags |= DMIO_STATE_WRITE_WAIT;
543 			error = cv_wait_sig(&ds->ds_complete_cv, &ds->ds_lock);
544 			if (error)
545 				break;
546 			continue;
547 		}
548 
549 		ds->ds_nreqs++;
550 
551 		mutex_exit(&ds->ds_lock);
552 
553 		progress = 1;
554 
555 		error = uiomove(&req, sizeof(req), uio);
556 		if (error) {
557 			mutex_enter(&ds->ds_lock);
558 			ds->ds_nreqs--;
559 			break;
560 		}
561 
562 		/* XXX How should this interact with FNONBLOCK? */
563 		dreq = dmover_request_alloc(ds->ds_session, NULL);
564 		if (dreq == NULL) {
565 			/* XXX */
566 			ds->ds_nreqs--;
567 			error = ENOMEM;
568 			return error;
569 		}
570 		dus = pool_get(&dmio_usrreq_state_pool, PR_WAITOK);
571 
572 		error = dmio_usrreq_init(fp, dus, &req, dreq);
573 		if (error) {
574 			dmover_request_free(dreq);
575 			pool_put(&dmio_usrreq_state_pool, dus);
576 			return error;
577 		}
578 
579 		dreq->dreq_callback = dmio_usrreq_done;
580 		dreq->dreq_cookie = dus;
581 
582 		dus->dus_req = dreq;
583 		dus->dus_id = req.req_id;
584 
585 		mutex_enter(&ds->ds_lock);
586 
587 		TAILQ_INSERT_TAIL(&ds->ds_pending, dus, dus_q);
588 
589 		mutex_exit(&ds->ds_lock);
590 
591 		dmover_process(dreq);
592 
593 		mutex_enter(&ds->ds_lock);
594 	}
595 
596 	mutex_exit(&ds->ds_lock);
597 
598 	return (error);
599 }
600 
601 static int
602 dmio_stat(struct file *fp, struct stat *st)
603 {
604 	struct dmio_state *ds = fp->f_data;
605 
606 	(void)memset(st, 0, sizeof(*st));
607 	KERNEL_LOCK(1, NULL);
608 	st->st_dev = makedev(cdevsw_lookup_major(&dmoverio_cdevsw), 0);
609 	st->st_atimespec = ds->ds_atime;
610 	st->st_mtimespec = ds->ds_mtime;
611 	st->st_ctimespec = st->st_birthtimespec = ds->ds_btime;
612 	st->st_uid = kauth_cred_geteuid(fp->f_cred);
613 	st->st_gid = kauth_cred_getegid(fp->f_cred);
614 	KERNEL_UNLOCK_ONE(NULL);
615 	return 0;
616 }
617 
618 /*
619  * dmio_ioctl:
620  *
621  *	Ioctl file op.
622  */
623 static int
624 dmio_ioctl(struct file *fp, u_long cmd, void *data)
625 {
626 	struct dmio_state *ds = (struct dmio_state *) fp->f_data;
627 	int error;
628 
629 	switch (cmd) {
630 	case FIONBIO:
631 	case FIOASYNC:
632 		return (0);
633 
634 	case DMIO_SETFUNC:
635 	    {
636 		struct dmio_setfunc *dsf = data;
637 		struct dmover_session *dses;
638 
639 		mutex_enter(&ds->ds_lock);
640 
641 		if (ds->ds_session != NULL ||
642 		    (ds->ds_flags & DMIO_STATE_LARVAL) != 0) {
643 			mutex_exit(&ds->ds_lock);
644 			return (EBUSY);
645 		}
646 
647 		ds->ds_flags |= DMIO_STATE_LARVAL;
648 
649 		mutex_exit(&ds->ds_lock);
650 
651 		dsf->dsf_name[DMIO_MAX_FUNCNAME - 1] = '\0';
652 		error = dmover_session_create(dsf->dsf_name, &dses);
653 
654 		mutex_enter(&ds->ds_lock);
655 
656 		if (error == 0) {
657 			dses->dses_cookie = ds;
658 			ds->ds_session = dses;
659 		}
660 		ds->ds_flags &= ~DMIO_STATE_LARVAL;
661 
662 		mutex_exit(&ds->ds_lock);
663 		break;
664 	    }
665 
666 	default:
667 		error = ENOTTY;
668 	}
669 
670 	return (error);
671 }
672 
673 /*
674  * dmio_poll:
675  *
676  *	Poll file op.
677  */
678 static int
679 dmio_poll(struct file *fp, int events)
680 {
681 	struct dmio_state *ds = (struct dmio_state *) fp->f_data;
682 	int revents = 0;
683 
684 	if ((events & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) == 0)
685 		return (revents);
686 
687 	mutex_enter(&ds->ds_lock);
688 
689 	if (ds->ds_flags & DMIO_STATE_DEAD) {
690 		/* EOF */
691 		revents |= events & (POLLIN | POLLRDNORM |
692 		    POLLOUT | POLLWRNORM);
693 		goto out;
694 	}
695 
696 	/* We can read if there are completed requests. */
697 	if (events & (POLLIN | POLLRDNORM))
698 		if (TAILQ_EMPTY(&ds->ds_complete) == 0)
699 			revents |= events & (POLLIN | POLLRDNORM);
700 
701 	/*
702 	 * We can write if there is there are fewer then DMIO_NREQS_MAX
703 	 * are already in the queue.
704 	 */
705 	if (events & (POLLOUT | POLLWRNORM))
706 		if (ds->ds_nreqs < DMIO_NREQS_MAX)
707 			revents |= events & (POLLOUT | POLLWRNORM);
708 
709 	if (revents == 0) {
710 		selrecord(curlwp, &ds->ds_selq);
711 		ds->ds_flags |= DMIO_STATE_SEL;
712 	}
713 
714  out:
715 	mutex_exit(&ds->ds_lock);
716 
717 	return (revents);
718 }
719 
720 /*
721  * dmio_close:
722  *
723  *	Close file op.
724  */
725 static int
726 dmio_close(struct file *fp)
727 {
728 	struct dmio_state *ds = (struct dmio_state *) fp->f_data;
729 	struct dmio_usrreq_state *dus;
730 	struct dmover_session *dses;
731 
732 	mutex_enter(&ds->ds_lock);
733 
734 	ds->ds_flags |= DMIO_STATE_DEAD;
735 
736 	/* Garbage-collect all the responses on the queue. */
737 	while ((dus = TAILQ_FIRST(&ds->ds_complete)) != NULL) {
738 		TAILQ_REMOVE(&ds->ds_complete, dus, dus_q);
739 		ds->ds_nreqs--;
740 		mutex_exit(&ds->ds_lock);
741 		dmover_request_free(dus->dus_req);
742 		dmio_usrreq_fini(ds, dus);
743 		mutex_enter(&ds->ds_lock);
744 	}
745 
746 	/*
747 	 * If there are any requests pending, we have to wait for
748 	 * them.  Don't free the dmio_state in this case.
749 	 */
750 	if (ds->ds_nreqs == 0) {
751 		dses = ds->ds_session;
752 		mutex_exit(&ds->ds_lock);
753 		dmio_state_put(ds);
754 	} else {
755 		dses = NULL;
756 		mutex_exit(&ds->ds_lock);
757 	}
758 
759 	fp->f_data = NULL;
760 
761 	if (dses != NULL)
762 		dmover_session_destroy(dses);
763 
764 	return (0);
765 }
766 
767 static const struct fileops dmio_fileops = {
768 	.fo_read = dmio_read,
769 	.fo_write = dmio_write,
770 	.fo_ioctl = dmio_ioctl,
771 	.fo_fcntl = fnullop_fcntl,
772 	.fo_poll = dmio_poll,
773 	.fo_stat = dmio_stat,
774 	.fo_close = dmio_close,
775 	.fo_kqfilter = fnullop_kqfilter,
776 	.fo_restart = fnullop_restart,
777 };
778 
779 /*
780  * dmoverioopen:
781  *
782  *	Device switch open routine.
783  */
784 int
785 dmoverioopen(dev_t dev, int flag, int mode, struct lwp *l)
786 {
787 	struct dmio_state *ds;
788 	struct file *fp;
789 	int error, fd;
790 
791 	if ((error = fd_allocfile(&fp, &fd)) != 0)
792 		return (error);
793 
794 	ds = dmio_state_get();
795 
796 	return fd_clone(fp, fd, flag, &dmio_fileops, ds);
797 }
798