xref: /netbsd-src/sys/fs/puffs/puffs_msgif.c (revision 7fa608457b817eca6e0977b37f758ae064f3c99c)
1 /*	$NetBSD: puffs_msgif.c,v 1.55 2007/11/12 16:39:34 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Google Summer of Code program and the Ulla Tuominen Foundation.
8  * The Google SoC project was mentored by Bill Studenmund.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.55 2007/11/12 16:39:34 pooka Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/fstrans.h>
37 #include <sys/kmem.h>
38 #include <sys/kthread.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/vnode.h>
45 
46 #include <dev/putter/putter_sys.h>
47 
48 #include <fs/puffs/puffs_msgif.h>
49 #include <fs/puffs/puffs_sys.h>
50 
51 #include <miscfs/syncfs/syncfs.h> /* XXX: for syncer_mutex reference */
52 
53 /*
54  * waitq data structures
55  */
56 
57 /*
58  * While a request is going to userspace, park the caller within the
59  * kernel.  This is the kernel counterpart of "struct puffs_req".
60  */
61 struct puffs_msgpark {
62 	struct puffs_req	*park_preq;	/* req followed by buf	*/
63 
64 	size_t			park_copylen;	/* userspace copylength	*/
65 	size_t			park_maxlen;	/* max size in comeback */
66 
67 	parkdone_fn		park_done;	/* "biodone" a'la puffs	*/
68 	void			*park_donearg;
69 
70 	int			park_flags;
71 	int			park_refcount;
72 
73 	kcondvar_t		park_cv;
74 	kmutex_t		park_mtx;
75 
76 	TAILQ_ENTRY(puffs_msgpark) park_entries;
77 };
78 #define PARKFLAG_WAITERGONE	0x01
79 #define PARKFLAG_DONE		0x02
80 #define PARKFLAG_ONQUEUE1	0x04
81 #define PARKFLAG_ONQUEUE2	0x08
82 #define PARKFLAG_CALL		0x10
83 #define PARKFLAG_WANTREPLY	0x20
84 
85 static pool_cache_t parkpc;
86 
87 static int
88 makepark(void *arg, void *obj, int flags)
89 {
90 	struct puffs_msgpark *park = obj;
91 
92 	mutex_init(&park->park_mtx, MUTEX_DEFAULT, IPL_NONE);
93 	cv_init(&park->park_cv, "puffsrpl");
94 
95 	return 0;
96 }
97 
98 static void
99 nukepark(void *arg, void *obj)
100 {
101 	struct puffs_msgpark *park = obj;
102 
103 	cv_destroy(&park->park_cv);
104 	mutex_destroy(&park->park_mtx);
105 }
106 
107 void
108 puffs_msgif_init()
109 {
110 
111 	parkpc = pool_cache_init(sizeof(struct puffs_msgpark), 0, 0, 0,
112 	    "puffprkl", NULL, IPL_NONE, makepark, nukepark, NULL);
113 }
114 
115 void
116 puffs_msgif_destroy()
117 {
118 
119 	pool_cache_destroy(parkpc);
120 }
121 
122 static int alloced;
123 
124 static struct puffs_msgpark *
125 puffs_msgpark_alloc(int waitok)
126 {
127 	struct puffs_msgpark *park;
128 
129 	park = pool_cache_get(parkpc, waitok ? PR_WAITOK : PR_NOWAIT);
130 	if (park == NULL)
131 		return park;
132 
133 	park->park_refcount = 1;
134 	park->park_preq = NULL;
135 	park->park_flags = PARKFLAG_WANTREPLY;
136 
137 	return park;
138 }
139 
140 static void
141 puffs_msgpark_reference(struct puffs_msgpark *park)
142 {
143 
144 	KASSERT(mutex_owned(&park->park_mtx));
145 	park->park_refcount++;
146 }
147 
148 /*
149  * Release reference to park structure.
150  */
151 static void
152 puffs_msgpark_release1(struct puffs_msgpark *park, int howmany)
153 {
154 	struct puffs_req *preq = park->park_preq;
155 	int refcnt;
156 
157 	KASSERT(mutex_owned(&park->park_mtx));
158 	refcnt = park->park_refcount -= howmany;
159 	mutex_exit(&park->park_mtx);
160 
161 	KASSERT(refcnt >= 0);
162 
163 	if (refcnt == 0) {
164 		alloced--;
165 		if (preq)
166 			kmem_free(preq, park->park_maxlen);
167 		pool_cache_put(parkpc, park);
168 	}
169 }
170 #define puffs_msgpark_release(a) puffs_msgpark_release1(a, 1)
171 
172 #ifdef PUFFSDEBUG
173 static void
174 parkdump(struct puffs_msgpark *park)
175 {
176 
177 	DPRINTF(("park %p, preq %p, id %" PRIu64 "\n"
178 	    "\tcopy %zu, max %zu - done: %p/%p\n"
179 	    "\tflags 0x%08x, refcount %d, cv/mtx: %p/%p\n",
180 	    park, park->park_preq, park->park_preq->preq_id,
181 	    park->park_copylen, park->park_maxlen,
182 	    park->park_done, park->park_donearg,
183 	    park->park_flags, park->park_refcount,
184 	    &park->park_cv, &park->park_mtx));
185 }
186 
187 static void
188 parkqdump(struct puffs_wq *q, int dumpall)
189 {
190 	struct puffs_msgpark *park;
191 	int total = 0;
192 
193 	TAILQ_FOREACH(park, q, park_entries) {
194 		if (dumpall)
195 			parkdump(park);
196 		total++;
197 	}
198 	DPRINTF(("puffs waitqueue at %p dumped, %d total\n", q, total));
199 
200 }
201 #endif /* PUFFSDEBUG */
202 
203 /*
204  * A word about locking in the park structures: the lock protects the
205  * fields of the *park* structure (not preq) and acts as an interlock
206  * in cv operations.  The lock is always internal to this module and
207  * callers do not need to worry about it.
208  */
209 
210 int
211 puffs_msgmem_alloc(size_t len, struct puffs_msgpark **ppark, void **mem,
212 	int cansleep)
213 {
214 	struct puffs_msgpark *park;
215 	void *m;
216 
217 	m = kmem_zalloc(len, cansleep ? KM_SLEEP : KM_NOSLEEP);
218 	if (m == NULL) {
219 		KASSERT(cansleep == 0);
220 		return ENOMEM;
221 	}
222 
223 	park = puffs_msgpark_alloc(cansleep);
224 	if (park == NULL) {
225 		KASSERT(cansleep == 0);
226 		kmem_free(m, len);
227 		return ENOMEM;
228 	}
229 
230 	park->park_preq = m;
231 	park->park_maxlen = len;
232 
233 	*ppark = park;
234 	*mem = m;
235 
236 	return 0;
237 }
238 
239 void
240 puffs_msgmem_release(struct puffs_msgpark *park)
241 {
242 
243 	if (park == NULL)
244 		return;
245 
246 	mutex_enter(&park->park_mtx);
247 	puffs_msgpark_release(park);
248 }
249 
250 void
251 puffs_msg_setfaf(struct puffs_msgpark *park)
252 {
253 
254 	park->park_flags &= ~PARKFLAG_WANTREPLY;
255 }
256 
257 /*
258  * kernel-user-kernel waitqueues
259  */
260 
261 static int touser(struct puffs_mount *, struct puffs_msgpark *);
262 
263 static uint64_t
264 puffs_getmsgid(struct puffs_mount *pmp)
265 {
266 	uint64_t rv;
267 
268 	mutex_enter(&pmp->pmp_lock);
269 	rv = pmp->pmp_nextmsgid++;
270 	mutex_exit(&pmp->pmp_lock);
271 
272 	return rv;
273 }
274 
275 /* vfs request */
276 int
277 puffs_msg_vfs(struct puffs_mount *pmp, struct puffs_msgpark *park, int optype)
278 {
279 
280 	park->park_preq->preq_opclass = PUFFSOP_VFS;
281 	park->park_preq->preq_optype = optype;
282 
283 	park->park_copylen = park->park_maxlen;
284 
285 	return touser(pmp, park);
286 }
287 
288 /*
289  * vnode level request
290  */
291 int
292 puffs_msg_vn(struct puffs_mount *pmp, struct puffs_msgpark *park,
293 	int optype, size_t delta, struct vnode *vp_opc, struct vnode *vp_aux)
294 {
295 	struct puffs_req *preq;
296 	void *cookie = VPTOPNC(vp_opc);
297 	struct puffs_node *pnode;
298 	int rv;
299 
300 	park->park_preq->preq_opclass = PUFFSOP_VN;
301 	park->park_preq->preq_optype = optype;
302 	park->park_preq->preq_cookie = cookie;
303 
304 	KASSERT(delta < park->park_maxlen); /* "<=" wouldn't make sense */
305 	park->park_copylen = park->park_maxlen - delta;
306 
307 	rv = touser(pmp, park);
308 
309 	/*
310 	 * Check if the user server requests that inactive be called
311 	 * when the time is right.
312 	 */
313 	preq = park->park_preq;
314 	if (preq->preq_setbacks & PUFFS_SETBACK_INACT_N1) {
315 		pnode = vp_opc->v_data;
316 		pnode->pn_stat |= PNODE_DOINACT;
317 	}
318 	if (preq->preq_setbacks & PUFFS_SETBACK_INACT_N2) {
319 		/* if no vp_aux, just ignore */
320 		if (vp_aux) {
321 			pnode = vp_aux->v_data;
322 			pnode->pn_stat |= PNODE_DOINACT;
323 		}
324 	}
325 	if (preq->preq_setbacks & PUFFS_SETBACK_NOREF_N1) {
326 		pnode = vp_opc->v_data;
327 		pnode->pn_stat |= PNODE_NOREFS;
328 	}
329 	if (preq->preq_setbacks & PUFFS_SETBACK_NOREF_N2) {
330 		/* if no vp_aux, just ignore */
331 		if (vp_aux) {
332 			pnode = vp_aux->v_data;
333 			pnode->pn_stat |= PNODE_NOREFS;
334 		}
335 	}
336 
337 	return rv;
338 }
339 
340 void
341 puffs_msg_vncall(struct puffs_mount *pmp, struct puffs_msgpark *park,
342 	int optype, size_t delta, parkdone_fn donefn, void *donearg,
343 	struct vnode *vp_opc)
344 {
345 	void *cookie = VPTOPNC(vp_opc);
346 
347 	park->park_preq->preq_opclass = PUFFSOP_VN;
348 	park->park_preq->preq_optype = optype;
349 	park->park_preq->preq_cookie = cookie;
350 
351 	KASSERT(delta < park->park_maxlen);
352 	park->park_copylen = park->park_maxlen - delta;
353 	park->park_done = donefn;
354 	park->park_donearg = donearg;
355 	park->park_flags |= PARKFLAG_CALL;
356 
357 	(void) touser(pmp, park);
358 }
359 
360 int
361 puffs_msg_raw(struct puffs_mount *pmp, struct puffs_msgpark *park)
362 {
363 
364 	park->park_copylen = park->park_maxlen;
365 
366 	return touser(pmp, park);
367 }
368 
369 void
370 puffs_msg_errnotify(struct puffs_mount *pmp, uint8_t type, int error,
371 	const char *str, void *cookie)
372 {
373 	struct puffs_msgpark *park;
374 	struct puffs_error *perr;
375 
376 	puffs_msgmem_alloc(sizeof(struct puffs_error), &park, (void **)&perr,1);
377 
378 	perr->perr_error = error;
379 	strlcpy(perr->perr_str, str, sizeof(perr->perr_str));
380 
381 	park->park_preq->preq_opclass |= PUFFSOP_ERROR | PUFFSOPFLAG_FAF;
382 	park->park_preq->preq_optype = type;
383 	park->park_preq->preq_cookie = cookie;
384 
385 	park->park_copylen = park->park_maxlen;
386 
387 	(void)touser(pmp, park);
388 }
389 
390 /*
391  * Wait for the userspace ping-pong game in calling process context,
392  * unless a FAF / async call, in which case just enqueues the request
393  * and return immediately.
394  */
395 static int
396 touser(struct puffs_mount *pmp, struct puffs_msgpark *park)
397 {
398 	struct lwp *l = curlwp;
399 	struct mount *mp;
400 	struct puffs_req *preq;
401 	int rv = 0;
402 
403 	mp = PMPTOMP(pmp);
404 	preq = park->park_preq;
405 	preq->preq_buflen = park->park_maxlen;
406 	KASSERT(preq->preq_id == 0);
407 
408 	if ((park->park_flags & PARKFLAG_WANTREPLY) == 0)
409 		preq->preq_opclass |= PUFFSOPFLAG_FAF;
410 	else
411 		preq->preq_id = puffs_getmsgid(pmp);
412 
413 	/* fill in caller information */
414 	preq->preq_pid = l->l_proc->p_pid;
415 	preq->preq_lid = l->l_lid;
416 
417 	/*
418 	 * To support cv_sig, yet another movie: check if there are signals
419 	 * pending and we are issueing a non-FAF.  If so, return an error
420 	 * directly UNLESS we are issueing INACTIVE.  In that case, convert
421 	 * it to a FAF, fire off to the file server and return an error.
422 	 * Yes, this is bordering disgusting.  Barfbags are on me.
423 	 */
424 	if ((park->park_flags & PARKFLAG_WANTREPLY)
425 	   && (park->park_flags & PARKFLAG_CALL) == 0
426 	   && (l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0)) {
427 		if (PUFFSOP_OPCLASS(preq->preq_opclass) == PUFFSOP_VN
428 		    && preq->preq_optype == PUFFS_VN_INACTIVE) {
429 			park->park_preq->preq_opclass |= PUFFSOPFLAG_FAF;
430 			park->park_flags &= ~PARKFLAG_WANTREPLY;
431 			DPRINTF(("puffs touser: converted to FAF %p\n", park));
432 			rv = EINTR;
433 		} else {
434 			return EINTR;
435 		}
436 	}
437 
438 	/*
439 	 * test for suspension lock.
440 	 *
441 	 * Note that we *DO NOT* keep the lock, since that might block
442 	 * lock acquiring PLUS it would give userlandia control over
443 	 * the lock.  The operation queue enforces a strict ordering:
444 	 * when the fs server gets in the op stream, it knows things
445 	 * are in order.  The kernel locks can't guarantee that for
446 	 * userspace, in any case.
447 	 *
448 	 * BUT: this presents a problem for ops which have a consistency
449 	 * clause based on more than one operation.  Unfortunately such
450 	 * operations (read, write) do not reliably work yet.
451 	 *
452 	 * Ya, Ya, it's wrong wong wrong, me be fixink this someday.
453 	 *
454 	 * XXX: and there is one more problem.  We sometimes need to
455 	 * take a lazy lock in case the fs is suspending and we are
456 	 * executing as the fs server context.  This might happen
457 	 * e.g. in the case that the user server triggers a reclaim
458 	 * in the kernel while the fs is suspending.  It's not a very
459 	 * likely event, but it needs to be fixed some day.
460 	 */
461 
462 	/*
463 	 * MOREXXX: once PUFFS_WCACHEINFO is enabled, we can't take
464 	 * the mutex here, since getpages() might be called locked.
465 	 */
466 	fstrans_start(mp, FSTRANS_NORMAL);
467 	mutex_enter(&pmp->pmp_lock);
468 	fstrans_done(mp);
469 
470 	if (pmp->pmp_status != PUFFSTAT_RUNNING) {
471 		mutex_exit(&pmp->pmp_lock);
472 		return ENXIO;
473 	}
474 
475 #ifdef PUFFSDEBUG
476 	parkqdump(&pmp->pmp_msg_touser, puffsdebug > 1);
477 	parkqdump(&pmp->pmp_msg_replywait, puffsdebug > 1);
478 #endif
479 
480 	mutex_enter(&park->park_mtx);
481 	TAILQ_INSERT_TAIL(&pmp->pmp_msg_touser, park, park_entries);
482 	park->park_flags |= PARKFLAG_ONQUEUE1;
483 	puffs_mp_reference(pmp);
484 	pmp->pmp_msg_touser_count++;
485 	mutex_exit(&pmp->pmp_lock);
486 
487 	DPRINTF(("touser: req %" PRIu64 ", preq: %p, park: %p, "
488 	    "c/t: 0x%x/0x%x, f: 0x%x\n", preq->preq_id, preq, park,
489 	    preq->preq_opclass, preq->preq_optype, park->park_flags));
490 
491 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
492 	putter_notify(pmp->pmp_pi);
493 
494 	if ((park->park_flags & PARKFLAG_WANTREPLY)
495 	    && (park->park_flags & PARKFLAG_CALL) == 0) {
496 		int error;
497 
498 		error = cv_wait_sig(&park->park_cv, &park->park_mtx);
499 		DPRINTF(("puffs_touser: waiter for %p woke up with %d\n",
500 		    park, error));
501 		if (error) {
502 			park->park_flags |= PARKFLAG_WAITERGONE;
503 			if (park->park_flags & PARKFLAG_DONE) {
504 				rv = preq->preq_rv;
505 			} else {
506 				/*
507 				 * ok, we marked it as going away, but
508 				 * still need to do queue ops.  take locks
509 				 * in correct order.
510 				 *
511 				 * We don't want to release our reference
512 				 * if it's on replywait queue to avoid error
513 				 * to file server.  putop() code will DTRT.
514 				 */
515 				mutex_exit(&park->park_mtx);
516 				mutex_enter(&pmp->pmp_lock);
517 				mutex_enter(&park->park_mtx);
518 
519 				/* remove from queue1 */
520 				if (park->park_flags & PARKFLAG_ONQUEUE1) {
521 					TAILQ_REMOVE(&pmp->pmp_msg_touser,
522 					    park, park_entries);
523 					pmp->pmp_msg_touser_count--;
524 					park->park_flags &= ~PARKFLAG_ONQUEUE1;
525 				}
526 
527 				/*
528 				 * If it's waiting for a response already,
529 				 * boost reference count.  Park will get
530 				 * nuked once the response arrives from
531 				 * the file server.
532 				 */
533 				if (park->park_flags & PARKFLAG_ONQUEUE2)
534 					puffs_msgpark_reference(park);
535 
536 				mutex_exit(&pmp->pmp_lock);
537 
538 				rv = error;
539 			}
540 		} else {
541 			rv = preq->preq_rv;
542 		}
543 
544 		/*
545 		 * retake the lock and release.  This makes sure (haha,
546 		 * I'm humorous) that we don't process the same vnode in
547 		 * multiple threads due to the locks hacks we have in
548 		 * puffs_lock().  In reality this is well protected by
549 		 * the biglock, but once that's gone, well, hopefully
550 		 * this will be fixed for real.  (and when you read this
551 		 * comment in 2017 and subsequently barf, my condolences ;).
552 		 */
553 		if (rv == 0 && !fstrans_is_owner(mp)) {
554 			fstrans_start(mp, FSTRANS_NORMAL);
555 			fstrans_done(mp);
556 		}
557 
558 	} else {
559 		/*
560 		 * Take extra reference for FAF, i.e. don't free us
561 		 * immediately upon return to the caller, but rather
562 		 * only when the message has been transported.
563 		 */
564 		puffs_msgpark_reference(park);
565 	}
566 
567 	mutex_exit(&park->park_mtx);
568 
569 	mutex_enter(&pmp->pmp_lock);
570 	puffs_mp_release(pmp);
571 	mutex_exit(&pmp->pmp_lock);
572 
573 	return rv;
574 }
575 
576 /*
577  * Get next request in the outgoing queue.  "maxsize" controls the
578  * size the caller can accommodate and "nonblock" signals if this
579  * should block while waiting for input.  Handles all locking internally.
580  */
581 int
582 puffs_msgif_getout(void *this, size_t maxsize, int nonblock,
583 	uint8_t **data, size_t *dlen, void **parkptr)
584 {
585 	struct puffs_mount *pmp = this;
586 	struct puffs_msgpark *park;
587 	struct puffs_req *preq;
588 	int error;
589 
590 	error = 0;
591 	mutex_enter(&pmp->pmp_lock);
592 	puffs_mp_reference(pmp);
593 	for (;;) {
594 		/* RIP? */
595 		if (pmp->pmp_status != PUFFSTAT_RUNNING) {
596 			error = ENXIO;
597 			break;
598 		}
599 
600 		/* need platinum yendorian express card? */
601 		if (TAILQ_EMPTY(&pmp->pmp_msg_touser)) {
602 			DPRINTF(("puffs_getout: no outgoing op, "));
603 			if (nonblock) {
604 				DPRINTF(("returning EWOULDBLOCK\n"));
605 				error = EWOULDBLOCK;
606 				break;
607 			}
608 			DPRINTF(("waiting ...\n"));
609 
610 			error = cv_wait_sig(&pmp->pmp_msg_waiter_cv,
611 			    &pmp->pmp_lock);
612 			if (error)
613 				break;
614 			else
615 				continue;
616 		}
617 
618 		park = TAILQ_FIRST(&pmp->pmp_msg_touser);
619 		if (park == NULL)
620 			continue;
621 
622 		mutex_enter(&park->park_mtx);
623 		puffs_msgpark_reference(park);
624 
625 		DPRINTF(("puffs_getout: found park at %p, ", park));
626 
627 		/* If it's a goner, don't process any furher */
628 		if (park->park_flags & PARKFLAG_WAITERGONE) {
629 			DPRINTF(("waitergone!\n"));
630 			puffs_msgpark_release(park);
631 			continue;
632 		}
633 		preq = park->park_preq;
634 
635 #if 0
636 		/* check size */
637 		/*
638 		 * XXX: this check is not valid for now, we don't know
639 		 * the size of the caller's input buffer.  i.e. this
640 		 * will most likely go away
641 		 */
642 		if (maxsize < preq->preq_frhdr.pfr_len) {
643 			DPRINTF(("buffer too small\n"));
644 			puffs_msgpark_release(park);
645 			error = E2BIG;
646 			break;
647 		}
648 #endif
649 
650 		DPRINTF(("returning\n"));
651 
652 		/*
653 		 * Ok, we found what we came for.  Release it from the
654 		 * outgoing queue but do not unlock.  We will unlock
655 		 * only after we "releaseout" it to avoid complications:
656 		 * otherwise it is (theoretically) possible for userland
657 		 * to race us into "put" before we have a change to put
658 		 * this baby on the receiving queue.
659 		 */
660 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
661 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
662 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
663 		mutex_exit(&park->park_mtx);
664 
665 		pmp->pmp_msg_touser_count--;
666 		KASSERT(pmp->pmp_msg_touser_count >= 0);
667 
668 		break;
669 	}
670 	puffs_mp_release(pmp);
671 	mutex_exit(&pmp->pmp_lock);
672 
673 	if (error == 0) {
674 		*data = (uint8_t *)preq;
675 		preq->preq_pth.pth_framelen = park->park_copylen;
676 		*dlen = preq->preq_pth.pth_framelen;
677 		*parkptr = park;
678 	}
679 
680 	return error;
681 }
682 
683 /*
684  * Release outgoing structure.  Now, depending on the success of the
685  * outgoing send, it is either going onto the result waiting queue
686  * or the death chamber.
687  */
688 void
689 puffs_msgif_releaseout(void *this, void *parkptr, int status)
690 {
691 	struct puffs_mount *pmp = this;
692 	struct puffs_msgpark *park = parkptr;
693 
694 	DPRINTF(("puffs_releaseout: returning park %p, errno %d: " ,
695 	    park, status));
696 	mutex_enter(&pmp->pmp_lock);
697 	mutex_enter(&park->park_mtx);
698 	if (park->park_flags & PARKFLAG_WANTREPLY) {
699 		if (status == 0) {
700 			DPRINTF(("enqueue replywait\n"));
701 			TAILQ_INSERT_TAIL(&pmp->pmp_msg_replywait, park,
702 			    park_entries);
703 			park->park_flags |= PARKFLAG_ONQUEUE2;
704 		} else {
705 			DPRINTF(("error path!\n"));
706 			park->park_preq->preq_rv = status;
707 			park->park_flags |= PARKFLAG_DONE;
708 			cv_signal(&park->park_cv);
709 		}
710 		puffs_msgpark_release(park);
711 	} else {
712 		DPRINTF(("release\n"));
713 		puffs_msgpark_release1(park, 2);
714 	}
715 	mutex_exit(&pmp->pmp_lock);
716 }
717 
718 size_t
719 puffs_msgif_waitcount(void *this)
720 {
721 	struct puffs_mount *pmp = this;
722 	size_t rv;
723 
724 	mutex_enter(&pmp->pmp_lock);
725 	rv = pmp->pmp_msg_touser_count;
726 	mutex_exit(&pmp->pmp_lock);
727 
728 	return rv;
729 }
730 
731 /*
732  * XXX: locking with this one?
733  */
734 static void
735 puffs_msgif_incoming(void *this, struct puffs_req *preq)
736 {
737 	struct puffs_mount *pmp = this;
738 	struct putter_hdr *pth = &preq->preq_pth;
739 	struct puffs_msgpark *park;
740 	int release, wgone;
741 
742 	/* XXX */
743 	if (PUFFSOP_OPCLASS(preq->preq_opclass) != PUFFSOP_VN
744 	    && PUFFSOP_OPCLASS(preq->preq_opclass) != PUFFSOP_VFS)
745 		return;
746 
747 	mutex_enter(&pmp->pmp_lock);
748 
749 	/* Locate waiter */
750 	TAILQ_FOREACH(park, &pmp->pmp_msg_replywait, park_entries) {
751 		if (park->park_preq->preq_id == preq->preq_id)
752 			break;
753 	}
754 	if (park == NULL) {
755 		DPRINTF(("puffs_msgif_income: no request: %" PRIu64 "\n",
756 		    preq->preq_id));
757 		mutex_exit(&pmp->pmp_lock);
758 		return; /* XXX send error */
759 	}
760 
761 	mutex_enter(&park->park_mtx);
762 	puffs_msgpark_reference(park);
763 	if (pth->pth_framelen > park->park_maxlen) {
764 		DPRINTF(("puffs_msgif_income: invalid buffer length: "
765 		    "%" PRIu64 " (req %" PRIu64 ", \n", pth->pth_framelen,
766 		    preq->preq_id));
767 		park->park_preq->preq_rv = EPROTO;
768 		cv_signal(&park->park_cv);
769 		puffs_msgpark_release(park);
770 		mutex_exit(&pmp->pmp_lock);
771 		return; /* XXX: error */
772 	}
773 	wgone = park->park_flags & PARKFLAG_WAITERGONE;
774 
775 	KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
776 	TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
777 	park->park_flags &= ~PARKFLAG_ONQUEUE2;
778 	mutex_exit(&pmp->pmp_lock);
779 
780 	if (wgone) {
781 		DPRINTF(("puffs_putop: bad service - waiter gone for "
782 		    "park %p\n", park));
783 		release = 2;
784 	} else {
785 		if (park->park_flags & PARKFLAG_CALL) {
786 			DPRINTF(("puffs_msgif_income: call for %p, arg %p\n",
787 			    park->park_preq, park->park_donearg));
788 			park->park_done(pmp, preq, park->park_donearg);
789 			release = 2;
790 		} else {
791 			/* XXX: yes, I know */
792 			memcpy(park->park_preq, preq, pth->pth_framelen);
793 			release = 1;
794 		}
795 	}
796 
797 	if (!wgone) {
798 		DPRINTF(("puffs_putop: flagging done for "
799 		    "park %p\n", park));
800 		cv_signal(&park->park_cv);
801 	}
802 
803 	park->park_flags |= PARKFLAG_DONE;
804 	puffs_msgpark_release1(park, release);
805 }
806 
807 /*
808  * helpers
809  */
810 static void
811 dosuspendresume(void *arg)
812 {
813 	struct puffs_mount *pmp = arg;
814 	struct mount *mp;
815 	int rv;
816 
817 	mp = PMPTOMP(pmp);
818 	/*
819 	 * XXX?  does this really do any good or is it just
820 	 * paranoid stupidity?  or stupid paranoia?
821 	 */
822 	if (mp->mnt_iflag & IMNT_UNMOUNT) {
823 		printf("puffs dosuspendresume(): detected suspend on "
824 		    "unmounting fs\n");
825 		goto out;
826 	}
827 
828 	/* Do the dance.  Allow only one concurrent suspend */
829 	rv = vfs_suspend(PMPTOMP(pmp), 1);
830 	if (rv == 0)
831 		vfs_resume(PMPTOMP(pmp));
832 
833  out:
834 	mutex_enter(&pmp->pmp_lock);
835 	KASSERT(pmp->pmp_suspend == 1);
836 	pmp->pmp_suspend = 0;
837 	puffs_mp_release(pmp);
838 	mutex_exit(&pmp->pmp_lock);
839 
840 	kthread_exit(0);
841 }
842 
843 static void
844 puffsop_suspend(struct puffs_mount *pmp)
845 {
846 	int rv = 0;
847 
848 	mutex_enter(&pmp->pmp_lock);
849 	if (pmp->pmp_suspend || pmp->pmp_status != PUFFSTAT_RUNNING) {
850 		rv = EBUSY;
851 	} else {
852 		puffs_mp_reference(pmp);
853 		pmp->pmp_suspend = 1;
854 	}
855 	mutex_exit(&pmp->pmp_lock);
856 	if (rv)
857 		return;
858 	rv = kthread_create(PRI_NONE, 0, NULL, dosuspendresume,
859 	    pmp, NULL, "puffsusp");
860 
861 	/* XXX: "return" rv */
862 }
863 
864 static int
865 puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
866 {
867 	struct vnode *vp;
868 	voff_t offlo, offhi;
869 	int rv, flags = 0;
870 
871 	/* XXX: slurry */
872 	if (pf->pf_op == PUFFS_INVAL_NAMECACHE_ALL) {
873 		cache_purgevfs(PMPTOMP(pmp));
874 		return 0;
875 	}
876 
877 	/*
878 	 * Get vnode, don't lock it.  Namecache is protected by its own lock
879 	 * and we have a reference to protect against premature harvesting.
880 	 *
881 	 * The node we want here might be locked and the op is in
882 	 * userspace waiting for us to complete ==> deadlock.  Another
883 	 * reason we need to eventually bump locking to userspace, as we
884 	 * will need to lock the node if we wish to do flushes.
885 	 */
886 	rv = puffs_cookie2vnode(pmp, pf->pf_cookie, 0, 0, &vp);
887 	if (rv) {
888 		if (rv == PUFFS_NOSUCHCOOKIE)
889 			return ENOENT;
890 		return rv;
891 	}
892 
893 	switch (pf->pf_op) {
894 #if 0
895 	/* not quite ready, yet */
896 	case PUFFS_INVAL_NAMECACHE_NODE:
897 	struct componentname *pf_cn;
898 	char *name;
899 		/* get comfortab^Wcomponentname */
900 		MALLOC(pf_cn, struct componentname *,
901 		    sizeof(struct componentname), M_PUFFS, M_WAITOK | M_ZERO);
902 		memset(pf_cn, 0, sizeof(struct componentname));
903 		break;
904 
905 #endif
906 	case PUFFS_INVAL_NAMECACHE_DIR:
907 		if (vp->v_type != VDIR) {
908 			rv = EINVAL;
909 			break;
910 		}
911 		cache_purge1(vp, NULL, PURGE_CHILDREN);
912 		break;
913 
914 	case PUFFS_INVAL_PAGECACHE_NODE_RANGE:
915 		flags = PGO_FREE;
916 		/*FALLTHROUGH*/
917 	case PUFFS_FLUSH_PAGECACHE_NODE_RANGE:
918 		if (flags == 0)
919 			flags = PGO_CLEANIT;
920 
921 		if (pf->pf_end > vp->v_size || vp->v_type != VREG) {
922 			rv = EINVAL;
923 			break;
924 		}
925 
926 		offlo = trunc_page(pf->pf_start);
927 		offhi = round_page(pf->pf_end);
928 		if (offhi != 0 && offlo >= offhi) {
929 			rv = EINVAL;
930 			break;
931 		}
932 
933 		simple_lock(&vp->v_uobj.vmobjlock);
934 		rv = VOP_PUTPAGES(vp, offlo, offhi, flags);
935 		break;
936 
937 	default:
938 		rv = EINVAL;
939 	}
940 
941 	vrele(vp);
942 
943 	return rv;
944 }
945 
946 int
947 puffs_msgif_dispatch(void *this, uint8_t *buf)
948 {
949 	struct puffs_mount *pmp = this;
950 	struct puffs_req *preq = (struct puffs_req *)buf;
951 
952 	switch (PUFFSOP_OPCLASS(preq->preq_opclass)) {
953 	case PUFFSOP_VN:
954 	case PUFFSOP_VFS:
955 		puffs_msgif_incoming(pmp, preq);
956 		break;
957 	case PUFFSOP_FLUSH:
958 		puffsop_flush(pmp, (void *)buf);
959 		break;
960 	case PUFFSOP_SUSPEND:
961 		puffsop_suspend(pmp);
962 		break;
963 	default:
964 		/* XXX: send error */
965 		break;
966 	}
967 
968 	return 0;
969 }
970 
971 int
972 puffs_msgif_close(void *this)
973 {
974 	struct puffs_mount *pmp = this;
975 	struct mount *mp = PMPTOMP(pmp);
976 	int gone, rv;
977 
978 	mutex_enter(&pmp->pmp_lock);
979 	puffs_mp_reference(pmp);
980 
981 	/*
982 	 * Free the waiting callers before proceeding any further.
983 	 * The syncer might be jogging around in this file system
984 	 * currently.  If we allow it to go to the userspace of no
985 	 * return while trying to get the syncer lock, well ...
986 	 * synclk: I feel happy, I feel fine.
987 	 * lockmgr: You're not fooling anyone, you know.
988 	 */
989 	puffs_userdead(pmp);
990 
991 	/*
992 	 * Make sure someone from puffs_unmount() isn't currently in
993 	 * userspace.  If we don't take this precautionary step,
994 	 * they might notice that the mountpoint has disappeared
995 	 * from under them once they return.  Especially note that we
996 	 * cannot simply test for an unmounter before calling
997 	 * dounmount(), since it might be possible that that particular
998 	 * invocation of unmount was called without MNT_FORCE.  Here we
999 	 * *must* make sure unmount succeeds.  Also, restart is necessary
1000 	 * since pmp isn't locked.  We might end up with PUTTER_DEAD after
1001 	 * restart and exit from there.
1002 	 */
1003 	if (pmp->pmp_unmounting) {
1004 		cv_wait(&pmp->pmp_unmounting_cv, &pmp->pmp_lock);
1005 		puffs_mp_release(pmp);
1006 		mutex_exit(&pmp->pmp_lock);
1007 		DPRINTF(("puffs_fop_close: unmount was in progress for pmp %p, "
1008 		    "restart\n", pmp));
1009 		return ERESTART;
1010 	}
1011 
1012 	/* Won't access pmp from here anymore */
1013 	puffs_mp_release(pmp);
1014 	mutex_exit(&pmp->pmp_lock);
1015 
1016 	/*
1017 	 * Detach from VFS.  First do necessary XXX-dance (from
1018 	 * sys_unmount() & other callers of dounmount()
1019 	 *
1020 	 * XXX Freeze syncer.  Must do this before locking the
1021 	 * mount point.  See dounmount() for details.
1022 	 *
1023 	 * XXX2: take a reference to the mountpoint before starting to
1024 	 * wait for syncer_mutex.  Otherwise the mointpoint can be
1025 	 * wiped out while we wait.
1026 	 */
1027 	simple_lock(&mp->mnt_slock);
1028 	mp->mnt_wcnt++;
1029 	simple_unlock(&mp->mnt_slock);
1030 
1031 	mutex_enter(&syncer_mutex);
1032 
1033 	simple_lock(&mp->mnt_slock);
1034 	mp->mnt_wcnt--;
1035 	if (mp->mnt_wcnt == 0)
1036 		wakeup(&mp->mnt_wcnt);
1037 	gone = mp->mnt_iflag & IMNT_GONE;
1038 	simple_unlock(&mp->mnt_slock);
1039 	if (gone) {
1040 		mutex_exit(&syncer_mutex);
1041 		return 0;
1042 	}
1043 
1044 	/*
1045 	 * microscopic race condition here (although not with the current
1046 	 * kernel), but can't really fix it without starting a crusade
1047 	 * against vfs_busy(), so let it be, let it be, let it be
1048 	 */
1049 
1050 	/*
1051 	 * The only way vfs_busy() will fail for us is if the filesystem
1052 	 * is already a goner.
1053 	 * XXX: skating on the thin ice of modern calling conventions ...
1054 	 */
1055 	if (vfs_busy(mp, 0, 0)) {
1056 		mutex_exit(&syncer_mutex);
1057 		return 0;
1058 	}
1059 
1060 	/*
1061 	 * Once we have the mount point, unmount() can't interfere..
1062 	 * or at least in theory it shouldn't.  dounmount() reentracy
1063 	 * might require some visiting at some point.
1064 	 */
1065 	rv = dounmount(mp, MNT_FORCE, curlwp);
1066 	KASSERT(rv == 0);
1067 
1068 	return 0;
1069 }
1070 
1071 /*
1072  * We're dead, kaput, RIP, slightly more than merely pining for the
1073  * fjords, belly-up, fallen, lifeless, finished, expired, gone to meet
1074  * our maker, ceased to be, etcetc.  YASD.  It's a dead FS!
1075  *
1076  * Caller must hold puffs mutex.
1077  */
1078 void
1079 puffs_userdead(struct puffs_mount *pmp)
1080 {
1081 	struct puffs_msgpark *park, *park_next;
1082 
1083 	/*
1084 	 * Mark filesystem status as dying so that operations don't
1085 	 * attempt to march to userspace any longer.
1086 	 */
1087 	pmp->pmp_status = PUFFSTAT_DYING;
1088 
1089 	/* signal waiters on REQUEST TO file server queue */
1090 	for (park = TAILQ_FIRST(&pmp->pmp_msg_touser); park; park = park_next) {
1091 		uint8_t opclass;
1092 
1093 		mutex_enter(&park->park_mtx);
1094 		puffs_msgpark_reference(park);
1095 		park_next = TAILQ_NEXT(park, park_entries);
1096 
1097 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
1098 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
1099 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
1100 		pmp->pmp_msg_touser_count--;
1101 
1102 		/*
1103 		 * Even though waiters on QUEUE1 are removed in touser()
1104 		 * in case of WAITERGONE, it is still possible for us to
1105 		 * get raced here due to having to retake locks in said
1106 		 * touser().  In the race case simply "ignore" the item
1107 		 * on the queue and move on to the next one.
1108 		 */
1109 		if (park->park_flags & PARKFLAG_WAITERGONE) {
1110 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
1111 			KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
1112 			puffs_msgpark_release(park);
1113 
1114 		} else {
1115 			opclass = park->park_preq->preq_opclass;
1116 			park->park_preq->preq_rv = ENXIO;
1117 
1118 			if (park->park_flags & PARKFLAG_CALL) {
1119 				park->park_done(pmp, park->park_preq,
1120 				    park->park_donearg);
1121 				puffs_msgpark_release1(park, 2);
1122 			} else if ((park->park_flags & PARKFLAG_WANTREPLY)==0) {
1123 				puffs_msgpark_release1(park, 2);
1124 			} else {
1125 				park->park_preq->preq_rv = ENXIO;
1126 				cv_signal(&park->park_cv);
1127 				puffs_msgpark_release(park);
1128 			}
1129 		}
1130 	}
1131 
1132 	/* signal waiters on RESPONSE FROM file server queue */
1133 	for (park=TAILQ_FIRST(&pmp->pmp_msg_replywait); park; park=park_next) {
1134 		mutex_enter(&park->park_mtx);
1135 		puffs_msgpark_reference(park);
1136 		park_next = TAILQ_NEXT(park, park_entries);
1137 
1138 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
1139 		KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
1140 
1141 		TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
1142 		park->park_flags &= ~PARKFLAG_ONQUEUE2;
1143 
1144 		if (park->park_flags & PARKFLAG_WAITERGONE) {
1145 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
1146 			puffs_msgpark_release(park);
1147 		} else {
1148 			park->park_preq->preq_rv = ENXIO;
1149 			if (park->park_flags & PARKFLAG_CALL) {
1150 				park->park_done(pmp, park->park_preq,
1151 				    park->park_donearg);
1152 				puffs_msgpark_release1(park, 2);
1153 			} else {
1154 				cv_signal(&park->park_cv);
1155 				puffs_msgpark_release(park);
1156 			}
1157 		}
1158 	}
1159 
1160 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
1161 }
1162