xref: /netbsd-src/sys/fs/puffs/puffs_msgif.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: puffs_msgif.c,v 1.61 2007/12/05 12:11:56 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Google Summer of Code program and the Ulla Tuominen Foundation.
8  * The Google SoC project was mentored by Bill Studenmund.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.61 2007/12/05 12:11:56 pooka Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/fstrans.h>
37 #include <sys/kmem.h>
38 #include <sys/kthread.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/vnode.h>
45 
46 #include <dev/putter/putter_sys.h>
47 
48 #include <fs/puffs/puffs_msgif.h>
49 #include <fs/puffs/puffs_sys.h>
50 
51 #include <miscfs/syncfs/syncfs.h> /* XXX: for syncer_mutex reference */
52 
53 /*
54  * waitq data structures
55  */
56 
57 /*
58  * While a request is going to userspace, park the caller within the
59  * kernel.  This is the kernel counterpart of "struct puffs_req".
60  */
61 struct puffs_msgpark {
62 	struct puffs_req	*park_preq;	/* req followed by buf	*/
63 
64 	size_t			park_copylen;	/* userspace copylength	*/
65 	size_t			park_maxlen;	/* max size in comeback */
66 
67 	parkdone_fn		park_done;	/* "biodone" a'la puffs	*/
68 	void			*park_donearg;
69 
70 	int			park_flags;
71 	int			park_refcount;
72 
73 	kcondvar_t		park_cv;
74 	kmutex_t		park_mtx;
75 
76 	TAILQ_ENTRY(puffs_msgpark) park_entries;
77 };
78 #define PARKFLAG_WAITERGONE	0x01
79 #define PARKFLAG_DONE		0x02
80 #define PARKFLAG_ONQUEUE1	0x04
81 #define PARKFLAG_ONQUEUE2	0x08
82 #define PARKFLAG_CALL		0x10
83 #define PARKFLAG_WANTREPLY	0x20
84 #define	PARKFLAG_HASERROR	0x40
85 
86 static pool_cache_t parkpc;
87 #ifdef PUFFSDEBUG
88 static int totalpark;
89 #endif
90 
91 static int
92 makepark(void *arg, void *obj, int flags)
93 {
94 	struct puffs_msgpark *park = obj;
95 
96 	mutex_init(&park->park_mtx, MUTEX_DEFAULT, IPL_NONE);
97 	cv_init(&park->park_cv, "puffsrpl");
98 
99 	return 0;
100 }
101 
102 static void
103 nukepark(void *arg, void *obj)
104 {
105 	struct puffs_msgpark *park = obj;
106 
107 	cv_destroy(&park->park_cv);
108 	mutex_destroy(&park->park_mtx);
109 }
110 
111 void
112 puffs_msgif_init()
113 {
114 
115 	parkpc = pool_cache_init(sizeof(struct puffs_msgpark), 0, 0, 0,
116 	    "puffprkl", NULL, IPL_NONE, makepark, nukepark, NULL);
117 }
118 
119 void
120 puffs_msgif_destroy()
121 {
122 
123 	pool_cache_destroy(parkpc);
124 }
125 
126 static int alloced;
127 
128 static struct puffs_msgpark *
129 puffs_msgpark_alloc(int waitok)
130 {
131 	struct puffs_msgpark *park;
132 
133 	park = pool_cache_get(parkpc, waitok ? PR_WAITOK : PR_NOWAIT);
134 	if (park == NULL)
135 		return park;
136 
137 	park->park_refcount = 1;
138 	park->park_preq = NULL;
139 	park->park_flags = PARKFLAG_WANTREPLY;
140 
141 #ifdef PUFFSDEBUG
142 	totalpark++;
143 #endif
144 
145 	return park;
146 }
147 
148 static void
149 puffs_msgpark_reference(struct puffs_msgpark *park)
150 {
151 
152 	KASSERT(mutex_owned(&park->park_mtx));
153 	park->park_refcount++;
154 }
155 
156 /*
157  * Release reference to park structure.
158  */
159 static void
160 puffs_msgpark_release1(struct puffs_msgpark *park, int howmany)
161 {
162 	struct puffs_req *preq = park->park_preq;
163 	int refcnt;
164 
165 	KASSERT(mutex_owned(&park->park_mtx));
166 	refcnt = park->park_refcount -= howmany;
167 	mutex_exit(&park->park_mtx);
168 
169 	KASSERT(refcnt >= 0);
170 
171 	if (refcnt == 0) {
172 		alloced--;
173 		if (preq)
174 			kmem_free(preq, park->park_maxlen);
175 		pool_cache_put(parkpc, park);
176 
177 #ifdef PUFFSDEBUG
178 		totalpark--;
179 #endif
180 	}
181 }
182 #define puffs_msgpark_release(a) puffs_msgpark_release1(a, 1)
183 
184 #ifdef PUFFSDEBUG
185 static void
186 parkdump(struct puffs_msgpark *park)
187 {
188 
189 	DPRINTF(("park %p, preq %p, id %" PRIu64 "\n"
190 	    "\tcopy %zu, max %zu - done: %p/%p\n"
191 	    "\tflags 0x%08x, refcount %d, cv/mtx: %p/%p\n",
192 	    park, park->park_preq, park->park_preq->preq_id,
193 	    park->park_copylen, park->park_maxlen,
194 	    park->park_done, park->park_donearg,
195 	    park->park_flags, park->park_refcount,
196 	    &park->park_cv, &park->park_mtx));
197 }
198 
199 static void
200 parkqdump(struct puffs_wq *q, int dumpall)
201 {
202 	struct puffs_msgpark *park;
203 	int total = 0;
204 
205 	TAILQ_FOREACH(park, q, park_entries) {
206 		if (dumpall)
207 			parkdump(park);
208 		total++;
209 	}
210 	DPRINTF(("puffs waitqueue at %p dumped, %d total\n", q, total));
211 
212 }
213 #endif /* PUFFSDEBUG */
214 
215 /*
216  * A word about locking in the park structures: the lock protects the
217  * fields of the *park* structure (not preq) and acts as an interlock
218  * in cv operations.  The lock is always internal to this module and
219  * callers do not need to worry about it.
220  */
221 
222 int
223 puffs_msgmem_alloc(size_t len, struct puffs_msgpark **ppark, void **mem,
224 	int cansleep)
225 {
226 	struct puffs_msgpark *park;
227 	void *m;
228 
229 	m = kmem_zalloc(len, cansleep ? KM_SLEEP : KM_NOSLEEP);
230 	if (m == NULL) {
231 		KASSERT(cansleep == 0);
232 		return ENOMEM;
233 	}
234 
235 	park = puffs_msgpark_alloc(cansleep);
236 	if (park == NULL) {
237 		KASSERT(cansleep == 0);
238 		kmem_free(m, len);
239 		return ENOMEM;
240 	}
241 
242 	park->park_preq = m;
243 	park->park_maxlen = park->park_copylen = len;
244 
245 	*ppark = park;
246 	*mem = m;
247 
248 	return 0;
249 }
250 
251 void
252 puffs_msgmem_release(struct puffs_msgpark *park)
253 {
254 
255 	if (park == NULL)
256 		return;
257 
258 	mutex_enter(&park->park_mtx);
259 	puffs_msgpark_release(park);
260 }
261 
262 void
263 puffs_msg_setfaf(struct puffs_msgpark *park)
264 {
265 
266 	KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
267 	park->park_flags &= ~PARKFLAG_WANTREPLY;
268 }
269 
270 void
271 puffs_msg_setdelta(struct puffs_msgpark *park, size_t delta)
272 {
273 
274 	KASSERT(delta < park->park_maxlen); /* "<=" wouldn't make sense */
275 	park->park_copylen = park->park_maxlen - delta;
276 }
277 
278 void
279 puffs_msg_setinfo(struct puffs_msgpark *park, int class, int type, void *cookie)
280 {
281 
282 	park->park_preq->preq_opclass = PUFFSOP_OPCLASS(class);
283 	park->park_preq->preq_optype = type;
284 	park->park_preq->preq_cookie = cookie;
285 }
286 
287 void
288 puffs_msg_setcall(struct puffs_msgpark *park, parkdone_fn donefn, void *donearg)
289 {
290 
291 	KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
292 	park->park_done = donefn;
293 	park->park_donearg = donearg;
294 	park->park_flags |= PARKFLAG_CALL;
295 }
296 
297 /*
298  * kernel-user-kernel waitqueues
299  */
300 
301 static uint64_t
302 puffs_getmsgid(struct puffs_mount *pmp)
303 {
304 	uint64_t rv;
305 
306 	mutex_enter(&pmp->pmp_lock);
307 	rv = pmp->pmp_nextmsgid++;
308 	mutex_exit(&pmp->pmp_lock);
309 
310 	return rv;
311 }
312 
313 /*
314  * A word about reference counting of parks.  A reference must be taken
315  * when accessing a park and additionally when it is on a queue.  So
316  * when taking it off a queue and releasing the access reference, the
317  * reference count is generally decremented by 2.
318  */
319 
320 void
321 puffs_msg_enqueue(struct puffs_mount *pmp, struct puffs_msgpark *park)
322 {
323 	struct lwp *l = curlwp;
324 	struct mount *mp;
325 	struct puffs_req *preq;
326 
327 	mp = PMPTOMP(pmp);
328 	preq = park->park_preq;
329 	preq->preq_buflen = park->park_maxlen;
330 	KASSERT(preq->preq_id == 0
331 	    || (preq->preq_opclass & PUFFSOPFLAG_ISRESPONSE));
332 
333 	if ((park->park_flags & PARKFLAG_WANTREPLY) == 0)
334 		preq->preq_opclass |= PUFFSOPFLAG_FAF;
335 	else
336 		preq->preq_id = puffs_getmsgid(pmp);
337 
338 	/* fill in caller information */
339 	preq->preq_pid = l->l_proc->p_pid;
340 	preq->preq_lid = l->l_lid;
341 
342 	/*
343 	 * To support cv_sig, yet another movie: check if there are signals
344 	 * pending and we are issueing a non-FAF.  If so, return an error
345 	 * directly UNLESS we are issueing INACTIVE/RECLAIM.  In that case,
346 	 * convert it to a FAF, fire off to the file server and return
347 	 * an error.  Yes, this is bordering disgusting.  Barfbags are on me.
348 	 */
349 	if (__predict_false((park->park_flags & PARKFLAG_WANTREPLY)
350 	   && (park->park_flags & PARKFLAG_CALL) == 0
351 	   && (l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0))) {
352 		park->park_flags |= PARKFLAG_HASERROR;
353 		preq->preq_rv = EINTR;
354 		if (PUFFSOP_OPCLASS(preq->preq_opclass) == PUFFSOP_VN
355 		    && (preq->preq_optype == PUFFS_VN_INACTIVE
356 		     || preq->preq_optype == PUFFS_VN_RECLAIM)) {
357 			park->park_preq->preq_opclass |= PUFFSOPFLAG_FAF;
358 			park->park_flags &= ~PARKFLAG_WANTREPLY;
359 			DPRINTF(("puffs_msg_enqueue: converted to FAF %p\n",
360 			    park));
361 		} else {
362 			return;
363 		}
364 	}
365 
366 	/*
367 	 * test for suspension lock.
368 	 *
369 	 * Note that we *DO NOT* keep the lock, since that might block
370 	 * lock acquiring PLUS it would give userlandia control over
371 	 * the lock.  The operation queue enforces a strict ordering:
372 	 * when the fs server gets in the op stream, it knows things
373 	 * are in order.  The kernel locks can't guarantee that for
374 	 * userspace, in any case.
375 	 *
376 	 * BUT: this presents a problem for ops which have a consistency
377 	 * clause based on more than one operation.  Unfortunately such
378 	 * operations (read, write) do not reliably work yet.
379 	 *
380 	 * Ya, Ya, it's wrong wong wrong, me be fixink this someday.
381 	 *
382 	 * XXX: and there is one more problem.  We sometimes need to
383 	 * take a lazy lock in case the fs is suspending and we are
384 	 * executing as the fs server context.  This might happen
385 	 * e.g. in the case that the user server triggers a reclaim
386 	 * in the kernel while the fs is suspending.  It's not a very
387 	 * likely event, but it needs to be fixed some day.
388 	 */
389 
390 	/*
391 	 * MOREXXX: once PUFFS_WCACHEINFO is enabled, we can't take
392 	 * the mutex here, since getpages() might be called locked.
393 	 */
394 	fstrans_start(mp, FSTRANS_NORMAL);
395 	mutex_enter(&pmp->pmp_lock);
396 	fstrans_done(mp);
397 
398 	if (pmp->pmp_status != PUFFSTAT_RUNNING) {
399 		mutex_exit(&pmp->pmp_lock);
400 		park->park_flags |= PARKFLAG_HASERROR;
401 		preq->preq_rv = ENXIO;
402 		return;
403 	}
404 
405 #ifdef PUFFSDEBUG
406 	parkqdump(&pmp->pmp_msg_touser, puffsdebug > 1);
407 	parkqdump(&pmp->pmp_msg_replywait, puffsdebug > 1);
408 #endif
409 
410 	/*
411 	 * Note: we don't need to lock park since we have the only
412 	 * reference to it at this point.
413 	 */
414 	TAILQ_INSERT_TAIL(&pmp->pmp_msg_touser, park, park_entries);
415 	park->park_flags |= PARKFLAG_ONQUEUE1;
416 	pmp->pmp_msg_touser_count++;
417 	park->park_refcount++;
418 	mutex_exit(&pmp->pmp_lock);
419 
420 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
421 	putter_notify(pmp->pmp_pi);
422 
423 	DPRINTF(("touser: req %" PRIu64 ", preq: %p, park: %p, "
424 	    "c/t: 0x%x/0x%x, f: 0x%x\n", preq->preq_id, preq, park,
425 	    preq->preq_opclass, preq->preq_optype, park->park_flags));
426 }
427 
428 int
429 puffs_msg_wait(struct puffs_mount *pmp, struct puffs_msgpark *park)
430 {
431 	struct puffs_req *preq = park->park_preq; /* XXX: hmmm */
432 	struct mount *mp = PMPTOMP(pmp);
433 	int error = 0;
434 	int rv;
435 
436 	mutex_enter(&pmp->pmp_lock);
437 	puffs_mp_reference(pmp);
438 	mutex_exit(&pmp->pmp_lock);
439 
440 	mutex_enter(&park->park_mtx);
441 	if ((park->park_flags & PARKFLAG_WANTREPLY) == 0
442 	    || (park->park_flags & PARKFLAG_CALL)) {
443 		mutex_exit(&park->park_mtx);
444 		rv = 0;
445 		goto skipwait;
446 	}
447 
448 	/* did the response beat us to the wait? */
449 	if (__predict_false((park->park_flags & PARKFLAG_DONE)
450 	    || (park->park_flags & PARKFLAG_HASERROR))) {
451 		rv = park->park_preq->preq_rv;
452 		mutex_exit(&park->park_mtx);
453 		goto skipwait;
454 	}
455 
456 	error = cv_wait_sig(&park->park_cv, &park->park_mtx);
457 	DPRINTF(("puffs_touser: waiter for %p woke up with %d\n",
458 	    park, error));
459 	if (error) {
460 		park->park_flags |= PARKFLAG_WAITERGONE;
461 		if (park->park_flags & PARKFLAG_DONE) {
462 			rv = preq->preq_rv;
463 			mutex_exit(&park->park_mtx);
464 		} else {
465 			/*
466 			 * ok, we marked it as going away, but
467 			 * still need to do queue ops.  take locks
468 			 * in correct order.
469 			 *
470 			 * We don't want to release our reference
471 			 * if it's on replywait queue to avoid error
472 			 * to file server.  putop() code will DTRT.
473 			 */
474 			mutex_exit(&park->park_mtx);
475 			mutex_enter(&pmp->pmp_lock);
476 			mutex_enter(&park->park_mtx);
477 
478 			/*
479 			 * Still on queue1?  We can safely remove it
480 			 * without any consequences since the file
481 			 * server hasn't seen it.  "else" we need to
482 			 * wait for the response and just ignore it
483 			 * to avoid signalling an incorrect error to
484 			 * the file server.
485 			 */
486 			if (park->park_flags & PARKFLAG_ONQUEUE1) {
487 				TAILQ_REMOVE(&pmp->pmp_msg_touser,
488 				    park, park_entries);
489 				puffs_msgpark_release(park);
490 				pmp->pmp_msg_touser_count--;
491 				park->park_flags &= ~PARKFLAG_ONQUEUE1;
492 			} else {
493 				mutex_exit(&park->park_mtx);
494 			}
495 			mutex_exit(&pmp->pmp_lock);
496 
497 			rv = EINTR;
498 		}
499 	} else {
500 		rv = preq->preq_rv;
501 		mutex_exit(&park->park_mtx);
502 	}
503 
504 	/*
505 	 * retake the lock and release.  This makes sure (haha,
506 	 * I'm humorous) that we don't process the same vnode in
507 	 * multiple threads due to the locks hacks we have in
508 	 * puffs_lock().  In reality this is well protected by
509 	 * the biglock, but once that's gone, well, hopefully
510 	 * this will be fixed for real.  (and when you read this
511 	 * comment in 2017 and subsequently barf, my condolences ;).
512 	 */
513 	if (rv == 0 && !fstrans_is_owner(mp)) {
514 		fstrans_start(mp, FSTRANS_NORMAL);
515 		fstrans_done(mp);
516 	}
517 
518  skipwait:
519 	mutex_enter(&pmp->pmp_lock);
520 	puffs_mp_release(pmp);
521 	mutex_exit(&pmp->pmp_lock);
522 
523 	return rv;
524 }
525 
526 /*
527  * XXX: this suuuucks.  Hopefully I'll get rid of this lossage once
528  * the whole setback-nonsense gets fixed.
529  */
530 int
531 puffs_msg_wait2(struct puffs_mount *pmp, struct puffs_msgpark *park,
532 	struct puffs_node *pn1, struct puffs_node *pn2)
533 {
534 	struct puffs_req *preq;
535 	int rv;
536 
537 	rv = puffs_msg_wait(pmp, park);
538 
539 	preq = park->park_preq;
540 	if (pn1 && preq->preq_setbacks & PUFFS_SETBACK_INACT_N1)
541 		pn1->pn_stat |= PNODE_DOINACT;
542 	if (pn2 && preq->preq_setbacks & PUFFS_SETBACK_INACT_N2)
543 		pn2->pn_stat |= PNODE_DOINACT;
544 
545 	if (pn1 && preq->preq_setbacks & PUFFS_SETBACK_NOREF_N1)
546 		pn1->pn_stat |= PNODE_NOREFS;
547 	if (pn2 && preq->preq_setbacks & PUFFS_SETBACK_NOREF_N2)
548 		pn2->pn_stat |= PNODE_NOREFS;
549 
550 	return rv;
551 
552 }
553 
554 /*
555  * XXX: lazy bum.  please, for the love of foie gras, fix me.
556  * This should *NOT* depend on setfaf.  Also "memcpy" could
557  * be done more nicely.
558  */
559 void
560 puffs_msg_sendresp(struct puffs_mount *pmp, struct puffs_req *origpreq, int rv)
561 {
562 	struct puffs_msgpark *park;
563 	struct puffs_req *preq;
564 
565 	puffs_msgmem_alloc(sizeof(struct puffs_req), &park, (void **)&preq, 1);
566 	puffs_msg_setfaf(park); /* XXXXXX: avoids reqid override */
567 
568 	memcpy(preq, origpreq, sizeof(struct puffs_req));
569 	preq->preq_rv = rv;
570 	preq->preq_opclass |= PUFFSOPFLAG_ISRESPONSE;
571 
572 	puffs_msg_enqueue(pmp, park);
573 	puffs_msgmem_release(park);
574 }
575 
576 /*
577  * Get next request in the outgoing queue.  "maxsize" controls the
578  * size the caller can accommodate and "nonblock" signals if this
579  * should block while waiting for input.  Handles all locking internally.
580  */
581 int
582 puffs_msgif_getout(void *this, size_t maxsize, int nonblock,
583 	uint8_t **data, size_t *dlen, void **parkptr)
584 {
585 	struct puffs_mount *pmp = this;
586 	struct puffs_msgpark *park;
587 	struct puffs_req *preq;
588 	int error;
589 
590 	error = 0;
591 	mutex_enter(&pmp->pmp_lock);
592 	puffs_mp_reference(pmp);
593 	for (;;) {
594 		/* RIP? */
595 		if (pmp->pmp_status != PUFFSTAT_RUNNING) {
596 			error = ENXIO;
597 			break;
598 		}
599 
600 		/* need platinum yendorian express card? */
601 		if (TAILQ_EMPTY(&pmp->pmp_msg_touser)) {
602 			DPRINTF(("puffs_getout: no outgoing op, "));
603 			if (nonblock) {
604 				DPRINTF(("returning EWOULDBLOCK\n"));
605 				error = EWOULDBLOCK;
606 				break;
607 			}
608 			DPRINTF(("waiting ...\n"));
609 
610 			error = cv_wait_sig(&pmp->pmp_msg_waiter_cv,
611 			    &pmp->pmp_lock);
612 			if (error)
613 				break;
614 			else
615 				continue;
616 		}
617 
618 		park = TAILQ_FIRST(&pmp->pmp_msg_touser);
619 		if (park == NULL)
620 			continue;
621 
622 		mutex_enter(&park->park_mtx);
623 		puffs_msgpark_reference(park);
624 
625 		DPRINTF(("puffs_getout: found park at %p, ", park));
626 
627 		/* If it's a goner, don't process any furher */
628 		if (park->park_flags & PARKFLAG_WAITERGONE) {
629 			DPRINTF(("waitergone!\n"));
630 			puffs_msgpark_release(park);
631 			continue;
632 		}
633 		preq = park->park_preq;
634 
635 #if 0
636 		/* check size */
637 		/*
638 		 * XXX: this check is not valid for now, we don't know
639 		 * the size of the caller's input buffer.  i.e. this
640 		 * will most likely go away
641 		 */
642 		if (maxsize < preq->preq_frhdr.pfr_len) {
643 			DPRINTF(("buffer too small\n"));
644 			puffs_msgpark_release(park);
645 			error = E2BIG;
646 			break;
647 		}
648 #endif
649 
650 		DPRINTF(("returning\n"));
651 
652 		/*
653 		 * Ok, we found what we came for.  Release it from the
654 		 * outgoing queue but do not unlock.  We will unlock
655 		 * only after we "releaseout" it to avoid complications:
656 		 * otherwise it is (theoretically) possible for userland
657 		 * to race us into "put" before we have a change to put
658 		 * this baby on the receiving queue.
659 		 */
660 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
661 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
662 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
663 		mutex_exit(&park->park_mtx);
664 
665 		pmp->pmp_msg_touser_count--;
666 		KASSERT(pmp->pmp_msg_touser_count >= 0);
667 
668 		break;
669 	}
670 	puffs_mp_release(pmp);
671 	mutex_exit(&pmp->pmp_lock);
672 
673 	if (error == 0) {
674 		*data = (uint8_t *)preq;
675 		preq->preq_pth.pth_framelen = park->park_copylen;
676 		*dlen = preq->preq_pth.pth_framelen;
677 		*parkptr = park;
678 	}
679 
680 	return error;
681 }
682 
683 /*
684  * Release outgoing structure.  Now, depending on the success of the
685  * outgoing send, it is either going onto the result waiting queue
686  * or the death chamber.
687  */
688 void
689 puffs_msgif_releaseout(void *this, void *parkptr, int status)
690 {
691 	struct puffs_mount *pmp = this;
692 	struct puffs_msgpark *park = parkptr;
693 
694 	DPRINTF(("puffs_releaseout: returning park %p, errno %d: " ,
695 	    park, status));
696 	mutex_enter(&pmp->pmp_lock);
697 	mutex_enter(&park->park_mtx);
698 	if (park->park_flags & PARKFLAG_WANTREPLY) {
699 		if (status == 0) {
700 			DPRINTF(("enqueue replywait\n"));
701 			TAILQ_INSERT_TAIL(&pmp->pmp_msg_replywait, park,
702 			    park_entries);
703 			park->park_flags |= PARKFLAG_ONQUEUE2;
704 		} else {
705 			DPRINTF(("error path!\n"));
706 			park->park_preq->preq_rv = status;
707 			park->park_flags |= PARKFLAG_DONE;
708 			cv_signal(&park->park_cv);
709 		}
710 		puffs_msgpark_release(park);
711 	} else {
712 		DPRINTF(("release\n"));
713 		puffs_msgpark_release1(park, 2);
714 	}
715 	mutex_exit(&pmp->pmp_lock);
716 }
717 
718 size_t
719 puffs_msgif_waitcount(void *this)
720 {
721 	struct puffs_mount *pmp = this;
722 	size_t rv;
723 
724 	mutex_enter(&pmp->pmp_lock);
725 	rv = pmp->pmp_msg_touser_count;
726 	mutex_exit(&pmp->pmp_lock);
727 
728 	return rv;
729 }
730 
731 /*
732  * XXX: locking with this one?
733  */
734 static void
735 puffsop_msg(void *this, struct puffs_req *preq)
736 {
737 	struct puffs_mount *pmp = this;
738 	struct putter_hdr *pth = &preq->preq_pth;
739 	struct puffs_msgpark *park;
740 	int wgone;
741 
742 	mutex_enter(&pmp->pmp_lock);
743 
744 	/* Locate waiter */
745 	TAILQ_FOREACH(park, &pmp->pmp_msg_replywait, park_entries) {
746 		if (park->park_preq->preq_id == preq->preq_id)
747 			break;
748 	}
749 	if (park == NULL) {
750 		DPRINTF(("puffsop_msg: no request: %" PRIu64 "\n",
751 		    preq->preq_id));
752 		mutex_exit(&pmp->pmp_lock);
753 		return; /* XXX send error */
754 	}
755 
756 	mutex_enter(&park->park_mtx);
757 	puffs_msgpark_reference(park);
758 	if (pth->pth_framelen > park->park_maxlen) {
759 		DPRINTF(("puffsop_msg: invalid buffer length: "
760 		    "%" PRIu64 " (req %" PRIu64 ", \n", pth->pth_framelen,
761 		    preq->preq_id));
762 		park->park_preq->preq_rv = EPROTO;
763 		cv_signal(&park->park_cv);
764 		puffs_msgpark_release1(park, 2);
765 		mutex_exit(&pmp->pmp_lock);
766 		return; /* XXX: error */
767 	}
768 	wgone = park->park_flags & PARKFLAG_WAITERGONE;
769 
770 	KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
771 	TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
772 	park->park_flags &= ~PARKFLAG_ONQUEUE2;
773 	mutex_exit(&pmp->pmp_lock);
774 
775 	if (wgone) {
776 		DPRINTF(("puffsop_msg: bad service - waiter gone for "
777 		    "park %p\n", park));
778 	} else {
779 		if (park->park_flags & PARKFLAG_CALL) {
780 			DPRINTF(("puffsop_msg: call for %p, arg %p\n",
781 			    park->park_preq, park->park_donearg));
782 			park->park_done(pmp, preq, park->park_donearg);
783 		} else {
784 			/* XXX: yes, I know */
785 			memcpy(park->park_preq, preq, pth->pth_framelen);
786 		}
787 	}
788 
789 	if (!wgone) {
790 		DPRINTF(("puffs_putop: flagging done for "
791 		    "park %p\n", park));
792 		cv_signal(&park->park_cv);
793 	}
794 
795 	park->park_flags |= PARKFLAG_DONE;
796 	puffs_msgpark_release1(park, 2);
797 }
798 
799 /*
800  * helpers
801  */
802 static void
803 dosuspendresume(void *arg)
804 {
805 	struct puffs_mount *pmp = arg;
806 	struct mount *mp;
807 	int rv;
808 
809 	mp = PMPTOMP(pmp);
810 	/*
811 	 * XXX?  does this really do any good or is it just
812 	 * paranoid stupidity?  or stupid paranoia?
813 	 */
814 	if (mp->mnt_iflag & IMNT_UNMOUNT) {
815 		printf("puffs dosuspendresume(): detected suspend on "
816 		    "unmounting fs\n");
817 		goto out;
818 	}
819 
820 	/* Do the dance.  Allow only one concurrent suspend */
821 	rv = vfs_suspend(PMPTOMP(pmp), 1);
822 	if (rv == 0)
823 		vfs_resume(PMPTOMP(pmp));
824 
825  out:
826 	mutex_enter(&pmp->pmp_lock);
827 	KASSERT(pmp->pmp_suspend == 1);
828 	pmp->pmp_suspend = 0;
829 	puffs_mp_release(pmp);
830 	mutex_exit(&pmp->pmp_lock);
831 
832 	kthread_exit(0);
833 }
834 
835 static void
836 puffsop_suspend(struct puffs_mount *pmp)
837 {
838 	int rv = 0;
839 
840 	mutex_enter(&pmp->pmp_lock);
841 	if (pmp->pmp_suspend || pmp->pmp_status != PUFFSTAT_RUNNING) {
842 		rv = EBUSY;
843 	} else {
844 		puffs_mp_reference(pmp);
845 		pmp->pmp_suspend = 1;
846 	}
847 	mutex_exit(&pmp->pmp_lock);
848 	if (rv)
849 		return;
850 	rv = kthread_create(PRI_NONE, 0, NULL, dosuspendresume,
851 	    pmp, NULL, "puffsusp");
852 
853 	/* XXX: "return" rv */
854 }
855 
856 static void
857 puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
858 {
859 	struct vnode *vp;
860 	voff_t offlo, offhi;
861 	int rv, flags = 0;
862 
863 	if (pf->pf_req.preq_pth.pth_framelen != sizeof(struct puffs_flush)) {
864 		rv = EINVAL;
865 		goto out;
866 	}
867 
868 	/* XXX: slurry */
869 	if (pf->pf_op == PUFFS_INVAL_NAMECACHE_ALL) {
870 		cache_purgevfs(PMPTOMP(pmp));
871 		rv = 0;
872 		goto out;
873 	}
874 
875 	/*
876 	 * Get vnode, don't lock it.  Namecache is protected by its own lock
877 	 * and we have a reference to protect against premature harvesting.
878 	 *
879 	 * The node we want here might be locked and the op is in
880 	 * userspace waiting for us to complete ==> deadlock.  Another
881 	 * reason we need to eventually bump locking to userspace, as we
882 	 * will need to lock the node if we wish to do flushes.
883 	 */
884 	rv = puffs_cookie2vnode(pmp, pf->pf_cookie, 0, 0, &vp);
885 	if (rv) {
886 		if (rv == PUFFS_NOSUCHCOOKIE)
887 			rv = ENOENT;
888 		goto out;
889 	}
890 
891 	switch (pf->pf_op) {
892 #if 0
893 	/* not quite ready, yet */
894 	case PUFFS_INVAL_NAMECACHE_NODE:
895 	struct componentname *pf_cn;
896 	char *name;
897 		/* get comfortab^Wcomponentname */
898 		pf_cn = kmem_alloc(componentname);
899 		memset(pf_cn, 0, sizeof(struct componentname));
900 		break;
901 
902 #endif
903 	case PUFFS_INVAL_NAMECACHE_DIR:
904 		if (vp->v_type != VDIR) {
905 			rv = EINVAL;
906 			break;
907 		}
908 		cache_purge1(vp, NULL, PURGE_CHILDREN);
909 		break;
910 
911 	case PUFFS_INVAL_PAGECACHE_NODE_RANGE:
912 		flags = PGO_FREE;
913 		/*FALLTHROUGH*/
914 	case PUFFS_FLUSH_PAGECACHE_NODE_RANGE:
915 		if (flags == 0)
916 			flags = PGO_CLEANIT;
917 
918 		if (pf->pf_end > vp->v_size || vp->v_type != VREG) {
919 			rv = EINVAL;
920 			break;
921 		}
922 
923 		offlo = trunc_page(pf->pf_start);
924 		offhi = round_page(pf->pf_end);
925 		if (offhi != 0 && offlo >= offhi) {
926 			rv = EINVAL;
927 			break;
928 		}
929 
930 		simple_lock(&vp->v_uobj.vmobjlock);
931 		rv = VOP_PUTPAGES(vp, offlo, offhi, flags);
932 		break;
933 
934 	default:
935 		rv = EINVAL;
936 	}
937 
938 	vrele(vp);
939 
940  out:
941 	puffs_msg_sendresp(pmp, &pf->pf_req, rv);
942 }
943 
944 int
945 puffs_msgif_dispatch(void *this, struct putter_hdr *pth)
946 {
947 	struct puffs_mount *pmp = this;
948 	struct puffs_req *preq = (struct puffs_req *)pth;
949 
950 	/* XXX: need to send error to userspace */
951 	if (pth->pth_framelen < sizeof(struct puffs_req)) {
952 		puffs_msg_sendresp(pmp, preq, EINVAL); /* E2SMALL */
953 		return 0;
954 	}
955 
956 	switch (PUFFSOP_OPCLASS(preq->preq_opclass)) {
957 	case PUFFSOP_VN:
958 	case PUFFSOP_VFS:
959 		DPRINTF(("dispatch: vn/vfs message 0x%x\n", preq->preq_optype));
960 		puffsop_msg(pmp, preq);
961 		break;
962 	case PUFFSOP_FLUSH:
963 		DPRINTF(("dispatch: flush 0x%x\n", preq->preq_optype));
964 		puffsop_flush(pmp, (struct puffs_flush *)preq);
965 		break;
966 	case PUFFSOP_SUSPEND:
967 		DPRINTF(("dispatch: suspend\n"));
968 		puffsop_suspend(pmp);
969 		break;
970 	default:
971 		DPRINTF(("dispatch: invalid class 0x%x\n", preq->preq_opclass));
972 		puffs_msg_sendresp(pmp, preq, EINVAL);
973 		break;
974 	}
975 
976 	return 0;
977 }
978 
979 int
980 puffs_msgif_close(void *this)
981 {
982 	struct puffs_mount *pmp = this;
983 	struct mount *mp = PMPTOMP(pmp);
984 	int gone, rv;
985 
986 	mutex_enter(&pmp->pmp_lock);
987 	puffs_mp_reference(pmp);
988 
989 	/*
990 	 * Free the waiting callers before proceeding any further.
991 	 * The syncer might be jogging around in this file system
992 	 * currently.  If we allow it to go to the userspace of no
993 	 * return while trying to get the syncer lock, well ...
994 	 * synclk: I feel happy, I feel fine.
995 	 * lockmgr: You're not fooling anyone, you know.
996 	 */
997 	puffs_userdead(pmp);
998 
999 	/*
1000 	 * Make sure someone from puffs_unmount() isn't currently in
1001 	 * userspace.  If we don't take this precautionary step,
1002 	 * they might notice that the mountpoint has disappeared
1003 	 * from under them once they return.  Especially note that we
1004 	 * cannot simply test for an unmounter before calling
1005 	 * dounmount(), since it might be possible that that particular
1006 	 * invocation of unmount was called without MNT_FORCE.  Here we
1007 	 * *must* make sure unmount succeeds.  Also, restart is necessary
1008 	 * since pmp isn't locked.  We might end up with PUTTER_DEAD after
1009 	 * restart and exit from there.
1010 	 */
1011 	if (pmp->pmp_unmounting) {
1012 		cv_wait(&pmp->pmp_unmounting_cv, &pmp->pmp_lock);
1013 		puffs_mp_release(pmp);
1014 		mutex_exit(&pmp->pmp_lock);
1015 		DPRINTF(("puffs_fop_close: unmount was in progress for pmp %p, "
1016 		    "restart\n", pmp));
1017 		return ERESTART;
1018 	}
1019 
1020 	/* Won't access pmp from here anymore */
1021 	puffs_mp_release(pmp);
1022 	mutex_exit(&pmp->pmp_lock);
1023 
1024 	/*
1025 	 * Detach from VFS.  First do necessary XXX-dance (from
1026 	 * sys_unmount() & other callers of dounmount()
1027 	 *
1028 	 * XXX Freeze syncer.  Must do this before locking the
1029 	 * mount point.  See dounmount() for details.
1030 	 *
1031 	 * XXX2: take a reference to the mountpoint before starting to
1032 	 * wait for syncer_mutex.  Otherwise the mointpoint can be
1033 	 * wiped out while we wait.
1034 	 */
1035 	simple_lock(&mp->mnt_slock);
1036 	mp->mnt_wcnt++;
1037 	simple_unlock(&mp->mnt_slock);
1038 
1039 	mutex_enter(&syncer_mutex);
1040 
1041 	simple_lock(&mp->mnt_slock);
1042 	mp->mnt_wcnt--;
1043 	if (mp->mnt_wcnt == 0)
1044 		wakeup(&mp->mnt_wcnt);
1045 	gone = mp->mnt_iflag & IMNT_GONE;
1046 	simple_unlock(&mp->mnt_slock);
1047 	if (gone) {
1048 		mutex_exit(&syncer_mutex);
1049 		return 0;
1050 	}
1051 
1052 	/*
1053 	 * microscopic race condition here (although not with the current
1054 	 * kernel), but can't really fix it without starting a crusade
1055 	 * against vfs_busy(), so let it be, let it be, let it be
1056 	 */
1057 
1058 	/*
1059 	 * The only way vfs_busy() will fail for us is if the filesystem
1060 	 * is already a goner.
1061 	 * XXX: skating on the thin ice of modern calling conventions ...
1062 	 */
1063 	if (vfs_busy(mp, 0, 0)) {
1064 		mutex_exit(&syncer_mutex);
1065 		return 0;
1066 	}
1067 
1068 	/*
1069 	 * Once we have the mount point, unmount() can't interfere..
1070 	 * or at least in theory it shouldn't.  dounmount() reentracy
1071 	 * might require some visiting at some point.
1072 	 */
1073 	rv = dounmount(mp, MNT_FORCE, curlwp);
1074 	KASSERT(rv == 0);
1075 
1076 	return 0;
1077 }
1078 
1079 /*
1080  * We're dead, kaput, RIP, slightly more than merely pining for the
1081  * fjords, belly-up, fallen, lifeless, finished, expired, gone to meet
1082  * our maker, ceased to be, etcetc.  YASD.  It's a dead FS!
1083  *
1084  * Caller must hold puffs mutex.
1085  */
1086 void
1087 puffs_userdead(struct puffs_mount *pmp)
1088 {
1089 	struct puffs_msgpark *park, *park_next;
1090 
1091 	/*
1092 	 * Mark filesystem status as dying so that operations don't
1093 	 * attempt to march to userspace any longer.
1094 	 */
1095 	pmp->pmp_status = PUFFSTAT_DYING;
1096 
1097 	/* signal waiters on REQUEST TO file server queue */
1098 	for (park = TAILQ_FIRST(&pmp->pmp_msg_touser); park; park = park_next) {
1099 		uint8_t opclass;
1100 
1101 		mutex_enter(&park->park_mtx);
1102 		puffs_msgpark_reference(park);
1103 		park_next = TAILQ_NEXT(park, park_entries);
1104 
1105 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
1106 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
1107 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
1108 		pmp->pmp_msg_touser_count--;
1109 
1110 		/*
1111 		 * Even though waiters on QUEUE1 are removed in touser()
1112 		 * in case of WAITERGONE, it is still possible for us to
1113 		 * get raced here due to having to retake locks in said
1114 		 * touser().  In the race case simply "ignore" the item
1115 		 * on the queue and move on to the next one.
1116 		 */
1117 		if (park->park_flags & PARKFLAG_WAITERGONE) {
1118 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
1119 			KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
1120 			puffs_msgpark_release(park);
1121 
1122 		} else {
1123 			opclass = park->park_preq->preq_opclass;
1124 			park->park_preq->preq_rv = ENXIO;
1125 
1126 			if (park->park_flags & PARKFLAG_CALL) {
1127 				park->park_done(pmp, park->park_preq,
1128 				    park->park_donearg);
1129 				puffs_msgpark_release1(park, 2);
1130 			} else if ((park->park_flags & PARKFLAG_WANTREPLY)==0) {
1131 				puffs_msgpark_release1(park, 2);
1132 			} else {
1133 				park->park_preq->preq_rv = ENXIO;
1134 				cv_signal(&park->park_cv);
1135 				puffs_msgpark_release(park);
1136 			}
1137 		}
1138 	}
1139 
1140 	/* signal waiters on RESPONSE FROM file server queue */
1141 	for (park=TAILQ_FIRST(&pmp->pmp_msg_replywait); park; park=park_next) {
1142 		mutex_enter(&park->park_mtx);
1143 		puffs_msgpark_reference(park);
1144 		park_next = TAILQ_NEXT(park, park_entries);
1145 
1146 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
1147 		KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
1148 
1149 		TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
1150 		park->park_flags &= ~PARKFLAG_ONQUEUE2;
1151 
1152 		if (park->park_flags & PARKFLAG_WAITERGONE) {
1153 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
1154 			puffs_msgpark_release(park);
1155 		} else {
1156 			park->park_preq->preq_rv = ENXIO;
1157 			if (park->park_flags & PARKFLAG_CALL) {
1158 				park->park_done(pmp, park->park_preq,
1159 				    park->park_donearg);
1160 				puffs_msgpark_release1(park, 2);
1161 			} else {
1162 				cv_signal(&park->park_cv);
1163 				puffs_msgpark_release(park);
1164 			}
1165 		}
1166 	}
1167 
1168 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
1169 }
1170