1 /* $NetBSD: nfs_kq.c,v 1.26 2017/10/25 08:12:40 maya Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: nfs_kq.c,v 1.26 2017/10/25 08:12:40 maya Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/condvar.h> 38 #include <sys/kernel.h> 39 #include <sys/proc.h> 40 #include <sys/kmem.h> 41 #include <sys/mount.h> 42 #include <sys/mutex.h> 43 #include <sys/vnode.h> 44 #include <sys/unistd.h> 45 #include <sys/file.h> 46 #include <sys/kthread.h> 47 48 #include <uvm/uvm_extern.h> 49 #include <uvm/uvm.h> 50 51 #include <nfs/rpcv2.h> 52 #include <nfs/nfsproto.h> 53 #include <nfs/nfs.h> 54 #include <nfs/nfsnode.h> 55 #include <nfs/nfs_var.h> 56 57 struct kevq { 58 SLIST_ENTRY(kevq) kev_link; 59 struct vnode *vp; 60 u_int usecount; 61 u_int flags; 62 #define KEVQ_BUSY 0x01 /* currently being processed */ 63 struct timespec omtime; /* old modification time */ 64 struct timespec octime; /* old change time */ 65 nlink_t onlink; /* old number of references to file */ 66 kcondvar_t cv; 67 }; 68 SLIST_HEAD(kevqlist, kevq); 69 70 static kmutex_t nfskq_lock; 71 static struct lwp *nfskq_thread; 72 static kcondvar_t nfskq_cv; 73 static struct kevqlist kevlist = SLIST_HEAD_INITIALIZER(kevlist); 74 static bool nfskq_thread_exit; 75 76 void 77 nfs_kqinit(void) 78 { 79 80 mutex_init(&nfskq_lock, MUTEX_DEFAULT, IPL_NONE); 81 cv_init(&nfskq_cv, "nfskqpw"); 82 } 83 84 void 85 nfs_kqfini(void) 86 { 87 88 if (nfskq_thread != NULL) { 89 mutex_enter(&nfskq_lock); 90 nfskq_thread_exit = true; 91 cv_broadcast(&nfskq_cv); 92 do { 93 cv_wait(&nfskq_cv, &nfskq_lock); 94 } while (nfskq_thread != NULL); 95 mutex_exit(&nfskq_lock); 96 } 97 mutex_destroy(&nfskq_lock); 98 cv_destroy(&nfskq_cv); 99 } 100 101 /* 102 * This quite simplistic routine periodically checks for server changes 103 * of any of the watched files every NFS_MINATTRTIMO/2 seconds. 104 * Only changes in size, modification time, change time and nlinks 105 * are being checked, everything else is ignored. 106 * The routine only calls VOP_GETATTR() when it's likely it would get 107 * some new data, i.e. when the vnode expires from attrcache. This 108 * should give same result as periodically running stat(2) from userland, 109 * while keeping CPU/network usage low, and still provide proper kevent 110 * semantics. 111 * The poller thread is created when first vnode is added to watch list, 112 * and exits when the watch list is empty. The overhead of thread creation 113 * isn't really important, neither speed of attach and detach of knote. 114 */ 115 /* ARGSUSED */ 116 static void 117 nfs_kqpoll(void *arg) 118 { 119 struct kevq *ke; 120 struct vattr attr; 121 struct lwp *l = curlwp; 122 u_quad_t osize; 123 124 mutex_enter(&nfskq_lock); 125 while (!nfskq_thread_exit) { 126 SLIST_FOREACH(ke, &kevlist, kev_link) { 127 /* skip if still in attrcache */ 128 if (nfs_getattrcache(ke->vp, &attr) != ENOENT) 129 continue; 130 131 /* 132 * Mark entry busy, release lock and check 133 * for changes. 134 */ 135 ke->flags |= KEVQ_BUSY; 136 mutex_exit(&nfskq_lock); 137 138 /* save v_size, nfs_getattr() updates it */ 139 osize = ke->vp->v_size; 140 141 memset(&attr, 0, sizeof(attr)); 142 vn_lock(ke->vp, LK_SHARED | LK_RETRY); 143 (void) VOP_GETATTR(ke->vp, &attr, l->l_cred); 144 VOP_UNLOCK(ke->vp); 145 146 /* following is a bit fragile, but about best 147 * we can get */ 148 if (attr.va_size != osize) { 149 int extended = (attr.va_size > osize); 150 VN_KNOTE(ke->vp, NOTE_WRITE 151 | (extended ? NOTE_EXTEND : 0)); 152 ke->omtime = attr.va_mtime; 153 } else if (attr.va_mtime.tv_sec != ke->omtime.tv_sec 154 || attr.va_mtime.tv_nsec != ke->omtime.tv_nsec) { 155 VN_KNOTE(ke->vp, NOTE_WRITE); 156 ke->omtime = attr.va_mtime; 157 } 158 159 if (attr.va_ctime.tv_sec != ke->octime.tv_sec 160 || attr.va_ctime.tv_nsec != ke->octime.tv_nsec) { 161 VN_KNOTE(ke->vp, NOTE_ATTRIB); 162 ke->octime = attr.va_ctime; 163 } 164 165 if (attr.va_nlink != ke->onlink) { 166 VN_KNOTE(ke->vp, NOTE_LINK); 167 ke->onlink = attr.va_nlink; 168 } 169 170 mutex_enter(&nfskq_lock); 171 ke->flags &= ~KEVQ_BUSY; 172 cv_signal(&ke->cv); 173 } 174 175 if (SLIST_EMPTY(&kevlist)) { 176 /* Nothing more to watch, exit */ 177 nfskq_thread = NULL; 178 mutex_exit(&nfskq_lock); 179 kthread_exit(0); 180 } 181 182 /* wait a while before checking for changes again */ 183 cv_timedwait(&nfskq_cv, &nfskq_lock, 184 NFS_MINATTRTIMO * hz / 2); 185 } 186 nfskq_thread = NULL; 187 cv_broadcast(&nfskq_cv); 188 mutex_exit(&nfskq_lock); 189 } 190 191 static void 192 filt_nfsdetach(struct knote *kn) 193 { 194 struct vnode *vp = (struct vnode *)kn->kn_hook; 195 struct kevq *ke; 196 197 mutex_enter(vp->v_interlock); 198 SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext); 199 mutex_exit(vp->v_interlock); 200 201 /* Remove the vnode from watch list */ 202 mutex_enter(&nfskq_lock); 203 SLIST_FOREACH(ke, &kevlist, kev_link) { 204 if (ke->vp == vp) { 205 while (ke->flags & KEVQ_BUSY) { 206 cv_wait(&ke->cv, &nfskq_lock); 207 } 208 209 if (ke->usecount > 1) { 210 /* keep, other kevents need this */ 211 ke->usecount--; 212 } else { 213 /* last user, g/c */ 214 cv_destroy(&ke->cv); 215 SLIST_REMOVE(&kevlist, ke, kevq, kev_link); 216 kmem_free(ke, sizeof(*ke)); 217 } 218 break; 219 } 220 } 221 mutex_exit(&nfskq_lock); 222 } 223 224 static int 225 filt_nfsread(struct knote *kn, long hint) 226 { 227 struct vnode *vp = (struct vnode *)kn->kn_hook; 228 int rv; 229 230 /* 231 * filesystem is gone, so set the EOF flag and schedule 232 * the knote for deletion. 233 */ 234 switch (hint) { 235 case NOTE_REVOKE: 236 KASSERT(mutex_owned(vp->v_interlock)); 237 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 238 return (1); 239 case 0: 240 mutex_enter(vp->v_interlock); 241 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset; 242 rv = (kn->kn_data != 0); 243 mutex_exit(vp->v_interlock); 244 return rv; 245 default: 246 KASSERT(mutex_owned(vp->v_interlock)); 247 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset; 248 return (kn->kn_data != 0); 249 } 250 } 251 252 static int 253 filt_nfsvnode(struct knote *kn, long hint) 254 { 255 struct vnode *vp = (struct vnode *)kn->kn_hook; 256 int fflags; 257 258 switch (hint) { 259 case NOTE_REVOKE: 260 KASSERT(mutex_owned(vp->v_interlock)); 261 kn->kn_flags |= EV_EOF; 262 if ((kn->kn_sfflags & hint) != 0) 263 kn->kn_fflags |= hint; 264 return (1); 265 case 0: 266 mutex_enter(vp->v_interlock); 267 fflags = kn->kn_fflags; 268 mutex_exit(vp->v_interlock); 269 break; 270 default: 271 KASSERT(mutex_owned(vp->v_interlock)); 272 if ((kn->kn_sfflags & hint) != 0) 273 kn->kn_fflags |= hint; 274 fflags = kn->kn_fflags; 275 break; 276 } 277 278 return (fflags != 0); 279 } 280 281 282 static const struct filterops nfsread_filtops = { 283 .f_isfd = 1, 284 .f_attach = NULL, 285 .f_detach = filt_nfsdetach, 286 .f_event = filt_nfsread, 287 }; 288 289 static const struct filterops nfsvnode_filtops = { 290 .f_isfd = 1, 291 .f_attach = NULL, 292 .f_detach = filt_nfsdetach, 293 .f_event = filt_nfsvnode, 294 }; 295 296 int 297 nfs_kqfilter(void *v) 298 { 299 struct vop_kqfilter_args /* { 300 struct vnode *a_vp; 301 struct knote *a_kn; 302 } */ *ap = v; 303 struct vnode *vp; 304 struct knote *kn; 305 struct kevq *ke; 306 int error = 0; 307 struct vattr attr; 308 struct lwp *l = curlwp; 309 310 vp = ap->a_vp; 311 kn = ap->a_kn; 312 switch (kn->kn_filter) { 313 case EVFILT_READ: 314 kn->kn_fop = &nfsread_filtops; 315 break; 316 case EVFILT_VNODE: 317 kn->kn_fop = &nfsvnode_filtops; 318 break; 319 default: 320 return (EINVAL); 321 } 322 323 /* 324 * Put the vnode to watched list. 325 */ 326 327 /* 328 * Fetch current attributes. It's only needed when the vnode 329 * is not watched yet, but we need to do this without lock 330 * held. This is likely cheap due to attrcache, so do it now. 331 */ 332 memset(&attr, 0, sizeof(attr)); 333 vn_lock(vp, LK_SHARED | LK_RETRY); 334 (void) VOP_GETATTR(vp, &attr, l->l_cred); 335 VOP_UNLOCK(vp); 336 337 mutex_enter(&nfskq_lock); 338 339 /* ensure the poller is running */ 340 if (!nfskq_thread) { 341 error = kthread_create(PRI_NONE, 0, NULL, nfs_kqpoll, 342 NULL, &nfskq_thread, "nfskqpoll"); 343 if (error) { 344 mutex_exit(&nfskq_lock); 345 return error; 346 } 347 } 348 349 SLIST_FOREACH(ke, &kevlist, kev_link) { 350 if (ke->vp == vp) 351 break; 352 } 353 354 if (ke) { 355 /* already watched, so just bump usecount */ 356 ke->usecount++; 357 } else { 358 /* need a new one */ 359 ke = kmem_alloc(sizeof(*ke), KM_SLEEP); 360 ke->vp = vp; 361 ke->usecount = 1; 362 ke->flags = 0; 363 ke->omtime = attr.va_mtime; 364 ke->octime = attr.va_ctime; 365 ke->onlink = attr.va_nlink; 366 cv_init(&ke->cv, "nfskqdet"); 367 SLIST_INSERT_HEAD(&kevlist, ke, kev_link); 368 } 369 370 mutex_enter(vp->v_interlock); 371 SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext); 372 kn->kn_hook = vp; 373 mutex_exit(vp->v_interlock); 374 375 /* kick the poller */ 376 cv_signal(&nfskq_cv); 377 mutex_exit(&nfskq_lock); 378 379 return (error); 380 } 381