1 /* $OpenBSD: nfs_kq.c,v 1.35 2023/03/08 04:43:09 guenther Exp $ */ 2 /* $NetBSD: nfs_kq.c,v 1.7 2003/10/30 01:43:10 simonb Exp $ */ 3 4 /*- 5 * Copyright (c) 2002 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jaromir Dolecek. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/proc.h> 37 #include <sys/mount.h> 38 #include <sys/malloc.h> 39 #include <sys/vnode.h> 40 #include <sys/unistd.h> 41 #include <sys/file.h> 42 #include <sys/kthread.h> 43 #include <sys/rwlock.h> 44 #include <sys/queue.h> 45 46 #include <nfs/rpcv2.h> 47 #include <nfs/nfsproto.h> 48 #include <nfs/nfs.h> 49 #include <nfs/nfsnode.h> 50 #include <nfs/nfs_var.h> 51 52 void nfs_kqpoll(void *); 53 int nfs_kqwatch(struct vnode *); 54 void nfs_kqunwatch(struct vnode *); 55 56 void filt_nfsdetach(struct knote *); 57 int filt_nfsread(struct knote *, long); 58 int filt_nfswrite(struct knote *, long); 59 int filt_nfsvnode(struct knote *, long); 60 61 struct kevq { 62 SLIST_ENTRY(kevq) kev_link; 63 struct vnode *vp; 64 u_int usecount; 65 u_int flags; 66 #define KEVQ_BUSY 0x01 /* currently being processed */ 67 #define KEVQ_WANT 0x02 /* want to change this entry */ 68 struct timespec omtime; /* old modification time */ 69 struct timespec octime; /* old change time */ 70 nlink_t onlink; /* old number of references to file */ 71 }; 72 SLIST_HEAD(kevqlist, kevq); 73 74 struct rwlock nfskevq_lock = RWLOCK_INITIALIZER("nfskqlk"); 75 struct proc *pnfskq; 76 struct kevqlist kevlist = SLIST_HEAD_INITIALIZER(kevlist); 77 78 /* 79 * This quite simplistic routine periodically checks for server changes 80 * of any of the watched files every NFS_MINATTRTIMO/2 seconds. 81 * Only changes in size, modification time, change time and nlinks 82 * are being checked, everything else is ignored. 83 * The routine only calls VOP_GETATTR() when it's likely it would get 84 * some new data, i.e. when the vnode expires from attrcache. This 85 * should give same result as periodically running stat(2) from userland, 86 * while keeping CPU/network usage low, and still provide proper kevent 87 * semantics. 88 * The poller thread is created when first vnode is added to watch list, 89 * and exits when the watch list is empty. The overhead of thread creation 90 * isn't really important, neither speed of attach and detach of knote. 91 */ 92 void 93 nfs_kqpoll(void *arg) 94 { 95 struct kevq *ke; 96 struct vattr attr; 97 struct proc *p = pnfskq; 98 u_quad_t osize; 99 int error; 100 101 for(;;) { 102 rw_enter_write(&nfskevq_lock); 103 SLIST_FOREACH(ke, &kevlist, kev_link) { 104 struct nfsnode *np = VTONFS(ke->vp); 105 106 #ifdef DEBUG 107 printf("nfs_kqpoll on: "); 108 VOP_PRINT(ke->vp); 109 #endif 110 /* skip if still in attrcache */ 111 if (nfs_getattrcache(ke->vp, &attr) != ENOENT) 112 continue; 113 114 /* 115 * Mark entry busy, release lock and check 116 * for changes. 117 */ 118 ke->flags |= KEVQ_BUSY; 119 rw_exit_write(&nfskevq_lock); 120 121 /* save v_size, nfs_getattr() updates it */ 122 osize = np->n_size; 123 124 error = VOP_GETATTR(ke->vp, &attr, p->p_ucred, p); 125 if (error == ESTALE) { 126 NFS_INVALIDATE_ATTRCACHE(np); 127 VN_KNOTE(ke->vp, NOTE_DELETE); 128 goto next; 129 } 130 131 /* following is a bit fragile, but about best 132 * we can get */ 133 if (attr.va_size != osize) { 134 int flags = NOTE_WRITE; 135 136 if (attr.va_size > osize) 137 flags |= NOTE_EXTEND; 138 else 139 flags |= NOTE_TRUNCATE; 140 141 VN_KNOTE(ke->vp, flags); 142 ke->omtime = attr.va_mtime; 143 } else if (attr.va_mtime.tv_sec != ke->omtime.tv_sec 144 || attr.va_mtime.tv_nsec != ke->omtime.tv_nsec) { 145 VN_KNOTE(ke->vp, NOTE_WRITE); 146 ke->omtime = attr.va_mtime; 147 } 148 149 if (attr.va_ctime.tv_sec != ke->octime.tv_sec 150 || attr.va_ctime.tv_nsec != ke->octime.tv_nsec) { 151 VN_KNOTE(ke->vp, NOTE_ATTRIB); 152 ke->octime = attr.va_ctime; 153 } 154 155 if (attr.va_nlink != ke->onlink) { 156 VN_KNOTE(ke->vp, NOTE_LINK); 157 ke->onlink = attr.va_nlink; 158 } 159 160 next: 161 rw_enter_write(&nfskevq_lock); 162 ke->flags &= ~KEVQ_BUSY; 163 if (ke->flags & KEVQ_WANT) { 164 ke->flags &= ~KEVQ_WANT; 165 wakeup(ke); 166 } 167 } 168 169 if (SLIST_EMPTY(&kevlist)) { 170 /* Nothing more to watch, exit */ 171 pnfskq = NULL; 172 rw_exit_write(&nfskevq_lock); 173 kthread_exit(0); 174 } 175 rw_exit_write(&nfskevq_lock); 176 177 /* wait a while before checking for changes again */ 178 tsleep_nsec(pnfskq, PSOCK, "nfskqpw", 179 SEC_TO_NSEC(NFS_MINATTRTIMO) / 2); 180 } 181 } 182 183 void 184 filt_nfsdetach(struct knote *kn) 185 { 186 struct vnode *vp = (struct vnode *)kn->kn_hook; 187 188 klist_remove_locked(&vp->v_selectinfo.si_note, kn); 189 190 /* Remove the vnode from watch list */ 191 if ((kn->kn_flags & (__EV_POLL | __EV_SELECT)) == 0) 192 nfs_kqunwatch(vp); 193 } 194 195 void 196 nfs_kqunwatch(struct vnode *vp) 197 { 198 struct kevq *ke; 199 200 rw_enter_write(&nfskevq_lock); 201 SLIST_FOREACH(ke, &kevlist, kev_link) { 202 if (ke->vp == vp) { 203 while (ke->flags & KEVQ_BUSY) { 204 ke->flags |= KEVQ_WANT; 205 rw_exit_write(&nfskevq_lock); 206 tsleep_nsec(ke, PSOCK, "nfskqdet", INFSLP); 207 rw_enter_write(&nfskevq_lock); 208 } 209 210 if (ke->usecount > 1) { 211 /* keep, other kevents need this */ 212 ke->usecount--; 213 } else { 214 /* last user, g/c */ 215 SLIST_REMOVE(&kevlist, ke, kevq, kev_link); 216 free(ke, M_KEVENT, sizeof(*ke)); 217 } 218 break; 219 } 220 } 221 rw_exit_write(&nfskevq_lock); 222 } 223 224 int 225 filt_nfsread(struct knote *kn, long hint) 226 { 227 struct vnode *vp = (struct vnode *)kn->kn_hook; 228 struct nfsnode *np = VTONFS(vp); 229 230 /* 231 * filesystem is gone, so set the EOF flag and schedule 232 * the knote for deletion. 233 */ 234 if (hint == NOTE_REVOKE) { 235 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 236 return (1); 237 } 238 239 kn->kn_data = np->n_size - foffset(kn->kn_fp); 240 #ifdef DEBUG 241 printf("nfsread event. %lld\n", kn->kn_data); 242 #endif 243 if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) { 244 kn->kn_fflags |= NOTE_EOF; 245 return (1); 246 } 247 248 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) 249 return (1); 250 251 return (kn->kn_data != 0); 252 } 253 254 int 255 filt_nfswrite(struct knote *kn, long hint) 256 { 257 /* 258 * filesystem is gone, so set the EOF flag and schedule 259 * the knote for deletion. 260 */ 261 if (hint == NOTE_REVOKE) { 262 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 263 return (1); 264 } 265 266 kn->kn_data = 0; 267 return (1); 268 } 269 270 int 271 filt_nfsvnode(struct knote *kn, long hint) 272 { 273 if (kn->kn_sfflags & hint) 274 kn->kn_fflags |= hint; 275 if (hint == NOTE_REVOKE) { 276 kn->kn_flags |= EV_EOF; 277 return (1); 278 } 279 return (kn->kn_fflags != 0); 280 } 281 282 static const struct filterops nfsread_filtops = { 283 .f_flags = FILTEROP_ISFD, 284 .f_attach = NULL, 285 .f_detach = filt_nfsdetach, 286 .f_event = filt_nfsread, 287 }; 288 289 static const struct filterops nfswrite_filtops = { 290 .f_flags = FILTEROP_ISFD, 291 .f_attach = NULL, 292 .f_detach = filt_nfsdetach, 293 .f_event = filt_nfswrite, 294 }; 295 296 static const struct filterops nfsvnode_filtops = { 297 .f_flags = FILTEROP_ISFD, 298 .f_attach = NULL, 299 .f_detach = filt_nfsdetach, 300 .f_event = filt_nfsvnode, 301 }; 302 303 int 304 nfs_kqfilter(void *v) 305 { 306 struct vop_kqfilter_args *ap = v; 307 struct vnode *vp; 308 struct knote *kn; 309 310 vp = ap->a_vp; 311 kn = ap->a_kn; 312 313 #ifdef DEBUG 314 printf("nfs_kqfilter(%d) on: ", kn->kn_filter); 315 VOP_PRINT(vp); 316 #endif 317 318 switch (kn->kn_filter) { 319 case EVFILT_READ: 320 kn->kn_fop = &nfsread_filtops; 321 break; 322 case EVFILT_WRITE: 323 kn->kn_fop = &nfswrite_filtops; 324 break; 325 case EVFILT_VNODE: 326 kn->kn_fop = &nfsvnode_filtops; 327 break; 328 default: 329 return (EINVAL); 330 } 331 332 kn->kn_hook = vp; 333 334 /* 335 * Put the vnode to watched list. 336 */ 337 if ((kn->kn_flags & (__EV_POLL | __EV_SELECT)) == 0) { 338 int error; 339 340 error = nfs_kqwatch(vp); 341 if (error) 342 return (error); 343 } 344 345 klist_insert_locked(&vp->v_selectinfo.si_note, kn); 346 347 return (0); 348 } 349 350 int 351 nfs_kqwatch(struct vnode *vp) 352 { 353 struct proc *p = curproc; /* XXX */ 354 struct vattr attr; 355 struct kevq *ke; 356 int error = 0; 357 358 /* 359 * Fetch current attributes. It's only needed when the vnode 360 * is not watched yet, but we need to do this without lock 361 * held. This is likely cheap due to attrcache, so do it now. 362 */ 363 memset(&attr, 0, sizeof(attr)); 364 (void) VOP_GETATTR(vp, &attr, p->p_ucred, p); 365 366 rw_enter_write(&nfskevq_lock); 367 368 /* ensure the poller is running */ 369 if (!pnfskq) { 370 error = kthread_create(nfs_kqpoll, NULL, &pnfskq, 371 "nfskqpoll"); 372 if (error) 373 goto out; 374 } 375 376 SLIST_FOREACH(ke, &kevlist, kev_link) 377 if (ke->vp == vp) 378 break; 379 380 if (ke) { 381 /* already watched, so just bump usecount */ 382 ke->usecount++; 383 } else { 384 /* need a new one */ 385 ke = malloc(sizeof(*ke), M_KEVENT, M_WAITOK); 386 ke->vp = vp; 387 ke->usecount = 1; 388 ke->flags = 0; 389 ke->omtime = attr.va_mtime; 390 ke->octime = attr.va_ctime; 391 ke->onlink = attr.va_nlink; 392 SLIST_INSERT_HEAD(&kevlist, ke, kev_link); 393 } 394 395 /* kick the poller */ 396 wakeup(pnfskq); 397 398 out: 399 rw_exit_write(&nfskevq_lock); 400 return (error); 401 } 402