1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 * All rights reserved.
28 */
29
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/uio.h>
38 #include <sys/stat.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/kstat.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/vtrace.h>
48 #include <sys/mode.h>
49 #include <sys/acl.h>
50 #include <sys/nbmlock.h>
51 #include <sys/policy.h>
52 #include <sys/sdt.h>
53
54 #include <rpc/types.h>
55 #include <rpc/auth.h>
56 #include <rpc/svc.h>
57
58 #include <nfs/nfs.h>
59 #include <nfs/export.h>
60 #include <nfs/nfs_cmd.h>
61
62 #include <vm/hat.h>
63 #include <vm/as.h>
64 #include <vm/seg.h>
65 #include <vm/seg_map.h>
66 #include <vm/seg_kmem.h>
67
68 #include <sys/strsubr.h>
69
70 /*
71 * These are the interface routines for the server side of the
72 * Network File System. See the NFS version 2 protocol specification
73 * for a description of this interface.
74 */
75
76 static int sattr_to_vattr(struct nfssattr *, struct vattr *);
77 static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
78 cred_t *);
79
80 /*
81 * Some "over the wire" UNIX file types. These are encoded
82 * into the mode. This needs to be fixed in the next rev.
83 */
84 #define IFMT 0170000 /* type of file */
85 #define IFCHR 0020000 /* character special */
86 #define IFBLK 0060000 /* block special */
87 #define IFSOCK 0140000 /* socket */
88
89 u_longlong_t nfs2_srv_caller_id;
90
91 /*
92 * Get file attributes.
93 * Returns the current attributes of the file with the given fhandle.
94 */
95 /* ARGSUSED */
96 void
rfs_getattr(fhandle_t * fhp,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr)97 rfs_getattr(fhandle_t *fhp, struct nfsattrstat *ns, struct exportinfo *exi,
98 struct svc_req *req, cred_t *cr)
99 {
100 int error;
101 vnode_t *vp;
102 struct vattr va;
103
104 vp = nfs_fhtovp(fhp, exi);
105 if (vp == NULL) {
106 ns->ns_status = NFSERR_STALE;
107 return;
108 }
109
110 /*
111 * Do the getattr.
112 */
113 va.va_mask = AT_ALL; /* we want all the attributes */
114
115 error = rfs4_delegated_getattr(vp, &va, 0, cr);
116
117 /* check for overflows */
118 if (!error) {
119 /* Lie about the object type for a referral */
120 if (vn_is_nfs_reparse(vp, cr))
121 va.va_type = VLNK;
122
123 acl_perm(vp, exi, &va, cr);
124 error = vattr_to_nattr(&va, &ns->ns_attr);
125 }
126
127 VN_RELE(vp);
128
129 ns->ns_status = puterrno(error);
130 }
131 void *
rfs_getattr_getfh(fhandle_t * fhp)132 rfs_getattr_getfh(fhandle_t *fhp)
133 {
134 return (fhp);
135 }
136
137 /*
138 * Set file attributes.
139 * Sets the attributes of the file with the given fhandle. Returns
140 * the new attributes.
141 */
142 void
rfs_setattr(struct nfssaargs * args,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr)143 rfs_setattr(struct nfssaargs *args, struct nfsattrstat *ns,
144 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
145 {
146 int error;
147 int flag;
148 int in_crit = 0;
149 vnode_t *vp;
150 struct vattr va;
151 struct vattr bva;
152 struct flock64 bf;
153 caller_context_t ct;
154
155
156 vp = nfs_fhtovp(&args->saa_fh, exi);
157 if (vp == NULL) {
158 ns->ns_status = NFSERR_STALE;
159 return;
160 }
161
162 if (rdonly(exi, req) || vn_is_readonly(vp)) {
163 VN_RELE(vp);
164 ns->ns_status = NFSERR_ROFS;
165 return;
166 }
167
168 error = sattr_to_vattr(&args->saa_sa, &va);
169 if (error) {
170 VN_RELE(vp);
171 ns->ns_status = puterrno(error);
172 return;
173 }
174
175 /*
176 * If the client is requesting a change to the mtime,
177 * but the nanosecond field is set to 1 billion, then
178 * this is a flag to the server that it should set the
179 * atime and mtime fields to the server's current time.
180 * The 1 billion number actually came from the client
181 * as 1 million, but the units in the over the wire
182 * request are microseconds instead of nanoseconds.
183 *
184 * This is an overload of the protocol and should be
185 * documented in the NFS Version 2 protocol specification.
186 */
187 if (va.va_mask & AT_MTIME) {
188 if (va.va_mtime.tv_nsec == 1000000000) {
189 gethrestime(&va.va_mtime);
190 va.va_atime = va.va_mtime;
191 va.va_mask |= AT_ATIME;
192 flag = 0;
193 } else
194 flag = ATTR_UTIME;
195 } else
196 flag = 0;
197
198 /*
199 * If the filesystem is exported with nosuid, then mask off
200 * the setuid and setgid bits.
201 */
202 if ((va.va_mask & AT_MODE) && vp->v_type == VREG &&
203 (exi->exi_export.ex_flags & EX_NOSUID))
204 va.va_mode &= ~(VSUID | VSGID);
205
206 ct.cc_sysid = 0;
207 ct.cc_pid = 0;
208 ct.cc_caller_id = nfs2_srv_caller_id;
209 ct.cc_flags = CC_DONTBLOCK;
210
211 /*
212 * We need to specially handle size changes because it is
213 * possible for the client to create a file with modes
214 * which indicate read-only, but with the file opened for
215 * writing. If the client then tries to set the size of
216 * the file, then the normal access checking done in
217 * VOP_SETATTR would prevent the client from doing so,
218 * although it should be legal for it to do so. To get
219 * around this, we do the access checking for ourselves
220 * and then use VOP_SPACE which doesn't do the access
221 * checking which VOP_SETATTR does. VOP_SPACE can only
222 * operate on VREG files, let VOP_SETATTR handle the other
223 * extremely rare cases.
224 * Also the client should not be allowed to change the
225 * size of the file if there is a conflicting non-blocking
226 * mandatory lock in the region of change.
227 */
228 if (vp->v_type == VREG && va.va_mask & AT_SIZE) {
229 if (nbl_need_check(vp)) {
230 nbl_start_crit(vp, RW_READER);
231 in_crit = 1;
232 }
233
234 bva.va_mask = AT_UID | AT_SIZE;
235
236 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
237
238 if (error) {
239 if (in_crit)
240 nbl_end_crit(vp);
241 VN_RELE(vp);
242 ns->ns_status = puterrno(error);
243 return;
244 }
245
246 if (in_crit) {
247 u_offset_t offset;
248 ssize_t length;
249
250 if (va.va_size < bva.va_size) {
251 offset = va.va_size;
252 length = bva.va_size - va.va_size;
253 } else {
254 offset = bva.va_size;
255 length = va.va_size - bva.va_size;
256 }
257 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
258 NULL)) {
259 error = EACCES;
260 }
261 }
262
263 if (crgetuid(cr) == bva.va_uid && !error &&
264 va.va_size != bva.va_size) {
265 va.va_mask &= ~AT_SIZE;
266 bf.l_type = F_WRLCK;
267 bf.l_whence = 0;
268 bf.l_start = (off64_t)va.va_size;
269 bf.l_len = 0;
270 bf.l_sysid = 0;
271 bf.l_pid = 0;
272
273 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
274 (offset_t)va.va_size, cr, &ct);
275 }
276 if (in_crit)
277 nbl_end_crit(vp);
278 } else
279 error = 0;
280
281 /*
282 * Do the setattr.
283 */
284 if (!error && va.va_mask) {
285 error = VOP_SETATTR(vp, &va, flag, cr, &ct);
286 }
287
288 /*
289 * check if the monitor on either vop_space or vop_setattr detected
290 * a delegation conflict and if so, mark the thread flag as
291 * wouldblock so that the response is dropped and the client will
292 * try again.
293 */
294 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
295 VN_RELE(vp);
296 curthread->t_flag |= T_WOULDBLOCK;
297 return;
298 }
299
300 if (!error) {
301 va.va_mask = AT_ALL; /* get everything */
302
303 error = rfs4_delegated_getattr(vp, &va, 0, cr);
304
305 /* check for overflows */
306 if (!error) {
307 acl_perm(vp, exi, &va, cr);
308 error = vattr_to_nattr(&va, &ns->ns_attr);
309 }
310 }
311
312 ct.cc_flags = 0;
313
314 /*
315 * Force modified metadata out to stable storage.
316 */
317 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
318
319 VN_RELE(vp);
320
321 ns->ns_status = puterrno(error);
322 }
323 void *
rfs_setattr_getfh(struct nfssaargs * args)324 rfs_setattr_getfh(struct nfssaargs *args)
325 {
326 return (&args->saa_fh);
327 }
328
329 /*
330 * Directory lookup.
331 * Returns an fhandle and file attributes for file name in a directory.
332 */
333 /* ARGSUSED */
334 void
rfs_lookup(struct nfsdiropargs * da,struct nfsdiropres * dr,struct exportinfo * exi,struct svc_req * req,cred_t * cr)335 rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
336 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
337 {
338 int error;
339 vnode_t *dvp;
340 vnode_t *vp;
341 struct vattr va;
342 fhandle_t *fhp = da->da_fhandle;
343 struct sec_ol sec = {0, 0};
344 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
345 char *name;
346 struct sockaddr *ca;
347
348 /*
349 * Trusted Extension doesn't support NFSv2. MOUNT
350 * will reject v2 clients. Need to prevent v2 client
351 * access via WebNFS here.
352 */
353 if (is_system_labeled() && req->rq_vers == 2) {
354 dr->dr_status = NFSERR_ACCES;
355 return;
356 }
357
358 /*
359 * Disallow NULL paths
360 */
361 if (da->da_name == NULL || *da->da_name == '\0') {
362 dr->dr_status = NFSERR_ACCES;
363 return;
364 }
365
366 /*
367 * Allow lookups from the root - the default
368 * location of the public filehandle.
369 */
370 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
371 dvp = rootdir;
372 VN_HOLD(dvp);
373 } else {
374 dvp = nfs_fhtovp(fhp, exi);
375 if (dvp == NULL) {
376 dr->dr_status = NFSERR_STALE;
377 return;
378 }
379 }
380
381 /*
382 * Not allow lookup beyond root.
383 * If the filehandle matches a filehandle of the exi,
384 * then the ".." refers beyond the root of an exported filesystem.
385 */
386 if (strcmp(da->da_name, "..") == 0 &&
387 EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
388 VN_RELE(dvp);
389 dr->dr_status = NFSERR_NOENT;
390 return;
391 }
392
393 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
394 name = nfscmd_convname(ca, exi, da->da_name, NFSCMD_CONV_INBOUND,
395 MAXPATHLEN);
396
397 if (name == NULL) {
398 dr->dr_status = NFSERR_ACCES;
399 return;
400 }
401
402 /*
403 * If the public filehandle is used then allow
404 * a multi-component lookup, i.e. evaluate
405 * a pathname and follow symbolic links if
406 * necessary.
407 *
408 * This may result in a vnode in another filesystem
409 * which is OK as long as the filesystem is exported.
410 */
411 if (PUBLIC_FH2(fhp)) {
412 publicfh_flag = TRUE;
413 error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
414 &sec);
415 } else {
416 /*
417 * Do a normal single component lookup.
418 */
419 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
420 NULL, NULL, NULL);
421 }
422
423 if (name != da->da_name)
424 kmem_free(name, MAXPATHLEN);
425
426
427 if (!error) {
428 va.va_mask = AT_ALL; /* we want everything */
429
430 error = rfs4_delegated_getattr(vp, &va, 0, cr);
431
432 /* check for overflows */
433 if (!error) {
434 acl_perm(vp, exi, &va, cr);
435 error = vattr_to_nattr(&va, &dr->dr_attr);
436 if (!error) {
437 if (sec.sec_flags & SEC_QUERY)
438 error = makefh_ol(&dr->dr_fhandle, exi,
439 sec.sec_index);
440 else {
441 error = makefh(&dr->dr_fhandle, vp,
442 exi);
443 if (!error && publicfh_flag &&
444 !chk_clnt_sec(exi, req))
445 auth_weak = TRUE;
446 }
447 }
448 }
449 VN_RELE(vp);
450 }
451
452 VN_RELE(dvp);
453
454 /*
455 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
456 * and have obtained a new exportinfo in exi which needs to be
457 * released. Note the the original exportinfo pointed to by exi
458 * will be released by the caller, comon_dispatch.
459 */
460 if (publicfh_flag && exi != NULL)
461 exi_rele(exi);
462
463 /*
464 * If it's public fh, no 0x81, and client's flavor is
465 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
466 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
467 */
468 if (auth_weak)
469 dr->dr_status = (enum nfsstat)WNFSERR_CLNT_FLAVOR;
470 else
471 dr->dr_status = puterrno(error);
472 }
473 void *
rfs_lookup_getfh(struct nfsdiropargs * da)474 rfs_lookup_getfh(struct nfsdiropargs *da)
475 {
476 return (da->da_fhandle);
477 }
478
479 /*
480 * Read symbolic link.
481 * Returns the string in the symbolic link at the given fhandle.
482 */
483 /* ARGSUSED */
484 void
rfs_readlink(fhandle_t * fhp,struct nfsrdlnres * rl,struct exportinfo * exi,struct svc_req * req,cred_t * cr)485 rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
486 struct svc_req *req, cred_t *cr)
487 {
488 int error;
489 struct iovec iov;
490 struct uio uio;
491 vnode_t *vp;
492 struct vattr va;
493 struct sockaddr *ca;
494 char *name = NULL;
495 int is_referral = 0;
496
497 vp = nfs_fhtovp(fhp, exi);
498 if (vp == NULL) {
499 rl->rl_data = NULL;
500 rl->rl_status = NFSERR_STALE;
501 return;
502 }
503
504 va.va_mask = AT_MODE;
505
506 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
507
508 if (error) {
509 VN_RELE(vp);
510 rl->rl_data = NULL;
511 rl->rl_status = puterrno(error);
512 return;
513 }
514
515 if (MANDLOCK(vp, va.va_mode)) {
516 VN_RELE(vp);
517 rl->rl_data = NULL;
518 rl->rl_status = NFSERR_ACCES;
519 return;
520 }
521
522 /* We lied about the object type for a referral */
523 if (vn_is_nfs_reparse(vp, cr))
524 is_referral = 1;
525
526 /*
527 * XNFS and RFC1094 require us to return ENXIO if argument
528 * is not a link. BUGID 1138002.
529 */
530 if (vp->v_type != VLNK && !is_referral) {
531 VN_RELE(vp);
532 rl->rl_data = NULL;
533 rl->rl_status = NFSERR_NXIO;
534 return;
535 }
536
537 /*
538 * Allocate data for pathname. This will be freed by rfs_rlfree.
539 */
540 rl->rl_data = kmem_alloc(NFS_MAXPATHLEN, KM_SLEEP);
541
542 if (is_referral) {
543 char *s;
544 size_t strsz;
545
546 /* Get an artificial symlink based on a referral */
547 s = build_symlink(vp, cr, &strsz);
548 global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
549 DTRACE_PROBE2(nfs2serv__func__referral__reflink,
550 vnode_t *, vp, char *, s);
551 if (s == NULL)
552 error = EINVAL;
553 else {
554 error = 0;
555 (void) strlcpy(rl->rl_data, s, NFS_MAXPATHLEN);
556 rl->rl_count = (uint32_t)MIN(strsz, NFS_MAXPATHLEN);
557 kmem_free(s, strsz);
558 }
559
560 } else {
561
562 /*
563 * Set up io vector to read sym link data
564 */
565 iov.iov_base = rl->rl_data;
566 iov.iov_len = NFS_MAXPATHLEN;
567 uio.uio_iov = &iov;
568 uio.uio_iovcnt = 1;
569 uio.uio_segflg = UIO_SYSSPACE;
570 uio.uio_extflg = UIO_COPY_CACHED;
571 uio.uio_loffset = (offset_t)0;
572 uio.uio_resid = NFS_MAXPATHLEN;
573
574 /*
575 * Do the readlink.
576 */
577 error = VOP_READLINK(vp, &uio, cr, NULL);
578
579 rl->rl_count = (uint32_t)(NFS_MAXPATHLEN - uio.uio_resid);
580
581 if (!error)
582 rl->rl_data[rl->rl_count] = '\0';
583
584 }
585
586
587 VN_RELE(vp);
588
589 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
590 name = nfscmd_convname(ca, exi, rl->rl_data,
591 NFSCMD_CONV_OUTBOUND, MAXPATHLEN);
592
593 if (name != NULL && name != rl->rl_data) {
594 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
595 rl->rl_data = name;
596 }
597
598 /*
599 * XNFS and RFC1094 require us to return ENXIO if argument
600 * is not a link. UFS returns EINVAL if this is the case,
601 * so we do the mapping here. BUGID 1138002.
602 */
603 if (error == EINVAL)
604 rl->rl_status = NFSERR_NXIO;
605 else
606 rl->rl_status = puterrno(error);
607
608 }
609 void *
rfs_readlink_getfh(fhandle_t * fhp)610 rfs_readlink_getfh(fhandle_t *fhp)
611 {
612 return (fhp);
613 }
614 /*
615 * Free data allocated by rfs_readlink
616 */
617 void
rfs_rlfree(struct nfsrdlnres * rl)618 rfs_rlfree(struct nfsrdlnres *rl)
619 {
620 if (rl->rl_data != NULL)
621 kmem_free(rl->rl_data, NFS_MAXPATHLEN);
622 }
623
624 static int rdma_setup_read_data2(struct nfsreadargs *, struct nfsrdresult *);
625
626 /*
627 * Read data.
628 * Returns some data read from the file at the given fhandle.
629 */
630 /* ARGSUSED */
631 void
rfs_read(struct nfsreadargs * ra,struct nfsrdresult * rr,struct exportinfo * exi,struct svc_req * req,cred_t * cr)632 rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
633 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
634 {
635 vnode_t *vp;
636 int error;
637 struct vattr va;
638 struct iovec iov;
639 struct uio uio;
640 mblk_t *mp;
641 int alloc_err = 0;
642 int in_crit = 0;
643 caller_context_t ct;
644
645 vp = nfs_fhtovp(&ra->ra_fhandle, exi);
646 if (vp == NULL) {
647 rr->rr_data = NULL;
648 rr->rr_status = NFSERR_STALE;
649 return;
650 }
651
652 if (vp->v_type != VREG) {
653 VN_RELE(vp);
654 rr->rr_data = NULL;
655 rr->rr_status = NFSERR_ISDIR;
656 return;
657 }
658
659 ct.cc_sysid = 0;
660 ct.cc_pid = 0;
661 ct.cc_caller_id = nfs2_srv_caller_id;
662 ct.cc_flags = CC_DONTBLOCK;
663
664 /*
665 * Enter the critical region before calling VOP_RWLOCK
666 * to avoid a deadlock with write requests.
667 */
668 if (nbl_need_check(vp)) {
669 nbl_start_crit(vp, RW_READER);
670 if (nbl_conflict(vp, NBL_READ, ra->ra_offset, ra->ra_count,
671 0, NULL)) {
672 nbl_end_crit(vp);
673 VN_RELE(vp);
674 rr->rr_data = NULL;
675 rr->rr_status = NFSERR_ACCES;
676 return;
677 }
678 in_crit = 1;
679 }
680
681 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
682
683 /* check if a monitor detected a delegation conflict */
684 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
685 VN_RELE(vp);
686 /* mark as wouldblock so response is dropped */
687 curthread->t_flag |= T_WOULDBLOCK;
688
689 rr->rr_data = NULL;
690 return;
691 }
692
693 va.va_mask = AT_ALL;
694
695 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
696
697 if (error) {
698 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
699 if (in_crit)
700 nbl_end_crit(vp);
701
702 VN_RELE(vp);
703 rr->rr_data = NULL;
704 rr->rr_status = puterrno(error);
705
706 return;
707 }
708
709 /*
710 * This is a kludge to allow reading of files created
711 * with no read permission. The owner of the file
712 * is always allowed to read it.
713 */
714 if (crgetuid(cr) != va.va_uid) {
715 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
716
717 if (error) {
718 /*
719 * Exec is the same as read over the net because
720 * of demand loading.
721 */
722 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
723 }
724 if (error) {
725 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
726 if (in_crit)
727 nbl_end_crit(vp);
728 VN_RELE(vp);
729 rr->rr_data = NULL;
730 rr->rr_status = puterrno(error);
731
732 return;
733 }
734 }
735
736 if (MANDLOCK(vp, va.va_mode)) {
737 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
738 if (in_crit)
739 nbl_end_crit(vp);
740
741 VN_RELE(vp);
742 rr->rr_data = NULL;
743 rr->rr_status = NFSERR_ACCES;
744
745 return;
746 }
747
748 rr->rr_ok.rrok_wlist_len = 0;
749 rr->rr_ok.rrok_wlist = NULL;
750
751 if ((u_offset_t)ra->ra_offset >= va.va_size) {
752 rr->rr_count = 0;
753 rr->rr_data = NULL;
754 /*
755 * In this case, status is NFS_OK, but there is no data
756 * to encode. So set rr_mp to NULL.
757 */
758 rr->rr_mp = NULL;
759 rr->rr_ok.rrok_wlist = ra->ra_wlist;
760 if (rr->rr_ok.rrok_wlist)
761 clist_zero_len(rr->rr_ok.rrok_wlist);
762 goto done;
763 }
764
765 if (ra->ra_wlist) {
766 mp = NULL;
767 rr->rr_mp = NULL;
768 (void) rdma_get_wchunk(req, &iov, ra->ra_wlist);
769 if (ra->ra_count > iov.iov_len) {
770 rr->rr_data = NULL;
771 rr->rr_status = NFSERR_INVAL;
772 goto done;
773 }
774 } else {
775 /*
776 * mp will contain the data to be sent out in the read reply.
777 * This will be freed after the reply has been sent out (by the
778 * driver).
779 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
780 * that the call to xdrmblk_putmblk() never fails.
781 */
782 mp = allocb_wait(RNDUP(ra->ra_count), BPRI_MED, STR_NOSIG,
783 &alloc_err);
784 ASSERT(mp != NULL);
785 ASSERT(alloc_err == 0);
786
787 rr->rr_mp = mp;
788
789 /*
790 * Set up io vector
791 */
792 iov.iov_base = (caddr_t)mp->b_datap->db_base;
793 iov.iov_len = ra->ra_count;
794 }
795
796 uio.uio_iov = &iov;
797 uio.uio_iovcnt = 1;
798 uio.uio_segflg = UIO_SYSSPACE;
799 uio.uio_extflg = UIO_COPY_CACHED;
800 uio.uio_loffset = (offset_t)ra->ra_offset;
801 uio.uio_resid = ra->ra_count;
802
803 error = VOP_READ(vp, &uio, 0, cr, &ct);
804
805 if (error) {
806 if (mp)
807 freeb(mp);
808
809 /*
810 * check if a monitor detected a delegation conflict and
811 * mark as wouldblock so response is dropped
812 */
813 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
814 curthread->t_flag |= T_WOULDBLOCK;
815 else
816 rr->rr_status = puterrno(error);
817
818 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
819 if (in_crit)
820 nbl_end_crit(vp);
821
822 VN_RELE(vp);
823 rr->rr_data = NULL;
824
825 return;
826 }
827
828 /*
829 * Get attributes again so we can send the latest access
830 * time to the client side for his cache.
831 */
832 va.va_mask = AT_ALL;
833
834 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
835
836 if (error) {
837 if (mp)
838 freeb(mp);
839
840 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
841 if (in_crit)
842 nbl_end_crit(vp);
843
844 VN_RELE(vp);
845 rr->rr_data = NULL;
846 rr->rr_status = puterrno(error);
847
848 return;
849 }
850
851 rr->rr_count = (uint32_t)(ra->ra_count - uio.uio_resid);
852
853 if (mp) {
854 rr->rr_data = (char *)mp->b_datap->db_base;
855 } else {
856 if (ra->ra_wlist) {
857 rr->rr_data = (caddr_t)iov.iov_base;
858 if (!rdma_setup_read_data2(ra, rr)) {
859 rr->rr_data = NULL;
860 rr->rr_status = puterrno(NFSERR_INVAL);
861 }
862 }
863 }
864 done:
865 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
866 if (in_crit)
867 nbl_end_crit(vp);
868
869 acl_perm(vp, exi, &va, cr);
870
871 /* check for overflows */
872 error = vattr_to_nattr(&va, &rr->rr_attr);
873
874 VN_RELE(vp);
875
876 rr->rr_status = puterrno(error);
877 }
878
879 /*
880 * Free data allocated by rfs_read
881 */
882 void
rfs_rdfree(struct nfsrdresult * rr)883 rfs_rdfree(struct nfsrdresult *rr)
884 {
885 mblk_t *mp;
886
887 if (rr->rr_status == NFS_OK) {
888 mp = rr->rr_mp;
889 if (mp != NULL)
890 freeb(mp);
891 }
892 }
893
894 void *
rfs_read_getfh(struct nfsreadargs * ra)895 rfs_read_getfh(struct nfsreadargs *ra)
896 {
897 return (&ra->ra_fhandle);
898 }
899
900 #define MAX_IOVECS 12
901
902 #ifdef DEBUG
903 static int rfs_write_sync_hits = 0;
904 static int rfs_write_sync_misses = 0;
905 #endif
906
907 /*
908 * Write data to file.
909 * Returns attributes of a file after writing some data to it.
910 *
911 * Any changes made here, especially in error handling might have
912 * to also be done in rfs_write (which clusters write requests).
913 */
914 void
rfs_write_sync(struct nfswriteargs * wa,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr)915 rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
916 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
917 {
918 int error;
919 vnode_t *vp;
920 rlim64_t rlimit;
921 struct vattr va;
922 struct uio uio;
923 struct iovec iov[MAX_IOVECS];
924 mblk_t *m;
925 struct iovec *iovp;
926 int iovcnt;
927 cred_t *savecred;
928 int in_crit = 0;
929 caller_context_t ct;
930
931 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
932 if (vp == NULL) {
933 ns->ns_status = NFSERR_STALE;
934 return;
935 }
936
937 if (rdonly(exi, req)) {
938 VN_RELE(vp);
939 ns->ns_status = NFSERR_ROFS;
940 return;
941 }
942
943 if (vp->v_type != VREG) {
944 VN_RELE(vp);
945 ns->ns_status = NFSERR_ISDIR;
946 return;
947 }
948
949 ct.cc_sysid = 0;
950 ct.cc_pid = 0;
951 ct.cc_caller_id = nfs2_srv_caller_id;
952 ct.cc_flags = CC_DONTBLOCK;
953
954 va.va_mask = AT_UID|AT_MODE;
955
956 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
957
958 if (error) {
959 VN_RELE(vp);
960 ns->ns_status = puterrno(error);
961
962 return;
963 }
964
965 if (crgetuid(cr) != va.va_uid) {
966 /*
967 * This is a kludge to allow writes of files created
968 * with read only permission. The owner of the file
969 * is always allowed to write it.
970 */
971 error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct);
972
973 if (error) {
974 VN_RELE(vp);
975 ns->ns_status = puterrno(error);
976 return;
977 }
978 }
979
980 /*
981 * Can't access a mandatory lock file. This might cause
982 * the NFS service thread to block forever waiting for a
983 * lock to be released that will never be released.
984 */
985 if (MANDLOCK(vp, va.va_mode)) {
986 VN_RELE(vp);
987 ns->ns_status = NFSERR_ACCES;
988 return;
989 }
990
991 /*
992 * We have to enter the critical region before calling VOP_RWLOCK
993 * to avoid a deadlock with ufs.
994 */
995 if (nbl_need_check(vp)) {
996 nbl_start_crit(vp, RW_READER);
997 in_crit = 1;
998 if (nbl_conflict(vp, NBL_WRITE, wa->wa_offset,
999 wa->wa_count, 0, NULL)) {
1000 error = EACCES;
1001 goto out;
1002 }
1003 }
1004
1005 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1006
1007 /* check if a monitor detected a delegation conflict */
1008 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1009 VN_RELE(vp);
1010 /* mark as wouldblock so response is dropped */
1011 curthread->t_flag |= T_WOULDBLOCK;
1012 return;
1013 }
1014
1015 if (wa->wa_data || wa->wa_rlist) {
1016 /* Do the RDMA thing if necessary */
1017 if (wa->wa_rlist) {
1018 iov[0].iov_base = (char *)((wa->wa_rlist)->u.c_daddr3);
1019 iov[0].iov_len = wa->wa_count;
1020 } else {
1021 iov[0].iov_base = wa->wa_data;
1022 iov[0].iov_len = wa->wa_count;
1023 }
1024 uio.uio_iov = iov;
1025 uio.uio_iovcnt = 1;
1026 uio.uio_segflg = UIO_SYSSPACE;
1027 uio.uio_extflg = UIO_COPY_DEFAULT;
1028 uio.uio_loffset = (offset_t)wa->wa_offset;
1029 uio.uio_resid = wa->wa_count;
1030 /*
1031 * The limit is checked on the client. We
1032 * should allow any size writes here.
1033 */
1034 uio.uio_llimit = curproc->p_fsz_ctl;
1035 rlimit = uio.uio_llimit - wa->wa_offset;
1036 if (rlimit < (rlim64_t)uio.uio_resid)
1037 uio.uio_resid = (uint_t)rlimit;
1038
1039 /*
1040 * for now we assume no append mode
1041 */
1042 /*
1043 * We're changing creds because VM may fault and we need
1044 * the cred of the current thread to be used if quota
1045 * checking is enabled.
1046 */
1047 savecred = curthread->t_cred;
1048 curthread->t_cred = cr;
1049 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1050 curthread->t_cred = savecred;
1051 } else {
1052 iovcnt = 0;
1053 for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
1054 iovcnt++;
1055 if (iovcnt <= MAX_IOVECS) {
1056 #ifdef DEBUG
1057 rfs_write_sync_hits++;
1058 #endif
1059 iovp = iov;
1060 } else {
1061 #ifdef DEBUG
1062 rfs_write_sync_misses++;
1063 #endif
1064 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1065 }
1066 mblk_to_iov(wa->wa_mblk, iovcnt, iovp);
1067 uio.uio_iov = iovp;
1068 uio.uio_iovcnt = iovcnt;
1069 uio.uio_segflg = UIO_SYSSPACE;
1070 uio.uio_extflg = UIO_COPY_DEFAULT;
1071 uio.uio_loffset = (offset_t)wa->wa_offset;
1072 uio.uio_resid = wa->wa_count;
1073 /*
1074 * The limit is checked on the client. We
1075 * should allow any size writes here.
1076 */
1077 uio.uio_llimit = curproc->p_fsz_ctl;
1078 rlimit = uio.uio_llimit - wa->wa_offset;
1079 if (rlimit < (rlim64_t)uio.uio_resid)
1080 uio.uio_resid = (uint_t)rlimit;
1081
1082 /*
1083 * For now we assume no append mode.
1084 */
1085 /*
1086 * We're changing creds because VM may fault and we need
1087 * the cred of the current thread to be used if quota
1088 * checking is enabled.
1089 */
1090 savecred = curthread->t_cred;
1091 curthread->t_cred = cr;
1092 error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
1093 curthread->t_cred = savecred;
1094
1095 if (iovp != iov)
1096 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1097 }
1098
1099 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1100
1101 if (!error) {
1102 /*
1103 * Get attributes again so we send the latest mod
1104 * time to the client side for his cache.
1105 */
1106 va.va_mask = AT_ALL; /* now we want everything */
1107
1108 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1109
1110 /* check for overflows */
1111 if (!error) {
1112 acl_perm(vp, exi, &va, cr);
1113 error = vattr_to_nattr(&va, &ns->ns_attr);
1114 }
1115 }
1116
1117 out:
1118 if (in_crit)
1119 nbl_end_crit(vp);
1120 VN_RELE(vp);
1121
1122 /* check if a monitor detected a delegation conflict */
1123 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1124 /* mark as wouldblock so response is dropped */
1125 curthread->t_flag |= T_WOULDBLOCK;
1126 else
1127 ns->ns_status = puterrno(error);
1128
1129 }
1130
1131 struct rfs_async_write {
1132 struct nfswriteargs *wa;
1133 struct nfsattrstat *ns;
1134 struct svc_req *req;
1135 cred_t *cr;
1136 kthread_t *thread;
1137 struct rfs_async_write *list;
1138 };
1139
1140 struct rfs_async_write_list {
1141 fhandle_t *fhp;
1142 kcondvar_t cv;
1143 struct rfs_async_write *list;
1144 struct rfs_async_write_list *next;
1145 };
1146
1147 static struct rfs_async_write_list *rfs_async_write_head = NULL;
1148 static kmutex_t rfs_async_write_lock;
1149 static int rfs_write_async = 1; /* enables write clustering if == 1 */
1150
1151 #define MAXCLIOVECS 42
1152 #define RFSWRITE_INITVAL (enum nfsstat) -1
1153
1154 #ifdef DEBUG
1155 static int rfs_write_hits = 0;
1156 static int rfs_write_misses = 0;
1157 #endif
1158
1159 /*
1160 * Write data to file.
1161 * Returns attributes of a file after writing some data to it.
1162 */
1163 void
rfs_write(struct nfswriteargs * wa,struct nfsattrstat * ns,struct exportinfo * exi,struct svc_req * req,cred_t * cr)1164 rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
1165 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1166 {
1167 int error;
1168 vnode_t *vp;
1169 rlim64_t rlimit;
1170 struct vattr va;
1171 struct uio uio;
1172 struct rfs_async_write_list *lp;
1173 struct rfs_async_write_list *nlp;
1174 struct rfs_async_write *rp;
1175 struct rfs_async_write *nrp;
1176 struct rfs_async_write *trp;
1177 struct rfs_async_write *lrp;
1178 int data_written;
1179 int iovcnt;
1180 mblk_t *m;
1181 struct iovec *iovp;
1182 struct iovec *niovp;
1183 struct iovec iov[MAXCLIOVECS];
1184 int count;
1185 int rcount;
1186 uint_t off;
1187 uint_t len;
1188 struct rfs_async_write nrpsp;
1189 struct rfs_async_write_list nlpsp;
1190 ushort_t t_flag;
1191 cred_t *savecred;
1192 int in_crit = 0;
1193 caller_context_t ct;
1194
1195 if (!rfs_write_async) {
1196 rfs_write_sync(wa, ns, exi, req, cr);
1197 return;
1198 }
1199
1200 /*
1201 * Initialize status to RFSWRITE_INITVAL instead of 0, since value of 0
1202 * is considered an OK.
1203 */
1204 ns->ns_status = RFSWRITE_INITVAL;
1205
1206 nrp = &nrpsp;
1207 nrp->wa = wa;
1208 nrp->ns = ns;
1209 nrp->req = req;
1210 nrp->cr = cr;
1211 nrp->thread = curthread;
1212
1213 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
1214
1215 /*
1216 * Look to see if there is already a cluster started
1217 * for this file.
1218 */
1219 mutex_enter(&rfs_async_write_lock);
1220 for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
1221 if (bcmp(&wa->wa_fhandle, lp->fhp,
1222 sizeof (fhandle_t)) == 0)
1223 break;
1224 }
1225
1226 /*
1227 * If lp is non-NULL, then there is already a cluster
1228 * started. We need to place ourselves in the cluster
1229 * list in the right place as determined by starting
1230 * offset. Conflicts with non-blocking mandatory locked
1231 * regions will be checked when the cluster is processed.
1232 */
1233 if (lp != NULL) {
1234 rp = lp->list;
1235 trp = NULL;
1236 while (rp != NULL && rp->wa->wa_offset < wa->wa_offset) {
1237 trp = rp;
1238 rp = rp->list;
1239 }
1240 nrp->list = rp;
1241 if (trp == NULL)
1242 lp->list = nrp;
1243 else
1244 trp->list = nrp;
1245 while (nrp->ns->ns_status == RFSWRITE_INITVAL)
1246 cv_wait(&lp->cv, &rfs_async_write_lock);
1247 mutex_exit(&rfs_async_write_lock);
1248
1249 return;
1250 }
1251
1252 /*
1253 * No cluster started yet, start one and add ourselves
1254 * to the list of clusters.
1255 */
1256 nrp->list = NULL;
1257
1258 nlp = &nlpsp;
1259 nlp->fhp = &wa->wa_fhandle;
1260 cv_init(&nlp->cv, NULL, CV_DEFAULT, NULL);
1261 nlp->list = nrp;
1262 nlp->next = NULL;
1263
1264 if (rfs_async_write_head == NULL) {
1265 rfs_async_write_head = nlp;
1266 } else {
1267 lp = rfs_async_write_head;
1268 while (lp->next != NULL)
1269 lp = lp->next;
1270 lp->next = nlp;
1271 }
1272 mutex_exit(&rfs_async_write_lock);
1273
1274 /*
1275 * Convert the file handle common to all of the requests
1276 * in this cluster to a vnode.
1277 */
1278 vp = nfs_fhtovp(&wa->wa_fhandle, exi);
1279 if (vp == NULL) {
1280 mutex_enter(&rfs_async_write_lock);
1281 if (rfs_async_write_head == nlp)
1282 rfs_async_write_head = nlp->next;
1283 else {
1284 lp = rfs_async_write_head;
1285 while (lp->next != nlp)
1286 lp = lp->next;
1287 lp->next = nlp->next;
1288 }
1289 t_flag = curthread->t_flag & T_WOULDBLOCK;
1290 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1291 rp->ns->ns_status = NFSERR_STALE;
1292 rp->thread->t_flag |= t_flag;
1293 }
1294 cv_broadcast(&nlp->cv);
1295 mutex_exit(&rfs_async_write_lock);
1296
1297 return;
1298 }
1299
1300 /*
1301 * Can only write regular files. Attempts to write any
1302 * other file types fail with EISDIR.
1303 */
1304 if (vp->v_type != VREG) {
1305 VN_RELE(vp);
1306 mutex_enter(&rfs_async_write_lock);
1307 if (rfs_async_write_head == nlp)
1308 rfs_async_write_head = nlp->next;
1309 else {
1310 lp = rfs_async_write_head;
1311 while (lp->next != nlp)
1312 lp = lp->next;
1313 lp->next = nlp->next;
1314 }
1315 t_flag = curthread->t_flag & T_WOULDBLOCK;
1316 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1317 rp->ns->ns_status = NFSERR_ISDIR;
1318 rp->thread->t_flag |= t_flag;
1319 }
1320 cv_broadcast(&nlp->cv);
1321 mutex_exit(&rfs_async_write_lock);
1322
1323 return;
1324 }
1325
1326 /*
1327 * Enter the critical region before calling VOP_RWLOCK, to avoid a
1328 * deadlock with ufs.
1329 */
1330 if (nbl_need_check(vp)) {
1331 nbl_start_crit(vp, RW_READER);
1332 in_crit = 1;
1333 }
1334
1335 ct.cc_sysid = 0;
1336 ct.cc_pid = 0;
1337 ct.cc_caller_id = nfs2_srv_caller_id;
1338 ct.cc_flags = CC_DONTBLOCK;
1339
1340 /*
1341 * Lock the file for writing. This operation provides
1342 * the delay which allows clusters to grow.
1343 */
1344 error = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1345
1346 /* check if a monitor detected a delegation conflict */
1347 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1348 if (in_crit)
1349 nbl_end_crit(vp);
1350 VN_RELE(vp);
1351 /* mark as wouldblock so response is dropped */
1352 curthread->t_flag |= T_WOULDBLOCK;
1353 mutex_enter(&rfs_async_write_lock);
1354 if (rfs_async_write_head == nlp)
1355 rfs_async_write_head = nlp->next;
1356 else {
1357 lp = rfs_async_write_head;
1358 while (lp->next != nlp)
1359 lp = lp->next;
1360 lp->next = nlp->next;
1361 }
1362 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1363 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1364 rp->ns->ns_status = puterrno(error);
1365 rp->thread->t_flag |= T_WOULDBLOCK;
1366 }
1367 }
1368 cv_broadcast(&nlp->cv);
1369 mutex_exit(&rfs_async_write_lock);
1370
1371 return;
1372 }
1373
1374 /*
1375 * Disconnect this cluster from the list of clusters.
1376 * The cluster that is being dealt with must be fixed
1377 * in size after this point, so there is no reason
1378 * to leave it on the list so that new requests can
1379 * find it.
1380 *
1381 * The algorithm is that the first write request will
1382 * create a cluster, convert the file handle to a
1383 * vnode pointer, and then lock the file for writing.
1384 * This request is not likely to be clustered with
1385 * any others. However, the next request will create
1386 * a new cluster and be blocked in VOP_RWLOCK while
1387 * the first request is being processed. This delay
1388 * will allow more requests to be clustered in this
1389 * second cluster.
1390 */
1391 mutex_enter(&rfs_async_write_lock);
1392 if (rfs_async_write_head == nlp)
1393 rfs_async_write_head = nlp->next;
1394 else {
1395 lp = rfs_async_write_head;
1396 while (lp->next != nlp)
1397 lp = lp->next;
1398 lp->next = nlp->next;
1399 }
1400 mutex_exit(&rfs_async_write_lock);
1401
1402 /*
1403 * Step through the list of requests in this cluster.
1404 * We need to check permissions to make sure that all
1405 * of the requests have sufficient permission to write
1406 * the file. A cluster can be composed of requests
1407 * from different clients and different users on each
1408 * client.
1409 *
1410 * As a side effect, we also calculate the size of the
1411 * byte range that this cluster encompasses.
1412 */
1413 rp = nlp->list;
1414 off = rp->wa->wa_offset;
1415 len = (uint_t)0;
1416 do {
1417 if (rdonly(exi, rp->req)) {
1418 rp->ns->ns_status = NFSERR_ROFS;
1419 t_flag = curthread->t_flag & T_WOULDBLOCK;
1420 rp->thread->t_flag |= t_flag;
1421 continue;
1422 }
1423
1424 va.va_mask = AT_UID|AT_MODE;
1425
1426 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1427
1428 if (!error) {
1429 if (crgetuid(rp->cr) != va.va_uid) {
1430 /*
1431 * This is a kludge to allow writes of files
1432 * created with read only permission. The
1433 * owner of the file is always allowed to
1434 * write it.
1435 */
1436 error = VOP_ACCESS(vp, VWRITE, 0, rp->cr, &ct);
1437 }
1438 if (!error && MANDLOCK(vp, va.va_mode))
1439 error = EACCES;
1440 }
1441
1442 /*
1443 * Check for a conflict with a nbmand-locked region.
1444 */
1445 if (in_crit && nbl_conflict(vp, NBL_WRITE, rp->wa->wa_offset,
1446 rp->wa->wa_count, 0, NULL)) {
1447 error = EACCES;
1448 }
1449
1450 if (error) {
1451 rp->ns->ns_status = puterrno(error);
1452 t_flag = curthread->t_flag & T_WOULDBLOCK;
1453 rp->thread->t_flag |= t_flag;
1454 continue;
1455 }
1456 if (len < rp->wa->wa_offset + rp->wa->wa_count - off)
1457 len = rp->wa->wa_offset + rp->wa->wa_count - off;
1458 } while ((rp = rp->list) != NULL);
1459
1460 /*
1461 * Step through the cluster attempting to gather as many
1462 * requests which are contiguous as possible. These
1463 * contiguous requests are handled via one call to VOP_WRITE
1464 * instead of different calls to VOP_WRITE. We also keep
1465 * track of the fact that any data was written.
1466 */
1467 rp = nlp->list;
1468 data_written = 0;
1469 do {
1470 /*
1471 * Skip any requests which are already marked as having an
1472 * error.
1473 */
1474 if (rp->ns->ns_status != RFSWRITE_INITVAL) {
1475 rp = rp->list;
1476 continue;
1477 }
1478
1479 /*
1480 * Count the number of iovec's which are required
1481 * to handle this set of requests. One iovec is
1482 * needed for each data buffer, whether addressed
1483 * by wa_data or by the b_rptr pointers in the
1484 * mblk chains.
1485 */
1486 iovcnt = 0;
1487 lrp = rp;
1488 for (;;) {
1489 if (lrp->wa->wa_data || lrp->wa->wa_rlist)
1490 iovcnt++;
1491 else {
1492 m = lrp->wa->wa_mblk;
1493 while (m != NULL) {
1494 iovcnt++;
1495 m = m->b_cont;
1496 }
1497 }
1498 if (lrp->list == NULL ||
1499 lrp->list->ns->ns_status != RFSWRITE_INITVAL ||
1500 lrp->wa->wa_offset + lrp->wa->wa_count !=
1501 lrp->list->wa->wa_offset) {
1502 lrp = lrp->list;
1503 break;
1504 }
1505 lrp = lrp->list;
1506 }
1507
1508 if (iovcnt <= MAXCLIOVECS) {
1509 #ifdef DEBUG
1510 rfs_write_hits++;
1511 #endif
1512 niovp = iov;
1513 } else {
1514 #ifdef DEBUG
1515 rfs_write_misses++;
1516 #endif
1517 niovp = kmem_alloc(sizeof (*niovp) * iovcnt, KM_SLEEP);
1518 }
1519 /*
1520 * Put together the scatter/gather iovecs.
1521 */
1522 iovp = niovp;
1523 trp = rp;
1524 count = 0;
1525 do {
1526 if (trp->wa->wa_data || trp->wa->wa_rlist) {
1527 if (trp->wa->wa_rlist) {
1528 iovp->iov_base =
1529 (char *)((trp->wa->wa_rlist)->
1530 u.c_daddr3);
1531 iovp->iov_len = trp->wa->wa_count;
1532 } else {
1533 iovp->iov_base = trp->wa->wa_data;
1534 iovp->iov_len = trp->wa->wa_count;
1535 }
1536 iovp++;
1537 } else {
1538 m = trp->wa->wa_mblk;
1539 rcount = trp->wa->wa_count;
1540 while (m != NULL) {
1541 iovp->iov_base = (caddr_t)m->b_rptr;
1542 iovp->iov_len = (m->b_wptr - m->b_rptr);
1543 rcount -= iovp->iov_len;
1544 if (rcount < 0)
1545 iovp->iov_len += rcount;
1546 iovp++;
1547 if (rcount <= 0)
1548 break;
1549 m = m->b_cont;
1550 }
1551 }
1552 count += trp->wa->wa_count;
1553 trp = trp->list;
1554 } while (trp != lrp);
1555
1556 uio.uio_iov = niovp;
1557 uio.uio_iovcnt = iovcnt;
1558 uio.uio_segflg = UIO_SYSSPACE;
1559 uio.uio_extflg = UIO_COPY_DEFAULT;
1560 uio.uio_loffset = (offset_t)rp->wa->wa_offset;
1561 uio.uio_resid = count;
1562 /*
1563 * The limit is checked on the client. We
1564 * should allow any size writes here.
1565 */
1566 uio.uio_llimit = curproc->p_fsz_ctl;
1567 rlimit = uio.uio_llimit - rp->wa->wa_offset;
1568 if (rlimit < (rlim64_t)uio.uio_resid)
1569 uio.uio_resid = (uint_t)rlimit;
1570
1571 /*
1572 * For now we assume no append mode.
1573 */
1574
1575 /*
1576 * We're changing creds because VM may fault
1577 * and we need the cred of the current
1578 * thread to be used if quota * checking is
1579 * enabled.
1580 */
1581 savecred = curthread->t_cred;
1582 curthread->t_cred = cr;
1583 error = VOP_WRITE(vp, &uio, 0, rp->cr, &ct);
1584 curthread->t_cred = savecred;
1585
1586 /* check if a monitor detected a delegation conflict */
1587 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
1588 /* mark as wouldblock so response is dropped */
1589 curthread->t_flag |= T_WOULDBLOCK;
1590
1591 if (niovp != iov)
1592 kmem_free(niovp, sizeof (*niovp) * iovcnt);
1593
1594 if (!error) {
1595 data_written = 1;
1596 /*
1597 * Get attributes again so we send the latest mod
1598 * time to the client side for his cache.
1599 */
1600 va.va_mask = AT_ALL; /* now we want everything */
1601
1602 error = VOP_GETATTR(vp, &va, 0, rp->cr, &ct);
1603
1604 if (!error)
1605 acl_perm(vp, exi, &va, rp->cr);
1606 }
1607
1608 /*
1609 * Fill in the status responses for each request
1610 * which was just handled. Also, copy the latest
1611 * attributes in to the attribute responses if
1612 * appropriate.
1613 */
1614 t_flag = curthread->t_flag & T_WOULDBLOCK;
1615 do {
1616 rp->thread->t_flag |= t_flag;
1617 /* check for overflows */
1618 if (!error) {
1619 error = vattr_to_nattr(&va, &rp->ns->ns_attr);
1620 }
1621 rp->ns->ns_status = puterrno(error);
1622 rp = rp->list;
1623 } while (rp != lrp);
1624 } while (rp != NULL);
1625
1626 /*
1627 * If any data was written at all, then we need to flush
1628 * the data and metadata to stable storage.
1629 */
1630 if (data_written) {
1631 error = VOP_PUTPAGE(vp, (u_offset_t)off, len, 0, cr, &ct);
1632
1633 if (!error) {
1634 error = VOP_FSYNC(vp, FNODSYNC, cr, &ct);
1635 }
1636 }
1637
1638 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1639
1640 if (in_crit)
1641 nbl_end_crit(vp);
1642 VN_RELE(vp);
1643
1644 t_flag = curthread->t_flag & T_WOULDBLOCK;
1645 mutex_enter(&rfs_async_write_lock);
1646 for (rp = nlp->list; rp != NULL; rp = rp->list) {
1647 if (rp->ns->ns_status == RFSWRITE_INITVAL) {
1648 rp->ns->ns_status = puterrno(error);
1649 rp->thread->t_flag |= t_flag;
1650 }
1651 }
1652 cv_broadcast(&nlp->cv);
1653 mutex_exit(&rfs_async_write_lock);
1654
1655 }
1656
1657 void *
rfs_write_getfh(struct nfswriteargs * wa)1658 rfs_write_getfh(struct nfswriteargs *wa)
1659 {
1660 return (&wa->wa_fhandle);
1661 }
1662
1663 /*
1664 * Create a file.
1665 * Creates a file with given attributes and returns those attributes
1666 * and an fhandle for the new file.
1667 */
1668 void
rfs_create(struct nfscreatargs * args,struct nfsdiropres * dr,struct exportinfo * exi,struct svc_req * req,cred_t * cr)1669 rfs_create(struct nfscreatargs *args, struct nfsdiropres *dr,
1670 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1671 {
1672 int error;
1673 int lookuperr;
1674 int in_crit = 0;
1675 struct vattr va;
1676 vnode_t *vp;
1677 vnode_t *realvp;
1678 vnode_t *dvp;
1679 char *name = args->ca_da.da_name;
1680 vnode_t *tvp = NULL;
1681 int mode;
1682 int lookup_ok;
1683 bool_t trunc;
1684 struct sockaddr *ca;
1685
1686 /*
1687 * Disallow NULL paths
1688 */
1689 if (name == NULL || *name == '\0') {
1690 dr->dr_status = NFSERR_ACCES;
1691 return;
1692 }
1693
1694 dvp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
1695 if (dvp == NULL) {
1696 dr->dr_status = NFSERR_STALE;
1697 return;
1698 }
1699
1700 error = sattr_to_vattr(args->ca_sa, &va);
1701 if (error) {
1702 dr->dr_status = puterrno(error);
1703 return;
1704 }
1705
1706 /*
1707 * Must specify the mode.
1708 */
1709 if (!(va.va_mask & AT_MODE)) {
1710 VN_RELE(dvp);
1711 dr->dr_status = NFSERR_INVAL;
1712 return;
1713 }
1714
1715 /*
1716 * This is a completely gross hack to make mknod
1717 * work over the wire until we can wack the protocol
1718 */
1719 if ((va.va_mode & IFMT) == IFCHR) {
1720 if (args->ca_sa->sa_size == (uint_t)NFS_FIFO_DEV)
1721 va.va_type = VFIFO; /* xtra kludge for named pipe */
1722 else {
1723 va.va_type = VCHR;
1724 /*
1725 * uncompress the received dev_t
1726 * if the top half is zero indicating a request
1727 * from an `older style' OS.
1728 */
1729 if ((va.va_size & 0xffff0000) == 0)
1730 va.va_rdev = nfsv2_expdev(va.va_size);
1731 else
1732 va.va_rdev = (dev_t)va.va_size;
1733 }
1734 va.va_mask &= ~AT_SIZE;
1735 } else if ((va.va_mode & IFMT) == IFBLK) {
1736 va.va_type = VBLK;
1737 /*
1738 * uncompress the received dev_t
1739 * if the top half is zero indicating a request
1740 * from an `older style' OS.
1741 */
1742 if ((va.va_size & 0xffff0000) == 0)
1743 va.va_rdev = nfsv2_expdev(va.va_size);
1744 else
1745 va.va_rdev = (dev_t)va.va_size;
1746 va.va_mask &= ~AT_SIZE;
1747 } else if ((va.va_mode & IFMT) == IFSOCK) {
1748 va.va_type = VSOCK;
1749 } else {
1750 va.va_type = VREG;
1751 }
1752 va.va_mode &= ~IFMT;
1753 va.va_mask |= AT_TYPE;
1754
1755 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1756 name = nfscmd_convname(ca, exi, name, NFSCMD_CONV_INBOUND,
1757 MAXPATHLEN);
1758 if (name == NULL) {
1759 dr->dr_status = puterrno(EINVAL);
1760 return;
1761 }
1762
1763 /*
1764 * Why was the choice made to use VWRITE as the mode to the
1765 * call to VOP_CREATE ? This results in a bug. When a client
1766 * opens a file that already exists and is RDONLY, the second
1767 * open fails with an EACESS because of the mode.
1768 * bug ID 1054648.
1769 */
1770 lookup_ok = 0;
1771 mode = VWRITE;
1772 if (!(va.va_mask & AT_SIZE) || va.va_type != VREG) {
1773 error = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1774 NULL, NULL, NULL);
1775 if (!error) {
1776 struct vattr at;
1777
1778 lookup_ok = 1;
1779 at.va_mask = AT_MODE;
1780 error = VOP_GETATTR(tvp, &at, 0, cr, NULL);
1781 if (!error)
1782 mode = (at.va_mode & S_IWUSR) ? VWRITE : VREAD;
1783 VN_RELE(tvp);
1784 tvp = NULL;
1785 }
1786 }
1787
1788 if (!lookup_ok) {
1789 if (rdonly(exi, req)) {
1790 error = EROFS;
1791 } else if (va.va_type != VREG && va.va_type != VFIFO &&
1792 va.va_type != VSOCK && secpolicy_sys_devices(cr) != 0) {
1793 error = EPERM;
1794 } else {
1795 error = 0;
1796 }
1797 }
1798
1799 /*
1800 * If file size is being modified on an already existing file
1801 * make sure that there are no conflicting non-blocking mandatory
1802 * locks in the region being manipulated. Return EACCES if there
1803 * are conflicting locks.
1804 */
1805 if (!error && (va.va_type == VREG) && (va.va_mask & AT_SIZE)) {
1806 lookuperr = VOP_LOOKUP(dvp, name, &tvp, NULL, 0, NULL, cr,
1807 NULL, NULL, NULL);
1808
1809 if (!lookuperr &&
1810 rfs4_check_delegated(FWRITE, tvp, va.va_size == 0)) {
1811 VN_RELE(tvp);
1812 curthread->t_flag |= T_WOULDBLOCK;
1813 goto out;
1814 }
1815
1816 if (!lookuperr && nbl_need_check(tvp)) {
1817 /*
1818 * The file exists. Now check if it has any
1819 * conflicting non-blocking mandatory locks
1820 * in the region being changed.
1821 */
1822 struct vattr bva;
1823 u_offset_t offset;
1824 ssize_t length;
1825
1826 nbl_start_crit(tvp, RW_READER);
1827 in_crit = 1;
1828
1829 bva.va_mask = AT_SIZE;
1830 error = VOP_GETATTR(tvp, &bva, 0, cr, NULL);
1831 if (!error) {
1832 if (va.va_size < bva.va_size) {
1833 offset = va.va_size;
1834 length = bva.va_size - va.va_size;
1835 } else {
1836 offset = bva.va_size;
1837 length = va.va_size - bva.va_size;
1838 }
1839 if (length) {
1840 if (nbl_conflict(tvp, NBL_WRITE,
1841 offset, length, 0, NULL)) {
1842 error = EACCES;
1843 }
1844 }
1845 }
1846 if (error) {
1847 nbl_end_crit(tvp);
1848 VN_RELE(tvp);
1849 in_crit = 0;
1850 }
1851 } else if (tvp != NULL) {
1852 VN_RELE(tvp);
1853 }
1854 }
1855
1856 if (!error) {
1857 /*
1858 * If filesystem is shared with nosuid the remove any
1859 * setuid/setgid bits on create.
1860 */
1861 if (va.va_type == VREG &&
1862 exi->exi_export.ex_flags & EX_NOSUID)
1863 va.va_mode &= ~(VSUID | VSGID);
1864
1865 error = VOP_CREATE(dvp, name, &va, NONEXCL, mode, &vp, cr, 0,
1866 NULL, NULL);
1867
1868 if (!error) {
1869
1870 if ((va.va_mask & AT_SIZE) && (va.va_size == 0))
1871 trunc = TRUE;
1872 else
1873 trunc = FALSE;
1874
1875 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1876 VN_RELE(vp);
1877 curthread->t_flag |= T_WOULDBLOCK;
1878 goto out;
1879 }
1880 va.va_mask = AT_ALL;
1881
1882 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1883
1884 /* check for overflows */
1885 if (!error) {
1886 acl_perm(vp, exi, &va, cr);
1887 error = vattr_to_nattr(&va, &dr->dr_attr);
1888 if (!error) {
1889 error = makefh(&dr->dr_fhandle, vp,
1890 exi);
1891 }
1892 }
1893 /*
1894 * Force modified metadata out to stable storage.
1895 *
1896 * if a underlying vp exists, pass it to VOP_FSYNC
1897 */
1898 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1899 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
1900 else
1901 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1902 VN_RELE(vp);
1903 }
1904
1905 if (in_crit) {
1906 nbl_end_crit(tvp);
1907 VN_RELE(tvp);
1908 }
1909 }
1910
1911 /*
1912 * Force modified data and metadata out to stable storage.
1913 */
1914 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1915
1916 out:
1917
1918 VN_RELE(dvp);
1919
1920 dr->dr_status = puterrno(error);
1921
1922 if (name != args->ca_da.da_name)
1923 kmem_free(name, MAXPATHLEN);
1924 }
1925 void *
rfs_create_getfh(struct nfscreatargs * args)1926 rfs_create_getfh(struct nfscreatargs *args)
1927 {
1928 return (args->ca_da.da_fhandle);
1929 }
1930
1931 /*
1932 * Remove a file.
1933 * Remove named file from parent directory.
1934 */
1935 void
rfs_remove(struct nfsdiropargs * da,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr)1936 rfs_remove(struct nfsdiropargs *da, enum nfsstat *status,
1937 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
1938 {
1939 int error = 0;
1940 vnode_t *vp;
1941 vnode_t *targvp;
1942 int in_crit = 0;
1943
1944 /*
1945 * Disallow NULL paths
1946 */
1947 if (da->da_name == NULL || *da->da_name == '\0') {
1948 *status = NFSERR_ACCES;
1949 return;
1950 }
1951
1952 vp = nfs_fhtovp(da->da_fhandle, exi);
1953 if (vp == NULL) {
1954 *status = NFSERR_STALE;
1955 return;
1956 }
1957
1958 if (rdonly(exi, req)) {
1959 VN_RELE(vp);
1960 *status = NFSERR_ROFS;
1961 return;
1962 }
1963
1964 /*
1965 * Check for a conflict with a non-blocking mandatory share reservation.
1966 */
1967 error = VOP_LOOKUP(vp, da->da_name, &targvp, NULL, 0,
1968 NULL, cr, NULL, NULL, NULL);
1969 if (error != 0) {
1970 VN_RELE(vp);
1971 *status = puterrno(error);
1972 return;
1973 }
1974
1975 /*
1976 * If the file is delegated to an v4 client, then initiate
1977 * recall and drop this request (by setting T_WOULDBLOCK).
1978 * The client will eventually re-transmit the request and
1979 * (hopefully), by then, the v4 client will have returned
1980 * the delegation.
1981 */
1982
1983 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
1984 VN_RELE(vp);
1985 VN_RELE(targvp);
1986 curthread->t_flag |= T_WOULDBLOCK;
1987 return;
1988 }
1989
1990 if (nbl_need_check(targvp)) {
1991 nbl_start_crit(targvp, RW_READER);
1992 in_crit = 1;
1993 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1994 error = EACCES;
1995 goto out;
1996 }
1997 }
1998
1999 error = VOP_REMOVE(vp, da->da_name, cr, NULL, 0);
2000
2001 /*
2002 * Force modified data and metadata out to stable storage.
2003 */
2004 (void) VOP_FSYNC(vp, 0, cr, NULL);
2005
2006 out:
2007 if (in_crit)
2008 nbl_end_crit(targvp);
2009 VN_RELE(targvp);
2010 VN_RELE(vp);
2011
2012 *status = puterrno(error);
2013
2014 }
2015
2016 void *
rfs_remove_getfh(struct nfsdiropargs * da)2017 rfs_remove_getfh(struct nfsdiropargs *da)
2018 {
2019 return (da->da_fhandle);
2020 }
2021
2022 /*
2023 * rename a file
2024 * Give a file (from) a new name (to).
2025 */
2026 void
rfs_rename(struct nfsrnmargs * args,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2027 rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
2028 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2029 {
2030 int error = 0;
2031 vnode_t *fromvp;
2032 vnode_t *tovp;
2033 struct exportinfo *to_exi;
2034 fhandle_t *fh;
2035 vnode_t *srcvp;
2036 vnode_t *targvp;
2037 int in_crit = 0;
2038
2039 fromvp = nfs_fhtovp(args->rna_from.da_fhandle, exi);
2040 if (fromvp == NULL) {
2041 *status = NFSERR_STALE;
2042 return;
2043 }
2044
2045 fh = args->rna_to.da_fhandle;
2046 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2047 if (to_exi == NULL) {
2048 VN_RELE(fromvp);
2049 *status = NFSERR_ACCES;
2050 return;
2051 }
2052 exi_rele(to_exi);
2053
2054 if (to_exi != exi) {
2055 VN_RELE(fromvp);
2056 *status = NFSERR_XDEV;
2057 return;
2058 }
2059
2060 tovp = nfs_fhtovp(args->rna_to.da_fhandle, exi);
2061 if (tovp == NULL) {
2062 VN_RELE(fromvp);
2063 *status = NFSERR_STALE;
2064 return;
2065 }
2066
2067 if (fromvp->v_type != VDIR || tovp->v_type != VDIR) {
2068 VN_RELE(tovp);
2069 VN_RELE(fromvp);
2070 *status = NFSERR_NOTDIR;
2071 return;
2072 }
2073
2074 /*
2075 * Disallow NULL paths
2076 */
2077 if (args->rna_from.da_name == NULL || *args->rna_from.da_name == '\0' ||
2078 args->rna_to.da_name == NULL || *args->rna_to.da_name == '\0') {
2079 VN_RELE(tovp);
2080 VN_RELE(fromvp);
2081 *status = NFSERR_ACCES;
2082 return;
2083 }
2084
2085 if (rdonly(exi, req)) {
2086 VN_RELE(tovp);
2087 VN_RELE(fromvp);
2088 *status = NFSERR_ROFS;
2089 return;
2090 }
2091
2092 /*
2093 * Check for a conflict with a non-blocking mandatory share reservation.
2094 */
2095 error = VOP_LOOKUP(fromvp, args->rna_from.da_name, &srcvp, NULL, 0,
2096 NULL, cr, NULL, NULL, NULL);
2097 if (error != 0) {
2098 VN_RELE(tovp);
2099 VN_RELE(fromvp);
2100 *status = puterrno(error);
2101 return;
2102 }
2103
2104 /* Check for delegations on the source file */
2105
2106 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2107 VN_RELE(tovp);
2108 VN_RELE(fromvp);
2109 VN_RELE(srcvp);
2110 curthread->t_flag |= T_WOULDBLOCK;
2111 return;
2112 }
2113
2114 /* Check for delegation on the file being renamed over, if it exists */
2115
2116 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2117 VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
2118 NULL, NULL, NULL) == 0) {
2119
2120 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2121 VN_RELE(tovp);
2122 VN_RELE(fromvp);
2123 VN_RELE(srcvp);
2124 VN_RELE(targvp);
2125 curthread->t_flag |= T_WOULDBLOCK;
2126 return;
2127 }
2128 VN_RELE(targvp);
2129 }
2130
2131
2132 if (nbl_need_check(srcvp)) {
2133 nbl_start_crit(srcvp, RW_READER);
2134 in_crit = 1;
2135 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2136 error = EACCES;
2137 goto out;
2138 }
2139 }
2140
2141 error = VOP_RENAME(fromvp, args->rna_from.da_name,
2142 tovp, args->rna_to.da_name, cr, NULL, 0);
2143
2144 if (error == 0)
2145 vn_renamepath(tovp, srcvp, args->rna_to.da_name,
2146 strlen(args->rna_to.da_name));
2147
2148 /*
2149 * Force modified data and metadata out to stable storage.
2150 */
2151 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2152 (void) VOP_FSYNC(fromvp, 0, cr, NULL);
2153
2154 out:
2155 if (in_crit)
2156 nbl_end_crit(srcvp);
2157 VN_RELE(srcvp);
2158 VN_RELE(tovp);
2159 VN_RELE(fromvp);
2160
2161 *status = puterrno(error);
2162
2163 }
2164 void *
rfs_rename_getfh(struct nfsrnmargs * args)2165 rfs_rename_getfh(struct nfsrnmargs *args)
2166 {
2167 return (args->rna_from.da_fhandle);
2168 }
2169
2170 /*
2171 * Link to a file.
2172 * Create a file (to) which is a hard link to the given file (from).
2173 */
2174 void
rfs_link(struct nfslinkargs * args,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2175 rfs_link(struct nfslinkargs *args, enum nfsstat *status,
2176 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2177 {
2178 int error;
2179 vnode_t *fromvp;
2180 vnode_t *tovp;
2181 struct exportinfo *to_exi;
2182 fhandle_t *fh;
2183
2184 fromvp = nfs_fhtovp(args->la_from, exi);
2185 if (fromvp == NULL) {
2186 *status = NFSERR_STALE;
2187 return;
2188 }
2189
2190 fh = args->la_to.da_fhandle;
2191 to_exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen);
2192 if (to_exi == NULL) {
2193 VN_RELE(fromvp);
2194 *status = NFSERR_ACCES;
2195 return;
2196 }
2197 exi_rele(to_exi);
2198
2199 if (to_exi != exi) {
2200 VN_RELE(fromvp);
2201 *status = NFSERR_XDEV;
2202 return;
2203 }
2204
2205 tovp = nfs_fhtovp(args->la_to.da_fhandle, exi);
2206 if (tovp == NULL) {
2207 VN_RELE(fromvp);
2208 *status = NFSERR_STALE;
2209 return;
2210 }
2211
2212 if (tovp->v_type != VDIR) {
2213 VN_RELE(tovp);
2214 VN_RELE(fromvp);
2215 *status = NFSERR_NOTDIR;
2216 return;
2217 }
2218 /*
2219 * Disallow NULL paths
2220 */
2221 if (args->la_to.da_name == NULL || *args->la_to.da_name == '\0') {
2222 VN_RELE(tovp);
2223 VN_RELE(fromvp);
2224 *status = NFSERR_ACCES;
2225 return;
2226 }
2227
2228 if (rdonly(exi, req)) {
2229 VN_RELE(tovp);
2230 VN_RELE(fromvp);
2231 *status = NFSERR_ROFS;
2232 return;
2233 }
2234
2235 error = VOP_LINK(tovp, fromvp, args->la_to.da_name, cr, NULL, 0);
2236
2237 /*
2238 * Force modified data and metadata out to stable storage.
2239 */
2240 (void) VOP_FSYNC(tovp, 0, cr, NULL);
2241 (void) VOP_FSYNC(fromvp, FNODSYNC, cr, NULL);
2242
2243 VN_RELE(tovp);
2244 VN_RELE(fromvp);
2245
2246 *status = puterrno(error);
2247
2248 }
2249 void *
rfs_link_getfh(struct nfslinkargs * args)2250 rfs_link_getfh(struct nfslinkargs *args)
2251 {
2252 return (args->la_from);
2253 }
2254
2255 /*
2256 * Symbolicly link to a file.
2257 * Create a file (to) with the given attributes which is a symbolic link
2258 * to the given path name (to).
2259 */
2260 void
rfs_symlink(struct nfsslargs * args,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2261 rfs_symlink(struct nfsslargs *args, enum nfsstat *status,
2262 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2263 {
2264 int error;
2265 struct vattr va;
2266 vnode_t *vp;
2267 vnode_t *svp;
2268 int lerror;
2269 struct sockaddr *ca;
2270 char *name = NULL;
2271
2272 /*
2273 * Disallow NULL paths
2274 */
2275 if (args->sla_from.da_name == NULL || *args->sla_from.da_name == '\0') {
2276 *status = NFSERR_ACCES;
2277 return;
2278 }
2279
2280 vp = nfs_fhtovp(args->sla_from.da_fhandle, exi);
2281 if (vp == NULL) {
2282 *status = NFSERR_STALE;
2283 return;
2284 }
2285
2286 if (rdonly(exi, req)) {
2287 VN_RELE(vp);
2288 *status = NFSERR_ROFS;
2289 return;
2290 }
2291
2292 error = sattr_to_vattr(args->sla_sa, &va);
2293 if (error) {
2294 VN_RELE(vp);
2295 *status = puterrno(error);
2296 return;
2297 }
2298
2299 if (!(va.va_mask & AT_MODE)) {
2300 VN_RELE(vp);
2301 *status = NFSERR_INVAL;
2302 return;
2303 }
2304
2305 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2306 name = nfscmd_convname(ca, exi, args->sla_tnm,
2307 NFSCMD_CONV_INBOUND, MAXPATHLEN);
2308
2309 if (name == NULL) {
2310 *status = NFSERR_ACCES;
2311 return;
2312 }
2313
2314 va.va_type = VLNK;
2315 va.va_mask |= AT_TYPE;
2316
2317 error = VOP_SYMLINK(vp, args->sla_from.da_name, &va, name, cr, NULL, 0);
2318
2319 /*
2320 * Force new data and metadata out to stable storage.
2321 */
2322 lerror = VOP_LOOKUP(vp, args->sla_from.da_name, &svp, NULL, 0,
2323 NULL, cr, NULL, NULL, NULL);
2324
2325 if (!lerror) {
2326 (void) VOP_FSYNC(svp, 0, cr, NULL);
2327 VN_RELE(svp);
2328 }
2329
2330 /*
2331 * Force modified data and metadata out to stable storage.
2332 */
2333 (void) VOP_FSYNC(vp, 0, cr, NULL);
2334
2335 VN_RELE(vp);
2336
2337 *status = puterrno(error);
2338 if (name != args->sla_tnm)
2339 kmem_free(name, MAXPATHLEN);
2340
2341 }
2342 void *
rfs_symlink_getfh(struct nfsslargs * args)2343 rfs_symlink_getfh(struct nfsslargs *args)
2344 {
2345 return (args->sla_from.da_fhandle);
2346 }
2347
2348 /*
2349 * Make a directory.
2350 * Create a directory with the given name, parent directory, and attributes.
2351 * Returns a file handle and attributes for the new directory.
2352 */
2353 void
rfs_mkdir(struct nfscreatargs * args,struct nfsdiropres * dr,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2354 rfs_mkdir(struct nfscreatargs *args, struct nfsdiropres *dr,
2355 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2356 {
2357 int error;
2358 struct vattr va;
2359 vnode_t *dvp = NULL;
2360 vnode_t *vp;
2361 char *name = args->ca_da.da_name;
2362
2363 /*
2364 * Disallow NULL paths
2365 */
2366 if (name == NULL || *name == '\0') {
2367 dr->dr_status = NFSERR_ACCES;
2368 return;
2369 }
2370
2371 vp = nfs_fhtovp(args->ca_da.da_fhandle, exi);
2372 if (vp == NULL) {
2373 dr->dr_status = NFSERR_STALE;
2374 return;
2375 }
2376
2377 if (rdonly(exi, req)) {
2378 VN_RELE(vp);
2379 dr->dr_status = NFSERR_ROFS;
2380 return;
2381 }
2382
2383 error = sattr_to_vattr(args->ca_sa, &va);
2384 if (error) {
2385 VN_RELE(vp);
2386 dr->dr_status = puterrno(error);
2387 return;
2388 }
2389
2390 if (!(va.va_mask & AT_MODE)) {
2391 VN_RELE(vp);
2392 dr->dr_status = NFSERR_INVAL;
2393 return;
2394 }
2395
2396 va.va_type = VDIR;
2397 va.va_mask |= AT_TYPE;
2398
2399 error = VOP_MKDIR(vp, name, &va, &dvp, cr, NULL, 0, NULL);
2400
2401 if (!error) {
2402 /*
2403 * Attribtutes of the newly created directory should
2404 * be returned to the client.
2405 */
2406 va.va_mask = AT_ALL; /* We want everything */
2407 error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
2408
2409 /* check for overflows */
2410 if (!error) {
2411 acl_perm(vp, exi, &va, cr);
2412 error = vattr_to_nattr(&va, &dr->dr_attr);
2413 if (!error) {
2414 error = makefh(&dr->dr_fhandle, dvp, exi);
2415 }
2416 }
2417 /*
2418 * Force new data and metadata out to stable storage.
2419 */
2420 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2421 VN_RELE(dvp);
2422 }
2423
2424 /*
2425 * Force modified data and metadata out to stable storage.
2426 */
2427 (void) VOP_FSYNC(vp, 0, cr, NULL);
2428
2429 VN_RELE(vp);
2430
2431 dr->dr_status = puterrno(error);
2432
2433 }
2434 void *
rfs_mkdir_getfh(struct nfscreatargs * args)2435 rfs_mkdir_getfh(struct nfscreatargs *args)
2436 {
2437 return (args->ca_da.da_fhandle);
2438 }
2439
2440 /*
2441 * Remove a directory.
2442 * Remove the given directory name from the given parent directory.
2443 */
2444 void
rfs_rmdir(struct nfsdiropargs * da,enum nfsstat * status,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2445 rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
2446 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2447 {
2448 int error;
2449 vnode_t *vp;
2450
2451
2452 /*
2453 * Disallow NULL paths
2454 */
2455 if (da->da_name == NULL || *da->da_name == '\0') {
2456 *status = NFSERR_ACCES;
2457 return;
2458 }
2459
2460 vp = nfs_fhtovp(da->da_fhandle, exi);
2461 if (vp == NULL) {
2462 *status = NFSERR_STALE;
2463 return;
2464 }
2465
2466 if (rdonly(exi, req)) {
2467 VN_RELE(vp);
2468 *status = NFSERR_ROFS;
2469 return;
2470 }
2471
2472 /*
2473 * VOP_RMDIR now takes a new third argument (the current
2474 * directory of the process). That's because someone
2475 * wants to return EINVAL if one tries to remove ".".
2476 * Of course, NFS servers have no idea what their
2477 * clients' current directories are. We fake it by
2478 * supplying a vnode known to exist and illegal to
2479 * remove.
2480 */
2481 error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
2482
2483 /*
2484 * Force modified data and metadata out to stable storage.
2485 */
2486 (void) VOP_FSYNC(vp, 0, cr, NULL);
2487
2488 VN_RELE(vp);
2489
2490 /*
2491 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2492 * if the directory is not empty. A System V NFS server
2493 * needs to map NFSERR_EXIST to NFSERR_NOTEMPTY to transmit
2494 * over the wire.
2495 */
2496 if (error == EEXIST)
2497 *status = NFSERR_NOTEMPTY;
2498 else
2499 *status = puterrno(error);
2500
2501 }
2502 void *
rfs_rmdir_getfh(struct nfsdiropargs * da)2503 rfs_rmdir_getfh(struct nfsdiropargs *da)
2504 {
2505 return (da->da_fhandle);
2506 }
2507
2508 /* ARGSUSED */
2509 void
rfs_readdir(struct nfsrddirargs * rda,struct nfsrddirres * rd,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2510 rfs_readdir(struct nfsrddirargs *rda, struct nfsrddirres *rd,
2511 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2512 {
2513 int error;
2514 int iseof;
2515 struct iovec iov;
2516 struct uio uio;
2517 vnode_t *vp;
2518 char *ndata = NULL;
2519 struct sockaddr *ca;
2520 size_t nents;
2521 int ret;
2522
2523 vp = nfs_fhtovp(&rda->rda_fh, exi);
2524 if (vp == NULL) {
2525 rd->rd_entries = NULL;
2526 rd->rd_status = NFSERR_STALE;
2527 return;
2528 }
2529
2530 if (vp->v_type != VDIR) {
2531 VN_RELE(vp);
2532 rd->rd_entries = NULL;
2533 rd->rd_status = NFSERR_NOTDIR;
2534 return;
2535 }
2536
2537 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2538
2539 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
2540
2541 if (error) {
2542 rd->rd_entries = NULL;
2543 goto bad;
2544 }
2545
2546 if (rda->rda_count == 0) {
2547 rd->rd_entries = NULL;
2548 rd->rd_size = 0;
2549 rd->rd_eof = FALSE;
2550 goto bad;
2551 }
2552
2553 rda->rda_count = MIN(rda->rda_count, NFS_MAXDATA);
2554
2555 /*
2556 * Allocate data for entries. This will be freed by rfs_rddirfree.
2557 */
2558 rd->rd_bufsize = (uint_t)rda->rda_count;
2559 rd->rd_entries = kmem_alloc(rd->rd_bufsize, KM_SLEEP);
2560
2561 /*
2562 * Set up io vector to read directory data
2563 */
2564 iov.iov_base = (caddr_t)rd->rd_entries;
2565 iov.iov_len = rda->rda_count;
2566 uio.uio_iov = &iov;
2567 uio.uio_iovcnt = 1;
2568 uio.uio_segflg = UIO_SYSSPACE;
2569 uio.uio_extflg = UIO_COPY_CACHED;
2570 uio.uio_loffset = (offset_t)rda->rda_offset;
2571 uio.uio_resid = rda->rda_count;
2572
2573 /*
2574 * read directory
2575 */
2576 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
2577
2578 /*
2579 * Clean up
2580 */
2581 if (!error) {
2582 /*
2583 * set size and eof
2584 */
2585 if (uio.uio_resid == rda->rda_count) {
2586 rd->rd_size = 0;
2587 rd->rd_eof = TRUE;
2588 } else {
2589 rd->rd_size = (uint32_t)(rda->rda_count -
2590 uio.uio_resid);
2591 rd->rd_eof = iseof ? TRUE : FALSE;
2592 }
2593 }
2594
2595 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2596 nents = nfscmd_countents((char *)rd->rd_entries, rd->rd_size);
2597 ret = nfscmd_convdirplus(ca, exi, (char *)rd->rd_entries, nents,
2598 rda->rda_count, &ndata);
2599
2600 if (ret != 0) {
2601 size_t dropbytes;
2602 /*
2603 * We had to drop one or more entries in order to fit
2604 * during the character conversion. We need to patch
2605 * up the size and eof info.
2606 */
2607 if (rd->rd_eof)
2608 rd->rd_eof = FALSE;
2609 dropbytes = nfscmd_dropped_entrysize(
2610 (struct dirent64 *)rd->rd_entries, nents, ret);
2611 rd->rd_size -= dropbytes;
2612 }
2613 if (ndata == NULL) {
2614 ndata = (char *)rd->rd_entries;
2615 } else if (ndata != (char *)rd->rd_entries) {
2616 kmem_free(rd->rd_entries, rd->rd_bufsize);
2617 rd->rd_entries = (void *)ndata;
2618 rd->rd_bufsize = rda->rda_count;
2619 }
2620
2621 bad:
2622 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2623
2624 #if 0 /* notyet */
2625 /*
2626 * Don't do this. It causes local disk writes when just
2627 * reading the file and the overhead is deemed larger
2628 * than the benefit.
2629 */
2630 /*
2631 * Force modified metadata out to stable storage.
2632 */
2633 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2634 #endif
2635
2636 VN_RELE(vp);
2637
2638 rd->rd_status = puterrno(error);
2639
2640 }
2641 void *
rfs_readdir_getfh(struct nfsrddirargs * rda)2642 rfs_readdir_getfh(struct nfsrddirargs *rda)
2643 {
2644 return (&rda->rda_fh);
2645 }
2646 void
rfs_rddirfree(struct nfsrddirres * rd)2647 rfs_rddirfree(struct nfsrddirres *rd)
2648 {
2649 if (rd->rd_entries != NULL)
2650 kmem_free(rd->rd_entries, rd->rd_bufsize);
2651 }
2652
2653 /* ARGSUSED */
2654 void
rfs_statfs(fhandle_t * fh,struct nfsstatfs * fs,struct exportinfo * exi,struct svc_req * req,cred_t * cr)2655 rfs_statfs(fhandle_t *fh, struct nfsstatfs *fs, struct exportinfo *exi,
2656 struct svc_req *req, cred_t *cr)
2657 {
2658 int error;
2659 struct statvfs64 sb;
2660 vnode_t *vp;
2661
2662 vp = nfs_fhtovp(fh, exi);
2663 if (vp == NULL) {
2664 fs->fs_status = NFSERR_STALE;
2665 return;
2666 }
2667
2668 error = VFS_STATVFS(vp->v_vfsp, &sb);
2669
2670 if (!error) {
2671 fs->fs_tsize = nfstsize();
2672 fs->fs_bsize = sb.f_frsize;
2673 fs->fs_blocks = sb.f_blocks;
2674 fs->fs_bfree = sb.f_bfree;
2675 fs->fs_bavail = sb.f_bavail;
2676 }
2677
2678 VN_RELE(vp);
2679
2680 fs->fs_status = puterrno(error);
2681
2682 }
2683 void *
rfs_statfs_getfh(fhandle_t * fh)2684 rfs_statfs_getfh(fhandle_t *fh)
2685 {
2686 return (fh);
2687 }
2688
2689 static int
sattr_to_vattr(struct nfssattr * sa,struct vattr * vap)2690 sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
2691 {
2692 vap->va_mask = 0;
2693
2694 /*
2695 * There was a sign extension bug in some VFS based systems
2696 * which stored the mode as a short. When it would get
2697 * assigned to a u_long, no sign extension would occur.
2698 * It needed to, but this wasn't noticed because sa_mode
2699 * would then get assigned back to the short, thus ignoring
2700 * the upper 16 bits of sa_mode.
2701 *
2702 * To make this implementation work for both broken
2703 * clients and good clients, we check for both versions
2704 * of the mode.
2705 */
2706 if (sa->sa_mode != (uint32_t)((ushort_t)-1) &&
2707 sa->sa_mode != (uint32_t)-1) {
2708 vap->va_mask |= AT_MODE;
2709 vap->va_mode = sa->sa_mode;
2710 }
2711 if (sa->sa_uid != (uint32_t)-1) {
2712 vap->va_mask |= AT_UID;
2713 vap->va_uid = sa->sa_uid;
2714 }
2715 if (sa->sa_gid != (uint32_t)-1) {
2716 vap->va_mask |= AT_GID;
2717 vap->va_gid = sa->sa_gid;
2718 }
2719 if (sa->sa_size != (uint32_t)-1) {
2720 vap->va_mask |= AT_SIZE;
2721 vap->va_size = sa->sa_size;
2722 }
2723 if (sa->sa_atime.tv_sec != (int32_t)-1 &&
2724 sa->sa_atime.tv_usec != (int32_t)-1) {
2725 #ifndef _LP64
2726 /* return error if time overflow */
2727 if (!NFS2_TIME_OK(sa->sa_atime.tv_sec))
2728 return (EOVERFLOW);
2729 #endif
2730 vap->va_mask |= AT_ATIME;
2731 /*
2732 * nfs protocol defines times as unsigned so don't extend sign,
2733 * unless sysadmin set nfs_allow_preepoch_time.
2734 */
2735 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec, sa->sa_atime.tv_sec);
2736 vap->va_atime.tv_nsec = (uint32_t)(sa->sa_atime.tv_usec * 1000);
2737 }
2738 if (sa->sa_mtime.tv_sec != (int32_t)-1 &&
2739 sa->sa_mtime.tv_usec != (int32_t)-1) {
2740 #ifndef _LP64
2741 /* return error if time overflow */
2742 if (!NFS2_TIME_OK(sa->sa_mtime.tv_sec))
2743 return (EOVERFLOW);
2744 #endif
2745 vap->va_mask |= AT_MTIME;
2746 /*
2747 * nfs protocol defines times as unsigned so don't extend sign,
2748 * unless sysadmin set nfs_allow_preepoch_time.
2749 */
2750 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec, sa->sa_mtime.tv_sec);
2751 vap->va_mtime.tv_nsec = (uint32_t)(sa->sa_mtime.tv_usec * 1000);
2752 }
2753 return (0);
2754 }
2755
2756 static enum nfsftype vt_to_nf[] = {
2757 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
2758 };
2759
2760 /*
2761 * check the following fields for overflow: nodeid, size, and time.
2762 * There could be a problem when converting 64-bit LP64 fields
2763 * into 32-bit ones. Return an error if there is an overflow.
2764 */
2765 int
vattr_to_nattr(struct vattr * vap,struct nfsfattr * na)2766 vattr_to_nattr(struct vattr *vap, struct nfsfattr *na)
2767 {
2768 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
2769 na->na_type = vt_to_nf[vap->va_type];
2770
2771 if (vap->va_mode == (unsigned short) -1)
2772 na->na_mode = (uint32_t)-1;
2773 else
2774 na->na_mode = VTTOIF(vap->va_type) | vap->va_mode;
2775
2776 if (vap->va_uid == (unsigned short)(-1))
2777 na->na_uid = (uint32_t)(-1);
2778 else if (vap->va_uid == UID_NOBODY)
2779 na->na_uid = (uint32_t)NFS_UID_NOBODY;
2780 else
2781 na->na_uid = vap->va_uid;
2782
2783 if (vap->va_gid == (unsigned short)(-1))
2784 na->na_gid = (uint32_t)-1;
2785 else if (vap->va_gid == GID_NOBODY)
2786 na->na_gid = (uint32_t)NFS_GID_NOBODY;
2787 else
2788 na->na_gid = vap->va_gid;
2789
2790 /*
2791 * Do we need to check fsid for overflow? It is 64-bit in the
2792 * vattr, but are bigger than 32 bit values supported?
2793 */
2794 na->na_fsid = vap->va_fsid;
2795
2796 na->na_nodeid = vap->va_nodeid;
2797
2798 /*
2799 * Check to make sure that the nodeid is representable over the
2800 * wire without losing bits.
2801 */
2802 if (vap->va_nodeid != (u_longlong_t)na->na_nodeid)
2803 return (EFBIG);
2804 na->na_nlink = vap->va_nlink;
2805
2806 /*
2807 * Check for big files here, instead of at the caller. See
2808 * comments in cstat for large special file explanation.
2809 */
2810 if (vap->va_size > (u_longlong_t)MAXOFF32_T) {
2811 if ((vap->va_type == VREG) || (vap->va_type == VDIR))
2812 return (EFBIG);
2813 if ((vap->va_type == VBLK) || (vap->va_type == VCHR)) {
2814 /* UNKNOWN_SIZE | OVERFLOW */
2815 na->na_size = MAXOFF32_T;
2816 } else
2817 na->na_size = vap->va_size;
2818 } else
2819 na->na_size = vap->va_size;
2820
2821 /*
2822 * If the vnode times overflow the 32-bit times that NFS2
2823 * uses on the wire then return an error.
2824 */
2825 if (!NFS_VAP_TIME_OK(vap)) {
2826 return (EOVERFLOW);
2827 }
2828 na->na_atime.tv_sec = vap->va_atime.tv_sec;
2829 na->na_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
2830
2831 na->na_mtime.tv_sec = vap->va_mtime.tv_sec;
2832 na->na_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
2833
2834 na->na_ctime.tv_sec = vap->va_ctime.tv_sec;
2835 na->na_ctime.tv_usec = vap->va_ctime.tv_nsec / 1000;
2836
2837 /*
2838 * If the dev_t will fit into 16 bits then compress
2839 * it, otherwise leave it alone. See comments in
2840 * nfs_client.c.
2841 */
2842 if (getminor(vap->va_rdev) <= SO4_MAXMIN &&
2843 getmajor(vap->va_rdev) <= SO4_MAXMAJ)
2844 na->na_rdev = nfsv2_cmpdev(vap->va_rdev);
2845 else
2846 (void) cmpldev(&na->na_rdev, vap->va_rdev);
2847
2848 na->na_blocks = vap->va_nblocks;
2849 na->na_blocksize = vap->va_blksize;
2850
2851 /*
2852 * This bit of ugliness is a *TEMPORARY* hack to preserve the
2853 * over-the-wire protocols for named-pipe vnodes. It remaps the
2854 * VFIFO type to the special over-the-wire type. (see note in nfs.h)
2855 *
2856 * BUYER BEWARE:
2857 * If you are porting the NFS to a non-Sun server, you probably
2858 * don't want to include the following block of code. The
2859 * over-the-wire special file types will be changing with the
2860 * NFS Protocol Revision.
2861 */
2862 if (vap->va_type == VFIFO)
2863 NA_SETFIFO(na);
2864 return (0);
2865 }
2866
2867 /*
2868 * acl v2 support: returns approximate permission.
2869 * default: returns minimal permission (more restrictive)
2870 * aclok: returns maximal permission (less restrictive)
2871 * This routine changes the permissions that are alaredy in *va.
2872 * If a file has minimal ACL, i.e. aclcnt == MIN_ACL_ENTRIES,
2873 * CLASS_OBJ is always the same as GROUP_OBJ entry.
2874 */
2875 static void
acl_perm(struct vnode * vp,struct exportinfo * exi,struct vattr * va,cred_t * cr)2876 acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
2877 {
2878 vsecattr_t vsa;
2879 int aclcnt;
2880 aclent_t *aclentp;
2881 mode_t mask_perm;
2882 mode_t grp_perm;
2883 mode_t other_perm;
2884 mode_t other_orig;
2885 int error;
2886
2887 /* dont care default acl */
2888 vsa.vsa_mask = (VSA_ACL | VSA_ACLCNT);
2889 error = VOP_GETSECATTR(vp, &vsa, 0, cr, NULL);
2890
2891 if (!error) {
2892 aclcnt = vsa.vsa_aclcnt;
2893 if (aclcnt > MIN_ACL_ENTRIES) {
2894 /* non-trivial ACL */
2895 aclentp = vsa.vsa_aclentp;
2896 if (exi->exi_export.ex_flags & EX_ACLOK) {
2897 /* maximal permissions */
2898 grp_perm = 0;
2899 other_perm = 0;
2900 for (; aclcnt > 0; aclcnt--, aclentp++) {
2901 switch (aclentp->a_type) {
2902 case USER_OBJ:
2903 break;
2904 case USER:
2905 grp_perm |=
2906 aclentp->a_perm << 3;
2907 other_perm |= aclentp->a_perm;
2908 break;
2909 case GROUP_OBJ:
2910 grp_perm |=
2911 aclentp->a_perm << 3;
2912 break;
2913 case GROUP:
2914 other_perm |= aclentp->a_perm;
2915 break;
2916 case OTHER_OBJ:
2917 other_orig = aclentp->a_perm;
2918 break;
2919 case CLASS_OBJ:
2920 mask_perm = aclentp->a_perm;
2921 break;
2922 default:
2923 break;
2924 }
2925 }
2926 grp_perm &= mask_perm << 3;
2927 other_perm &= mask_perm;
2928 other_perm |= other_orig;
2929
2930 } else {
2931 /* minimal permissions */
2932 grp_perm = 070;
2933 other_perm = 07;
2934 for (; aclcnt > 0; aclcnt--, aclentp++) {
2935 switch (aclentp->a_type) {
2936 case USER_OBJ:
2937 break;
2938 case USER:
2939 case CLASS_OBJ:
2940 grp_perm &=
2941 aclentp->a_perm << 3;
2942 other_perm &=
2943 aclentp->a_perm;
2944 break;
2945 case GROUP_OBJ:
2946 grp_perm &=
2947 aclentp->a_perm << 3;
2948 break;
2949 case GROUP:
2950 other_perm &=
2951 aclentp->a_perm;
2952 break;
2953 case OTHER_OBJ:
2954 other_perm &=
2955 aclentp->a_perm;
2956 break;
2957 default:
2958 break;
2959 }
2960 }
2961 }
2962 /* copy to va */
2963 va->va_mode &= ~077;
2964 va->va_mode |= grp_perm | other_perm;
2965 }
2966 if (vsa.vsa_aclcnt)
2967 kmem_free(vsa.vsa_aclentp,
2968 vsa.vsa_aclcnt * sizeof (aclent_t));
2969 }
2970 }
2971
2972 void
rfs_srvrinit(void)2973 rfs_srvrinit(void)
2974 {
2975 mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
2976 nfs2_srv_caller_id = fs_new_caller_id();
2977 }
2978
2979 void
rfs_srvrfini(void)2980 rfs_srvrfini(void)
2981 {
2982 mutex_destroy(&rfs_async_write_lock);
2983 }
2984
2985 static int
rdma_setup_read_data2(struct nfsreadargs * ra,struct nfsrdresult * rr)2986 rdma_setup_read_data2(struct nfsreadargs *ra, struct nfsrdresult *rr)
2987 {
2988 struct clist *wcl;
2989 int wlist_len;
2990 uint32_t count = rr->rr_count;
2991
2992 wcl = ra->ra_wlist;
2993
2994 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
2995 return (FALSE);
2996 }
2997
2998 wcl = ra->ra_wlist;
2999 rr->rr_ok.rrok_wlist_len = wlist_len;
3000 rr->rr_ok.rrok_wlist = wcl;
3001
3002 return (TRUE);
3003 }
3004